Skip to content

Commit

Permalink
Merge pull request #37 from nazonoSAUNA/lensblur_cl
Browse files Browse the repository at this point in the history
要求バージョン再修正
  • Loading branch information
ePi5131 authored Jun 15, 2022
2 parents e52b1ed + de57b8f commit d662b82
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 21 deletions.
23 changes: 12 additions & 11 deletions patch/clprogram.cl
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,7 @@ kernel void LensBlur_Media(global char* dst, global char* src, int obj_w, int ob
int sum_cb = 0;
int sum_cr = 0;
int sum_a = 0;

int cor_sum = 0;

int offset = (x + left + (y + top) * obj_line) * 8;
Expand All @@ -713,10 +714,10 @@ kernel void LensBlur_Media(global char* dst, global char* src, int obj_w, int ob
cor_a = 4096;
}
cor_sum += cor_a;
sum_y += *(float*)&src[offset2] * (float)cor_a;
sum_y += *(global float*)&src[offset2] * (float)cor_a;
sum_cb += src[offset2 + 4] * cor_a;
sum_cr += src[offset2 + 5] * cor_a;
sum_a += *(short*)&src[offset2 + 6] * cor_a >> 12;
sum_a += *(global short*)&src[offset2 + 6] * cor_a >> 12;
}
sqr += 1 + xx * 2;
offset2 += 8;
Expand All @@ -726,16 +727,16 @@ kernel void LensBlur_Media(global char* dst, global char* src, int obj_w, int ob

dst += (x + y * obj_line) * 8;
if (0 < sum_a) {
*(float*)dst = sum_y / (float)sum_a;
*(global float*)dst = sum_y / (float)sum_a;
dst[4] = (char)(((sum_a >> 1) + sum_cb) / sum_a);
dst[5] = (char)(((sum_a >> 1) + sum_cr) / sum_a);
*(short*)&dst[6] = (short)round((float)sum_a * (4096.0f / (float)cor_sum));
*(global short*)&dst[6] = (short)round((float)sum_a * (4096.0f / (float)cor_sum));
} else {
*(int*)dst = 0;
*(int*)&dst[4] = 0;
*(global int*)dst = 0;
*(global int*)&dst[4] = 0;
}
}

)" R"(
kernel void LensBlur_Filter(global char* dst, global char* src, int scene_w, int scene_h, int scene_line,
int range, int rangep05_sqr, int range_t3m1, int rangem1_sqr) {

Expand Down Expand Up @@ -768,8 +769,8 @@ kernel void LensBlur_Filter(global char* dst, global char* src, int scene_w, int
} else {
cor_a = 4096;
}
tofloat[0] = *(short*)&src[offset2];
tofloat[1] = *(short*)&src[offset2 + 2];
tofloat[0] = *(global short*)&src[offset2];
tofloat[1] = *(global short*)&src[offset2 + 2];
sum_y += *(float*)tofloat * (float)cor_a;
sum_cb += src[offset2 + 4] * cor_a;
sum_cr += src[offset2 + 5] * cor_a;
Expand All @@ -783,8 +784,8 @@ kernel void LensBlur_Filter(global char* dst, global char* src, int scene_w, int

dst += (x + y * scene_line) * 6;
*(float*)tofloat = sum_y / (float)sum_a;
*(short*)&dst[0] = tofloat[0];
*(short*)&dst[2] = tofloat[1];
*(global short*)&dst[0] = tofloat[0];
*(global short*)&dst[2] = tofloat[1];
dst[4] = (char)(((sum_a >> 1) + sum_cb) / sum_a);
dst[5] = (char)(((sum_a >> 1) + sum_cr) / sum_a);
}
20 changes: 10 additions & 10 deletions patch/patch_fast_cl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -751,10 +751,10 @@ kernel void LensBlur_Media(global char* dst, global char* src, int obj_w, int ob
cor_a = 4096;
}
cor_sum += cor_a;
sum_y += *(float*)&src[offset2] * (float)cor_a;
sum_y += *(global float*)&src[offset2] * (float)cor_a;
sum_cb += src[offset2 + 4] * cor_a;
sum_cr += src[offset2 + 5] * cor_a;
sum_a += *(short*)&src[offset2 + 6] * cor_a >> 12;
sum_a += *(global short*)&src[offset2 + 6] * cor_a >> 12;
}
sqr += 1 + xx * 2;
offset2 += 8;
Expand All @@ -764,13 +764,13 @@ kernel void LensBlur_Media(global char* dst, global char* src, int obj_w, int ob
dst += (x + y * obj_line) * 8;
if (0 < sum_a) {
*(float*)dst = sum_y / (float)sum_a;
*(global float*)dst = sum_y / (float)sum_a;
dst[4] = (char)(((sum_a >> 1) + sum_cb) / sum_a);
dst[5] = (char)(((sum_a >> 1) + sum_cr) / sum_a);
*(short*)&dst[6] = (short)round((float)sum_a * (4096.0f / (float)cor_sum));
*(global short*)&dst[6] = (short)round((float)sum_a * (4096.0f / (float)cor_sum));
} else {
*(int*)dst = 0;
*(int*)&dst[4] = 0;
*(global int*)dst = 0;
*(global int*)&dst[4] = 0;
}
}
)" R"(
Expand Down Expand Up @@ -806,8 +806,8 @@ kernel void LensBlur_Filter(global char* dst, global char* src, int scene_w, int
} else {
cor_a = 4096;
}
tofloat[0] = *(short*)&src[offset2];
tofloat[1] = *(short*)&src[offset2 + 2];
tofloat[0] = *(global short*)&src[offset2];
tofloat[1] = *(global short*)&src[offset2 + 2];
sum_y += *(float*)tofloat * (float)cor_a;
sum_cb += src[offset2 + 4] * cor_a;
sum_cr += src[offset2 + 5] * cor_a;
Expand All @@ -821,8 +821,8 @@ kernel void LensBlur_Filter(global char* dst, global char* src, int scene_w, int
dst += (x + y * scene_line) * 6;
*(float*)tofloat = sum_y / (float)sum_a;
*(short*)&dst[0] = tofloat[0];
*(short*)&dst[2] = tofloat[1];
*(global short*)&dst[0] = tofloat[0];
*(global short*)&dst[2] = tofloat[1];
dst[4] = (char)(((sum_a >> 1) + sum_cb) / sum_a);
dst[5] = (char)(((sum_a >> 1) + sum_cr) / sum_a);
}
Expand Down

0 comments on commit d662b82

Please sign in to comment.