Merge pull request !8101 from pengyongrong/stacktags/v1.1.0
| @@ -39,7 +39,7 @@ __kernel void Sigmoid(__read_only image2d_t input, __write_only image2d_t output | |||||
| const int last_c4) { | const int last_c4) { | ||||
| int X = get_global_id(0); | int X = get_global_id(0); | ||||
| int Y = get_global_id(1); | int Y = get_global_id(1); | ||||
| if (X >= img_shape.x || Y >= img_shape.y) return; | |||||
| if (X >= img_shape.x || Y >= img_shape.y || c4 == 0) return; | |||||
| int C4 = X % c4; | int C4 = X % c4; | ||||
| FLT4 in_c4 = READ_IMAGE(input, smp_zero, (int2)(X, Y)); | FLT4 in_c4 = READ_IMAGE(input, smp_zero, (int2)(X, Y)); | ||||
| if (C4 < c4 - 1) { | if (C4 < c4 - 1) { | ||||
| @@ -30,9 +30,7 @@ __kernel void Convolution(__read_only image2d_t input, __write_only image2d_t ou | |||||
| const int strideW = kernel_stride.w; | const int strideW = kernel_stride.w; | ||||
| const int padTop = pad.x; | const int padTop = pad.x; | ||||
| const int padBottom = pad.y; | |||||
| const int padLeft = pad.z; | const int padLeft = pad.z; | ||||
| const int padRight = pad.w; | |||||
| const int dilationH = dilation.x; | const int dilationH = dilation.x; | ||||
| const int dilationW = dilation.y; | const int dilationW = dilation.y; | ||||
| @@ -74,7 +72,7 @@ __kernel void Convolution(__read_only image2d_t input, __write_only image2d_t ou | |||||
| } | } | ||||
| } | } | ||||
| if (bias) { | |||||
| if (bias != 0) { | |||||
| out_c4 = out_c4 + bias[co_slice]; | out_c4 = out_c4 + bias[co_slice]; | ||||
| } | } | ||||
| @@ -116,7 +114,7 @@ __kernel void Winograd4x4To36(__read_only image2d_t input, __write_only image2d_ | |||||
| return; | return; | ||||
| } | } | ||||
| int IH = input_shape.y, IW = input_shape.z; | |||||
| int IW = input_shape.z; | |||||
| int TILE_X = UP_DIV(IW, 4); | int TILE_X = UP_DIV(IW, 4); | ||||
| int tile_x = tile_xy % TILE_X; | int tile_x = tile_xy % TILE_X; | ||||
| int tile_y = tile_xy / TILE_X; | int tile_y = tile_xy / TILE_X; | ||||
| @@ -229,7 +227,6 @@ __kernel void Winograd36To4x4(__read_only image2d_t input, __write_only image2d_ | |||||
| int TILE_XY = input_shape.z; | int TILE_XY = input_shape.z; | ||||
| int SLICES = input_shape.w; | int SLICES = input_shape.w; | ||||
| int OH = output_shape.y; | |||||
| int OW = output_shape.z; | int OW = output_shape.z; | ||||
| if (tile_xy >= TILE_XY || row >= 4 || slice >= SLICES) { | if (tile_xy >= TILE_XY || row >= 4 || slice >= SLICES) { | ||||
| @@ -257,7 +254,7 @@ __kernel void Winograd36To4x4(__read_only image2d_t input, __write_only image2d_ | |||||
| acc += AtM_row[y] * At[idx]; | acc += AtM_row[y] * At[idx]; | ||||
| } | } | ||||
| if (bias) { | |||||
| if (bias != 0) { | |||||
| acc += bias[slice]; | acc += bias[slice]; | ||||
| } | } | ||||
| @@ -51,13 +51,10 @@ __kernel void reshape_NC4HW4(__read_only image2d_t src_data, __write_only image2 | |||||
| int4 dst_size) { | int4 dst_size) { | ||||
| int X = get_global_id(0); | int X = get_global_id(0); | ||||
| int Y = get_global_id(1); | int Y = get_global_id(1); | ||||
| int CO4 = UP_DIV(dst_size.z, C4NUM); | |||||
| int CO4_rem = dst_size.z % C4NUM; | |||||
| if (X >= dst_size.x || Y > dst_size.y) { | if (X >= dst_size.x || Y > dst_size.y) { | ||||
| return; | return; | ||||
| } | } | ||||
| int CI4 = UP_DIV(src_size.x, C4NUM); | int CI4 = UP_DIV(src_size.x, C4NUM); | ||||
| int CI4_rem = src_size.x % C4NUM; | |||||
| int in_img_x = CI4 * src_size.y; | int in_img_x = CI4 * src_size.y; | ||||
| int gcnt = X + dst_size.x * Y; | int gcnt = X + dst_size.x * Y; | ||||
| WRITE_IMAGE(dst_data, (int2)(X, Y), READ_IMAGE(src_data, smp_zero, (int2)(gcnt % in_img_x, gcnt / in_img_x))); | WRITE_IMAGE(dst_data, (int2)(X, Y), READ_IMAGE(src_data, smp_zero, (int2)(gcnt % in_img_x, gcnt / in_img_x))); | ||||
| @@ -43,8 +43,8 @@ std::vector<float> SoftmaxOpenCLKernel::GetMaskForLastChannel(int channels) { | |||||
| } | } | ||||
| int SoftmaxOpenCLKernel::InitGlobalSize() { | int SoftmaxOpenCLKernel::InitGlobalSize() { | ||||
| size_t global_x, global_y, global_z; | |||||
| global_z = 1; | |||||
| size_t global_x, global_y; | |||||
| const size_t global_z = 1; | |||||
| if (axis_ == 1) { | if (axis_ == 1) { | ||||
| global_x = UP_DIV(nhwc_shape_[3], C4NUM); | global_x = UP_DIV(nhwc_shape_[3], C4NUM); | ||||
| global_y = nhwc_shape_[2]; | global_y = nhwc_shape_[2]; | ||||
| @@ -38,7 +38,7 @@ struct OpenCLToFormatParameter { | |||||
| struct Image2DInfo { | struct Image2DInfo { | ||||
| explicit Image2DInfo(const lite::Tensor *tensor) { | explicit Image2DInfo(const lite::Tensor *tensor) { | ||||
| if (tensor) { | |||||
| if (tensor != nullptr) { | |||||
| auto shape = tensor->shape(); | auto shape = tensor->shape(); | ||||
| if (shape.size() == 1) { | if (shape.size() == 1) { | ||||
| N = shape[0]; | N = shape[0]; | ||||