| @@ -39,9 +39,9 @@ __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | |||
| __kernel void Conv2D_H1W1C1(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight, | |||
| __global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad, | |||
| int2 dilation, int act_type, float alpha) { | |||
| int BlockH = 1; | |||
| int BlockW = 1; | |||
| int BlockC = 1; | |||
| const int BlockH = 1; | |||
| const int BlockW = 1; | |||
| const int BlockC = 1; | |||
| DEFINE_ARGS; | |||
| int oh0 = oh + 0; | |||
| @@ -102,9 +102,9 @@ __kernel void Conv2D_H1W1C1(__read_only image2d_t input, __write_only image2d_t | |||
| __kernel void Conv2D_H2W1C1(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight, | |||
| __global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad, | |||
| int2 dilation, int act_type, float alpha) { | |||
| int BlockH = 2; | |||
| int BlockW = 1; | |||
| int BlockC = 1; | |||
| const int BlockH = 2; | |||
| const int BlockW = 1; | |||
| const int BlockC = 1; | |||
| DEFINE_ARGS; | |||
| int oh0 = oh + 0; | |||
| @@ -189,9 +189,9 @@ __kernel void Conv2D_H2W1C1(__read_only image2d_t input, __write_only image2d_t | |||
| __kernel void Conv2D_H2W1C2(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight, | |||
| __global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad, | |||
| int2 dilation, int act_type, float alpha) { | |||
| int BlockH = 2; | |||
| int BlockW = 1; | |||
| int BlockC = 2; | |||
| const int BlockH = 2; | |||
| const int BlockW = 1; | |||
| const int BlockC = 2; | |||
| DEFINE_ARGS; | |||
| int oh0 = oh + 0; | |||
| @@ -312,9 +312,9 @@ __kernel void Conv2D_H2W1C2(__read_only image2d_t input, __write_only image2d_t | |||
| __kernel void Conv2D_H2W2C2(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight, | |||
| __global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad, | |||
| int2 dilation, int act_type, float alpha) { | |||
| int BlockH = 2; | |||
| int BlockW = 2; | |||
| int BlockC = 2; | |||
| const int BlockH = 2; | |||
| const int BlockW = 2; | |||
| const int BlockC = 2; | |||
| DEFINE_ARGS; | |||
| int oh0 = oh + 0; | |||
| @@ -7,7 +7,7 @@ __kernel void ComputeMeanVarDim1NHWC4(__read_only image2d_t src_data, __global F | |||
| int4 in_shape, int normalized_shape_size) { | |||
| int X = get_global_id(0); // n*h | |||
| int Y = get_global_id(1); // w | |||
| if (X > in_shape.x * in_shape.y || Y > in_shape.z || in_shape.y == 0) { | |||
| if (X > in_shape.x * in_shape.y || Y > in_shape.z || in_shape.y == 0 || normalized_shape_size == 0) { | |||
| return; | |||
| } | |||
| int n = X / in_shape.y; | |||
| @@ -41,8 +41,8 @@ using mindspore::schema::PrimitiveType_FullConnection; | |||
| namespace mindspore::kernel { | |||
| constexpr size_t CI_TILE = C4NUM; | |||
| constexpr size_t CO_TILE = C4NUM; | |||
| const size_t CI_TILE = C4NUM; | |||
| const size_t CO_TILE = C4NUM; | |||
| int Conv2DOpenCLKernel::CheckSpecs() { | |||
| if (in_tensors_.size() != 2 && in_tensors_.size() != 3) { | |||
| @@ -164,12 +164,12 @@ int Conv2DOpenCLKernel::Prepare() { | |||
| } | |||
| int Conv2DOpenCLKernel::GenerateWinogradFilter() { | |||
| constexpr float Gt[] = {1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 0.0000000000, | |||
| 0.0000000000, 0.7071067691, -0.7071067691, 1.4142135382, -1.4142135382, 0.0000000000, | |||
| 0.0000000000, 0.4999999702, 0.4999999702, 1.9999998808, 1.9999998808, 1.0000000000}; | |||
| constexpr float G[] = {1.0000000000, 0.0000000000, 0.0000000000, 1.0000000000, 0.7071067691, 0.4999999702, | |||
| 1.0000000000, -0.7071067691, 0.4999999702, 1.0000000000, 1.4142135382, 1.9999998808, | |||
| 1.0000000000, -1.4142135382, 1.9999998808, 0.0000000000, 0.0000000000, 1.0000000000}; | |||
| const float Gt[] = {1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 0.0000000000, | |||
| 0.0000000000, 0.7071067691, -0.7071067691, 1.4142135382, -1.4142135382, 0.0000000000, | |||
| 0.0000000000, 0.4999999702, 0.4999999702, 1.9999998808, 1.9999998808, 1.0000000000}; | |||
| const float G[] = {1.0000000000, 0.0000000000, 0.0000000000, 1.0000000000, 0.7071067691, 0.4999999702, | |||
| 1.0000000000, -0.7071067691, 0.4999999702, 1.0000000000, 1.4142135382, 1.9999998808, | |||
| 1.0000000000, -1.4142135382, 1.9999998808, 0.0000000000, 0.0000000000, 1.0000000000}; | |||
| auto weight_tensor = in_tensors_.at(1); | |||
| auto origin_weight_fp32 = reinterpret_cast<float *>(weight_tensor->data_c()); | |||
| @@ -366,11 +366,11 @@ void Conv2DOpenCLKernel::SetGlobalLocal() { | |||
| size_t global_h = batch_size_ * UP_DIV(OH_, block_size_.H); | |||
| size_t global_w = UP_DIV(OW_, block_size_.W); | |||
| size_t global_c = UP_DIV(CO_SLICES_, block_size_.C); | |||
| constexpr int local_c_max = 16; | |||
| constexpr int local_hw_max = 256; | |||
| constexpr int OH_threshold = 100; | |||
| constexpr int OW_threshold = 100; | |||
| constexpr int OC_threshold = 64; | |||
| const int local_c_max = 16; | |||
| const int local_hw_max = 256; | |||
| const int OH_threshold = 100; | |||
| const int OW_threshold = 100; | |||
| const int OC_threshold = 64; | |||
| size_t local_c = GetMaxDivisor(global_c, local_c_max); | |||
| local_c = std::max<size_t>(local_c, 1); | |||
| size_t local_hw = local_hw_max / local_c; | |||
| @@ -1,3 +1,3 @@ | |||
| mobilenet_v1_1.0_224.tflite 2.5 | |||
| mobilenet_v2_1.0_224.tflite 2.5 | |||
| squeezenet.tflite 2.5 | |||
| inception_v3.tflite 1 | |||