codeclean for layernorm and conv

5 years ago · 36bb147844
--- a/mindspore/lite/src/runtime/kernel/opencl/cl/conv2d.cl
+++ b/mindspore/lite/src/runtime/kernel/opencl/cl/conv2d.cl
@@ -39,9 +39,9 @@ __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP
 __kernel void Conv2D_H1W1C1(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
                            __global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
                            int2 dilation, int act_type, float alpha) {
  int BlockH = 1;
  int BlockW = 1;
  int BlockC = 1;
  const int BlockH = 1;
  const int BlockW = 1;
  const int BlockC = 1;
  DEFINE_ARGS;

  int oh0 = oh + 0;
@@ -102,9 +102,9 @@ __kernel void Conv2D_H1W1C1(__read_only image2d_t input, __write_only image2d_t
 __kernel void Conv2D_H2W1C1(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
                            __global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
                            int2 dilation, int act_type, float alpha) {
  int BlockH = 2;
  int BlockW = 1;
  int BlockC = 1;
  const int BlockH = 2;
  const int BlockW = 1;
  const int BlockC = 1;
  DEFINE_ARGS;

  int oh0 = oh + 0;
@@ -189,9 +189,9 @@ __kernel void Conv2D_H2W1C1(__read_only image2d_t input, __write_only image2d_t
 __kernel void Conv2D_H2W1C2(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
                            __global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
                            int2 dilation, int act_type, float alpha) {
  int BlockH = 2;
  int BlockW = 1;
  int BlockC = 2;
  const int BlockH = 2;
  const int BlockW = 1;
  const int BlockC = 2;
  DEFINE_ARGS;

  int oh0 = oh + 0;
@@ -312,9 +312,9 @@ __kernel void Conv2D_H2W1C2(__read_only image2d_t input, __write_only image2d_t
 __kernel void Conv2D_H2W2C2(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
                            __global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
                            int2 dilation, int act_type, float alpha) {
  int BlockH = 2;
  int BlockW = 2;
  int BlockC = 2;
  const int BlockH = 2;
  const int BlockW = 2;
  const int BlockC = 2;
  DEFINE_ARGS;

  int oh0 = oh + 0;
--- a/mindspore/lite/src/runtime/kernel/opencl/cl/layer_norm.cl
+++ b/mindspore/lite/src/runtime/kernel/opencl/cl/layer_norm.cl
@@ -7,7 +7,7 @@ __kernel void ComputeMeanVarDim1NHWC4(__read_only image2d_t src_data, __global F
                                      int4 in_shape, int normalized_shape_size) {
  int X = get_global_id(0);  // n*h
  int Y = get_global_id(1);  // w
  if (X > in_shape.x * in_shape.y || Y > in_shape.z || in_shape.y == 0) {
  if (X > in_shape.x * in_shape.y || Y > in_shape.z || in_shape.y == 0 || normalized_shape_size == 0) {
    return;
  }
  int n = X / in_shape.y;
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc
@@ -41,8 +41,8 @@ using mindspore::schema::PrimitiveType_FullConnection;

 namespace mindspore::kernel {

 constexpr size_t CI_TILE = C4NUM;
 constexpr size_t CO_TILE = C4NUM;
 const size_t CI_TILE = C4NUM;
 const size_t CO_TILE = C4NUM;

 int Conv2DOpenCLKernel::CheckSpecs() {
  if (in_tensors_.size() != 2 && in_tensors_.size() != 3) {
@@ -164,12 +164,12 @@ int Conv2DOpenCLKernel::Prepare() {
 }

 int Conv2DOpenCLKernel::GenerateWinogradFilter() {
  constexpr float Gt[] = {1.0000000000, 1.0000000000, 1.0000000000,  1.0000000000, 1.0000000000,  0.0000000000,
                          0.0000000000, 0.7071067691, -0.7071067691, 1.4142135382, -1.4142135382, 0.0000000000,
                          0.0000000000, 0.4999999702, 0.4999999702,  1.9999998808, 1.9999998808,  1.0000000000};
  constexpr float G[] = {1.0000000000, 0.0000000000,  0.0000000000, 1.0000000000, 0.7071067691, 0.4999999702,
                         1.0000000000, -0.7071067691, 0.4999999702, 1.0000000000, 1.4142135382, 1.9999998808,
                         1.0000000000, -1.4142135382, 1.9999998808, 0.0000000000, 0.0000000000, 1.0000000000};
  const float Gt[] = {1.0000000000, 1.0000000000, 1.0000000000,  1.0000000000, 1.0000000000,  0.0000000000,
                      0.0000000000, 0.7071067691, -0.7071067691, 1.4142135382, -1.4142135382, 0.0000000000,
                      0.0000000000, 0.4999999702, 0.4999999702,  1.9999998808, 1.9999998808,  1.0000000000};
  const float G[] = {1.0000000000, 0.0000000000,  0.0000000000, 1.0000000000, 0.7071067691, 0.4999999702,
                     1.0000000000, -0.7071067691, 0.4999999702, 1.0000000000, 1.4142135382, 1.9999998808,
                     1.0000000000, -1.4142135382, 1.9999998808, 0.0000000000, 0.0000000000, 1.0000000000};

  auto weight_tensor = in_tensors_.at(1);
  auto origin_weight_fp32 = reinterpret_cast<float *>(weight_tensor->data_c());
@@ -366,11 +366,11 @@ void Conv2DOpenCLKernel::SetGlobalLocal() {
    size_t global_h = batch_size_ * UP_DIV(OH_, block_size_.H);
    size_t global_w = UP_DIV(OW_, block_size_.W);
    size_t global_c = UP_DIV(CO_SLICES_, block_size_.C);
    constexpr int local_c_max = 16;
    constexpr int local_hw_max = 256;
    constexpr int OH_threshold = 100;
    constexpr int OW_threshold = 100;
    constexpr int OC_threshold = 64;
    const int local_c_max = 16;
    const int local_hw_max = 256;
    const int OH_threshold = 100;
    const int OW_threshold = 100;
    const int OC_threshold = 64;
    size_t local_c = GetMaxDivisor(global_c, local_c_max);
    local_c = std::max<size_t>(local_c, 1);
    size_t local_hw = local_hw_max / local_c;
--- a/mindspore/lite/test/models_npu.cfg
+++ b/mindspore/lite/test/models_npu.cfg
@@ -1,3 +1,3 @@
 mobilenet_v1_1.0_224.tflite 2.5
 mobilenet_v2_1.0_224.tflite 2.5
 squeezenet.tflite 2.5
 inception_v3.tflite 1