fix depthwise CreateImage2d bug

5 years ago · e1e10981d0
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc
@@ -48,13 +48,12 @@ int ArgMinMaxOpenCLKernel::CheckSpecs() {
    return RET_ERROR;
  }
  auto *param = reinterpret_cast<ArgMinMaxParameter *>(this->op_parameter_);
  param->dims_size_ = in_tensors_[0]->shape().size();
  param->axis_ = (param->axis_ + param->dims_size_) % param->dims_size_;
  if (param->axis_ < 0 || param->axis_ >= param->dims_size_) {
    MS_LOG(ERROR) << "Invalid axis " << param->axis_;
  auto dims_size = in_tensors_[0]->shape().size();
  auto axis = (param->axis_ + dims_size) % dims_size;
  if (axis < 0 || axis >= dims_size) {
    MS_LOG(ERROR) << "Invalid axis " << axis;
    return RET_ERROR;
  }
  param->get_max_ = (Type() == PrimitiveType_ArgMax);
  return RET_OK;
 }

@@ -77,10 +76,10 @@ void ArgMinMaxOpenCLKernel::SetConstArgs() {

 void ArgMinMaxOpenCLKernel::SetGlobalLocal() {
  auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
  auto in_shape = in_tensors_[0]->shape();
  im_in_ = GpuTensorInfo(in_tensors_[0]);
  std::vector<size_t> in_shape = {im_in_.N, im_in_.H, im_in_.W, im_in_.C};
  auto in_shape_align = in_shape;
  in_shape_align[3] = UP_ROUND(in_shape[3], C4NUM);
  im_in_ = GpuTensorInfo(in_tensors_[0]);
  auto out_shape_align = in_shape_align;
  out_shape_align.at(param->axis_) = param->axis_ == 3 ? UP_ROUND(param->topk_, C4NUM) : param->topk_;
  int reduce_len = GetUpPow2(in_shape.at(param->axis_));
@@ -92,7 +91,7 @@ void ArgMinMaxOpenCLKernel::SetGlobalLocal() {
  src_size_ = {std::accumulate(in_shape.begin() + param->axis_ + 1, in_shape.end(), 1, std::multiplies<int>()),
               std::accumulate(in_shape.begin(), in_shape.begin() + param->axis_, 1, std::multiplies<int>()),
               std::accumulate(in_shape.begin() + param->axis_, in_shape.end(), 1, std::multiplies<int>()),
               in_shape.at(param->axis_)};
               static_cast<int>(in_shape.at(param->axis_))};
  strides_ = {
    std::accumulate(in_shape_align.begin() + param->axis_ + 1, in_shape_align.end(), 1, std::multiplies<int>()),
    std::accumulate(in_shape_align.begin() + param->axis_, in_shape_align.end(), 1, std::multiplies<int>()),
@@ -145,6 +144,12 @@ int ArgMinMaxOpenCLKernel::Prepare() {
  ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name);
 #endif

  auto *param = reinterpret_cast<ArgMinMaxParameter *>(this->op_parameter_);
  param->dims_size_ = in_tensors_[0]->shape().size();
  param->axis_ = (param->axis_ + param->dims_size_) % param->dims_size_;
  param->axis_ = (4 - param->dims_size_) + param->axis_;
  param->get_max_ = (Type() == PrimitiveType_ArgMax);

  InitWeights();
  SetGlobalLocal();
  SetConstArgs();
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
@@ -118,67 +118,77 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() {
    int alignment = ocl_runtime_->GetImagePitchAlignment();
    plane_out = UP_ROUND(plane_out, alignment) * C4NUM;
    pack_weight_size = plane_out * CO4;
    auto shape = in_tensors_[1]->shape();
    size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT;
    img_size = {(size_t)plane_out / C4NUM, (size_t)shape[0] * CO4, img_dtype};
    img_size = {(size_t)plane_out / C4NUM, (size_t)out_info.N * CO4, img_dtype};
  }
  if (is_fp16) {
    packed_weight_ = allocator->Malloc(pack_weight_size * sizeof(int16_t), img_size);
    packed_weight_ = allocator->MapBuffer(packed_weight_, CL_MAP_WRITE, nullptr, true);
    if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16) {
      std::function<int16_t(int16_t)> to_dtype = [](int16_t x) -> int16_t { return x; };
      PackNCHWToNC4HW4<int16_t, int16_t>(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype);
    } else if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat32) {
      std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); };
      PackNCHWToNC4HW4<float, float16_t>(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype);
    } else {  // int8 or int16
      std::function<int16_t(int16_t)> to_dtype = [](int16_t x) -> int16_t { return x; };
      PackNCHWToNC4HW4<int16_t, int16_t>(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype);
    }
  } else {
    packed_weight_ = allocator->Malloc(pack_weight_size * sizeof(float), img_size);
    packed_weight_ = allocator->MapBuffer(packed_weight_, CL_MAP_WRITE, nullptr, true);
    if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat32) {
      std::function<float(float)> to_dtype = [](float x) -> float { return x; };
      PackNCHWToNC4HW4<float, float>(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype);
    } else if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16) {
      std::function<float(float16_t)> to_dtype = [](float16_t x) -> float { return static_cast<float>(x); };
      PackNCHWToNC4HW4<float16_t, float>(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype);
    } else {  // int8 or int16
      std::function<float(float)> to_dtype = [](float x) -> float { return x; };
      PackNCHWToNC4HW4<float, float>(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype);
  pack_weight_size = is_fp16 ? pack_weight_size * sizeof(int16_t) : pack_weight_size * sizeof(float);
  auto ConvertFilter = [](void *src, void *dst, TypeId src_type, TypeId dst_type, size_t plane_in, size_t plane_out,
                          size_t channel) {
    if (dst_type == kNumberTypeFloat16) {
      if (src_type == kNumberTypeFloat16) {
        std::function<int16_t(int16_t)> to_dtype = [](int16_t x) -> int16_t { return x; };
        PackNCHWToNC4HW4<int16_t, int16_t>(src, dst, 1, plane_in, plane_out, channel, to_dtype);
      } else if (src_type == kNumberTypeFloat32) {
        std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); };
        PackNCHWToNC4HW4<float, float16_t>(src, dst, 1, plane_in, plane_out, channel, to_dtype);
      } else {  // int8 or int16
        std::function<int16_t(int16_t)> to_dtype = [](int16_t x) -> int16_t { return x; };
        PackNCHWToNC4HW4<int16_t, int16_t>(src, dst, 1, plane_in, plane_out, channel, to_dtype);
      }
    } else {
      if (src_type == kNumberTypeFloat32) {
        std::function<float(float)> to_dtype = [](float x) -> float { return x; };
        PackNCHWToNC4HW4<float, float>(src, dst, 1, plane_in, plane_out, channel, to_dtype);
      } else if (src_type == kNumberTypeFloat16) {
        std::function<float(float16_t)> to_dtype = [](float16_t x) -> float { return static_cast<float>(x); };
        PackNCHWToNC4HW4<float16_t, float>(src, dst, 1, plane_in, plane_out, channel, to_dtype);
      } else {  // int8 or int16
        std::function<float(float)> to_dtype = [](float x) -> float { return x; };
        PackNCHWToNC4HW4<float, float>(src, dst, 1, plane_in, plane_out, channel, to_dtype);
      }
    }
  }
  allocator->UnmapBuffer(packed_weight_);
  };
  std::vector<char> temp_filter(pack_weight_size);
  auto src_type = in_tensors_.at(kWeightIndex)->data_type();
  auto dst_type = is_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32;
  ConvertFilter(origin_weight, temp_filter.data(), src_type, dst_type, plane_in, plane_out, out_info.C);
  packed_weight_ = allocator->Malloc(pack_weight_size, img_size, temp_filter.data());
  FreeDequantedWeight();

  size_t dtype_size = sizeof(float);
  if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat16) {
    dtype_size = sizeof(int16_t);
  if (packed_weight_ == nullptr) {
    return RET_ERROR;
  }
  bias_data_ = allocator->Malloc(C4NUM * CO4 * dtype_size);
  bias_data_ = allocator->MapBuffer(bias_data_, CL_MAP_WRITE, nullptr, true);
  size_t up_co_size = C4NUM * CO4 * dtype_size;
  memset(bias_data_, 0, up_co_size);
  if (in_tensors_.size() == kInputSize2) {
    auto ori_bias = in_tensors_.at(kBiasIndex)->data_c();
    if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat32) {
      float16_t *bias_ptr = static_cast<float16_t *>(bias_data_);
      for (size_t i = 0; i < in_tensors_.at(kBiasIndex)->ElementsNum(); ++i) {
        bias_ptr[i] = static_cast<float16_t>(static_cast<float *>(ori_bias)[i]);

  auto ConvertBias = [](void *src, void *dst, size_t size, size_t dtype_size, TypeId src_type, TypeId dst_type) {
    if (dst_type == kNumberTypeFloat16 && src_type == kNumberTypeFloat32) {
      float16_t *bias_ptr = static_cast<float16_t *>(dst);
      for (size_t i = 0; i < size; ++i) {
        bias_ptr[i] = static_cast<float16_t>(static_cast<float *>(src)[i]);
      }
    } else if (!is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat16) {
      float32_t *bias_ptr = static_cast<float32_t *>(bias_data_);
      for (size_t i = 0; i < in_tensors_.at(kBiasIndex)->ElementsNum(); ++i) {
        bias_ptr[i] = static_cast<float32_t>(static_cast<float16_t *>(ori_bias)[i]);
    } else if (dst_type == kNumberTypeFloat32 && src_type == kNumberTypeFloat16) {
      float32_t *bias_ptr = static_cast<float32_t *>(dst);
      for (size_t i = 0; i < size; ++i) {
        bias_ptr[i] = static_cast<float32_t>(static_cast<float16_t *>(src)[i]);
      }
    } else {
      memcpy(bias_data_, ori_bias, out_info.C * dtype_size);
      memcpy(dst, src, size * dtype_size);
    }
  } else {
    MS_ASSERT(in_tensors_.size() == kInputSize1);
  };
  size_t dtype_size = sizeof(float);
  if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat16) {
    dtype_size = sizeof(int16_t);
  }
  std::vector<char> temp_bias(pack_weight_size, 0);
  if (in_tensors_.size() == 3) {
    src_type = in_tensors_.at(kBiasIndex)->data_type();
    dst_type = is_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32;
    auto element_size = in_tensors_.at(kBiasIndex)->ElementsNum();
    ConvertBias(in_tensors_.at(kBiasIndex)->data_c(), temp_bias.data(), element_size, dtype_size, src_type, dst_type);
  }
  size_t bias_size = C4NUM * CO4 * dtype_size;
  bias_data_ = allocator->Malloc(bias_size, {}, temp_bias.data());
  if (bias_data_ == nullptr) {
    return RET_ERROR;
  }
  allocator->UnmapBuffer(bias_data_);
  return mindspore::lite::RET_OK;
 }
 void DepthwiseConv2dOpenCLKernel::SetConstArgs() {
--- a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc
+++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc
@@ -107,14 +107,20 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const std::vector<size_t> &img
  }
  if (*image == nullptr) {
    delete *buffer;
    MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << ret << ")";
    MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")";
    return nullptr;
  }
  if (ret != CL_SUCCESS) {
    delete *buffer;
    delete *image;
    MS_LOG(ERROR) << "Create OpenCL Image2D  (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")";
    return nullptr;
  }
  MS_LOG(DEBUG) << "Malloc a new Image2D, width=" << img_size[0] << ", height=" << img_size[1];
  void *host_ptr = nullptr;
  if (is_map) {
    std::vector<size_t> region{img_size[0], img_size[1], 1};
    host_ptr = ocl_runtime_->MapBuffer(**image, 0, CL_MAP_READ | CL_MAP_WRITE, region);
    host_ptr = ocl_runtime_->MapBuffer(**image, true, CL_MAP_READ | CL_MAP_WRITE, region);
    if (host_ptr == nullptr) {
      delete *buffer;
      delete *image;
@@ -340,7 +346,7 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue,
    std::vector<size_t> region{mem_buf->img_size[0], mem_buf->img_size[1], 1};
    cl::Image2D *image = static_cast<cl::Image2D *>(mem_buf->image_ptr_);
    MS_ASSERT(image);
    new_host_ptr = ocl_runtime_->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region);
    new_host_ptr = ocl_runtime_->MapBuffer(*image, sync, CL_MAP_READ | CL_MAP_WRITE, region);
  }
  if (new_host_ptr == nullptr) {
    UnLock();
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/argminmax_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/argminmax_tests.cc
@@ -185,4 +185,19 @@ TEST_F(TestOpenCL_ArgMinMax, axis3topk2value) {
    TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
  }
 }
 TEST_F(TestOpenCL_ArgMinMax, axis1topk1index) {
  schema::PrimitiveType type = schema::PrimitiveType_ArgMax;
  int axis = 1;
  int topk = 1;
  bool out_value = false;
  std::vector<int> input_shape = {1, 2, 14};
  std::vector<int> output_shape = {1, 14};
  float input_data[] = {10, 20, 30, 40, 90, 20, 11, 15, 1,  50, 30, 45, 25, 50,
                        30, 10, 20, 30, 40, 90, 20, 11, 15, 1,  50, 30, 45, 25};
  float output_data[] = {1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
  for (auto fp16_enable : {false, true}) {
    auto *param = CreateParameter(type, axis, topk, out_value);
    TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable, 1e-1, 1e-1, true);
  }
 }
 }  // namespace mindspore::lite::opencl::test
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc
@@ -58,22 +58,24 @@ TEST_F(TestOpenCL_DepthwiseConv2d, NoPad) {
  std::vector<int> output_shape = {1, 2, 2, 4};
  std::vector<int> weight_shape = {1, kernel_h, kernel_w, output_shape.back()};
  std::vector<int> bias_shape = {output_shape.back()};
  float input_data[] = {0.5488135,  0.0202184,  0.45615032, 0.31542835, 0.71518934, 0.83261985, 0.56843394, 0.36371076,
                        0.60276335, 0.77815676, 0.0187898,  0.57019675, 0.5448832,  0.87001216, 0.6176355,  0.43860152,
                        0.4236548,  0.9786183,  0.6120957,  0.9883738,  0.6458941,  0.7991586,  0.616934,   0.10204481,
                        0.4375872,  0.46147937, 0.94374806, 0.20887676, 0.891773,   0.7805292,  0.6818203,  0.16130951,
                        0.96366274, 0.11827443, 0.3595079,  0.6531083,  0.3834415,  0.639921,   0.43703195, 0.2532916,
                        0.79172504, 0.14335328, 0.6976312,  0.46631077, 0.5288949,  0.9446689,  0.06022547, 0.2444256,
                        0.56804454, 0.5218483,  0.6667667,  0.15896958, 0.92559665, 0.41466194, 0.67063785, 0.11037514,
                        0.07103606, 0.2645556,  0.21038257, 0.6563296,  0.0871293,  0.7742337,  0.12892629, 0.13818295};
  float input_data[] = {
    0.5488135,  0.71518934, 0.60276335, 0.5448832,  0.4236548,  0.6458941,  0.4375872,  0.891773,
    0.96366274, 0.3834415,  0.79172504, 0.5288949,  0.56804454, 0.92559665, 0.07103606, 0.0871293,
    0.0202184,  0.83261985, 0.77815676, 0.87001216, 0.9786183,  0.7991586,  0.46147937, 0.7805292,
    0.11827443, 0.639921,   0.14335328, 0.9446689,  0.5218483,  0.41466194, 0.2645556,  0.7742337,
    0.45615032, 0.56843394, 0.0187898,  0.6176355,  0.6120957,  0.616934,   0.94374806, 0.6818203,
    0.3595079,  0.43703195, 0.6976312,  0.06022547, 0.6667667,  0.67063785, 0.21038257, 0.12892629,
    0.31542835, 0.36371076, 0.57019675, 0.43860152, 0.9883738,  0.10204481, 0.20887676, 0.16130951,
    0.6531083,  0.2532916,  0.46631077, 0.2444256,  0.15896958, 0.11037514, 0.6563296,  0.13818295,
  };
  float bias_data[] = {0, 0, 0, 0};
  float weight_data[] = {0.19658236, 0.36872518, 0.82099324, 0.09710128, 0.8379449,  0.09609841, 0.97645944, 0.4686512,
                         0.9767611,  0.6048455,  0.7392636,  0.03918779, 0.28280696, 0.12019656, 0.2961402,  0.11872772,
                         0.31798318, 0.41426298, 0.06414749, 0.6924721,  0.56660146, 0.2653895,  0.5232481,  0.09394051,
                         0.5759465,  0.9292962,  0.31856894, 0.6674104,  0.13179787, 0.7163272,  0.2894061,  0.18319136,
                         0.5865129,  0.02010755, 0.82894003, 0.00469548};
  float output_data[] = {3.3848767, 1.4446403, 1.8428744, 1.3194335, 2.5873442, 2.1384869, 2.04022,  1.1872686,
                         2.2294958, 1.6570128, 2.465089,  1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988};
  float output_data[] = {2.9720426, 1.890834,  2.3618119, 2.3867798, 2.5666943, 1.6261611, 2.0977764, 1.6445805,
                         2.462798,  1.6643658, 1.6861027, 1.8428761, 2.5156446, 1.5366757, 1.6767557, 1.6905226};

  for (auto fp16_enable : {false, true}) {
    auto *param = CreateParameter(kernel_h, kernel_w, stride_h, stride_w, pad_u, pad_d, pad_l, pad_r, dilation_h,
@@ -132,4 +134,117 @@ TEST_F(TestOpenCL_DepthwiseConv2d, Pad) {
  }
 }

 TEST_F(TestOpenCL_DepthwiseConv2d, NoPad1) {
  int kernel_h = 2;
  int kernel_w = 2;
  int stride_h = 1;
  int stride_w = 1;
  int pad_u = 0;
  int pad_d = 0;
  int pad_l = 0;
  int pad_r = 0;
  int dilation_h = 1;
  int dilation_w = 1;
  ActType act_type = ActType_No;

  std::vector<int> input_shape = {1, 4, 4, 4};
  std::vector<int> output_shape = {1, 3, 3, 4};
  std::vector<int> weight_shape = {1, kernel_h, kernel_w, output_shape.back()};
  std::vector<int> bias_shape = {output_shape.back()};
  float input_data[] = {0.5488135,  0.71518934, 0.60276335, 0.5448832,  0.4236548,  0.6458941,  0.4375872,  0.891773,
                        0.96366274, 0.3834415,  0.79172504, 0.5288949,  0.56804454, 0.92559665, 0.07103606, 0.0871293,
                        0.0202184,  0.83261985, 0.77815676, 0.87001216, 0.9786183,  0.7991586,  0.46147937, 0.7805292,
                        0.11827443, 0.639921,   0.14335328, 0.9446689,  0.5218483,  0.41466194, 0.2645556,  0.7742337,
                        0.45615032, 0.56843394, 0.0187898,  0.6176355,  0.6120957,  0.616934,   0.94374806, 0.6818203,
                        0.3595079,  0.43703195, 0.6976312,  0.06022547, 0.6667667,  0.67063785, 0.21038257, 0.12892629,
                        0.31542835, 0.36371076, 0.57019675, 0.43860152, 0.9883738,  0.10204481, 0.20887676, 0.16130951,
                        0.6531083,  0.2532916,  0.46631077, 0.2444256,  0.15896958, 0.11037514, 0.6563296,  0.13818295};
  float bias_data[] = {0, 0, 0, 0};
  float weight_data[] = {0.19658236, 0.36872517, 0.82099323, 0.09710128, 0.83794491, 0.09609841,
                         0.97645947, 0.4686512,  0.97676109, 0.60484552, 0.73926358, 0.03918779,
                         0.28280696, 0.12019656, 0.2961402,  0.11872772};
  float output_data[] = {0.3757235,  1.8489048,  1.4467758,  0.6116009,  1.2535334, 1.6583176, 1.2530621,  0.6590755,
                         0.5466661,  1.22944,    0.93263525, 0.5317252,  0.7987474, 1.618667,  1.090071,   0.60372007,
                         0.773425,   1.5383728,  1.262479,   0.54334986, 0.5755667, 1.3171062, 0.82401496, 0.39336145,
                         0.6703031,  0.9385749,  1.018886,   0.40566355, 1.1277528, 0.7773028, 1.5164642,  0.27685273,
                         0.86816025, 0.72971237, 1.1791146,  0.12131907};

  for (auto fp16_enable : {false, true}) {
    auto *param = CreateParameter(kernel_h, kernel_w, stride_h, stride_w, pad_u, pad_d, pad_l, pad_r, dilation_h,
                                  dilation_w, act_type, input_shape.back());
    TestMain({{input_shape, input_data, VAR},
              {weight_shape, weight_data, CONST_TENSOR},
              {bias_shape, bias_data, CONST_TENSOR}},
             {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-2 : 1e-5, 1e-1, true);
  }
 }
 TEST_F(TestOpenCL_DepthwiseConv2d, Pad1) {
  int kernel_h = 3;
  int kernel_w = 3;
  int stride_h = 1;
  int stride_w = 1;
  int pad_u = 1;
  int pad_d = 1;
  int pad_l = 1;
  int pad_r = 1;
  int dilation_h = 1;
  int dilation_w = 1;
  ActType act_type = ActType_No;

  std::vector<int> input_shape = {1, 5, 5, 6};
  std::vector<int> output_shape = {1, 5, 5, 6};
  std::vector<int> weight_shape = {1, kernel_h, kernel_w, output_shape.back()};
  std::vector<int> bias_shape = {output_shape.back()};
  float input_data[] = {
    0.5488135,  0.71518934, 0.60276335, 0.5448832,  0.4236548,  0.6458941,  0.4375872,  0.891773,   0.96366274,
    0.3834415,  0.79172504, 0.5288949,  0.56804454, 0.92559665, 0.07103606, 0.0871293,  0.0202184,  0.83261985,
    0.77815676, 0.87001216, 0.9786183,  0.7991586,  0.46147937, 0.7805292,  0.11827443, 0.639921,   0.14335328,
    0.9446689,  0.5218483,  0.41466194, 0.2645556,  0.7742337,  0.45615032, 0.56843394, 0.0187898,  0.6176355,
    0.6120957,  0.616934,   0.94374806, 0.6818203,  0.3595079,  0.43703195, 0.6976312,  0.06022547, 0.6667667,
    0.67063785, 0.21038257, 0.12892629, 0.31542835, 0.36371076, 0.57019675, 0.43860152, 0.9883738,  0.10204481,
    0.20887676, 0.16130951, 0.6531083,  0.2532916,  0.46631077, 0.2444256,  0.15896958, 0.11037514, 0.6563296,
    0.13818295, 0.19658236, 0.36872518, 0.82099324, 0.09710128, 0.8379449,  0.09609841, 0.97645944, 0.4686512,
    0.9767611,  0.6048455,  0.7392636,  0.03918779, 0.28280696, 0.12019656, 0.2961402,  0.11872772, 0.31798318,
    0.41426298, 0.06414749, 0.6924721,  0.56660146, 0.2653895,  0.5232481,  0.09394051, 0.5759465,  0.9292962,
    0.31856894, 0.6674104,  0.13179787, 0.7163272,  0.2894061,  0.18319136, 0.5865129,  0.02010755, 0.82894003,
    0.00469548, 0.6778165,  0.27000797, 0.735194,   0.96218854, 0.24875315, 0.57615733, 0.5920419,  0.5722519,
    0.22308163, 0.952749,   0.44712538, 0.84640867, 0.6994793,  0.29743695, 0.81379783, 0.39650574, 0.8811032,
    0.5812729,  0.8817354,  0.6925316,  0.7252543,  0.50132436, 0.95608366, 0.6439902,  0.42385504, 0.6063932,
    0.0191932,  0.30157483, 0.66017354, 0.2900776,  0.6180154,  0.4287687,  0.13547407, 0.29828233, 0.5699649,
    0.59087276, 0.57432526, 0.6532008,  0.65210325, 0.43141845, 0.8965466,  0.36756188, 0.43586493, 0.89192337,
    0.806194,   0.7038886,  0.10022689, 0.9194826,  0.7142413,  0.998847};
  float weight_data[] = {0.1494483,  0.86812606, 0.16249293, 0.61555956, 0.12381998, 0.84800823, 0.80731896, 0.56910074,
                         0.4071833,  0.069167,   0.69742877, 0.45354268, 0.7220556,  0.86638233, 0.97552151, 0.85580334,
                         0.01171408, 0.35997806, 0.72999056, 0.17162968, 0.52103661, 0.05433799, 0.19999652, 0.01852179,
                         0.7936977,  0.22392469, 0.34535168, 0.92808129, 0.7044144,  0.03183893, 0.16469416, 0.6214784,
                         0.57722859, 0.23789282, 0.934214,   0.61396596, 0.5356328,  0.58990998, 0.73012203, 0.311945,
                         0.39822106, 0.20984375, 0.18619301, 0.94437239, 0.7395508,  0.49045881, 0.22741463, 0.25435648,
                         0.05802916, 0.43441663, 0.31179588, 0.69634349, 0.37775184, 0.17960368};
  float bias_data[] = {0, 0, 0, 0, 0, 0};
  float output_data[] = {
    0.8388255,  1.7207233,  0.56646764, 1.50962,   0.6184657,  0.7572999, 1.7197044,  2.8834608, 1.0304408,  1.5622743,
    0.95027775, 1.1451806,  2.0191956,  2.9541533, 1.1799709,  1.6366025, 1.3484346,  1.0071151, 1.3740869,  2.1602216,
    1.0846798,  1.7810996,  1.6170096,  0.6889053, 0.8671698,  1.4957678, 0.68065727, 1.0596768, 0.9761665,  0.38881996,
    1.524128,   2.2121127,  1.1506181,  1.330961,  1.8186853,  0.9094476, 2.3777275,  2.5568333, 1.8321692,  1.8297466,
    2.069798,   1.3701197,  2.7548862,  2.0871775, 2.3611763,  1.5387508, 1.6725919,  1.2565864, 2.6130712,  2.0915375,
    1.2955335,  1.6571269,  1.7603228,  1.3315495, 1.0005323,  1.0135669, 1.2701392,  1.8230836, 1.6048919,  1.4224635,
    1.4651375,  1.0251865,  1.0325887,  1.2355556, 1.3313429,  0.6756204, 2.602416,   2.1827717, 1.4354478,  1.6628273,
    2.0171032,  1.0299077,  2.6085434,  1.3310422, 2.1677747,  2.457499,  2.6715999,  1.0225507, 2.5822947,  2.1068158,
    1.6401942,  2.5422354,  2.6937182,  1.3813802, 1.1241511,  1.273326,  1.2024405,  1.4564767, 2.016776,   1.0182433,
    1.228782,   0.83329916, 1.033041,   1.3280122, 1.9437144,  0.6729013, 2.438968,   2.3275855, 2.289177,   1.4376242,
    2.4595368,  1.325891,   2.018128,   2.676854,  1.9685578,  1.8240746, 2.3104675,  1.4958379, 2.474168,   2.6657124,
    1.6738743,  2.336092,   2.3048637,  1.802324,  1.7594845,  1.6022205, 1.2564734,  1.8977238, 1.6991055,  1.8674731,
    0.47793916, 1.2031221,  0.6579696,  1.0724078, 0.96408695, 0.5074543, 1.2399375,  1.410824,  0.56263226, 1.3138686,
    1.4859737,  0.7219256,  1.3437214,  2.0015993, 1.0472497,  1.064316,  1.7359762,  0.9249617, 1.2835678,  2.1866667,
    0.92954785, 2.005947,   1.8761289,  1.2612648, 1.2410495,  1.263778,  0.54638237, 1.8269669, 1.3152003,  0.7890457};

  for (auto fp16_enable : {false, true}) {
    auto *param = CreateParameter(kernel_h, kernel_w, stride_h, stride_w, pad_u, pad_d, pad_l, pad_r, dilation_h,
                                  dilation_w, act_type, input_shape.back());
    TestMain({{input_shape, input_data, VAR},
              {weight_shape, weight_data, CONST_TENSOR},
              {bias_shape, bias_data, CONST_TENSOR}},
             {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-2 : 1e-5, 1e-1, true);
  }
 }
 }  // namespace mindspore::lite::opencl::test