Browse Source

fix depthwise CreateImage2d bug

tags/v1.2.0-rc1
wandongdong 5 years ago
parent
commit
e1e10981d0
5 changed files with 223 additions and 72 deletions
  1. +13
    -8
      mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc
  2. +61
    -51
      mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
  3. +9
    -3
      mindspore/lite/src/runtime/opencl/opencl_allocator.cc
  4. +15
    -0
      mindspore/lite/test/ut/src/runtime/kernel/opencl/argminmax_tests.cc
  5. +125
    -10
      mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc

+ 13
- 8
mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc View File

@@ -48,13 +48,12 @@ int ArgMinMaxOpenCLKernel::CheckSpecs() {
return RET_ERROR;
}
auto *param = reinterpret_cast<ArgMinMaxParameter *>(this->op_parameter_);
param->dims_size_ = in_tensors_[0]->shape().size();
param->axis_ = (param->axis_ + param->dims_size_) % param->dims_size_;
if (param->axis_ < 0 || param->axis_ >= param->dims_size_) {
MS_LOG(ERROR) << "Invalid axis " << param->axis_;
auto dims_size = in_tensors_[0]->shape().size();
auto axis = (param->axis_ + dims_size) % dims_size;
if (axis < 0 || axis >= dims_size) {
MS_LOG(ERROR) << "Invalid axis " << axis;
return RET_ERROR;
}
param->get_max_ = (Type() == PrimitiveType_ArgMax);
return RET_OK;
}

@@ -77,10 +76,10 @@ void ArgMinMaxOpenCLKernel::SetConstArgs() {

void ArgMinMaxOpenCLKernel::SetGlobalLocal() {
auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
auto in_shape = in_tensors_[0]->shape();
im_in_ = GpuTensorInfo(in_tensors_[0]);
std::vector<size_t> in_shape = {im_in_.N, im_in_.H, im_in_.W, im_in_.C};
auto in_shape_align = in_shape;
in_shape_align[3] = UP_ROUND(in_shape[3], C4NUM);
im_in_ = GpuTensorInfo(in_tensors_[0]);
auto out_shape_align = in_shape_align;
out_shape_align.at(param->axis_) = param->axis_ == 3 ? UP_ROUND(param->topk_, C4NUM) : param->topk_;
int reduce_len = GetUpPow2(in_shape.at(param->axis_));
@@ -92,7 +91,7 @@ void ArgMinMaxOpenCLKernel::SetGlobalLocal() {
src_size_ = {std::accumulate(in_shape.begin() + param->axis_ + 1, in_shape.end(), 1, std::multiplies<int>()),
std::accumulate(in_shape.begin(), in_shape.begin() + param->axis_, 1, std::multiplies<int>()),
std::accumulate(in_shape.begin() + param->axis_, in_shape.end(), 1, std::multiplies<int>()),
in_shape.at(param->axis_)};
static_cast<int>(in_shape.at(param->axis_))};
strides_ = {
std::accumulate(in_shape_align.begin() + param->axis_ + 1, in_shape_align.end(), 1, std::multiplies<int>()),
std::accumulate(in_shape_align.begin() + param->axis_, in_shape_align.end(), 1, std::multiplies<int>()),
@@ -145,6 +144,12 @@ int ArgMinMaxOpenCLKernel::Prepare() {
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name);
#endif

auto *param = reinterpret_cast<ArgMinMaxParameter *>(this->op_parameter_);
param->dims_size_ = in_tensors_[0]->shape().size();
param->axis_ = (param->axis_ + param->dims_size_) % param->dims_size_;
param->axis_ = (4 - param->dims_size_) + param->axis_;
param->get_max_ = (Type() == PrimitiveType_ArgMax);

InitWeights();
SetGlobalLocal();
SetConstArgs();


+ 61
- 51
mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc View File

@@ -118,67 +118,77 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() {
int alignment = ocl_runtime_->GetImagePitchAlignment();
plane_out = UP_ROUND(plane_out, alignment) * C4NUM;
pack_weight_size = plane_out * CO4;
auto shape = in_tensors_[1]->shape();
size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT;
img_size = {(size_t)plane_out / C4NUM, (size_t)shape[0] * CO4, img_dtype};
img_size = {(size_t)plane_out / C4NUM, (size_t)out_info.N * CO4, img_dtype};
}
if (is_fp16) {
packed_weight_ = allocator->Malloc(pack_weight_size * sizeof(int16_t), img_size);
packed_weight_ = allocator->MapBuffer(packed_weight_, CL_MAP_WRITE, nullptr, true);
if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16) {
std::function<int16_t(int16_t)> to_dtype = [](int16_t x) -> int16_t { return x; };
PackNCHWToNC4HW4<int16_t, int16_t>(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype);
} else if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat32) {
std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); };
PackNCHWToNC4HW4<float, float16_t>(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype);
} else { // int8 or int16
std::function<int16_t(int16_t)> to_dtype = [](int16_t x) -> int16_t { return x; };
PackNCHWToNC4HW4<int16_t, int16_t>(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype);
}
} else {
packed_weight_ = allocator->Malloc(pack_weight_size * sizeof(float), img_size);
packed_weight_ = allocator->MapBuffer(packed_weight_, CL_MAP_WRITE, nullptr, true);
if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat32) {
std::function<float(float)> to_dtype = [](float x) -> float { return x; };
PackNCHWToNC4HW4<float, float>(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype);
} else if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16) {
std::function<float(float16_t)> to_dtype = [](float16_t x) -> float { return static_cast<float>(x); };
PackNCHWToNC4HW4<float16_t, float>(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype);
} else { // int8 or int16
std::function<float(float)> to_dtype = [](float x) -> float { return x; };
PackNCHWToNC4HW4<float, float>(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype);
pack_weight_size = is_fp16 ? pack_weight_size * sizeof(int16_t) : pack_weight_size * sizeof(float);
auto ConvertFilter = [](void *src, void *dst, TypeId src_type, TypeId dst_type, size_t plane_in, size_t plane_out,
size_t channel) {
if (dst_type == kNumberTypeFloat16) {
if (src_type == kNumberTypeFloat16) {
std::function<int16_t(int16_t)> to_dtype = [](int16_t x) -> int16_t { return x; };
PackNCHWToNC4HW4<int16_t, int16_t>(src, dst, 1, plane_in, plane_out, channel, to_dtype);
} else if (src_type == kNumberTypeFloat32) {
std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); };
PackNCHWToNC4HW4<float, float16_t>(src, dst, 1, plane_in, plane_out, channel, to_dtype);
} else { // int8 or int16
std::function<int16_t(int16_t)> to_dtype = [](int16_t x) -> int16_t { return x; };
PackNCHWToNC4HW4<int16_t, int16_t>(src, dst, 1, plane_in, plane_out, channel, to_dtype);
}
} else {
if (src_type == kNumberTypeFloat32) {
std::function<float(float)> to_dtype = [](float x) -> float { return x; };
PackNCHWToNC4HW4<float, float>(src, dst, 1, plane_in, plane_out, channel, to_dtype);
} else if (src_type == kNumberTypeFloat16) {
std::function<float(float16_t)> to_dtype = [](float16_t x) -> float { return static_cast<float>(x); };
PackNCHWToNC4HW4<float16_t, float>(src, dst, 1, plane_in, plane_out, channel, to_dtype);
} else { // int8 or int16
std::function<float(float)> to_dtype = [](float x) -> float { return x; };
PackNCHWToNC4HW4<float, float>(src, dst, 1, plane_in, plane_out, channel, to_dtype);
}
}
}
allocator->UnmapBuffer(packed_weight_);
};
std::vector<char> temp_filter(pack_weight_size);
auto src_type = in_tensors_.at(kWeightIndex)->data_type();
auto dst_type = is_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32;
ConvertFilter(origin_weight, temp_filter.data(), src_type, dst_type, plane_in, plane_out, out_info.C);
packed_weight_ = allocator->Malloc(pack_weight_size, img_size, temp_filter.data());
FreeDequantedWeight();

size_t dtype_size = sizeof(float);
if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat16) {
dtype_size = sizeof(int16_t);
if (packed_weight_ == nullptr) {
return RET_ERROR;
}
bias_data_ = allocator->Malloc(C4NUM * CO4 * dtype_size);
bias_data_ = allocator->MapBuffer(bias_data_, CL_MAP_WRITE, nullptr, true);
size_t up_co_size = C4NUM * CO4 * dtype_size;
memset(bias_data_, 0, up_co_size);
if (in_tensors_.size() == kInputSize2) {
auto ori_bias = in_tensors_.at(kBiasIndex)->data_c();
if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat32) {
float16_t *bias_ptr = static_cast<float16_t *>(bias_data_);
for (size_t i = 0; i < in_tensors_.at(kBiasIndex)->ElementsNum(); ++i) {
bias_ptr[i] = static_cast<float16_t>(static_cast<float *>(ori_bias)[i]);

auto ConvertBias = [](void *src, void *dst, size_t size, size_t dtype_size, TypeId src_type, TypeId dst_type) {
if (dst_type == kNumberTypeFloat16 && src_type == kNumberTypeFloat32) {
float16_t *bias_ptr = static_cast<float16_t *>(dst);
for (size_t i = 0; i < size; ++i) {
bias_ptr[i] = static_cast<float16_t>(static_cast<float *>(src)[i]);
}
} else if (!is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat16) {
float32_t *bias_ptr = static_cast<float32_t *>(bias_data_);
for (size_t i = 0; i < in_tensors_.at(kBiasIndex)->ElementsNum(); ++i) {
bias_ptr[i] = static_cast<float32_t>(static_cast<float16_t *>(ori_bias)[i]);
} else if (dst_type == kNumberTypeFloat32 && src_type == kNumberTypeFloat16) {
float32_t *bias_ptr = static_cast<float32_t *>(dst);
for (size_t i = 0; i < size; ++i) {
bias_ptr[i] = static_cast<float32_t>(static_cast<float16_t *>(src)[i]);
}
} else {
memcpy(bias_data_, ori_bias, out_info.C * dtype_size);
memcpy(dst, src, size * dtype_size);
}
} else {
MS_ASSERT(in_tensors_.size() == kInputSize1);
};
size_t dtype_size = sizeof(float);
if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat16) {
dtype_size = sizeof(int16_t);
}
std::vector<char> temp_bias(pack_weight_size, 0);
if (in_tensors_.size() == 3) {
src_type = in_tensors_.at(kBiasIndex)->data_type();
dst_type = is_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32;
auto element_size = in_tensors_.at(kBiasIndex)->ElementsNum();
ConvertBias(in_tensors_.at(kBiasIndex)->data_c(), temp_bias.data(), element_size, dtype_size, src_type, dst_type);
}
size_t bias_size = C4NUM * CO4 * dtype_size;
bias_data_ = allocator->Malloc(bias_size, {}, temp_bias.data());
if (bias_data_ == nullptr) {
return RET_ERROR;
}
allocator->UnmapBuffer(bias_data_);
return mindspore::lite::RET_OK;
}
void DepthwiseConv2dOpenCLKernel::SetConstArgs() {


+ 9
- 3
mindspore/lite/src/runtime/opencl/opencl_allocator.cc View File

@@ -107,14 +107,20 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const std::vector<size_t> &img
}
if (*image == nullptr) {
delete *buffer;
MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << ret << ")";
MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")";
return nullptr;
}
if (ret != CL_SUCCESS) {
delete *buffer;
delete *image;
MS_LOG(ERROR) << "Create OpenCL Image2D (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")";
return nullptr;
}
MS_LOG(DEBUG) << "Malloc a new Image2D, width=" << img_size[0] << ", height=" << img_size[1];
void *host_ptr = nullptr;
if (is_map) {
std::vector<size_t> region{img_size[0], img_size[1], 1};
host_ptr = ocl_runtime_->MapBuffer(**image, 0, CL_MAP_READ | CL_MAP_WRITE, region);
host_ptr = ocl_runtime_->MapBuffer(**image, true, CL_MAP_READ | CL_MAP_WRITE, region);
if (host_ptr == nullptr) {
delete *buffer;
delete *image;
@@ -340,7 +346,7 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue,
std::vector<size_t> region{mem_buf->img_size[0], mem_buf->img_size[1], 1};
cl::Image2D *image = static_cast<cl::Image2D *>(mem_buf->image_ptr_);
MS_ASSERT(image);
new_host_ptr = ocl_runtime_->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region);
new_host_ptr = ocl_runtime_->MapBuffer(*image, sync, CL_MAP_READ | CL_MAP_WRITE, region);
}
if (new_host_ptr == nullptr) {
UnLock();


+ 15
- 0
mindspore/lite/test/ut/src/runtime/kernel/opencl/argminmax_tests.cc View File

@@ -185,4 +185,19 @@ TEST_F(TestOpenCL_ArgMinMax, axis3topk2value) {
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
}
}
TEST_F(TestOpenCL_ArgMinMax, axis1topk1index) {
schema::PrimitiveType type = schema::PrimitiveType_ArgMax;
int axis = 1;
int topk = 1;
bool out_value = false;
std::vector<int> input_shape = {1, 2, 14};
std::vector<int> output_shape = {1, 14};
float input_data[] = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50,
30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25};
float output_data[] = {1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0};
for (auto fp16_enable : {false, true}) {
auto *param = CreateParameter(type, axis, topk, out_value);
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable, 1e-1, 1e-1, true);
}
}
} // namespace mindspore::lite::opencl::test

+ 125
- 10
mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc View File

@@ -58,22 +58,24 @@ TEST_F(TestOpenCL_DepthwiseConv2d, NoPad) {
std::vector<int> output_shape = {1, 2, 2, 4};
std::vector<int> weight_shape = {1, kernel_h, kernel_w, output_shape.back()};
std::vector<int> bias_shape = {output_shape.back()};
float input_data[] = {0.5488135, 0.0202184, 0.45615032, 0.31542835, 0.71518934, 0.83261985, 0.56843394, 0.36371076,
0.60276335, 0.77815676, 0.0187898, 0.57019675, 0.5448832, 0.87001216, 0.6176355, 0.43860152,
0.4236548, 0.9786183, 0.6120957, 0.9883738, 0.6458941, 0.7991586, 0.616934, 0.10204481,
0.4375872, 0.46147937, 0.94374806, 0.20887676, 0.891773, 0.7805292, 0.6818203, 0.16130951,
0.96366274, 0.11827443, 0.3595079, 0.6531083, 0.3834415, 0.639921, 0.43703195, 0.2532916,
0.79172504, 0.14335328, 0.6976312, 0.46631077, 0.5288949, 0.9446689, 0.06022547, 0.2444256,
0.56804454, 0.5218483, 0.6667667, 0.15896958, 0.92559665, 0.41466194, 0.67063785, 0.11037514,
0.07103606, 0.2645556, 0.21038257, 0.6563296, 0.0871293, 0.7742337, 0.12892629, 0.13818295};
float input_data[] = {
0.5488135, 0.71518934, 0.60276335, 0.5448832, 0.4236548, 0.6458941, 0.4375872, 0.891773,
0.96366274, 0.3834415, 0.79172504, 0.5288949, 0.56804454, 0.92559665, 0.07103606, 0.0871293,
0.0202184, 0.83261985, 0.77815676, 0.87001216, 0.9786183, 0.7991586, 0.46147937, 0.7805292,
0.11827443, 0.639921, 0.14335328, 0.9446689, 0.5218483, 0.41466194, 0.2645556, 0.7742337,
0.45615032, 0.56843394, 0.0187898, 0.6176355, 0.6120957, 0.616934, 0.94374806, 0.6818203,
0.3595079, 0.43703195, 0.6976312, 0.06022547, 0.6667667, 0.67063785, 0.21038257, 0.12892629,
0.31542835, 0.36371076, 0.57019675, 0.43860152, 0.9883738, 0.10204481, 0.20887676, 0.16130951,
0.6531083, 0.2532916, 0.46631077, 0.2444256, 0.15896958, 0.11037514, 0.6563296, 0.13818295,
};
float bias_data[] = {0, 0, 0, 0};
float weight_data[] = {0.19658236, 0.36872518, 0.82099324, 0.09710128, 0.8379449, 0.09609841, 0.97645944, 0.4686512,
0.9767611, 0.6048455, 0.7392636, 0.03918779, 0.28280696, 0.12019656, 0.2961402, 0.11872772,
0.31798318, 0.41426298, 0.06414749, 0.6924721, 0.56660146, 0.2653895, 0.5232481, 0.09394051,
0.5759465, 0.9292962, 0.31856894, 0.6674104, 0.13179787, 0.7163272, 0.2894061, 0.18319136,
0.5865129, 0.02010755, 0.82894003, 0.00469548};
float output_data[] = {3.3848767, 1.4446403, 1.8428744, 1.3194335, 2.5873442, 2.1384869, 2.04022, 1.1872686,
2.2294958, 1.6570128, 2.465089, 1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988};
float output_data[] = {2.9720426, 1.890834, 2.3618119, 2.3867798, 2.5666943, 1.6261611, 2.0977764, 1.6445805,
2.462798, 1.6643658, 1.6861027, 1.8428761, 2.5156446, 1.5366757, 1.6767557, 1.6905226};
for (auto fp16_enable : {false, true}) {
auto *param = CreateParameter(kernel_h, kernel_w, stride_h, stride_w, pad_u, pad_d, pad_l, pad_r, dilation_h,
@@ -132,4 +134,117 @@ TEST_F(TestOpenCL_DepthwiseConv2d, Pad) {
}
}
TEST_F(TestOpenCL_DepthwiseConv2d, NoPad1) {
int kernel_h = 2;
int kernel_w = 2;
int stride_h = 1;
int stride_w = 1;
int pad_u = 0;
int pad_d = 0;
int pad_l = 0;
int pad_r = 0;
int dilation_h = 1;
int dilation_w = 1;
ActType act_type = ActType_No;
std::vector<int> input_shape = {1, 4, 4, 4};
std::vector<int> output_shape = {1, 3, 3, 4};
std::vector<int> weight_shape = {1, kernel_h, kernel_w, output_shape.back()};
std::vector<int> bias_shape = {output_shape.back()};
float input_data[] = {0.5488135, 0.71518934, 0.60276335, 0.5448832, 0.4236548, 0.6458941, 0.4375872, 0.891773,
0.96366274, 0.3834415, 0.79172504, 0.5288949, 0.56804454, 0.92559665, 0.07103606, 0.0871293,
0.0202184, 0.83261985, 0.77815676, 0.87001216, 0.9786183, 0.7991586, 0.46147937, 0.7805292,
0.11827443, 0.639921, 0.14335328, 0.9446689, 0.5218483, 0.41466194, 0.2645556, 0.7742337,
0.45615032, 0.56843394, 0.0187898, 0.6176355, 0.6120957, 0.616934, 0.94374806, 0.6818203,
0.3595079, 0.43703195, 0.6976312, 0.06022547, 0.6667667, 0.67063785, 0.21038257, 0.12892629,
0.31542835, 0.36371076, 0.57019675, 0.43860152, 0.9883738, 0.10204481, 0.20887676, 0.16130951,
0.6531083, 0.2532916, 0.46631077, 0.2444256, 0.15896958, 0.11037514, 0.6563296, 0.13818295};
float bias_data[] = {0, 0, 0, 0};
float weight_data[] = {0.19658236, 0.36872517, 0.82099323, 0.09710128, 0.83794491, 0.09609841,
0.97645947, 0.4686512, 0.97676109, 0.60484552, 0.73926358, 0.03918779,
0.28280696, 0.12019656, 0.2961402, 0.11872772};
float output_data[] = {0.3757235, 1.8489048, 1.4467758, 0.6116009, 1.2535334, 1.6583176, 1.2530621, 0.6590755,
0.5466661, 1.22944, 0.93263525, 0.5317252, 0.7987474, 1.618667, 1.090071, 0.60372007,
0.773425, 1.5383728, 1.262479, 0.54334986, 0.5755667, 1.3171062, 0.82401496, 0.39336145,
0.6703031, 0.9385749, 1.018886, 0.40566355, 1.1277528, 0.7773028, 1.5164642, 0.27685273,
0.86816025, 0.72971237, 1.1791146, 0.12131907};
for (auto fp16_enable : {false, true}) {
auto *param = CreateParameter(kernel_h, kernel_w, stride_h, stride_w, pad_u, pad_d, pad_l, pad_r, dilation_h,
dilation_w, act_type, input_shape.back());
TestMain({{input_shape, input_data, VAR},
{weight_shape, weight_data, CONST_TENSOR},
{bias_shape, bias_data, CONST_TENSOR}},
{output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-2 : 1e-5, 1e-1, true);
}
}
TEST_F(TestOpenCL_DepthwiseConv2d, Pad1) {
int kernel_h = 3;
int kernel_w = 3;
int stride_h = 1;
int stride_w = 1;
int pad_u = 1;
int pad_d = 1;
int pad_l = 1;
int pad_r = 1;
int dilation_h = 1;
int dilation_w = 1;
ActType act_type = ActType_No;
std::vector<int> input_shape = {1, 5, 5, 6};
std::vector<int> output_shape = {1, 5, 5, 6};
std::vector<int> weight_shape = {1, kernel_h, kernel_w, output_shape.back()};
std::vector<int> bias_shape = {output_shape.back()};
float input_data[] = {
0.5488135, 0.71518934, 0.60276335, 0.5448832, 0.4236548, 0.6458941, 0.4375872, 0.891773, 0.96366274,
0.3834415, 0.79172504, 0.5288949, 0.56804454, 0.92559665, 0.07103606, 0.0871293, 0.0202184, 0.83261985,
0.77815676, 0.87001216, 0.9786183, 0.7991586, 0.46147937, 0.7805292, 0.11827443, 0.639921, 0.14335328,
0.9446689, 0.5218483, 0.41466194, 0.2645556, 0.7742337, 0.45615032, 0.56843394, 0.0187898, 0.6176355,
0.6120957, 0.616934, 0.94374806, 0.6818203, 0.3595079, 0.43703195, 0.6976312, 0.06022547, 0.6667667,
0.67063785, 0.21038257, 0.12892629, 0.31542835, 0.36371076, 0.57019675, 0.43860152, 0.9883738, 0.10204481,
0.20887676, 0.16130951, 0.6531083, 0.2532916, 0.46631077, 0.2444256, 0.15896958, 0.11037514, 0.6563296,
0.13818295, 0.19658236, 0.36872518, 0.82099324, 0.09710128, 0.8379449, 0.09609841, 0.97645944, 0.4686512,
0.9767611, 0.6048455, 0.7392636, 0.03918779, 0.28280696, 0.12019656, 0.2961402, 0.11872772, 0.31798318,
0.41426298, 0.06414749, 0.6924721, 0.56660146, 0.2653895, 0.5232481, 0.09394051, 0.5759465, 0.9292962,
0.31856894, 0.6674104, 0.13179787, 0.7163272, 0.2894061, 0.18319136, 0.5865129, 0.02010755, 0.82894003,
0.00469548, 0.6778165, 0.27000797, 0.735194, 0.96218854, 0.24875315, 0.57615733, 0.5920419, 0.5722519,
0.22308163, 0.952749, 0.44712538, 0.84640867, 0.6994793, 0.29743695, 0.81379783, 0.39650574, 0.8811032,
0.5812729, 0.8817354, 0.6925316, 0.7252543, 0.50132436, 0.95608366, 0.6439902, 0.42385504, 0.6063932,
0.0191932, 0.30157483, 0.66017354, 0.2900776, 0.6180154, 0.4287687, 0.13547407, 0.29828233, 0.5699649,
0.59087276, 0.57432526, 0.6532008, 0.65210325, 0.43141845, 0.8965466, 0.36756188, 0.43586493, 0.89192337,
0.806194, 0.7038886, 0.10022689, 0.9194826, 0.7142413, 0.998847};
float weight_data[] = {0.1494483, 0.86812606, 0.16249293, 0.61555956, 0.12381998, 0.84800823, 0.80731896, 0.56910074,
0.4071833, 0.069167, 0.69742877, 0.45354268, 0.7220556, 0.86638233, 0.97552151, 0.85580334,
0.01171408, 0.35997806, 0.72999056, 0.17162968, 0.52103661, 0.05433799, 0.19999652, 0.01852179,
0.7936977, 0.22392469, 0.34535168, 0.92808129, 0.7044144, 0.03183893, 0.16469416, 0.6214784,
0.57722859, 0.23789282, 0.934214, 0.61396596, 0.5356328, 0.58990998, 0.73012203, 0.311945,
0.39822106, 0.20984375, 0.18619301, 0.94437239, 0.7395508, 0.49045881, 0.22741463, 0.25435648,
0.05802916, 0.43441663, 0.31179588, 0.69634349, 0.37775184, 0.17960368};
float bias_data[] = {0, 0, 0, 0, 0, 0};
float output_data[] = {
0.8388255, 1.7207233, 0.56646764, 1.50962, 0.6184657, 0.7572999, 1.7197044, 2.8834608, 1.0304408, 1.5622743,
0.95027775, 1.1451806, 2.0191956, 2.9541533, 1.1799709, 1.6366025, 1.3484346, 1.0071151, 1.3740869, 2.1602216,
1.0846798, 1.7810996, 1.6170096, 0.6889053, 0.8671698, 1.4957678, 0.68065727, 1.0596768, 0.9761665, 0.38881996,
1.524128, 2.2121127, 1.1506181, 1.330961, 1.8186853, 0.9094476, 2.3777275, 2.5568333, 1.8321692, 1.8297466,
2.069798, 1.3701197, 2.7548862, 2.0871775, 2.3611763, 1.5387508, 1.6725919, 1.2565864, 2.6130712, 2.0915375,
1.2955335, 1.6571269, 1.7603228, 1.3315495, 1.0005323, 1.0135669, 1.2701392, 1.8230836, 1.6048919, 1.4224635,
1.4651375, 1.0251865, 1.0325887, 1.2355556, 1.3313429, 0.6756204, 2.602416, 2.1827717, 1.4354478, 1.6628273,
2.0171032, 1.0299077, 2.6085434, 1.3310422, 2.1677747, 2.457499, 2.6715999, 1.0225507, 2.5822947, 2.1068158,
1.6401942, 2.5422354, 2.6937182, 1.3813802, 1.1241511, 1.273326, 1.2024405, 1.4564767, 2.016776, 1.0182433,
1.228782, 0.83329916, 1.033041, 1.3280122, 1.9437144, 0.6729013, 2.438968, 2.3275855, 2.289177, 1.4376242,
2.4595368, 1.325891, 2.018128, 2.676854, 1.9685578, 1.8240746, 2.3104675, 1.4958379, 2.474168, 2.6657124,
1.6738743, 2.336092, 2.3048637, 1.802324, 1.7594845, 1.6022205, 1.2564734, 1.8977238, 1.6991055, 1.8674731,
0.47793916, 1.2031221, 0.6579696, 1.0724078, 0.96408695, 0.5074543, 1.2399375, 1.410824, 0.56263226, 1.3138686,
1.4859737, 0.7219256, 1.3437214, 2.0015993, 1.0472497, 1.064316, 1.7359762, 0.9249617, 1.2835678, 2.1866667,
0.92954785, 2.005947, 1.8761289, 1.2612648, 1.2410495, 1.263778, 0.54638237, 1.8269669, 1.3152003, 0.7890457};
for (auto fp16_enable : {false, true}) {
auto *param = CreateParameter(kernel_h, kernel_w, stride_h, stride_w, pad_u, pad_d, pad_l, pad_r, dilation_h,
dilation_w, act_type, input_shape.back());
TestMain({{input_shape, input_data, VAR},
{weight_shape, weight_data, CONST_TENSOR},
{bias_shape, bias_data, CONST_TENSOR}},
{output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-2 : 1e-5, 1e-1, true);
}
}
} // namespace mindspore::lite::opencl::test

Loading…
Cancel
Save