From 5c65180d0a258c131248213777298ccccab5eef4 Mon Sep 17 00:00:00 2001 From: wandongdong Date: Thu, 19 Nov 2020 18:33:26 -0800 Subject: [PATCH] fix setargs bug for prelu and fp16 bug for argminmax --- .../runtime/kernel/opencl/kernel/argminmax.cc | 3 +- .../src/runtime/kernel/opencl/kernel/prelu.cc | 2 +- .../kernel/opencl/subgraph_opencl_kernel.cc | 4 +-- .../src/runtime/opencl/opencl_allocator.cc | 28 +++++++++++-------- .../src/runtime/opencl/opencl_allocator.h | 4 +-- .../runtime/kernel/opencl/argminmax_tests.cc | 24 ++++++++-------- 6 files changed, 35 insertions(+), 30 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc index 8939f9955d..615f693933 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc @@ -80,7 +80,8 @@ void ArgMinMaxOpenCLKernel::SetGlobalLocal() { auto out_shape_align = in_shape_align; out_shape_align.at(param->axis_) = param->axis_ == 3 ? UP_ROUND(param->topk_, C4NUM) : param->topk_; int reduce_len = GetUpPow2(in_shape.at(param->axis_)); - cus_size_ = {reduce_len, static_cast(im_in_.RowPitch() / C4NUM), 1, 1}; + int dtype_size = in_tensors_[0]->data_type() == kNumberTypeFloat16 ? sizeof(int16_t) : sizeof(float); + cus_size_ = {reduce_len, static_cast(im_in_.RowPitch() / dtype_size), 1, 1}; cus_size_.s[2] = UP_ROUND(im_in_.width * C4NUM, cus_size_.s[1]) - im_in_.width * C4NUM; cus_size_.s[3] = im_in_.W * UP_ROUND(param->topk_, C4NUM); cus_size_.s[3] = UP_ROUND(cus_size_.s[3], cus_size_.s[1]) - cus_size_.s[3]; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc index 2864267312..63efb37838 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc @@ -125,7 +125,7 @@ int PReluOpenCLKernel::Run() { if (weight_is_scalar) { ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_scalar_); } else { - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_, lite::opencl::MemType::BUF); } ocl_runtime_->SetKernelArg(kernel_, arg_idx++, shape); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2); diff --git a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc index f0c9ea01f1..b7b8bd4b97 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc @@ -211,14 +211,14 @@ int SubGraphOpenCLKernel::Init() { } nodes_.insert(nodes_.end(), out_convert_ops_.begin(), out_convert_ops_.end()); + UpdateTensorDataType(); + ret = SubGraphKernel::Prepare(); if (ret != RET_OK) { MS_LOG(ERROR) << "OpenCL prepare fail"; return ret; } - UpdateTensorDataType(); - MallocTensorWithReuse(); return RET_OK; } diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc index 6703c29aa1..998399a1a9 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc @@ -83,7 +83,7 @@ void *OpenCLAllocator::CreateBuffer(size_t size, void *data, size_t flags, cl::B } void *OpenCLAllocator::CreateImage2D(size_t size, const std::vector &img_size, void *data, size_t flags, - cl::Buffer **buffer, cl::Image2D **image) { + bool is_map, cl::Buffer **buffer, cl::Image2D **image) { cl_int ret = CL_SUCCESS; cl::ImageFormat image_format(CL_RGBA, img_size[2]); if (data == nullptr) { @@ -99,16 +99,19 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const std::vector &img return nullptr; } MS_LOG(DEBUG) << "Malloc a new Image2D, width=" << img_size[0] << ", height=" << img_size[1]; - std::vector region{img_size[0], img_size[1], 1}; - void *host_ptr = ocl_runtime_->MapBuffer(**image, 0, CL_MAP_READ | CL_MAP_WRITE, region); - if (host_ptr == nullptr) { - delete *buffer; - delete *image; - MS_LOG(ERROR) << "Map image failed, can not found image :" << *image << ", host_ptr=" << host_ptr; - return nullptr; + void *host_ptr = nullptr; + if (is_map) { + std::vector region{img_size[0], img_size[1], 1}; + host_ptr = ocl_runtime_->MapBuffer(**image, 0, CL_MAP_READ | CL_MAP_WRITE, region); + if (host_ptr == nullptr) { + delete *buffer; + delete *image; + MS_LOG(ERROR) << "Map image failed, can not found image :" << *image << ", host_ptr=" << host_ptr; + return nullptr; + } + cl::Memory *mem = *image; + ocl_runtime_->UnmapBuffer(*mem, host_ptr); } - cl::Memory *mem = *image; - ocl_runtime_->UnmapBuffer(*mem, host_ptr); return host_ptr; } @@ -149,11 +152,12 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector &img_size, } } if (!img_size.empty()) { - host_ptr = CreateImage2D(size, img_size, data, flags, &buffer, &image); - if (host_ptr == nullptr) { + void *host_ptr_im = CreateImage2D(size, img_size, data, flags, data != nullptr, &buffer, &image); + if (data != nullptr && host_ptr_im == nullptr) { UnLock(); return nullptr; } + host_ptr = (data != nullptr) ? host_ptr_im : host_ptr; } } MemBuf *mem_buf = new (std::nothrow) MemBuf; diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.h b/mindspore/lite/src/runtime/opencl/opencl_allocator.h index ad579524d6..b1ebafbeb1 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_allocator.h +++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.h @@ -72,8 +72,8 @@ class OpenCLAllocator : public Allocator { void UnLock(); void *MinimumFit(size_t size, const std::vector &img_size); void *CreateBuffer(size_t size, void *data, size_t flags, cl::Buffer **buffer); - void *CreateImage2D(size_t size, const std::vector &img_size, void *data, size_t flags, cl::Buffer **buffer, - cl::Image2D **image); + void *CreateImage2D(size_t size, const std::vector &img_size, void *data, size_t flags, bool is_map, + cl::Buffer **buffer, cl::Image2D **image); struct MemBuf { size_t size_; void *device_ptr_; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/argminmax_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/argminmax_tests.cc index 070eff9a08..9ed0cd3d79 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/argminmax_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/argminmax_tests.cc @@ -44,7 +44,7 @@ TEST_F(TestOpenCL_ArgMinMax, axis0topk2index) { std::vector output_shape = {2, 2, 2, 2}; float input_data[] = {100, 2, 4, 50, 11, 12, 34, 35, 10, 20, 40, 5, 7, 80, 10, 11, 55, 25, 5, 15, 18, 8, 15, 16}; float output_data[] = {0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 2, 2, 0, 0, 2, 2}; - for (auto fp16_enable : {false}) { + for (auto fp16_enable : {false, true}) { auto *param = CreateParameter(type, axis, topk, out_value); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } @@ -59,7 +59,7 @@ TEST_F(TestOpenCL_ArgMinMax, axis0topk2value) { std::vector output_shape = {2, 2, 2, 2}; float input_data[] = {100, 2, 4, 50, 11, 12, 34, 35, 10, 20, 40, 5, 7, 80, 10, 11, 55, 25, 5, 15, 18, 8, 15, 16}; float output_data[] = {100, 25, 40, 50, 18, 80, 34, 35, 55, 20, 5, 15, 11, 12, 15, 16}; - for (auto fp16_enable : {false}) { + for (auto fp16_enable : {false, true}) { auto *param = CreateParameter(type, axis, topk, out_value); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } @@ -75,7 +75,7 @@ TEST_F(TestOpenCL_ArgMinMax, axis1topk2index) { float input_data[] = {100, 2, 200, 4, 50, 6, 11, 12, 13, 34, 35, 36, 9, 6, 17, 10, 20, 30, 10, 20, 30, 40, 5, 60, 7, 80, 90, 10, 11, 120, 18, 5, 16, 9, 22, 23}; float output_data[] = {0, 1, 0, 1, 0, 1, 1, 2, 2, 2, 1, 2, 2, 1, 1, 0, 2, 1, 0, 0, 0, 1, 1, 0}; - for (auto fp16_enable : {false}) { + for (auto fp16_enable : {false, true}) { auto *param = CreateParameter(type, axis, topk, out_value); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } @@ -92,7 +92,7 @@ TEST_F(TestOpenCL_ArgMinMax, axis1topk2value) { 10, 20, 30, 40, 5, 60, 7, 80, 90, 10, 11, 120, 18, 5, 16, 9, 22, 23}; float output_data[] = {100, 12, 200, 34, 50, 36, 11, 6, 17, 10, 35, 30, 18, 80, 90, 40, 22, 120, 10, 20, 30, 10, 11, 60}; - for (auto fp16_enable : {false}) { + for (auto fp16_enable : {false, true}) { auto *param = CreateParameter(type, axis, topk, out_value); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } @@ -109,7 +109,7 @@ TEST_F(TestOpenCL_ArgMinMax, axis2topk1index) { 10, 20, 30, 11, 15, 10, 5, 10, 12, 10, 20, 30, 11, 15, 10, 5, 10, 12, 10, 20, 30, 11, 15, 10, 5, 10, 12, 10, 20, 30, 11, 15, 10, 5, 10, 12}; float output_data[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}; - for (auto fp16_enable : {false}) { + for (auto fp16_enable : {false, true}) { auto *param = CreateParameter(type, axis, topk, out_value); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } @@ -121,12 +121,13 @@ TEST_F(TestOpenCL_ArgMinMax, axis2topk2value) { int topk = 2; bool out_value = true; std::vector input_shape = {2, 2, 3, 5}; - std::vector output_shape = {1, 2, 2, 5}; + std::vector output_shape = {2, 2, 2, 5}; float input_data[] = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30}; - float output_data[] = {30, 45, 30, 50, 90, 20, 20, 25, 40, 50, 30, 45, 30, 50, 90, 20, 20, 25, 40, 50}; - for (auto fp16_enable : {false}) { + float output_data[] = {30, 45, 30, 50, 90, 20, 20, 25, 40, 50, 30, 45, 30, 50, 90, 20, 20, 25, 40, 50, + 30, 45, 30, 50, 90, 20, 20, 25, 40, 50, 30, 45, 30, 50, 90, 20, 20, 25, 40, 50}; + for (auto fp16_enable : {false, true}) { auto *param = CreateParameter(type, axis, topk, out_value); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } @@ -144,7 +145,7 @@ TEST_F(TestOpenCL_ArgMinMax, axis2topk2index) { 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30}; float output_data[] = {2, 2, 0, 2, 0, 1, 0, 2, 0, 1, 2, 2, 0, 2, 0, 1, 0, 2, 0, 1, 2, 2, 0, 2, 0, 1, 0, 2, 0, 1, 2, 2, 0, 2, 0, 1, 0, 2, 0, 1}; - for (auto fp16_enable : {false}) { + for (auto fp16_enable : {false, true}) { auto *param = CreateParameter(type, axis, topk, out_value); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } @@ -161,7 +162,7 @@ TEST_F(TestOpenCL_ArgMinMax, axis3topk2index) { 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30}; float output_data[] = {4, 3, 4, 0, 3, 1, 4, 3, 4, 0, 3, 1, 4, 3, 4, 0, 3, 1, 4, 3, 4, 0, 3, 1}; - for (auto fp16_enable : {false}) { + for (auto fp16_enable : {false, true}) { auto *param = CreateParameter(type, axis, topk, out_value); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } @@ -179,10 +180,9 @@ TEST_F(TestOpenCL_ArgMinMax, axis3topk2value) { 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30}; float output_data[] = {90, 40, 50, 20, 50, 45, 90, 40, 50, 20, 50, 45, 90, 40, 50, 20, 50, 45, 90, 40, 50, 20, 50, 45}; - for (auto fp16_enable : {false}) { + for (auto fp16_enable : {false, true}) { auto *param = CreateParameter(type, axis, topk, out_value); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } } - } // namespace mindspore::lite::opencl::test