diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc index 50604ee3ef..549b420081 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc @@ -120,7 +120,10 @@ int Conv2DOpenCLKernel::Prepare() { winograd_mem1_ = allocator->Malloc(size, {width, height, img_dtype}); } - InitWeights(); + auto ret = InitWeights(); + if (ret != RET_OK) { + return ret; + } SetGlobalLocal(); SetConstArgs(); return RET_OK; @@ -256,8 +259,16 @@ int Conv2DOpenCLKernel::InitBias() { } int Conv2DOpenCLKernel::InitWeights() { + if (!in_tensors_.at(1)->IsConst()) { + MS_LOG(ERROR) << "Conv2D don't support non-constant filter yet."; + return RET_ERROR; + } InitFilter(); if (has_bias_) { + if (!in_tensors_.at(2)->IsConst()) { + MS_LOG(ERROR) << "Conv2D don't support non-constant bias yet."; + return RET_ERROR; + } InitBias(); } return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc index 82544a5750..60bd36d7ea 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc @@ -52,7 +52,10 @@ int Conv2dTransposeOpenCLKernel::Prepare() { ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif - InitWeights(); + auto ret = InitWeights(); + if (ret != RET_OK) { + return ret; + } SetGlobalLocal(); SetConstArgs(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; @@ -102,6 +105,10 @@ void Conv2dTransposeOpenCLKernel::SetConstArgs() { } int Conv2dTransposeOpenCLKernel::InitWeights() { + if (!in_tensors_.at(1)->IsConst()) { + MS_LOG(ERROR) << "Conv2dTranspose don't support non-constant filter yet."; + return RET_ERROR; + } ConvParameter *param = reinterpret_cast(op_parameter_); int ci = in_tensors_[0]->shape()[3]; int co = out_tensors_[0]->shape()[3]; @@ -171,6 +178,10 @@ int Conv2dTransposeOpenCLKernel::InitWeights() { bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true); memset(bias_, 0x00, div_co * C4NUM * data_size); if (in_tensors_.size() >= 3) { + if (!in_tensors_.at(2)->IsConst()) { + MS_LOG(ERROR) << "Conv2dTranspose don't support non-constant bias yet."; + return RET_ERROR; + } auto bias_dtype = in_tensors_[2]->data_type(); if (bias_dtype == kNumberTypeFloat32 && enable_fp16_) { for (int i = 0; i < co; i++) { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc index 7a9d100f1d..81cb66a5cf 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc @@ -73,7 +73,10 @@ int DepthwiseConv2dOpenCLKernel::Prepare() { ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif - InitWeights(); + auto ret = InitWeights(); + if (ret != RET_OK) { + return ret; + } SetGlobalLocal(); SetConstArgs(); MS_LOG(DEBUG) << kernel_name << " Init Done! mem type=" << static_cast(out_mem_type_); @@ -81,6 +84,10 @@ int DepthwiseConv2dOpenCLKernel::Prepare() { } int DepthwiseConv2dOpenCLKernel::InitWeights() { + if (!in_tensors_.at(1)->IsConst()) { + MS_LOG(ERROR) << "DepthwiseConv2d don't support non-constant filter yet."; + return RET_ERROR; + } auto parameter = reinterpret_cast(op_parameter_); auto allocator = ocl_runtime_->GetAllocator(); bool is_fp16 = ocl_runtime_->GetFp16Enable(); @@ -122,6 +129,10 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() { allocator->UnmapBuffer(packed_weight_); if (in_tensors_.size() == kInputSize2) { + if (!in_tensors_.at(2)->IsConst()) { + MS_LOG(ERROR) << "DepthwiseConv2d don't support non-constant bias yet."; + return RET_ERROR; + } size_t dtype_size = sizeof(float); if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat16) { dtype_size = sizeof(int16_t); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc index e79a7156e1..7367ce7e4e 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc @@ -81,7 +81,10 @@ int FullConnectionOpenCLKernel::Prepare() { ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif - InitWeights(); + auto ret = InitWeights(); + if (ret != RET_OK) { + return ret; + } SetConstArgs(); SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; @@ -89,6 +92,10 @@ int FullConnectionOpenCLKernel::Prepare() { } int FullConnectionOpenCLKernel::InitWeights() { + if (!in_tensors_.at(kWeightIndex)->IsConst()) { + MS_LOG(ERROR) << "FullConnection don't support non-constant filter yet."; + return RET_ERROR; + } auto allocator = ocl_runtime_->GetAllocator(); int ci = inShape.C; int ci4 = UP_DIV(ci, C4NUM); @@ -96,7 +103,6 @@ int FullConnectionOpenCLKernel::InitWeights() { int co4 = UP_DIV(co, C4NUM); int h = inShape.H; int w = inShape.W; - size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); padWeight_ = allocator->Malloc(h * w * ci4 * co4 * C4NUM * C4NUM * dtype_size); padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true); @@ -162,6 +168,10 @@ int FullConnectionOpenCLKernel::InitWeights() { bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true); memset(bias_, 0x00, co4 * C4NUM * dtype_size); if (in_tensors_.size() >= 3) { + if (!in_tensors_.at(2)->IsConst()) { + MS_LOG(ERROR) << "FullConnection don't support non-constant bias yet."; + return RET_ERROR; + } if (in_tensors_[2]->data_type() == kNumberTypeFloat32 && enable_fp16_) { for (int i = 0; i < co; i++) { reinterpret_cast(bias_)[i] = reinterpret_cast(in_tensors_[2]->data_c())[i]; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc index b113244491..c220679314 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc @@ -64,7 +64,10 @@ int MatMulOpenCLKernel::Prepare() { ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif - InitWeights(); + auto ret = InitWeights(); + if (ret != RET_OK) { + return ret; + } SetConstArgs(); SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; @@ -73,6 +76,10 @@ int MatMulOpenCLKernel::Prepare() { int MatMulOpenCLKernel::InitWeights() { // ABMCI @ ABCICO = ABMCO + if (!in_tensors_.at(kWeightIndex)->IsConst()) { + MS_LOG(ERROR) << "Matmul don't support non-constant filter yet."; + return RET_ERROR; + } auto allocator = ocl_runtime_->GetAllocator(); int ci = inShape[3]; int ci4 = UP_DIV(ci, C4NUM); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h index 037ccc10df..f4a0785fe9 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h @@ -39,8 +39,6 @@ class PoolingOpenCLKernel : public OpenCLKernel { private: PoolingParameter *parameter_; - std::vector local_size_; - std::vector global_size_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h index fdb8e274a5..e79f689d27 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h @@ -245,12 +245,12 @@ class OpenCLKernel : public LiteKernel { candidate_z = GenerateLocalByGlobal(global_size_[2]); } for (auto x : candidate_x) { - if (x < max_work_items[0]) { + if (x <= max_work_items[0]) { for (auto y : candidate_y) { - if (y < max_work_items[1]) { + if (y <= max_work_items[1]) { for (auto z : candidate_z) { auto group_size = x * y * z; - if (z < max_work_items[2] && group_size < max_workgroup_size && group_size > MIN_WORKGROUP_SIZE) { + if (z <= max_work_items[2] && group_size <= max_workgroup_size && group_size >= MIN_WORKGROUP_SIZE) { BaseTuningParameter tuning_param = BaseTuningParameter(); tuning_param.local_size = {x, y, z}; tuning_params.push_back(tuning_param); @@ -341,11 +341,11 @@ class OpenCLKernel : public LiteKernel { static std::set GenerateLocalByGlobal(size_t global_i) { std::set local_ = {}; int index = 1; - while (index < global_i) { + while (index <= global_i) { local_.insert(index); index *= 2; } - for (size_t i = 1; i < 16; i++) { + for (size_t i = 1; i <= 16; i++) { if (global_i % i == 0) { local_.insert(i); }