diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc index 3e4a081e70..e5ff86048a 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc @@ -54,7 +54,7 @@ int ArgMinMaxOpenCLKernel::CheckSpecs() { MS_LOG(ERROR) << "Invalid axis " << param->axis_; return RET_ERROR; } - param->get_max_ = (op_parameter_->type_ == PrimitiveType_ArgMax); + param->get_max_ = (Type() == PrimitiveType_ArgMax); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc index c579672fd1..78e866ee7d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc @@ -38,25 +38,6 @@ using mindspore::schema::PrimitiveType_Eltwise; namespace mindspore::kernel { -std::set SupportedOpenCLArithmetics = {PrimitiveType_Mul, - PrimitiveType_Add, - PrimitiveType_Sub, - PrimitiveType_Div, - PrimitiveType_LogicalAnd, - PrimitiveType_LogicalOr, - PrimitiveType_Maximum, - PrimitiveType_Minimum, - PrimitiveType_FloorDiv, - PrimitiveType_FloorMod, - PrimitiveType_SquaredDifference, - PrimitiveType_Equal, - PrimitiveType_NotEqual, - PrimitiveType_Less, - PrimitiveType_LessEqual, - PrimitiveType_Greater, - PrimitiveType_GreaterEqual, - PrimitiveType_Eltwise}; - int ArithmeticOpenCLKernel::CheckSpecs() { if (in_tensors_.size() != 2 || out_tensors_.size() != 1) { MS_LOG(ERROR) << "in size: " << in_tensors_.size() << ", out size: " << out_tensors_.size(); @@ -67,8 +48,8 @@ int ArithmeticOpenCLKernel::CheckSpecs() { MS_LOG(ERROR) << "Broadcasting don't support N > 1"; return RET_ERROR; } - if (SupportedOpenCLArithmetics.count(static_cast(op_parameter_->type_)) == 0) { - MS_LOG(ERROR) << "UnSupported Operator: " << schema::EnumNamesPrimitiveType()[op_parameter_->type_]; + if (!IsArithmetic(Type())) { + MS_LOG(ERROR) << "UnSupported Operator: " << schema::EnumNamePrimitiveType(Type()); return RET_ERROR; } if (!(param->activation_type_ == ActivationType_NO_ACTIVATION || param->activation_type_ == ActivationType_RELU || @@ -201,7 +182,7 @@ int ArithmeticOpenCLKernel::Prepare() { auto *param = reinterpret_cast(op_parameter_); element_flag_ = !param->broadcasting_; kernel_name_ = param->broadcasting_ ? "BroadcastNHWC4" : "Element"; - kernel_name_ += schema::EnumNamesPrimitiveType()[op_parameter_->type_]; + kernel_name_ += schema::EnumNamePrimitiveType(Type()); if (param->activation_type_ == ActivationType_RELU) { activation_min_ = 0.f; } else if (param->activation_type_ == ActivationType_RELU6) { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc index 3b78a33ff1..642ac24bc2 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc @@ -25,75 +25,18 @@ using mindspore::kernel::KERNEL_ARCH::kGPU; using mindspore::lite::KernelRegistrar; using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; -using mindspore::schema::PrimitiveType_Abs; -using mindspore::schema::PrimitiveType_Ceil; -using mindspore::schema::PrimitiveType_Cos; -using mindspore::schema::PrimitiveType_Exp; -using mindspore::schema::PrimitiveType_Floor; -using mindspore::schema::PrimitiveType_Log; -using mindspore::schema::PrimitiveType_LogicalNot; -using mindspore::schema::PrimitiveType_Neg; -using mindspore::schema::PrimitiveType_Round; -using mindspore::schema::PrimitiveType_Rsqrt; -using mindspore::schema::PrimitiveType_Sin; -using mindspore::schema::PrimitiveType_Sqrt; -using mindspore::schema::PrimitiveType_Square; namespace mindspore::kernel { -void ArithmeticSelfOpenCLKernel::GetKernelName(std::string *kernel_name, ArithmeticSelfParameter *param) { - MS_ASSERT(kernel_name); - MS_ASSERT(param); - switch (param->op_parameter_.type_) { - case PrimitiveType_Abs: - kernel_name[0] += "_ElementAbs"; - break; - case PrimitiveType_Cos: - kernel_name[0] += "_ElementCos"; - break; - case PrimitiveType_Exp: - kernel_name[0] += "_ElementExp"; - break; - case PrimitiveType_Log: - kernel_name[0] += "_ElementLog"; - break; - case PrimitiveType_Square: - kernel_name[0] += "_ElementSquare"; - break; - case PrimitiveType_Sqrt: - kernel_name[0] += "_ElementSqrt"; - break; - case PrimitiveType_Rsqrt: - kernel_name[0] += "_ElementRsqrt"; - break; - case PrimitiveType_Sin: - kernel_name[0] += "_ElementSin"; - break; - case PrimitiveType_LogicalNot: - kernel_name[0] += "_ElementLogicalNot"; - break; - case PrimitiveType_Floor: - kernel_name[0] += "_ElementFloor"; - break; - case PrimitiveType_Ceil: - kernel_name[0] += "_ElementCeil"; - break; - case PrimitiveType_Round: - kernel_name[0] += "_ElementRound"; - break; - case PrimitiveType_Neg: - kernel_name[0] += "_ElementNeg"; - break; - default: - break; - } -} - int ArithmeticSelfOpenCLKernel::CheckSpecs() { if (in_tensors_.size() != 1 || out_tensors_.size() != 1) { MS_LOG(ERROR) << "in size: " << in_tensors_.size() << ", out size: " << out_tensors_.size(); return RET_ERROR; } + if (!IsArithmeticSelf(Type())) { + MS_LOG(ERROR) << "UnSupported Operator: " << schema::EnumNamePrimitiveType(Type()); + return RET_ERROR; + } if (in_tensors_[0]->shape().size() != 4 && in_tensors_[0]->shape().size() != 2) { MS_LOG(ERROR) << " only support dim = 4 or 2 but your dim = " << in_tensors_[0]->shape().size(); return RET_ERROR; @@ -101,11 +44,6 @@ int ArithmeticSelfOpenCLKernel::CheckSpecs() { return RET_OK; } -void ArithmeticSelfOpenCLKernel::SetConstArgs() { - int arg_cn = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); -} - void ArithmeticSelfGetWorkGroup(const std::vector &global, std::vector *local, int max_size) { const int max_divider = 8; const int max_x = 4, max_y = 8; @@ -142,27 +80,20 @@ void ArithmeticSelfOpenCLKernel::SetGlobalLocal() { } int ArithmeticSelfOpenCLKernel::Prepare() { - auto param = reinterpret_cast(this->op_parameter_); - std::string kernel_name = "ArithmeticSelf"; - GetKernelName(&kernel_name, param); - kernel_name += "_NHWC4"; + std::string kernel_name = "ArithmeticSelf_Element" + std::string(schema::EnumNamePrimitiveType(Type())) + "_NHWC4"; MS_LOG(DEBUG) << "execute kernel name : " << kernel_name; - std::set build_options; - std::string source = arithmeticself_source; std::string program_name = "ArithmeticSelf"; - ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, arithmeticself_source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); SetGlobalLocal(); SetConstArgs(); - return RET_OK; } int ArithmeticSelfOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - int arg_cn = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); + ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_OK; } @@ -180,5 +111,18 @@ REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Sin, OpenCLKernelCreator) REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Sqrt, OpenCLKernelCreator) REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Square, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Abs, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Ceil, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Cos, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Exp, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Floor, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Log, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_LogicalNot, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Round, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Rsqrt, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Sin, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Neg, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Sqrt, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Square, OpenCLKernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h index 93512e1fef..f6c1c8ed11 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h @@ -22,6 +22,21 @@ #include "src/runtime/kernel/opencl/opencl_kernel.h" #include "nnacl/arithmetic_self_parameter.h" +using mindspore::schema::PrimitiveType_Abs; +using mindspore::schema::PrimitiveType_Ceil; +using mindspore::schema::PrimitiveType_Cos; +using mindspore::schema::PrimitiveType_Eltwise; +using mindspore::schema::PrimitiveType_Exp; +using mindspore::schema::PrimitiveType_Floor; +using mindspore::schema::PrimitiveType_Log; +using mindspore::schema::PrimitiveType_LogicalNot; +using mindspore::schema::PrimitiveType_Neg; +using mindspore::schema::PrimitiveType_Round; +using mindspore::schema::PrimitiveType_Rsqrt; +using mindspore::schema::PrimitiveType_Sin; +using mindspore::schema::PrimitiveType_Sqrt; +using mindspore::schema::PrimitiveType_Square; + namespace mindspore::kernel { class ArithmeticSelfOpenCLKernel : public OpenCLKernel { @@ -35,13 +50,12 @@ class ArithmeticSelfOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + void SetConstArgs() override { ocl_runtime_->SetKernelArg(kernel_, 2, output_shape_); } void SetGlobalLocal() override; int Run() override; private: - void GetKernelName(std::string *kernel_name, ArithmeticSelfParameter *param); cl_int4 output_shape_ = {}; }; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc index 453ce0035c..18ac32c0f4 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc @@ -57,17 +57,26 @@ int Conv2DOpenCLKernel::CheckSpecs() { MS_LOG(ERROR) << "Conv2D only supports 4D input Tensor but get " << in_tensors_.front()->shape().size() << "D."; return RET_ERROR; } - if (in_tensors_[1]->shape().size() != 4) { - MS_LOG(ERROR) << "Conv2D only supports 4D filter Tensor but get " << in_tensors_[1]->shape().size() << "D."; + if (in_tensors_.at(1)->shape().size() != 4) { + MS_LOG(ERROR) << "Conv2D only supports 4D filter Tensor but get " << in_tensors_.at(1)->shape().size() << "D."; return RET_ERROR; } if (out_tensors_.front()->shape().size() != 4) { MS_LOG(ERROR) << "Conv2D only supports 4D output Tensor but get " << out_tensors_.front()->shape().size() << "D."; return RET_ERROR; } - if (param_->act_type_ != ActType_No && param_->act_type_ != ActType_Relu && param_->act_type_ != ActType_Relu6) { - MS_LOG(ERROR) << "Unsupported activation type " << param_->act_type_; - return RET_ERROR; + // for fusion: ActivationType_LEAKY_RELU ActivationType_TANH + switch (static_cast(param_->act_type_)) { + case ActType_No: + case ActType_Relu: + case ActType_Relu6: + case ActivationType_LEAKY_RELU: + case ActivationType_TANH: + break; + default: { + MS_LOG(ERROR) << "Unsupported activation type " << param_->act_type_; + return RET_ERROR; + } } return RET_OK; } @@ -154,9 +163,11 @@ int Conv2DOpenCLKernel::GenerateWinogradFilter() { 1.0000000000, -0.7071067691, 0.4999999702, 1.0000000000, 1.4142135382, 1.9999998808, 1.0000000000, -1.4142135382, 1.9999998808, 0.0000000000, 0.0000000000, 1.0000000000}; - auto weight_tensor = in_tensors_[1]; + auto weight_tensor = in_tensors_.at(1); auto origin_weight_fp32 = reinterpret_cast(weight_tensor->data_c()); + MS_ASSERT(origin_weight_fp32); auto origin_weight_fp16 = reinterpret_cast(weight_tensor->data_c()); + MS_ASSERT(origin_weight_fp16); std::function access_func; if (weight_tensor->data_type() == kNumberTypeFloat32) { access_func = [=](int idx) { return origin_weight_fp32[idx]; }; @@ -216,7 +227,7 @@ int Conv2DOpenCLKernel::InitFilter() { if (use_winograd_) { GenerateWinogradFilter(); } else { - auto weight_tensor = in_tensors_[1]; + auto weight_tensor = in_tensors_.at(1); if (weight_tensor->data_type() == kNumberTypeFloat16) { if (use_fp16_) { ConvertConvWeight4DTo7D(weight_tensor->data_c(), packed_weight_, CO_, KH_, KW_, CI_, @@ -244,7 +255,7 @@ int Conv2DOpenCLKernel::InitBias() { auto allocator = ocl_runtime_->GetAllocator(); // align bias from C to C4 - auto bias_tensor = in_tensors_[2]; + auto bias_tensor = in_tensors_.at(2); size_t packed_bias_size = UP_ROUND(CO_SLICES_, block_size_.C) * CO_TILE * sizeof_FLT_; packed_bias_ = allocator->Malloc(packed_bias_size); @@ -256,6 +267,7 @@ int Conv2DOpenCLKernel::InitBias() { } else { auto packed_bias_fp32 = reinterpret_cast(packed_bias_); auto origin_bias_fp16 = reinterpret_cast(bias_tensor->data_c()); + MS_ASSERT(origin_bias_fp16); for (int i = 0; i < CO_; ++i) { packed_bias_fp32[i] = static_cast(origin_bias_fp16[i]); } @@ -264,6 +276,7 @@ int Conv2DOpenCLKernel::InitBias() { if (use_fp16_) { auto packed_bias_fp16 = reinterpret_cast(packed_bias_); auto origin_bias_fp32 = reinterpret_cast(bias_tensor->data_c()); + MS_ASSERT(origin_bias_fp32); for (int i = 0; i < CO_; ++i) { packed_bias_fp16[i] = static_cast(origin_bias_fp32[i]); } @@ -456,6 +469,7 @@ int Conv2DOpenCLKernel::Tune() { } int Conv2DOpenCLKernel::Run() { + MS_LOG(DEBUG) << this->name() << " Running!"; if (use_winograd_) { ocl_runtime_->SetKernelArg(kernel_4x4to36_, 0, in_tensors_.front()->data_c()); ocl_runtime_->RunKernel(kernel_4x4to36_, global_4x4to36_, local_4x4to36_); @@ -474,6 +488,9 @@ int Conv2DOpenCLKernel::Run() { bool UseFcReplaceConv(const std::vector &inputs, const std::vector &outputs, ConvParameter *param) { + MS_ASSERT(param); + MS_ASSERT(!inputs.empty()); + MS_ASSERT(!outputs.empty()); auto input_shape = inputs.front()->shape(); auto output_shape = inputs.front()->shape(); // IH=1 IW=1 OH=1 OW=1 diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc index 58b0078da1..4d5685c12d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc @@ -51,10 +51,24 @@ int FullConnectionOpenCLKernel::CheckSpecs() { MS_LOG(ERROR) << "fullconnection only support 2d output shape or 4d output but H=W=1"; return RET_ERROR; } - if (param->act_type_ != ActType_No && param->act_type_ != ActType_Relu && param->act_type_ != ActType_Relu6) { + // for fusion: ActivationType_TANH + if (param->act_type_ != ActType_No && param->act_type_ != ActType_Relu && param->act_type_ != ActType_Relu6 && + static_cast(param->act_type_) != ActivationType_TANH) { MS_LOG(ERROR) << "Unsupported activation type " << param->act_type_; return RET_ERROR; } + // for fusion: ActivationType_TANH + switch (static_cast(param->act_type_)) { + case ActType_No: + case ActType_Relu: + case ActType_Relu6: + case ActivationType_TANH: + break; + default: { + MS_LOG(ERROR) << "Unsupported activation type " << param->act_type_; + return RET_ERROR; + } + } N_ = out_gpu_info.N; CO_ = out_gpu_info.C; auto intensor_shape = GpuTensorInfo(in_tensors_[0]); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc new file mode 100644 index 0000000000..032108a94f --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc @@ -0,0 +1,16 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/runtime/kernel/opencl/kernel/fusion_eltwise.h" diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h new file mode 100644 index 0000000000..b316c4f5d4 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h @@ -0,0 +1,20 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_FUSION_ELTWISE_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_FUSION_ELTWISE_H_ + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_FUSION_ELTWISE_H_ diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc index b8a0506866..c607bf2c44 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc @@ -35,6 +35,7 @@ namespace mindspore::kernel { int PadOpenCLKernel::CheckSpecs() { auto param = reinterpret_cast(op_parameter_); + MS_ASSERT(param); if (in_tensors_.size() != 1) { MS_LOG(ERROR) << "Pad only support 1 input Tensor."; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc index a8bd2f5529..972ec3096f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc @@ -77,17 +77,13 @@ int PReluOpenCLKernel::InitWeights() { int PReluOpenCLKernel::CheckSpecs() { if (in_tensors_.size() != 2 || out_tensors_.size() != 1) { - MS_LOG(ERROR) << "PRelu Only supported in_tensors_.size=2 and out_tensors_.size()= 2 but your in_tensors_.size = " - << in_tensors_.size() << "out_tensors_.size()=: " << out_tensors_.size(); + MS_LOG(ERROR) << "PRelu Only supported in_tensors_.size=2 and out_tensors_.size()=1 but your in_tensors_.size=" + << in_tensors_.size() << " out_tensors_.size()=" << out_tensors_.size(); return RET_ERROR; } - GpuTensorInfo img_info_in_tensors0(in_tensors_[0]); - GpuTensorInfo img_info_in_tensors1(in_tensors_[1]); - auto weight_tensor = in_tensors_.at(1); - auto in_tensor_channel = img_info_in_tensors0.C; - auto weight_channel = img_info_in_tensors1.C; - + auto in_tensor_channel = GpuTensorInfo(in_tensors_[0]).C; + auto weight_channel = GpuTensorInfo(in_tensors_[1]).C; if (weight_channel != 1 && weight_channel != in_tensor_channel) { MS_LOG(ERROR) << "PRelu weight must be equal with in_teneors channel size, but your weight size is " << weight_channel << " and your input channel size is " << in_tensor_channel; @@ -160,6 +156,6 @@ int PReluOpenCLKernel::Run() { return mindspore::lite::RET_OK; } -REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_PReLU, OpenCLKernelCreator); -REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_PReLU, OpenCLKernelCreator); +REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_PReLU, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_PReLU, OpenCLKernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc index 58f608d819..7d3bb70a9e 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc @@ -149,7 +149,7 @@ int ScaleOpenCLKernel::InitWeights() { return RET_OK; } -int ScaleOpenCLKernel::Init() { +int ScaleOpenCLKernel::Prepare() { std::string kernel_name; auto *scale_param = reinterpret_cast(op_parameter_); auto in_tensor = in_tensors_.at(0); @@ -250,25 +250,6 @@ int ScaleOpenCLKernel::Run() { return RET_OK; } -kernel::LiteKernel *OpenCLScaleKernelCreator(const std::vector &inputs, - const std::vector &outputs, OpParameter *opParameter, - const lite::InnerContext *ctx, const kernel::KernelKey &desc, - const mindspore::lite::PrimitiveC *primitive) { - auto *kernel = new (std::nothrow) ScaleOpenCLKernel(reinterpret_cast(opParameter), inputs, outputs); - if (kernel == nullptr) { - MS_LOG(ERROR) << "Create OpenCL Scale kernel failed!"; - free(opParameter); - return nullptr; - } - auto ret = kernel->Init(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Init kernel failed, name: Scale"; - delete kernel; - return nullptr; - } - return kernel; -} - -REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Scale, OpenCLScaleKernelCreator) -REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Scale, OpenCLScaleKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Scale, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Scale, OpenCLKernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h index 958d2489dc..2792068b01 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h @@ -31,7 +31,7 @@ class ScaleOpenCLKernel : public OpenCLKernel { ~ScaleOpenCLKernel() override; int CheckSpecs() override; - int Init() override; + int Prepare() override; int Run() override; int InitWeights() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc index a92f84b621..865939da0e 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc @@ -34,11 +34,21 @@ using mindspore::schema::PrimitiveType_StridedSlice; namespace mindspore::kernel { int StridedSliceOpenCLKernel::CheckSpecs() { - const std::string kernel_name = op_parameter_->type_ == PrimitiveType_Slice ? "Slice" : "StridedSlice"; - if (in_tensors_.size() != 1) { - MS_LOG(ERROR) << kernel_name + " only supports 1 input Tensor."; + if (Type() == PrimitiveType_Slice) { + if (in_tensors_.size() != 3) { + MS_LOG(ERROR) << "Slice only supports 3 input Tensor."; + return RET_ERROR; + } + } else if (Type() == PrimitiveType_StridedSlice) { + if (in_tensors_.size() != 4) { + MS_LOG(ERROR) << "StridedSlice only supports 4 input Tensor."; + return RET_ERROR; + } + } else { + MS_LOG(ERROR) << "Type error."; return RET_ERROR; } + const std::string kernel_name = Type() == PrimitiveType_Slice ? "Slice" : "StridedSlice"; if (out_tensors_.size() != 1) { MS_LOG(ERROR) << kernel_name + " only supports 1 output Tensor."; return RET_ERROR; @@ -78,8 +88,9 @@ int StridedSliceOpenCLKernel::InitConstArgs() { static_cast(output_info.W), static_cast(output_info.C)}; io_slices_ = {static_cast(input_info.Slice), static_cast(output_info.Slice)}; - if (op_parameter_->type_ == PrimitiveType_Slice) { + if (Type() == PrimitiveType_Slice) { auto param = reinterpret_cast(op_parameter_); + MS_ASSERT(param); Broadcast2GpuShape(begin_.s, param->begin_, param->param_length_, 0); Broadcast2GpuShape(size_.s, param->size_, param->param_length_, -1); for (int i = 0; i < 4; ++i) { @@ -101,6 +112,7 @@ int StridedSliceOpenCLKernel::InitConstArgs() { } } else { auto param = reinterpret_cast(op_parameter_); + MS_ASSERT(param); cl_int4 end = input_shape_; Broadcast2GpuShape(begin_.s, param->begins_, param->num_axes_, 0); Broadcast2GpuShape(stride_.s, param->strides_, param->num_axes_, 1); @@ -179,8 +191,8 @@ void StridedSliceOpenCLKernel::SetGlobalLocal() { int StridedSliceOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); + ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h index 93db7888c4..3fb4e0a9da 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h @@ -47,7 +47,7 @@ void Broadcast2GpuShape(DstT *dst, const SrcT *src, int src_num) { auto *W = dst + 2; auto *C = dst + 3; if (src_num == 1) { - *N = src[0]; + *C = src[0]; } else if (src_num == 2) { *N = src[0]; *C = src[1]; diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc index e09619fb63..8b5cce77a8 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc @@ -115,21 +115,22 @@ int OpenCLSubGraph::GenToFormatOp(const std::vector &in_tensors, ReplaceOutTensorAndKernelToNull(in_tensors, in_kernels, mem_type); for (size_t i = 0; i < in_tensors.size(); ++i) { + auto *in_tensor = in_tensors.at(i); auto dst_format = (mem_type == MemType::IMG) ? schema::Format::Format_NHWC4 : schema::Format::Format_NHWC; auto src_format = (mem_type == MemType::IMG) ? schema::Format::Format_NHWC : schema::Format::Format_NHWC4; - auto *new_tensor = new (std::nothrow) lite::Tensor(); + auto *new_tensor = new (std::nothrow) + lite::Tensor(in_tensor->data_type(), in_tensor->shape(), in_tensor->format(), lite::Tensor::VAR); MS_ASSERT(new_tensor); if (new_tensor == nullptr) { MS_LOG(ERROR) << "OpenCLSubGraph new tensor failed!"; return RET_ERROR; } - new_tensor->CopyTensor(*in_tensors[i]); if (mem_type == MemType::IMG) { new_tensor->set_format(dst_format); - in_tensors[i]->set_format(src_format); + in_tensor->set_format(src_format); } else { new_tensor->set_format(src_format); - in_tensors[i]->set_format(dst_format); + in_tensor->set_format(dst_format); } out_tensors->emplace_back(new_tensor); @@ -153,11 +154,11 @@ int OpenCLSubGraph::GenToFormatOp(const std::vector &in_tensors, out_parameters->emplace_back(parameter); LiteKernel *in_convert_op = nullptr; if (mem_type == MemType::IMG) { - in_convert_op = lite::GetOpenCLKernel({in_tensors[i]}, {new_tensor}, reinterpret_cast(parameter), - context_, desc); + in_convert_op = + lite::GetOpenCLKernel({in_tensor}, {new_tensor}, reinterpret_cast(parameter), context_, desc); } else { - in_convert_op = lite::GetOpenCLKernel({new_tensor}, {in_tensors[i]}, reinterpret_cast(parameter), - context_, desc); + in_convert_op = + lite::GetOpenCLKernel({new_tensor}, {in_tensor}, reinterpret_cast(parameter), context_, desc); } MS_ASSERT(in_convert_op); if (in_convert_op == nullptr) { @@ -169,7 +170,7 @@ int OpenCLSubGraph::GenToFormatOp(const std::vector &in_tensors, return RET_ERROR; } - ReplaceOutTensorAndKernelToConvert(in_tensors.at(i), in_kernels.at(i), new_tensor, in_convert_op, mem_type); + ReplaceOutTensorAndKernelToConvert(in_tensor, in_kernels.at(i), new_tensor, in_convert_op, mem_type); // replace in_tensor of inner kernel which use out tensor if (mem_type == MemType::BUF) { diff --git a/mindspore/lite/src/runtime/kernel/opencl/utils.cc b/mindspore/lite/src/runtime/kernel/opencl/utils.cc index 185461c6db..fcf75ff30d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/utils.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/utils.cc @@ -45,6 +45,32 @@ kernel::LiteKernel *GetOpenCLKernel(const std::vector &in_tensors, con namespace mindspore::kernel { +const std::set ArithmeticPrimitives = {schema::PrimitiveType_Mul, + schema::PrimitiveType_Add, + schema::PrimitiveType_Sub, + schema::PrimitiveType_Div, + schema::PrimitiveType_LogicalAnd, + schema::PrimitiveType_LogicalOr, + schema::PrimitiveType_Maximum, + schema::PrimitiveType_Minimum, + schema::PrimitiveType_FloorDiv, + schema::PrimitiveType_FloorMod, + schema::PrimitiveType_SquaredDifference, + schema::PrimitiveType_Equal, + schema::PrimitiveType_NotEqual, + schema::PrimitiveType_Less, + schema::PrimitiveType_LessEqual, + schema::PrimitiveType_Greater, + schema::PrimitiveType_GreaterEqual, + schema::PrimitiveType_Eltwise}; + +const std::set ArithmeticSelfPrimitives = { + schema::PrimitiveType_Abs, schema::PrimitiveType_Ceil, schema::PrimitiveType_Cos, + schema::PrimitiveType_Exp, schema::PrimitiveType_Floor, schema::PrimitiveType_Log, + schema::PrimitiveType_LogicalNot, schema::PrimitiveType_Round, schema::PrimitiveType_Rsqrt, + schema::PrimitiveType_Sin, schema::PrimitiveType_Neg, schema::PrimitiveType_Sqrt, + schema::PrimitiveType_Square}; + std::string GetActDefines() { static std::string act_defines = "#define ActivationType_RELU " + std::to_string(ActivationType_RELU) + "\n#define ActivationType_RELU6 " + std::to_string(ActivationType_RELU6) + diff --git a/mindspore/lite/src/runtime/kernel/opencl/utils.h b/mindspore/lite/src/runtime/kernel/opencl/utils.h index 0971b88913..fbcf5552eb 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/utils.h +++ b/mindspore/lite/src/runtime/kernel/opencl/utils.h @@ -19,6 +19,7 @@ #include #include +#include #include "CL/cl2.hpp" #include "src/common/log_adapter.h" #include "nnacl/op_base.h" @@ -34,6 +35,12 @@ kernel::LiteKernel *GetOpenCLKernel(const std::vector &in_tensors, con namespace mindspore::kernel { +// for fusion +extern const std::set ArithmeticPrimitives; +extern const std::set ArithmeticSelfPrimitives; +inline bool IsArithmetic(schema::PrimitiveType type) { return ArithmeticPrimitives.count(type); } +inline bool IsArithmeticSelf(schema::PrimitiveType type) { return ArithmeticSelfPrimitives.count(type); } + std::string GetActDefines(); int GetUpPow2(int n); diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc index 8beaa6054a..ccbdc1c23b 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc @@ -51,6 +51,7 @@ void *OpenCLAllocator::MinimumFit(size_t size, const std::vector &img_si bool is_match{mem_buf->img_size.size() == img_size.size()}; for (int i = 0; i < img_size.size() && is_match; ++i) { is_match &= img_size[i] == mem_buf->img_size[i]; + is_match &= mem_buf->device_ptr_ != nullptr; } if (is_match) { free_list_.erase(iter);