From: @wangdongxu6 Reviewed-by: @hangangqiang,@ddwsky Signed-off-by: @ddwskytags/v1.1.0
| @@ -54,7 +54,7 @@ int ArgMinMaxOpenCLKernel::CheckSpecs() { | |||
| MS_LOG(ERROR) << "Invalid axis " << param->axis_; | |||
| return RET_ERROR; | |||
| } | |||
| param->get_max_ = (op_parameter_->type_ == PrimitiveType_ArgMax); | |||
| param->get_max_ = (Type() == PrimitiveType_ArgMax); | |||
| return RET_OK; | |||
| } | |||
| @@ -38,25 +38,6 @@ using mindspore::schema::PrimitiveType_Eltwise; | |||
| namespace mindspore::kernel { | |||
| std::set<schema::PrimitiveType> SupportedOpenCLArithmetics = {PrimitiveType_Mul, | |||
| PrimitiveType_Add, | |||
| PrimitiveType_Sub, | |||
| PrimitiveType_Div, | |||
| PrimitiveType_LogicalAnd, | |||
| PrimitiveType_LogicalOr, | |||
| PrimitiveType_Maximum, | |||
| PrimitiveType_Minimum, | |||
| PrimitiveType_FloorDiv, | |||
| PrimitiveType_FloorMod, | |||
| PrimitiveType_SquaredDifference, | |||
| PrimitiveType_Equal, | |||
| PrimitiveType_NotEqual, | |||
| PrimitiveType_Less, | |||
| PrimitiveType_LessEqual, | |||
| PrimitiveType_Greater, | |||
| PrimitiveType_GreaterEqual, | |||
| PrimitiveType_Eltwise}; | |||
| int ArithmeticOpenCLKernel::CheckSpecs() { | |||
| if (in_tensors_.size() != 2 || out_tensors_.size() != 1) { | |||
| MS_LOG(ERROR) << "in size: " << in_tensors_.size() << ", out size: " << out_tensors_.size(); | |||
| @@ -67,8 +48,8 @@ int ArithmeticOpenCLKernel::CheckSpecs() { | |||
| MS_LOG(ERROR) << "Broadcasting don't support N > 1"; | |||
| return RET_ERROR; | |||
| } | |||
| if (SupportedOpenCLArithmetics.count(static_cast<schema::PrimitiveType>(op_parameter_->type_)) == 0) { | |||
| MS_LOG(ERROR) << "UnSupported Operator: " << schema::EnumNamesPrimitiveType()[op_parameter_->type_]; | |||
| if (!IsArithmetic(Type())) { | |||
| MS_LOG(ERROR) << "UnSupported Operator: " << schema::EnumNamePrimitiveType(Type()); | |||
| return RET_ERROR; | |||
| } | |||
| if (!(param->activation_type_ == ActivationType_NO_ACTIVATION || param->activation_type_ == ActivationType_RELU || | |||
| @@ -201,7 +182,7 @@ int ArithmeticOpenCLKernel::Prepare() { | |||
| auto *param = reinterpret_cast<const ArithmeticParameter *>(op_parameter_); | |||
| element_flag_ = !param->broadcasting_; | |||
| kernel_name_ = param->broadcasting_ ? "BroadcastNHWC4" : "Element"; | |||
| kernel_name_ += schema::EnumNamesPrimitiveType()[op_parameter_->type_]; | |||
| kernel_name_ += schema::EnumNamePrimitiveType(Type()); | |||
| if (param->activation_type_ == ActivationType_RELU) { | |||
| activation_min_ = 0.f; | |||
| } else if (param->activation_type_ == ActivationType_RELU6) { | |||
| @@ -25,75 +25,18 @@ using mindspore::kernel::KERNEL_ARCH::kGPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Abs; | |||
| using mindspore::schema::PrimitiveType_Ceil; | |||
| using mindspore::schema::PrimitiveType_Cos; | |||
| using mindspore::schema::PrimitiveType_Exp; | |||
| using mindspore::schema::PrimitiveType_Floor; | |||
| using mindspore::schema::PrimitiveType_Log; | |||
| using mindspore::schema::PrimitiveType_LogicalNot; | |||
| using mindspore::schema::PrimitiveType_Neg; | |||
| using mindspore::schema::PrimitiveType_Round; | |||
| using mindspore::schema::PrimitiveType_Rsqrt; | |||
| using mindspore::schema::PrimitiveType_Sin; | |||
| using mindspore::schema::PrimitiveType_Sqrt; | |||
| using mindspore::schema::PrimitiveType_Square; | |||
| namespace mindspore::kernel { | |||
| void ArithmeticSelfOpenCLKernel::GetKernelName(std::string *kernel_name, ArithmeticSelfParameter *param) { | |||
| MS_ASSERT(kernel_name); | |||
| MS_ASSERT(param); | |||
| switch (param->op_parameter_.type_) { | |||
| case PrimitiveType_Abs: | |||
| kernel_name[0] += "_ElementAbs"; | |||
| break; | |||
| case PrimitiveType_Cos: | |||
| kernel_name[0] += "_ElementCos"; | |||
| break; | |||
| case PrimitiveType_Exp: | |||
| kernel_name[0] += "_ElementExp"; | |||
| break; | |||
| case PrimitiveType_Log: | |||
| kernel_name[0] += "_ElementLog"; | |||
| break; | |||
| case PrimitiveType_Square: | |||
| kernel_name[0] += "_ElementSquare"; | |||
| break; | |||
| case PrimitiveType_Sqrt: | |||
| kernel_name[0] += "_ElementSqrt"; | |||
| break; | |||
| case PrimitiveType_Rsqrt: | |||
| kernel_name[0] += "_ElementRsqrt"; | |||
| break; | |||
| case PrimitiveType_Sin: | |||
| kernel_name[0] += "_ElementSin"; | |||
| break; | |||
| case PrimitiveType_LogicalNot: | |||
| kernel_name[0] += "_ElementLogicalNot"; | |||
| break; | |||
| case PrimitiveType_Floor: | |||
| kernel_name[0] += "_ElementFloor"; | |||
| break; | |||
| case PrimitiveType_Ceil: | |||
| kernel_name[0] += "_ElementCeil"; | |||
| break; | |||
| case PrimitiveType_Round: | |||
| kernel_name[0] += "_ElementRound"; | |||
| break; | |||
| case PrimitiveType_Neg: | |||
| kernel_name[0] += "_ElementNeg"; | |||
| break; | |||
| default: | |||
| break; | |||
| } | |||
| } | |||
| int ArithmeticSelfOpenCLKernel::CheckSpecs() { | |||
| if (in_tensors_.size() != 1 || out_tensors_.size() != 1) { | |||
| MS_LOG(ERROR) << "in size: " << in_tensors_.size() << ", out size: " << out_tensors_.size(); | |||
| return RET_ERROR; | |||
| } | |||
| if (!IsArithmeticSelf(Type())) { | |||
| MS_LOG(ERROR) << "UnSupported Operator: " << schema::EnumNamePrimitiveType(Type()); | |||
| return RET_ERROR; | |||
| } | |||
| if (in_tensors_[0]->shape().size() != 4 && in_tensors_[0]->shape().size() != 2) { | |||
| MS_LOG(ERROR) << " only support dim = 4 or 2 but your dim = " << in_tensors_[0]->shape().size(); | |||
| return RET_ERROR; | |||
| @@ -101,11 +44,6 @@ int ArithmeticSelfOpenCLKernel::CheckSpecs() { | |||
| return RET_OK; | |||
| } | |||
| void ArithmeticSelfOpenCLKernel::SetConstArgs() { | |||
| int arg_cn = 2; | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); | |||
| } | |||
| void ArithmeticSelfGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *local, int max_size) { | |||
| const int max_divider = 8; | |||
| const int max_x = 4, max_y = 8; | |||
| @@ -142,27 +80,20 @@ void ArithmeticSelfOpenCLKernel::SetGlobalLocal() { | |||
| } | |||
| int ArithmeticSelfOpenCLKernel::Prepare() { | |||
| auto param = reinterpret_cast<ArithmeticSelfParameter *>(this->op_parameter_); | |||
| std::string kernel_name = "ArithmeticSelf"; | |||
| GetKernelName(&kernel_name, param); | |||
| kernel_name += "_NHWC4"; | |||
| std::string kernel_name = "ArithmeticSelf_Element" + std::string(schema::EnumNamePrimitiveType(Type())) + "_NHWC4"; | |||
| MS_LOG(DEBUG) << "execute kernel name : " << kernel_name; | |||
| std::set<std::string> build_options; | |||
| std::string source = arithmeticself_source; | |||
| std::string program_name = "ArithmeticSelf"; | |||
| ocl_runtime_->LoadSource(program_name, source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||
| ocl_runtime_->LoadSource(program_name, arithmeticself_source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); | |||
| SetGlobalLocal(); | |||
| SetConstArgs(); | |||
| return RET_OK; | |||
| } | |||
| int ArithmeticSelfOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running! "; | |||
| int arg_cn = 0; | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); | |||
| ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); | |||
| ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); | |||
| ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); | |||
| return RET_OK; | |||
| } | |||
| @@ -180,5 +111,18 @@ REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Sin, OpenCLKernelCreator<Arit | |||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Neg, OpenCLKernelCreator<ArithmeticSelfOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Sqrt, OpenCLKernelCreator<ArithmeticSelfOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Square, OpenCLKernelCreator<ArithmeticSelfOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Abs, OpenCLKernelCreator<ArithmeticSelfOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Ceil, OpenCLKernelCreator<ArithmeticSelfOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Cos, OpenCLKernelCreator<ArithmeticSelfOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Exp, OpenCLKernelCreator<ArithmeticSelfOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Floor, OpenCLKernelCreator<ArithmeticSelfOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Log, OpenCLKernelCreator<ArithmeticSelfOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_LogicalNot, OpenCLKernelCreator<ArithmeticSelfOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Round, OpenCLKernelCreator<ArithmeticSelfOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Rsqrt, OpenCLKernelCreator<ArithmeticSelfOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Sin, OpenCLKernelCreator<ArithmeticSelfOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Neg, OpenCLKernelCreator<ArithmeticSelfOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Sqrt, OpenCLKernelCreator<ArithmeticSelfOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Square, OpenCLKernelCreator<ArithmeticSelfOpenCLKernel>) | |||
| } // namespace mindspore::kernel | |||
| @@ -22,6 +22,21 @@ | |||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | |||
| #include "nnacl/arithmetic_self_parameter.h" | |||
| using mindspore::schema::PrimitiveType_Abs; | |||
| using mindspore::schema::PrimitiveType_Ceil; | |||
| using mindspore::schema::PrimitiveType_Cos; | |||
| using mindspore::schema::PrimitiveType_Eltwise; | |||
| using mindspore::schema::PrimitiveType_Exp; | |||
| using mindspore::schema::PrimitiveType_Floor; | |||
| using mindspore::schema::PrimitiveType_Log; | |||
| using mindspore::schema::PrimitiveType_LogicalNot; | |||
| using mindspore::schema::PrimitiveType_Neg; | |||
| using mindspore::schema::PrimitiveType_Round; | |||
| using mindspore::schema::PrimitiveType_Rsqrt; | |||
| using mindspore::schema::PrimitiveType_Sin; | |||
| using mindspore::schema::PrimitiveType_Sqrt; | |||
| using mindspore::schema::PrimitiveType_Square; | |||
| namespace mindspore::kernel { | |||
| class ArithmeticSelfOpenCLKernel : public OpenCLKernel { | |||
| @@ -35,13 +50,12 @@ class ArithmeticSelfOpenCLKernel : public OpenCLKernel { | |||
| int Prepare() override; | |||
| int CheckSpecs() override; | |||
| void SetConstArgs() override; | |||
| void SetConstArgs() override { ocl_runtime_->SetKernelArg(kernel_, 2, output_shape_); } | |||
| void SetGlobalLocal() override; | |||
| int Run() override; | |||
| private: | |||
| void GetKernelName(std::string *kernel_name, ArithmeticSelfParameter *param); | |||
| cl_int4 output_shape_ = {}; | |||
| }; | |||
| @@ -57,17 +57,26 @@ int Conv2DOpenCLKernel::CheckSpecs() { | |||
| MS_LOG(ERROR) << "Conv2D only supports 4D input Tensor but get " << in_tensors_.front()->shape().size() << "D."; | |||
| return RET_ERROR; | |||
| } | |||
| if (in_tensors_[1]->shape().size() != 4) { | |||
| MS_LOG(ERROR) << "Conv2D only supports 4D filter Tensor but get " << in_tensors_[1]->shape().size() << "D."; | |||
| if (in_tensors_.at(1)->shape().size() != 4) { | |||
| MS_LOG(ERROR) << "Conv2D only supports 4D filter Tensor but get " << in_tensors_.at(1)->shape().size() << "D."; | |||
| return RET_ERROR; | |||
| } | |||
| if (out_tensors_.front()->shape().size() != 4) { | |||
| MS_LOG(ERROR) << "Conv2D only supports 4D output Tensor but get " << out_tensors_.front()->shape().size() << "D."; | |||
| return RET_ERROR; | |||
| } | |||
| if (param_->act_type_ != ActType_No && param_->act_type_ != ActType_Relu && param_->act_type_ != ActType_Relu6) { | |||
| MS_LOG(ERROR) << "Unsupported activation type " << param_->act_type_; | |||
| return RET_ERROR; | |||
| // for fusion: ActivationType_LEAKY_RELU ActivationType_TANH | |||
| switch (static_cast<int>(param_->act_type_)) { | |||
| case ActType_No: | |||
| case ActType_Relu: | |||
| case ActType_Relu6: | |||
| case ActivationType_LEAKY_RELU: | |||
| case ActivationType_TANH: | |||
| break; | |||
| default: { | |||
| MS_LOG(ERROR) << "Unsupported activation type " << param_->act_type_; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -154,9 +163,11 @@ int Conv2DOpenCLKernel::GenerateWinogradFilter() { | |||
| 1.0000000000, -0.7071067691, 0.4999999702, 1.0000000000, 1.4142135382, 1.9999998808, | |||
| 1.0000000000, -1.4142135382, 1.9999998808, 0.0000000000, 0.0000000000, 1.0000000000}; | |||
| auto weight_tensor = in_tensors_[1]; | |||
| auto weight_tensor = in_tensors_.at(1); | |||
| auto origin_weight_fp32 = reinterpret_cast<float *>(weight_tensor->data_c()); | |||
| MS_ASSERT(origin_weight_fp32); | |||
| auto origin_weight_fp16 = reinterpret_cast<float16_t *>(weight_tensor->data_c()); | |||
| MS_ASSERT(origin_weight_fp16); | |||
| std::function<float(int)> access_func; | |||
| if (weight_tensor->data_type() == kNumberTypeFloat32) { | |||
| access_func = [=](int idx) { return origin_weight_fp32[idx]; }; | |||
| @@ -216,7 +227,7 @@ int Conv2DOpenCLKernel::InitFilter() { | |||
| if (use_winograd_) { | |||
| GenerateWinogradFilter(); | |||
| } else { | |||
| auto weight_tensor = in_tensors_[1]; | |||
| auto weight_tensor = in_tensors_.at(1); | |||
| if (weight_tensor->data_type() == kNumberTypeFloat16) { | |||
| if (use_fp16_) { | |||
| ConvertConvWeight4DTo7D<float16_t, float16_t>(weight_tensor->data_c(), packed_weight_, CO_, KH_, KW_, CI_, | |||
| @@ -244,7 +255,7 @@ int Conv2DOpenCLKernel::InitBias() { | |||
| auto allocator = ocl_runtime_->GetAllocator(); | |||
| // align bias from C to C4 | |||
| auto bias_tensor = in_tensors_[2]; | |||
| auto bias_tensor = in_tensors_.at(2); | |||
| size_t packed_bias_size = UP_ROUND(CO_SLICES_, block_size_.C) * CO_TILE * sizeof_FLT_; | |||
| packed_bias_ = allocator->Malloc(packed_bias_size); | |||
| @@ -256,6 +267,7 @@ int Conv2DOpenCLKernel::InitBias() { | |||
| } else { | |||
| auto packed_bias_fp32 = reinterpret_cast<float *>(packed_bias_); | |||
| auto origin_bias_fp16 = reinterpret_cast<float16_t *>(bias_tensor->data_c()); | |||
| MS_ASSERT(origin_bias_fp16); | |||
| for (int i = 0; i < CO_; ++i) { | |||
| packed_bias_fp32[i] = static_cast<float>(origin_bias_fp16[i]); | |||
| } | |||
| @@ -264,6 +276,7 @@ int Conv2DOpenCLKernel::InitBias() { | |||
| if (use_fp16_) { | |||
| auto packed_bias_fp16 = reinterpret_cast<float16_t *>(packed_bias_); | |||
| auto origin_bias_fp32 = reinterpret_cast<float *>(bias_tensor->data_c()); | |||
| MS_ASSERT(origin_bias_fp32); | |||
| for (int i = 0; i < CO_; ++i) { | |||
| packed_bias_fp16[i] = static_cast<float16_t>(origin_bias_fp32[i]); | |||
| } | |||
| @@ -456,6 +469,7 @@ int Conv2DOpenCLKernel::Tune() { | |||
| } | |||
| int Conv2DOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running!"; | |||
| if (use_winograd_) { | |||
| ocl_runtime_->SetKernelArg(kernel_4x4to36_, 0, in_tensors_.front()->data_c()); | |||
| ocl_runtime_->RunKernel(kernel_4x4to36_, global_4x4to36_, local_4x4to36_); | |||
| @@ -474,6 +488,9 @@ int Conv2DOpenCLKernel::Run() { | |||
| bool UseFcReplaceConv(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| ConvParameter *param) { | |||
| MS_ASSERT(param); | |||
| MS_ASSERT(!inputs.empty()); | |||
| MS_ASSERT(!outputs.empty()); | |||
| auto input_shape = inputs.front()->shape(); | |||
| auto output_shape = inputs.front()->shape(); | |||
| // IH=1 IW=1 OH=1 OW=1 | |||
| @@ -51,10 +51,24 @@ int FullConnectionOpenCLKernel::CheckSpecs() { | |||
| MS_LOG(ERROR) << "fullconnection only support 2d output shape or 4d output but H=W=1"; | |||
| return RET_ERROR; | |||
| } | |||
| if (param->act_type_ != ActType_No && param->act_type_ != ActType_Relu && param->act_type_ != ActType_Relu6) { | |||
| // for fusion: ActivationType_TANH | |||
| if (param->act_type_ != ActType_No && param->act_type_ != ActType_Relu && param->act_type_ != ActType_Relu6 && | |||
| static_cast<schema::ActivationType>(param->act_type_) != ActivationType_TANH) { | |||
| MS_LOG(ERROR) << "Unsupported activation type " << param->act_type_; | |||
| return RET_ERROR; | |||
| } | |||
| // for fusion: ActivationType_TANH | |||
| switch (static_cast<int>(param->act_type_)) { | |||
| case ActType_No: | |||
| case ActType_Relu: | |||
| case ActType_Relu6: | |||
| case ActivationType_TANH: | |||
| break; | |||
| default: { | |||
| MS_LOG(ERROR) << "Unsupported activation type " << param->act_type_; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| N_ = out_gpu_info.N; | |||
| CO_ = out_gpu_info.C; | |||
| auto intensor_shape = GpuTensorInfo(in_tensors_[0]); | |||
| @@ -0,0 +1,16 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/opencl/kernel/fusion_eltwise.h" | |||
| @@ -0,0 +1,20 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_FUSION_ELTWISE_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_FUSION_ELTWISE_H_ | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_FUSION_ELTWISE_H_ | |||
| @@ -35,6 +35,7 @@ namespace mindspore::kernel { | |||
| int PadOpenCLKernel::CheckSpecs() { | |||
| auto param = reinterpret_cast<PadParameter *>(op_parameter_); | |||
| MS_ASSERT(param); | |||
| if (in_tensors_.size() != 1) { | |||
| MS_LOG(ERROR) << "Pad only support 1 input Tensor."; | |||
| return RET_ERROR; | |||
| @@ -77,17 +77,13 @@ int PReluOpenCLKernel::InitWeights() { | |||
| int PReluOpenCLKernel::CheckSpecs() { | |||
| if (in_tensors_.size() != 2 || out_tensors_.size() != 1) { | |||
| MS_LOG(ERROR) << "PRelu Only supported in_tensors_.size=2 and out_tensors_.size()= 2 but your in_tensors_.size = " | |||
| << in_tensors_.size() << "out_tensors_.size()=: " << out_tensors_.size(); | |||
| MS_LOG(ERROR) << "PRelu Only supported in_tensors_.size=2 and out_tensors_.size()=1 but your in_tensors_.size=" | |||
| << in_tensors_.size() << " out_tensors_.size()=" << out_tensors_.size(); | |||
| return RET_ERROR; | |||
| } | |||
| GpuTensorInfo img_info_in_tensors0(in_tensors_[0]); | |||
| GpuTensorInfo img_info_in_tensors1(in_tensors_[1]); | |||
| auto weight_tensor = in_tensors_.at(1); | |||
| auto in_tensor_channel = img_info_in_tensors0.C; | |||
| auto weight_channel = img_info_in_tensors1.C; | |||
| auto in_tensor_channel = GpuTensorInfo(in_tensors_[0]).C; | |||
| auto weight_channel = GpuTensorInfo(in_tensors_[1]).C; | |||
| if (weight_channel != 1 && weight_channel != in_tensor_channel) { | |||
| MS_LOG(ERROR) << "PRelu weight must be equal with in_teneors channel size, but your weight size is " | |||
| << weight_channel << " and your input channel size is " << in_tensor_channel; | |||
| @@ -160,6 +156,6 @@ int PReluOpenCLKernel::Run() { | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_PReLU, OpenCLKernelCreator<PReluOpenCLKernel>); | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_PReLU, OpenCLKernelCreator<PReluOpenCLKernel>); | |||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_PReLU, OpenCLKernelCreator<PReluOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_PReLU, OpenCLKernelCreator<PReluOpenCLKernel>) | |||
| } // namespace mindspore::kernel | |||
| @@ -149,7 +149,7 @@ int ScaleOpenCLKernel::InitWeights() { | |||
| return RET_OK; | |||
| } | |||
| int ScaleOpenCLKernel::Init() { | |||
| int ScaleOpenCLKernel::Prepare() { | |||
| std::string kernel_name; | |||
| auto *scale_param = reinterpret_cast<const ScaleParameter *>(op_parameter_); | |||
| auto in_tensor = in_tensors_.at(0); | |||
| @@ -250,25 +250,6 @@ int ScaleOpenCLKernel::Run() { | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *OpenCLScaleKernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | |||
| const lite::InnerContext *ctx, const kernel::KernelKey &desc, | |||
| const mindspore::lite::PrimitiveC *primitive) { | |||
| auto *kernel = new (std::nothrow) ScaleOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "Create OpenCL Scale kernel failed!"; | |||
| free(opParameter); | |||
| return nullptr; | |||
| } | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init kernel failed, name: Scale"; | |||
| delete kernel; | |||
| return nullptr; | |||
| } | |||
| return kernel; | |||
| } | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Scale, OpenCLScaleKernelCreator) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Scale, OpenCLScaleKernelCreator) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Scale, OpenCLKernelCreator<ScaleOpenCLKernel>) | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Scale, OpenCLKernelCreator<ScaleOpenCLKernel>) | |||
| } // namespace mindspore::kernel | |||
| @@ -31,7 +31,7 @@ class ScaleOpenCLKernel : public OpenCLKernel { | |||
| ~ScaleOpenCLKernel() override; | |||
| int CheckSpecs() override; | |||
| int Init() override; | |||
| int Prepare() override; | |||
| int Run() override; | |||
| int InitWeights() override; | |||
| @@ -34,11 +34,21 @@ using mindspore::schema::PrimitiveType_StridedSlice; | |||
| namespace mindspore::kernel { | |||
| int StridedSliceOpenCLKernel::CheckSpecs() { | |||
| const std::string kernel_name = op_parameter_->type_ == PrimitiveType_Slice ? "Slice" : "StridedSlice"; | |||
| if (in_tensors_.size() != 1) { | |||
| MS_LOG(ERROR) << kernel_name + " only supports 1 input Tensor."; | |||
| if (Type() == PrimitiveType_Slice) { | |||
| if (in_tensors_.size() != 3) { | |||
| MS_LOG(ERROR) << "Slice only supports 3 input Tensor."; | |||
| return RET_ERROR; | |||
| } | |||
| } else if (Type() == PrimitiveType_StridedSlice) { | |||
| if (in_tensors_.size() != 4) { | |||
| MS_LOG(ERROR) << "StridedSlice only supports 4 input Tensor."; | |||
| return RET_ERROR; | |||
| } | |||
| } else { | |||
| MS_LOG(ERROR) << "Type error."; | |||
| return RET_ERROR; | |||
| } | |||
| const std::string kernel_name = Type() == PrimitiveType_Slice ? "Slice" : "StridedSlice"; | |||
| if (out_tensors_.size() != 1) { | |||
| MS_LOG(ERROR) << kernel_name + " only supports 1 output Tensor."; | |||
| return RET_ERROR; | |||
| @@ -78,8 +88,9 @@ int StridedSliceOpenCLKernel::InitConstArgs() { | |||
| static_cast<cl_int>(output_info.W), static_cast<cl_int>(output_info.C)}; | |||
| io_slices_ = {static_cast<cl_int>(input_info.Slice), static_cast<cl_int>(output_info.Slice)}; | |||
| if (op_parameter_->type_ == PrimitiveType_Slice) { | |||
| if (Type() == PrimitiveType_Slice) { | |||
| auto param = reinterpret_cast<SliceParameter *>(op_parameter_); | |||
| MS_ASSERT(param); | |||
| Broadcast2GpuShape(begin_.s, param->begin_, param->param_length_, 0); | |||
| Broadcast2GpuShape(size_.s, param->size_, param->param_length_, -1); | |||
| for (int i = 0; i < 4; ++i) { | |||
| @@ -101,6 +112,7 @@ int StridedSliceOpenCLKernel::InitConstArgs() { | |||
| } | |||
| } else { | |||
| auto param = reinterpret_cast<StridedSliceParameter *>(op_parameter_); | |||
| MS_ASSERT(param); | |||
| cl_int4 end = input_shape_; | |||
| Broadcast2GpuShape(begin_.s, param->begins_, param->num_axes_, 0); | |||
| Broadcast2GpuShape(stride_.s, param->strides_, param->num_axes_, 1); | |||
| @@ -179,8 +191,8 @@ void StridedSliceOpenCLKernel::SetGlobalLocal() { | |||
| int StridedSliceOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running! "; | |||
| ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); | |||
| ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); | |||
| ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); | |||
| ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); | |||
| ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); | |||
| return RET_OK; | |||
| } | |||
| @@ -47,7 +47,7 @@ void Broadcast2GpuShape(DstT *dst, const SrcT *src, int src_num) { | |||
| auto *W = dst + 2; | |||
| auto *C = dst + 3; | |||
| if (src_num == 1) { | |||
| *N = src[0]; | |||
| *C = src[0]; | |||
| } else if (src_num == 2) { | |||
| *N = src[0]; | |||
| *C = src[1]; | |||
| @@ -115,21 +115,22 @@ int OpenCLSubGraph::GenToFormatOp(const std::vector<lite::Tensor *> &in_tensors, | |||
| ReplaceOutTensorAndKernelToNull(in_tensors, in_kernels, mem_type); | |||
| for (size_t i = 0; i < in_tensors.size(); ++i) { | |||
| auto *in_tensor = in_tensors.at(i); | |||
| auto dst_format = (mem_type == MemType::IMG) ? schema::Format::Format_NHWC4 : schema::Format::Format_NHWC; | |||
| auto src_format = (mem_type == MemType::IMG) ? schema::Format::Format_NHWC : schema::Format::Format_NHWC4; | |||
| auto *new_tensor = new (std::nothrow) lite::Tensor(); | |||
| auto *new_tensor = new (std::nothrow) | |||
| lite::Tensor(in_tensor->data_type(), in_tensor->shape(), in_tensor->format(), lite::Tensor::VAR); | |||
| MS_ASSERT(new_tensor); | |||
| if (new_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "OpenCLSubGraph new tensor failed!"; | |||
| return RET_ERROR; | |||
| } | |||
| new_tensor->CopyTensor(*in_tensors[i]); | |||
| if (mem_type == MemType::IMG) { | |||
| new_tensor->set_format(dst_format); | |||
| in_tensors[i]->set_format(src_format); | |||
| in_tensor->set_format(src_format); | |||
| } else { | |||
| new_tensor->set_format(src_format); | |||
| in_tensors[i]->set_format(dst_format); | |||
| in_tensor->set_format(dst_format); | |||
| } | |||
| out_tensors->emplace_back(new_tensor); | |||
| @@ -153,11 +154,11 @@ int OpenCLSubGraph::GenToFormatOp(const std::vector<lite::Tensor *> &in_tensors, | |||
| out_parameters->emplace_back(parameter); | |||
| LiteKernel *in_convert_op = nullptr; | |||
| if (mem_type == MemType::IMG) { | |||
| in_convert_op = lite::GetOpenCLKernel({in_tensors[i]}, {new_tensor}, reinterpret_cast<OpParameter *>(parameter), | |||
| context_, desc); | |||
| in_convert_op = | |||
| lite::GetOpenCLKernel({in_tensor}, {new_tensor}, reinterpret_cast<OpParameter *>(parameter), context_, desc); | |||
| } else { | |||
| in_convert_op = lite::GetOpenCLKernel({new_tensor}, {in_tensors[i]}, reinterpret_cast<OpParameter *>(parameter), | |||
| context_, desc); | |||
| in_convert_op = | |||
| lite::GetOpenCLKernel({new_tensor}, {in_tensor}, reinterpret_cast<OpParameter *>(parameter), context_, desc); | |||
| } | |||
| MS_ASSERT(in_convert_op); | |||
| if (in_convert_op == nullptr) { | |||
| @@ -169,7 +170,7 @@ int OpenCLSubGraph::GenToFormatOp(const std::vector<lite::Tensor *> &in_tensors, | |||
| return RET_ERROR; | |||
| } | |||
| ReplaceOutTensorAndKernelToConvert(in_tensors.at(i), in_kernels.at(i), new_tensor, in_convert_op, mem_type); | |||
| ReplaceOutTensorAndKernelToConvert(in_tensor, in_kernels.at(i), new_tensor, in_convert_op, mem_type); | |||
| // replace in_tensor of inner kernel which use out tensor | |||
| if (mem_type == MemType::BUF) { | |||
| @@ -45,6 +45,32 @@ kernel::LiteKernel *GetOpenCLKernel(const std::vector<Tensor *> &in_tensors, con | |||
| namespace mindspore::kernel { | |||
| const std::set<schema::PrimitiveType> ArithmeticPrimitives = {schema::PrimitiveType_Mul, | |||
| schema::PrimitiveType_Add, | |||
| schema::PrimitiveType_Sub, | |||
| schema::PrimitiveType_Div, | |||
| schema::PrimitiveType_LogicalAnd, | |||
| schema::PrimitiveType_LogicalOr, | |||
| schema::PrimitiveType_Maximum, | |||
| schema::PrimitiveType_Minimum, | |||
| schema::PrimitiveType_FloorDiv, | |||
| schema::PrimitiveType_FloorMod, | |||
| schema::PrimitiveType_SquaredDifference, | |||
| schema::PrimitiveType_Equal, | |||
| schema::PrimitiveType_NotEqual, | |||
| schema::PrimitiveType_Less, | |||
| schema::PrimitiveType_LessEqual, | |||
| schema::PrimitiveType_Greater, | |||
| schema::PrimitiveType_GreaterEqual, | |||
| schema::PrimitiveType_Eltwise}; | |||
| const std::set<schema::PrimitiveType> ArithmeticSelfPrimitives = { | |||
| schema::PrimitiveType_Abs, schema::PrimitiveType_Ceil, schema::PrimitiveType_Cos, | |||
| schema::PrimitiveType_Exp, schema::PrimitiveType_Floor, schema::PrimitiveType_Log, | |||
| schema::PrimitiveType_LogicalNot, schema::PrimitiveType_Round, schema::PrimitiveType_Rsqrt, | |||
| schema::PrimitiveType_Sin, schema::PrimitiveType_Neg, schema::PrimitiveType_Sqrt, | |||
| schema::PrimitiveType_Square}; | |||
| std::string GetActDefines() { | |||
| static std::string act_defines = "#define ActivationType_RELU " + std::to_string(ActivationType_RELU) + | |||
| "\n#define ActivationType_RELU6 " + std::to_string(ActivationType_RELU6) + | |||
| @@ -19,6 +19,7 @@ | |||
| #include <string> | |||
| #include <vector> | |||
| #include <set> | |||
| #include "CL/cl2.hpp" | |||
| #include "src/common/log_adapter.h" | |||
| #include "nnacl/op_base.h" | |||
| @@ -34,6 +35,12 @@ kernel::LiteKernel *GetOpenCLKernel(const std::vector<Tensor *> &in_tensors, con | |||
| namespace mindspore::kernel { | |||
| // for fusion | |||
| extern const std::set<schema::PrimitiveType> ArithmeticPrimitives; | |||
| extern const std::set<schema::PrimitiveType> ArithmeticSelfPrimitives; | |||
| inline bool IsArithmetic(schema::PrimitiveType type) { return ArithmeticPrimitives.count(type); } | |||
| inline bool IsArithmeticSelf(schema::PrimitiveType type) { return ArithmeticSelfPrimitives.count(type); } | |||
| std::string GetActDefines(); | |||
| int GetUpPow2(int n); | |||
| @@ -51,6 +51,7 @@ void *OpenCLAllocator::MinimumFit(size_t size, const std::vector<size_t> &img_si | |||
| bool is_match{mem_buf->img_size.size() == img_size.size()}; | |||
| for (int i = 0; i < img_size.size() && is_match; ++i) { | |||
| is_match &= img_size[i] == mem_buf->img_size[i]; | |||
| is_match &= mem_buf->device_ptr_ != nullptr; | |||
| } | |||
| if (is_match) { | |||
| free_list_.erase(iter); | |||