From 3e9ebc745f2719991967be0883a36b73f19b1176 Mon Sep 17 00:00:00 2001 From: yangruoqi713 Date: Mon, 12 Apr 2021 17:38:04 +0800 Subject: [PATCH] [MSLITE][DEVELOP] rectify arm cpu fp16 conv register --- .../arm/base/group_convolution_creator.cc | 109 +++----- .../arm/base/group_convolution_creator.h | 28 +-- .../kernel/arm/fp16/convolution_1x1_fp16.cc | 22 +- .../kernel/arm/fp16/convolution_1x1_fp16.h | 8 +- .../kernel/arm/fp16/convolution_base_fp16.cc | 52 ---- .../kernel/arm/fp16/convolution_base_fp16.h | 57 ----- .../arm/fp16/convolution_delegate_fp16.cc | 235 +++--------------- .../arm/fp16/convolution_delegate_fp16.h | 12 +- .../arm/fp16/convolution_depthwise_fp16.cc | 31 +-- .../arm/fp16/convolution_depthwise_fp16.h | 14 +- .../convolution_depthwise_slidewindow_fp16.cc | 28 ++- .../convolution_depthwise_slidewindow_fp16.h | 14 +- .../kernel/arm/fp16/convolution_fp16.cc | 24 +- .../kernel/arm/fp16/convolution_fp16.h | 8 +- .../arm/fp16/convolution_winograd_fp16.cc | 38 +-- .../arm/fp16/convolution_winograd_fp16.h | 9 +- .../arm/fp16/deconvolution_depthwise_fp16.cc | 25 +- .../arm/fp16/deconvolution_depthwise_fp16.h | 9 +- .../kernel/arm/fp16/deconvolution_fp16.cc | 43 ++-- .../kernel/arm/fp16/deconvolution_fp16.h | 10 +- .../arm/fp16/deconvolution_winograd_fp16.cc | 30 +-- .../arm/fp16/deconvolution_winograd_fp16.h | 9 +- .../arm/fp32/convolution_delegate_fp32.cc | 32 ++- .../arm/int8/convolution_int8_creator.cc | 24 ++ 24 files changed, 273 insertions(+), 598 deletions(-) delete mode 100644 mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc delete mode 100644 mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h diff --git a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_creator.cc b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_creator.cc index 5e447781a5..23895fddd1 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_creator.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_creator.cc @@ -15,10 +15,6 @@ */ #include "src/runtime/kernel/arm/base/group_convolution_creator.h" -#include "src/runtime/kernel/arm/base/group_convolution.h" -#include "src/runtime/kernel/arm/int8/convolution_int8_creator.h" -#include "src/runtime/kernel/arm/fp32/convolution_delegate_fp32.h" -#include "src/runtime/kernel/arm/int8/group_convolution_int8.h" namespace mindspore::kernel { void CopyTensorQuantParam(lite::Tensor *dst, lite::Tensor *src) { @@ -37,15 +33,11 @@ ConvParameter *CreateNewConvParameter(ConvParameter *parameter) { return conv_parameter; } -void FreeMemory(ConvParameter *conv_param, const std::vector &new_inputs, - const std::vector &new_outputs) { - if (conv_param != nullptr) { - free(conv_param); - } - for (auto &in_tensor : new_inputs) { +void FreeMemory(const std::vector *new_inputs, const std::vector *new_outputs) { + for (auto &in_tensor : *new_inputs) { delete in_tensor; } - for (auto &out_tensor : new_outputs) { + for (auto &out_tensor : *new_outputs) { delete out_tensor; } } @@ -106,6 +98,7 @@ void GroupConvCreator::CopyQuantParam(std::vector *tensors) { CopyTensorQuantParam(tensors->at(j), origin_inputs_.at(j)); } } + bool GroupConvCreator::CheckIfValidPoint(void *ptr) { if (ptr == nullptr) { for (auto &sub_conv : group_convs_) { @@ -117,18 +110,17 @@ bool GroupConvCreator::CheckIfValidPoint(void *ptr) { } int GroupConvCreator::NewInputTensor(std::vector *tensors) { - auto in_tensor = CreateVarTensor( - {input_shape_, schema::Format_NHWC, origin_inputs_.at(0)->data_type(), lite::Tensor::Category::VAR, true}, - infered_); + auto in_tensor = + CreateVarTensor({input_shape_, schema::Format_NHWC, data_type_, lite::Tensor::Category::VAR, true}, infered_); if (!CheckIfValidPoint(in_tensor)) { return lite::RET_ERROR; } tensors->emplace_back(in_tensor); return lite::RET_OK; } + int GroupConvCreator::NewOutputTensor(std::vector *tensors, lite::Tensor *output) { - auto out_tensor = - CreateVarTensor({output_shape_, output->format(), output->data_type(), output->category(), false}, infered_); + auto out_tensor = CreateVarTensor({output_shape_, output->format(), data_type_, output->category(), false}, infered_); if (!CheckIfValidPoint(out_tensor)) { return lite::RET_ERROR; } @@ -153,6 +145,7 @@ int GroupConvCreator::NewConstTensor(std::vector *tensors, int g } return lite::RET_OK; } + void GroupConvCreator::SetShapeOfTensors() { int new_in_channel = origin_inputs_.at(kWeightIndex)->Channel(); int new_out_channel; @@ -176,71 +169,31 @@ void GroupConvCreator::SetShapeOfTensors() { } } -int GroupConvCreator::CreatGroupConv() { - for (int i = 0; i < conv_param_->group_; ++i) { - auto new_conv_parameter = CreateNewConvParameter(conv_param_); - if (!CheckIfValidPoint(new_conv_parameter)) { - return lite::RET_ERROR; - } - // create new input for each group - std::vector new_inputs; - if (NewInputTensor(&new_inputs) != lite::RET_OK) { - MS_LOG(ERROR) << "new input tensor failed."; - FreeMemory(new_conv_parameter, new_inputs, {}); - return lite::RET_ERROR; - } - // const tensor - if (NewConstTensor(&new_inputs, i) != lite::RET_OK) { - MS_LOG(ERROR) << "new const tensor failed."; - FreeMemory(new_conv_parameter, new_inputs, {}); +int GroupConvCreator::GetSingleConvParam(ConvParameter *conv_param, std::vector *new_inputs, + std::vector *new_outputs, int group_id) { + if (!CheckIfValidPoint(conv_param)) { + return lite::RET_ERROR; + } + // create new input for each group + if (NewInputTensor(new_inputs) != lite::RET_OK) { + MS_LOG(ERROR) << "new input tensor failed."; + FreeMemory(new_inputs, {}); + return lite::RET_ERROR; + } + // const tensor + if (NewConstTensor(new_inputs, group_id) != lite::RET_OK) { + MS_LOG(ERROR) << "new const tensor failed."; + FreeMemory(new_inputs, {}); + return lite::RET_ERROR; + } + // create new output tensor + for (auto &output : origin_outputs_) { + if (NewOutputTensor(new_outputs, output) != lite::RET_OK) { + MS_LOG(ERROR) << "new output tensor failed."; + FreeMemory(new_inputs, new_outputs); return lite::RET_ERROR; } - // create new output tensor - std::vector new_outputs; - for (auto &output : origin_outputs_) { - if (NewOutputTensor(&new_outputs, output) != lite::RET_OK) { - MS_LOG(ERROR) << "new output tensor failed."; - FreeMemory(new_conv_parameter, new_inputs, new_outputs); - return lite::RET_ERROR; - } - } - - if (is_quant_) { - CopyQuantParam(&new_inputs); - group_convs_.emplace_back(CpuConvInt8KernelSelect(new_inputs, new_outputs, - reinterpret_cast(new_conv_parameter), context_)); - } else { - group_convs_.emplace_back(new (std::nothrow) kernel::ConvolutionDelegateCPUKernel( - reinterpret_cast(new_conv_parameter), new_inputs, new_outputs, context_)); - } } return lite::RET_OK; } - -kernel::LiteKernel *CpuGroupConvFp32KernelCreator(const std::vector &inputs, - const std::vector &outputs, OpParameter *op_parameter, - const lite::InnerContext *ctx) { - GroupConvCreator group_conv_creator(inputs, outputs, op_parameter, ctx, false); - group_conv_creator.SetShapeOfTensors(); - if (group_conv_creator.CreatGroupConv() != lite::RET_OK) { - MS_LOG(ERROR) << "Create fp32 group conv failed."; - return nullptr; - } - return new (std::nothrow) - GroupConvolutionCPUKernel(op_parameter, inputs, outputs, ctx, group_conv_creator.get_group_conv(), - reinterpret_cast(op_parameter)->group_); -} - -kernel::LiteKernel *CpuGroupConvInt8KernelCreator(const std::vector &inputs, - const std::vector &outputs, OpParameter *op_parameter, - const lite::InnerContext *ctx, int group) { - GroupConvCreator group_conv_creator(inputs, outputs, op_parameter, ctx, true); - group_conv_creator.SetShapeOfTensors(); - if (group_conv_creator.CreatGroupConv() != lite::RET_OK) { - MS_LOG(ERROR) << "Create int8 group conv failed."; - return nullptr; - } - return new (std::nothrow) - GroupConvolutionInt8CPUKernel(op_parameter, inputs, outputs, ctx, group_conv_creator.get_group_conv(), group); -} } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_creator.h b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_creator.h index 5c0e616ede..c0f844626f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_creator.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_creator.h @@ -34,12 +34,12 @@ struct TensorInfo { class GroupConvCreator { public: GroupConvCreator(std::vector inputs, std::vector outputs, OpParameter *op_parameter, - const lite::InnerContext *ctx, bool is_quant) + const lite::InnerContext *ctx, bool is_quant, TypeId data_type) : origin_inputs_(std::move(inputs)), origin_outputs_(std::move(outputs)), - context_(ctx), infered_(op_parameter->infer_flag_), - is_quant_(is_quant) { + is_quant_(is_quant), + data_type_(data_type) { conv_param_ = reinterpret_cast(op_parameter); } @@ -47,15 +47,16 @@ class GroupConvCreator { public: void SetShapeOfTensors(); - int CreatGroupConv(); - std::vector get_group_conv() { return group_convs_; } + std::vector *get_group_conv() { return &group_convs_; } + void CopyQuantParam(std::vector *tensors); + int GetSingleConvParam(ConvParameter *conv_param, std::vector *new_inputs, + std::vector *new_outputs, int group_id); protected: void set_input_shape(const std::vector &shape) { input_shape_ = shape; } void set_output_shape(const std::vector &shape) { output_shape_ = shape; } void set_filter_shape(const std::vector &shape) { filter_shape_ = shape; } void set_bias_shape(const std::vector &shape) { bias_shape_ = shape; } - void CopyQuantParam(std::vector *tensors); bool CheckIfValidPoint(void *ptr); int NewInputTensor(std::vector *tensors); int NewConstTensor(std::vector *tensors, int group_id); @@ -69,20 +70,13 @@ class GroupConvCreator { std::vector output_shape_; std::vector filter_shape_; std::vector bias_shape_; - const lite::InnerContext *context_; ConvParameter *conv_param_; - bool infered_; - bool is_quant_; + bool infered_ = false; + bool is_quant_ = false; + TypeId data_type_; }; -LiteKernel *CpuGroupConvFp32KernelCreator(const std::vector &inputs, - const std::vector &outputs, OpParameter *op_parameter, - const lite::InnerContext *ctx); - -LiteKernel *CpuGroupConvInt8KernelCreator(const std::vector &inputs, - const std::vector &outputs, OpParameter *op_parameter, - const lite::InnerContext *ctx, int group); - +ConvParameter *CreateNewConvParameter(ConvParameter *parameter); } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_GROUP_CONVOLUTION_CREATOR_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc index 09b6e20cd4..c47450f8e6 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc @@ -88,12 +88,7 @@ int Convolution1x1FP16CPUKernel::InitWeightBias() { MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!"; return RET_ERROR; } - if (origin_bias_data_type_ == kNumberTypeFloat16) { - memcpy(bias_data_, origin_bias_, output_channel * sizeof(float16_t)); - } else { - MS_LOG(ERROR) << "Conv1x1 only support fp16 weight"; - return RET_ERROR; - } + memcpy(bias_data_, origin_bias_, output_channel * sizeof(float16_t)); memset(reinterpret_cast(bias_data_) + bias_size, 0, size - bias_size); } @@ -105,8 +100,7 @@ int Convolution1x1FP16CPUKernel::InitWeightBias() { return RET_ERROR; } memset(reinterpret_cast(weight_ptr_) + down_size, 0, size - down_size); - ColMajor2Row8MajorFp16(origin_weight_, weight_ptr_, input_channel, output_channel, - origin_weight_data_type_ == kNumberTypeFloat16); + ColMajor2Row8MajorFp16(origin_weight_, weight_ptr_, input_channel, output_channel, true); return RET_OK; } @@ -217,8 +211,12 @@ static int Convolution1x1Fp16RunHw(void *cdata, int task_id) { } int Convolution1x1FP16CPUKernel::Run() { - ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); - + auto input_data = reinterpret_cast(in_tensors_.at(0)->data_c()); + auto output_data = reinterpret_cast(out_tensors_.at(0)->data_c()); + if (input_data == nullptr || output_data == nullptr) { + MS_LOG(ERROR) << "Convolution1x1 Fp16 get null tensor data!"; + return RET_ERROR; + } pack_input_ = reinterpret_cast( ctx_->allocator->Malloc(matmul_param_->row_align_ * matmul_param_->deep_ * sizeof(float16_t))); if (pack_input_ == nullptr) { @@ -227,9 +225,9 @@ int Convolution1x1FP16CPUKernel::Run() { } for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { - output_ptr_ = execute_output_ + batch_index * matmul_param_->row_ * matmul_param_->col_; + output_ptr_ = output_data + batch_index * matmul_param_->row_ * matmul_param_->col_; float16_t *batch_in = - execute_input_ + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_; + input_data + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_; if (pre_trans_input_) { Conv1x1InputPack(batch_in, input_ptr_, conv_param_, sizeof(float16_t)); } else { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h index 680ba6ef40..a799abfc84 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h @@ -20,18 +20,18 @@ #include #include #include "src/lite_kernel.h" -#include "src/runtime/kernel/arm/fp16/convolution_base_fp16.h" +#include "src/runtime/kernel/arm/base/convolution_base.h" #include "src/common/utils.h" #include "nnacl/matmul_parameter.h" #include "nnacl/fp16/matmul_fp16.h" namespace mindspore::kernel { -class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel { +class Convolution1x1FP16CPUKernel : public ConvolutionBaseCPUKernel { public: Convolution1x1FP16CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx, void *origin_weight, - void *origin_bias, TypeId origin_weight_data_type, TypeId origin_bias_data_type) - : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type), + void *origin_bias) + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), origin_weight_(origin_weight), origin_bias_(origin_bias) {} ~Convolution1x1FP16CPUKernel() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc deleted file mode 100644 index bdaf6cd6f0..0000000000 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc +++ /dev/null @@ -1,52 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "src/runtime/kernel/arm/fp16/convolution_base_fp16.h" -#include "nnacl/fp16/cast_fp16.h" -#include "src/runtime/kernel/arm/fp16/common_fp16.h" -#include "include/errorcode.h" -#include "src/runtime/runtime_api.h" - -namespace mindspore::kernel { -using mindspore::lite::RET_ERROR; -using mindspore::lite::RET_OK; -ConvolutionBaseFP16CPUKernel::~ConvolutionBaseFP16CPUKernel() { - if (fp16_weight_ != nullptr) { - free(fp16_weight_); - fp16_weight_ = nullptr; - } -} - -int ConvolutionBaseFP16CPUKernel::GetExecuteTensor() { - auto input_tensor = in_tensors_.at(0); - auto output_tensor = out_tensors_.at(0); - execute_input_ = reinterpret_cast(input_tensor->data_c()); - execute_output_ = reinterpret_cast(output_tensor->data_c()); - return RET_OK; -} - -int ConvolutionBaseFP16CPUKernel::GetExecuteFilter(lite::Tensor *weight_tensor, void *origin_data) { - MS_ASSERT(origin_weight_data_type_ == kNumberTypeFloat32 || origin_weight_data_type_ == kNumberTypeFloat16); - if (origin_weight_data_type_ == kNumberTypeFloat32) { - MS_LOG(ERROR) << "Conv fp16 only support fp16 weight"; - return RET_ERROR; - } else { - execute_weight_ = reinterpret_cast(origin_data); - fp16_weight_ = nullptr; - } - return RET_OK; -} -} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h deleted file mode 100644 index 4dc7ddd755..0000000000 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_BASE_FP16_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_BASE_FP16_H_ - -#include -#include -#include "src/lite_kernel.h" -#include "src/runtime/kernel/arm/base/convolution_base.h" -#include "src/common/utils.h" - -namespace mindspore::kernel { -class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel { - public: - ConvolutionBaseFP16CPUKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs, const InnerContext *ctx, - TypeId origin_weight_data_type, TypeId origin_bias_data_type) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), - origin_weight_data_type_(origin_weight_data_type), - origin_bias_data_type_(origin_bias_data_type) {} - ~ConvolutionBaseFP16CPUKernel() override; - - int Init() override { return mindspore::lite::RET_OK; } - int ReSize() override { return mindspore::lite::RET_OK; } - int Run() override { return mindspore::lite::RET_OK; } - int RunImpl(int task_id) { return mindspore::lite::RET_OK; } - virtual int GetExecuteTensor(); - // origin_data may not be the same as the data in the weight tensor, - // because weight tensor has released data already. In this situation, - // origin_data is the pointer of another memory block. - virtual int GetExecuteFilter(lite::Tensor *weight_tensor, void *origin_data); - - protected: - float16_t *fp16_weight_ = nullptr; - float16_t *execute_input_ = nullptr; - float16_t *execute_weight_ = nullptr; - float16_t *execute_output_ = nullptr; - TypeId origin_weight_data_type_; - TypeId origin_bias_data_type_; -}; -} // namespace mindspore::kernel - -#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_BASE_FP16_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc index 13c4862a76..4526900ca3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc @@ -22,6 +22,7 @@ #include "src/runtime/kernel/arm/fp16/group_convolution_fp16.h" #include "src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h" #include "src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h" +#include "src/runtime/kernel/arm/base/group_convolution_creator.h" #include "schema/model_generated.h" #include "src/kernel_registry.h" #include "include/errorcode.h" @@ -96,8 +97,8 @@ int ConvolutionDelegateFP16CPUKernel::ReSize() { kernel::SetInputOutputShapeInfo(reinterpret_cast(op_parameter_), in_tensors_.front(), out_tensors_.front(), context_); if (fp16_conv_kernel_ == nullptr) { - fp16_conv_kernel_ = CpuConvFp16KernelSelect(in_tensors_, out_tensors_, op_parameter_, context_, origin_weight_, - origin_bias_, origin_weight_data_type_, origin_bias_data_type_); + fp16_conv_kernel_ = + CpuConvFp16KernelSelect(in_tensors_, out_tensors_, op_parameter_, context_, origin_weight_, origin_bias_); if (fp16_conv_kernel_ == nullptr) { MS_LOG(ERROR) << "Selecting execute kernel failed for conv_kernel, got a nullptr."; return RET_ERROR; @@ -108,29 +109,16 @@ int ConvolutionDelegateFP16CPUKernel::ReSize() { return fp16_conv_kernel_->ReSize(); } -ConvParameter *CreateNewConvParameterFp16(ConvParameter *parameter) { - auto conv_parameter = reinterpret_cast(malloc(sizeof(ConvParameter))); - if (conv_parameter == nullptr) { - MS_LOG(ERROR) << "Malloc new conv parameter failed."; - return nullptr; - } - memcpy(conv_parameter, parameter, sizeof(ConvParameter)); - return conv_parameter; -} - kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, - const InnerContext *ctx, void *origin_weight, void *origin_bias, - TypeId origin_weight_data_type, TypeId origin_bias_data_type) { + const InnerContext *ctx) { MS_ASSERT(opParameter != nullptr); auto conv_param = reinterpret_cast(opParameter); kernel::LiteKernel *kernel = nullptr; if (conv_param->input_channel_ < 32) { - kernel = new (std::nothrow) kernel::ConvolutionDepthwiseSWFp16CPUKernel( - opParameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type); + kernel = new (std::nothrow) kernel::ConvolutionDepthwiseSWFp16CPUKernel(opParameter, inputs, outputs, ctx); } else { - kernel = new (std::nothrow) kernel::ConvolutionDepthwiseFp16CPUKernel( - opParameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type); + kernel = new (std::nothrow) kernel::ConvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx); } if (kernel == nullptr) { MS_LOG(ERROR) << "kernel is nullptr."; @@ -142,27 +130,22 @@ kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector &inputs, const std::vector &outputs, OpParameter *op_parameter, - const lite::InnerContext *ctx, void *origin_weight, void *origin_bias, - TypeId origin_weight_data_type, TypeId origin_bias_data_type) { + const lite::InnerContext *ctx, void *origin_weight, void *origin_bias) { auto conv_param = reinterpret_cast(op_parameter); bool use_winograd = false; int out_unit; CheckIfUseWinogradFp16(&use_winograd, &out_unit, conv_param); kernel::LiteKernel *kernel = nullptr; - if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { - kernel = CpuConvDwFp16KernelCreator(inputs, outputs, op_parameter, ctx, origin_weight, origin_bias, - origin_weight_data_type, origin_bias_data_type); - } else if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) { - kernel = new (std::nothrow) kernel::Convolution1x1FP16CPUKernel( - op_parameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type); - } else if (use_winograd) { + if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) { kernel = new (std::nothrow) - kernel::ConvolutionWinogradFP16CPUKernel(op_parameter, inputs, outputs, ctx, out_unit, origin_weight, origin_bias, - origin_weight_data_type, origin_bias_data_type); + kernel::Convolution1x1FP16CPUKernel(op_parameter, inputs, outputs, ctx, origin_weight, origin_bias); + } else if (use_winograd) { + kernel = new (std::nothrow) kernel::ConvolutionWinogradFP16CPUKernel(op_parameter, inputs, outputs, ctx, out_unit, + origin_weight, origin_bias); } else { - kernel = new (std::nothrow) kernel::ConvolutionFP16CPUKernel( - op_parameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type); + kernel = new (std::nothrow) + kernel::ConvolutionFP16CPUKernel(op_parameter, inputs, outputs, ctx, origin_weight, origin_bias); } // Once kernel is selected, init func will invoke InitWeightAndBias auto ret = kernel->Init(); @@ -174,194 +157,54 @@ kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector &i return kernel; } -void FreeMemoryFp16(const std::vector &group_convs, const std::vector &new_inputs, - const std::vector &new_outputs) { - for (auto sub_conv : group_convs) { - delete sub_conv; - } - for (auto in_tensor : new_inputs) { - delete in_tensor; - } - for (auto out_tensor : new_outputs) { - delete out_tensor; - } -} - -static lite::Tensor *CreateInputTensorFp16(TypeId data_type, const std::vector &in_shape, bool infered_flag) { - auto in_tensor = new (std::nothrow) lite::Tensor(data_type, in_shape, Format_NHWC, lite::Tensor::Category::VAR); - if (in_tensor == nullptr) { - MS_LOG(ERROR) << "new in_tensor failed."; - return nullptr; - } - if (infered_flag) { - auto ret = in_tensor->MallocData(); - if (ret != RET_OK) { - delete in_tensor; - MS_LOG(ERROR) << "in tensor malloc failed."; - return nullptr; - } - } - return in_tensor; -} - -static lite::Tensor *CreateConstTensorFp16(lite::Tensor *tensor, const std::vector &shape, const int index) { - auto new_tensor = - new (std::nothrow) lite::Tensor(tensor->data_type(), shape, Format_NHWC, lite::Tensor::Category::CONST_TENSOR); - if (new_tensor == nullptr) { - MS_LOG(ERROR) << "Create new_tensor failed."; - return nullptr; - } - auto ret = new_tensor->MallocData(); - if (ret != RET_OK) { - delete new_tensor; - MS_LOG(ERROR) << "Malloc new_tensor failed."; - return nullptr; - } - memcpy(new_tensor->data_c(), reinterpret_cast(tensor->data_c()) + index * new_tensor->Size(), - new_tensor->Size()); - return new_tensor; -} - -static lite::Tensor *CreateOutputTensorFp16(const std::vector &out_shape, - const std::vector &outputs, bool infered_flag, int index) { - auto out_tensor = new (std::nothrow) lite::Tensor(); - if (out_tensor == nullptr) { - MS_LOG(ERROR) << "new tmp_out_tensor failed."; - return nullptr; - } - out_tensor->set_data_type(mindspore::kNumberTypeFloat16); - out_tensor->set_format(outputs.at(index)->format()); - if (infered_flag) { - out_tensor->set_shape(out_shape); - auto ret = out_tensor->MallocData(); - if (ret != RET_OK) { - delete out_tensor; - MS_LOG(ERROR) << "out_tensor malloc data failed."; - return nullptr; - } - } - return out_tensor; -} - -kernel::LiteKernel *CreateDelegateConvFp16(const std::vector &inputs, - const std::vector &outputs, OpParameter *op_parameter, - const InnerContext *ctx) { - auto weight_data_type = inputs.at(1)->data_type(); - if (weight_data_type != kNumberTypeFloat16) { - MS_LOG(ERROR) << "Convfp16 only support fp16 weight"; - return nullptr; - } - TypeId bias_data_type = kTypeUnknown; - if (inputs.size() == 3) { - bias_data_type = inputs.at(2)->data_type(); - } - return new (std::nothrow) - kernel::ConvolutionDelegateFP16CPUKernel(op_parameter, inputs, outputs, ctx, weight_data_type, bias_data_type); -} - kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *op_parameter, const InnerContext *ctx) { - bool infer_flag = op_parameter->infer_flag_; auto conv_param = reinterpret_cast(op_parameter); - // update new shape info for each sub kernel - int new_in_channel = inputs.at(kWeightIndex)->Channel(); - int new_out_channel = 0; - if (conv_param->group_ == 0) { - MS_LOG(ERROR) << "Divisor 'group' cannot be 0."; - return nullptr; - } else { - new_out_channel = inputs.at(kWeightIndex)->Batch() / conv_param->group_; - } + GroupConvCreator group_conv_creator(inputs, outputs, op_parameter, ctx, false, kNumberTypeFloat16); + group_conv_creator.SetShapeOfTensors(); - std::vector in_shape; - std::vector out_shape; - if (infer_flag) { - conv_param->input_channel_ = new_in_channel; - conv_param->output_channel_ = new_out_channel; - in_shape = {inputs.front()->Batch(), inputs.front()->Height(), inputs.front()->Width(), new_in_channel}; - out_shape = {inputs.front()->Batch(), outputs.front()->Height(), outputs.front()->Width(), new_out_channel}; - } - std::vector filter_shape = {new_out_channel, conv_param->kernel_h_, conv_param->kernel_w_, new_in_channel}; - std::vector bias_shape = {new_out_channel}; - - // new group conv op - std::vector group_convs; - // create tensors for every sub conv kernel for (int i = 0; i < conv_param->group_; ++i) { + ConvParameter *new_conv_param = CreateNewConvParameter(conv_param); std::vector new_inputs; std::vector new_outputs; - auto new_conv_parameter = CreateNewConvParameterFp16(conv_param); - if (new_conv_parameter == nullptr) { - FreeMemoryFp16(group_convs, new_inputs, new_outputs); - MS_LOG(ERROR) << "Get new conv parameter failed."; - return nullptr; - } - // create new input for each group - auto in_tensor = CreateInputTensorFp16(mindspore::kNumberTypeFloat16, in_shape, infer_flag); - if (in_tensor == nullptr) { - delete new_conv_parameter; - FreeMemoryFp16(group_convs, new_inputs, new_outputs); - MS_LOG(ERROR) << "create input tensor failed."; - return nullptr; - } - new_inputs.emplace_back(in_tensor); - - // create new weight - auto filter_tensor = CreateConstTensorFp16(inputs.at(kWeightIndex), filter_shape, i); - if (filter_tensor == nullptr) { - delete new_conv_parameter; - FreeMemoryFp16(group_convs, new_inputs, new_outputs); - MS_LOG(ERROR) << "create filter tensor failed."; + auto ret = group_conv_creator.GetSingleConvParam(new_conv_param, &new_inputs, &new_outputs, i); + if (ret != RET_OK) { + MS_LOG(ERROR) << "GetSingleConv for fp16 group conv failed."; return nullptr; } - new_inputs.emplace_back(filter_tensor); - - // if has bias, create new bias - if (inputs.size() == 3) { - auto bias_tensor = CreateConstTensorFp16(inputs.at(kBiasIndex), bias_shape, i); - if (bias_tensor == nullptr) { - delete new_conv_parameter; - FreeMemoryFp16(group_convs, new_inputs, new_outputs); - MS_LOG(ERROR) << "create bias_tensor failed."; - return nullptr; - } - new_inputs.emplace_back(bias_tensor); - } - - // create new output tensors - for (size_t j = 0; j < outputs.size(); ++j) { - auto out_tensor = CreateOutputTensorFp16(out_shape, outputs, infer_flag, j); - if (out_tensor == nullptr) { - delete new_conv_parameter; - FreeMemoryFp16(group_convs, new_inputs, new_outputs); - MS_LOG(ERROR) << "new out_tensor failed."; - return nullptr; - } - new_outputs.emplace_back(out_tensor); - } - group_convs.emplace_back( - CreateDelegateConvFp16(new_inputs, new_outputs, reinterpret_cast(new_conv_parameter), ctx)); + group_conv_creator.get_group_conv()->emplace_back(new (std::nothrow) ConvolutionDelegateFP16CPUKernel( + reinterpret_cast(new_conv_param), new_inputs, new_outputs, ctx)); } return new (std::nothrow) - GroupConvolutionFP16CPUKernel(op_parameter, inputs, outputs, ctx, group_convs, conv_param->group_); + GroupConvolutionFP16CPUKernel(op_parameter, inputs, outputs, ctx, *(group_conv_creator.get_group_conv()), + reinterpret_cast(op_parameter)->group_); } +/* creator func */ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const InnerContext *ctx, const kernel::KernelKey &desc) { MS_ASSERT(opParameter != nullptr); MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion); + auto weight_data_type = inputs.at(1)->data_type(); + TypeId bias_data_type = weight_data_type; + if (inputs.size() == 3) { + bias_data_type = inputs.at(2)->data_type(); + } + if (weight_data_type != kNumberTypeFloat16 || bias_data_type != kNumberTypeFloat16) { + MS_LOG(ERROR) << "Convfp16 only support fp16 weight and fp16 bias."; + return nullptr; + } auto conv_param = reinterpret_cast(opParameter); kernel::LiteKernel *kernel = nullptr; - bool is_depthwise = - (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_); - - if (conv_param->group_ > 1 && !is_depthwise) { - kernel = CpuGroupConvFp16KernelCreator(inputs, outputs, opParameter, ctx); + if (conv_param->group_ == 1) { + kernel = new (std::nothrow) kernel::ConvolutionDelegateFP16CPUKernel(opParameter, inputs, outputs, ctx); + } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { + kernel = CpuConvDwFp16KernelCreator(inputs, outputs, opParameter, ctx); } else { - kernel = CreateDelegateConvFp16(inputs, outputs, opParameter, ctx); + kernel = CpuGroupConvFp16KernelCreator(inputs, outputs, opParameter, ctx); } if (kernel == nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h index bc686cc76e..eb1e722e84 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h @@ -29,11 +29,8 @@ namespace mindspore::kernel { class ConvolutionDelegateFP16CPUKernel : public LiteKernel { public: ConvolutionDelegateFP16CPUKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs, const lite::InnerContext *ctx, - TypeId origin_weight_data_type, TypeId origin_bias_data_type) - : LiteKernel(parameter, inputs, outputs, ctx), - origin_weight_data_type_(origin_weight_data_type), - origin_bias_data_type_(origin_bias_data_type) {} + const std::vector &outputs, const lite::InnerContext *ctx) + : LiteKernel(parameter, inputs, outputs, ctx) {} ~ConvolutionDelegateFP16CPUKernel() override { FreeCopiedData(); if (fp16_conv_kernel_ != nullptr) { @@ -56,14 +53,11 @@ class ConvolutionDelegateFP16CPUKernel : public LiteKernel { void *origin_weight_ = nullptr; void *origin_bias_ = nullptr; kernel::LiteKernel *fp16_conv_kernel_ = nullptr; - TypeId origin_weight_data_type_; - TypeId origin_bias_data_type_; }; kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector &inputs, const std::vector &outputs, OpParameter *op_parameter, - const lite::InnerContext *ctx, void *origin_weight, void *origin_bias, - TypeId origin_weight_data_type, TypeId origin_bias_data_type); + const lite::InnerContext *ctx, void *origin_weight, void *origin_bias); } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_DELEGATE_FP16_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc index 521f5b1501..9446cfd10e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc @@ -36,23 +36,15 @@ int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() { auto weight_tensor = in_tensors_.at(kWeightIndex); int channel = weight_tensor->Batch(); int pack_weight_size = channel * weight_tensor->Height() * weight_tensor->Width(); + auto origin_weight = reinterpret_cast(weight_tensor->data_c()); packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float16_t))); if (packed_weight_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } - auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteFilter(weight_tensor, origin_weight_); - if (ret != RET_OK) { - MS_LOG(ERROR) << "get execute filter data failed."; - return ret; - } - PackNCHWToNHWCFp16(execute_weight_, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(), + PackNCHWToNHWCFp16(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch()); - if (fp16_weight_ != nullptr) { - free(fp16_weight_); - fp16_weight_ = nullptr; - } bias_data_ = reinterpret_cast(malloc(channel * sizeof(float16_t))); if (bias_data_ == nullptr) { @@ -60,14 +52,10 @@ int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() { return RET_ERROR; } memset(bias_data_, 0, channel * sizeof(float16_t)); - auto bias_fp16 = reinterpret_cast(bias_data_); if (in_tensors_.size() == kInputSize2) { auto bias_tensor = in_tensors_.at(kBiasIndex); - MS_ASSERT(origin_bias_); - auto ori_bias = reinterpret_cast(origin_bias_); - for (int i = 0; i < bias_tensor->ElementsNum(); i++) { - bias_fp16[i] = (float16_t)ori_bias[i]; - } + auto ori_bias = reinterpret_cast(bias_tensor->data_c()); + memcpy(bias_data_, ori_bias, bias_tensor->Size()); } return RET_OK; } @@ -95,8 +83,13 @@ int ConvolutionDepthwiseFp16CPUKernel::ReSize() { } int ConvolutionDepthwiseFp16CPUKernel::Execute(int task_id) { - ConvDwFp16(execute_output_, execute_input_, packed_weight_, reinterpret_cast(bias_data_), conv_param_, - task_id); + auto input_ptr = reinterpret_cast(in_tensors_.at(0)->data_c()); + auto output_ptr = reinterpret_cast(out_tensors_.at(0)->data_c()); + if (input_ptr == nullptr || output_ptr == nullptr) { + MS_LOG(ERROR) << "Convolution depthwise Fp16 get null tensor data!"; + return RET_ERROR; + } + ConvDwFp16(output_ptr, input_ptr, packed_weight_, reinterpret_cast(bias_data_), conv_param_, task_id); return RET_OK; } @@ -111,8 +104,6 @@ static int ConvDwFp16Run(void *cdata, int task_id) { } int ConvolutionDepthwiseFp16CPUKernel::Run() { - ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); - auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDwFp16Run, this, conv_param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]"; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h index 0f08247b83..674466cab0 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h @@ -19,7 +19,7 @@ #include #include "src/lite_kernel.h" -#include "src/runtime/kernel/arm/fp16/convolution_base_fp16.h" +#include "src/runtime/kernel/arm/base/convolution_base.h" #include "nnacl/fp16/conv_depthwise_fp16.h" #ifdef __cplusplus @@ -32,15 +32,11 @@ void ConvDwFp16(float16_t *output_data, const float16_t *input_data, const float #endif namespace mindspore::kernel { -class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { +class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { public: ConvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs, const InnerContext *ctx, - void *origin_weight, void *origin_bias, TypeId origin_weight_data_type, - TypeId origin_bias_data_type) - : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type), - origin_weight_(origin_weight), - origin_bias_(origin_bias) {} + const std::vector &outputs, const InnerContext *ctx) + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} ~ConvolutionDepthwiseFp16CPUKernel() override; int Init() override; @@ -51,8 +47,6 @@ class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { int Execute(int task_id); private: - void *origin_weight_; // do not free - void *origin_bias_; // do not free float16_t *packed_weight_ = nullptr; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc index 02cdf2721c..a479709231 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc @@ -62,14 +62,15 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitWeightBias() { auto weight_tensor = in_tensors_.at(kWeightIndex); int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM); int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width(); + auto origin_weight = reinterpret_cast(weight_tensor->data_c()); packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float16_t))); if (packed_weight_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } - PackNCHWFp16ToNC8HW8Fp16(reinterpret_cast(origin_weight_), packed_weight_, 1, - weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch()); + PackNCHWFp16ToNC8HW8Fp16(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(), + weight_tensor->Batch()); bias_data_ = reinterpret_cast(malloc(C8NUM * OC8 * sizeof(float16_t))); if (bias_data_ == nullptr) { @@ -77,14 +78,10 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitWeightBias() { return RET_ERROR; } memset(bias_data_, 0, C8NUM * OC8 * sizeof(float16_t)); - auto bias_fp16 = reinterpret_cast(bias_data_); if (in_tensors_.size() == kInputSize2) { auto bias_tensor = in_tensors_.at(kBiasIndex); - MS_ASSERT(origin_bias_); - auto ori_bias = reinterpret_cast(origin_bias_); - for (int i = 0; i < bias_tensor->ElementsNum(); i++) { - bias_fp16[i] = (float16_t)ori_bias[i]; - } + auto ori_bias = reinterpret_cast(bias_tensor->data_c()); + memcpy(bias_data_, ori_bias, bias_tensor->Size()); } conv_param_->thread_num_ = MSMIN(thread_count_, OC8); @@ -143,14 +140,19 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { return ret; } - ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); + auto input_ptr = reinterpret_cast(in_tensors_.at(0)->data_c()); + auto output_ptr = reinterpret_cast(out_tensors_.at(0)->data_c()); + if (input_ptr == nullptr || output_ptr == nullptr) { + MS_LOG(ERROR) << "Convolution depthwise Fp16 get null tensor data!"; + return RET_ERROR; + } if (need_align_) { - PackNHWCToNHWC8Fp16(execute_input_, packed_input_, conv_param_->input_batch_, + PackNHWCToNHWC8Fp16(input_ptr, packed_input_, conv_param_->input_batch_, conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); } else { - packed_input_ = execute_input_; - packed_output_ = execute_output_; + packed_input_ = input_ptr; + packed_output_ = output_ptr; } ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWFp16Run, this, conv_param_->thread_num_); @@ -158,7 +160,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { MS_LOG(ERROR) << "ConvDwSWFp16Run error: error_code[" << ret << "]"; } if (need_align_) { - PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, + PackNHWC8ToNHWCFp16(packed_output_, output_ptr, conv_param_->output_batch_, conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h index 4dadf8ff28..51d4ab94f6 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h @@ -19,7 +19,7 @@ #include #include "src/lite_kernel.h" -#include "src/runtime/kernel/arm/fp16/convolution_base_fp16.h" +#include "src/runtime/kernel/arm/base/convolution_base.h" #include "nnacl/fp16/conv_depthwise_fp16.h" #ifdef __cplusplus @@ -33,15 +33,11 @@ void ConvDwC8Fp16(float16_t *output_data, const float16_t *input_data, const flo #endif namespace mindspore::kernel { -class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { +class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseCPUKernel { public: ConvolutionDepthwiseSWFp16CPUKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs, const InnerContext *ctx, - void *origin_weight, void *origin_bias, TypeId origin_weight_data_type, - TypeId origin_bias_data_type) - : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type), - origin_weight_(origin_weight), - origin_bias_(origin_bias) {} + const std::vector &outputs, const InnerContext *ctx) + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} ~ConvolutionDepthwiseSWFp16CPUKernel() override; int Init() override; @@ -54,8 +50,6 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel private: void FreePackedInputOutput(); - void *origin_weight_; // do not free - void *origin_bias_; // do not free SlidingWindowParam *sliding_ = nullptr; float16_t *packed_weight_ = nullptr; float16_t *packed_input_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc index faca0d38aa..3e15eec4ff 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc @@ -45,8 +45,7 @@ int ConvolutionFP16CPUKernel::InitWeightBias() { return RET_ERROR; } memset(packed_weight_, 0, pack_weight_size * sizeof(float16_t)); - RowMajor2Col8MajorFp16(origin_weight_, packed_weight_, out_channel, in_channel * kernel_plane, - origin_weight_data_type_ == kNumberTypeFloat32); + RowMajor2Col8MajorFp16(origin_weight_, packed_weight_, out_channel, in_channel * kernel_plane, false); // init bias bias_data_ = malloc(oc8 * sizeof(float16_t)); @@ -56,14 +55,7 @@ int ConvolutionFP16CPUKernel::InitWeightBias() { } memset(bias_data_, 0, oc8 * sizeof(float16_t)); if (in_tensors_.size() == kInputSize2) { - if (origin_bias_data_type_ == kNumberTypeFloat16) { - memcpy(bias_data_, origin_bias_, out_channel * sizeof(float16_t)); - } else { - MS_LOG(ERROR) << "Conv fp16 only support fp16 bias"; - return RET_ERROR; - } - } else { - MS_ASSERT(in_tensors_.size() == kInputSize1); + memcpy(bias_data_, origin_bias_, out_channel * sizeof(float16_t)); } return RET_OK; } @@ -123,8 +115,14 @@ int ConvolutionFP16CPUKernel::ReSize() { } int ConvolutionFP16CPUKernel::RunImpl(int task_id) { - ConvFp16(execute_input_, packed_input_, packed_weight_, reinterpret_cast(bias_data_), col_major_input_, - execute_output_, task_id, conv_param_); + auto input_ptr = reinterpret_cast(in_tensors_.at(0)->data_c()); + auto output_ptr = reinterpret_cast(out_tensors_.at(0)->data_c()); + if (input_ptr == nullptr || output_ptr == nullptr) { + MS_LOG(ERROR) << "Convolution Fp16 get null tensor data!"; + return RET_ERROR; + } + ConvFp16(input_ptr, packed_input_, packed_weight_, reinterpret_cast(bias_data_), col_major_input_, + output_ptr, task_id, conv_param_); return RET_OK; } @@ -139,8 +137,6 @@ static int ConvolutionFp16Impl(void *cdata, int task_id) { } int ConvolutionFP16CPUKernel::Run() { - ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); - auto ret = InitTmpBuffer(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init tmp buffer failed."; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h index f9c0859139..2750c8da60 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h @@ -20,15 +20,15 @@ #include #include #include "src/lite_kernel.h" -#include "src/runtime/kernel/arm/fp16/convolution_base_fp16.h" +#include "src/runtime/kernel/arm/base/convolution_base.h" namespace mindspore::kernel { -class ConvolutionFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { +class ConvolutionFP16CPUKernel : public ConvolutionBaseCPUKernel { public: ConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx, void *origin_weight, - void *origin_bias, TypeId origin_weight_data_type, TypeId origin_bias_data_type) - : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type), + void *origin_bias) + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), origin_weight_(origin_weight), origin_bias_(origin_bias) {} ~ConvolutionFP16CPUKernel() override { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc index 0bded8410c..c7d0d89267 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc @@ -33,9 +33,9 @@ int ConvolutionWinogradFP16CPUKernel::WinogradFilterTransformFp16(const float16_ } int ConvolutionWinogradFP16CPUKernel::InitWeightBias() { - auto filter_tensor = in_tensors_.at(kWeightIndex); - int in_channel = filter_tensor->Channel(); - int out_channel = filter_tensor->Batch(); + auto weight_tensor = in_tensors_.at(kWeightIndex); + int in_channel = weight_tensor->Channel(); + int out_channel = weight_tensor->Batch(); conv_param_->input_channel_ = in_channel; conv_param_->output_channel_ = out_channel; int oc_block_num = UP_DIV(out_channel, col_tile_); @@ -65,21 +65,11 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() { MS_LOG(ERROR) << "get matrix g from CookToomFilter failed."; return ret; } - ret = GetExecuteFilter(filter_tensor, origin_weight_); - if (ret != RET_OK) { - MS_LOG(ERROR) << "get execute filter failed."; - return ret; - } - ret = WinogradFilterTransformFp16(execute_weight_, matrix_g, matrix_gt, col_tile_); + ret = WinogradFilterTransformFp16(reinterpret_cast(origin_weight_), matrix_g, matrix_gt, col_tile_); if (ret != RET_OK) { MS_LOG(ERROR) << "winograd filter transform failed."; return ret; } - // if fp16_weight is malloced, free it. It will not be used in runtime anymore. - if (fp16_weight_ != nullptr) { - free(fp16_weight_); - fp16_weight_ = nullptr; - } // init bias bias_data_ = malloc(oc_block_num * col_tile_ * sizeof(float16_t)); @@ -88,16 +78,8 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() { return RET_ERROR; } memset(bias_data_, 0, oc_block_num * col_tile_ * sizeof(float16_t)); - if (in_tensors_.size() == kInputSize2) { - if (origin_bias_data_type_ == kNumberTypeFloat16) { - memcpy(bias_data_, origin_bias_, out_channel * sizeof(float16_t)); - } else { - MS_LOG(ERROR) << "Conv winograd fp16 only support fp16 bias"; - return RET_ERROR; - } - } else { - MS_ASSERT(in_tensors_.size() == kInputSize1); + memcpy(bias_data_, origin_bias_, out_channel * sizeof(float16_t)); } return RET_OK; } @@ -202,7 +184,13 @@ int ConvolutionWinogradFP16CPUKernel::ReSize() { } int ConvolutionWinogradFP16CPUKernel::RunImpl(int task_id) { - ConvWinogardFp16(execute_input_, trans_weight_, reinterpret_cast(bias_data_), execute_output_, + auto input_ptr = reinterpret_cast(in_tensors_.at(0)->data_c()); + auto output_ptr = reinterpret_cast(out_tensors_.at(0)->data_c()); + if (input_ptr == nullptr || output_ptr == nullptr) { + MS_LOG(ERROR) << "Convolution Winograd Fp16 get null tensor data!"; + return RET_ERROR; + } + ConvWinogardFp16(input_ptr, trans_weight_, reinterpret_cast(bias_data_), output_ptr, tmp_buffer_address_list_, task_id, conv_param_, in_func_, out_func_); return RET_OK; } @@ -218,8 +206,6 @@ static int ConvolutionWinogradFp16Impl(void *cdata, int task_id) { } int ConvolutionWinogradFP16CPUKernel::Run() { - ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); - auto ret = InitTmpBuffer(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init tmp buffer failed."; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h index e9793875df..6cf454e085 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h @@ -20,20 +20,19 @@ #include #include #include "src/lite_kernel.h" -#include "src/runtime/kernel/arm/fp16/convolution_base_fp16.h" +#include "src/runtime/kernel/arm/base/convolution_base.h" #include "nnacl/fp16/conv_fp16.h" #include "nnacl/fp16/winograd_utils_fp16.h" #include "src/common/utils.h" #include "nnacl/base/minimal_filtering_generator.h" namespace mindspore::kernel { -class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { +class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseCPUKernel { public: ConvolutionWinogradFP16CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx, int out_unit, - void *origin_weight, void *origin_bias, TypeId origin_weight_data_type, - TypeId origin_bias_data_type) - : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type), + void *origin_weight, void *origin_bias) + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), output_unit_(out_unit), origin_weight_(origin_weight), origin_bias_(origin_bias) {} diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc index 3c1200fb97..99f4f21892 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc @@ -73,7 +73,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitWeightBias() { // init weight: o, h, w, i; o == group, i == 1 auto weight_tensor = in_tensors_.at(kWeightIndex); int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM); - auto origin_weight = reinterpret_cast(weight_tensor->MutableData()); + auto origin_weight = reinterpret_cast(weight_tensor->data_c()); int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width(); packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float16_t))); @@ -92,10 +92,8 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitWeightBias() { memset(bias_data_, 0, C8NUM * OC8 * sizeof(float16_t)); if (in_tensors_.size() == kInputSize2) { auto bias_tensor = in_tensors_.at(kBiasIndex); - auto ori_bias = reinterpret_cast(bias_tensor->MutableData()); - for (int i = 0; i < bias_tensor->ElementsNum(); i++) { - reinterpret_cast(bias_data_)[i] = ori_bias[i]; - } + auto ori_bias = reinterpret_cast(bias_tensor->data_c()); + memcpy(bias_data_, ori_bias, bias_tensor->Size()); } conv_param_->thread_num_ = MSMIN(thread_count_, OC8); @@ -157,18 +155,23 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { return RET_ERROR; } - ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); + auto input_ptr = reinterpret_cast(in_tensors_.at(0)->data_c()); + auto output_ptr = reinterpret_cast(out_tensors_.at(0)->data_c()); + if (input_ptr == nullptr || output_ptr == nullptr) { + MS_LOG(ERROR) << "Deconvolution depthwise Fp16 get null tensor data!"; + return RET_ERROR; + } if (need_align_) { - PackNHWCToNHWC8Fp16(execute_input_, packed_input_, conv_param_->input_batch_, + PackNHWCToNHWC8Fp16(input_ptr, packed_input_, conv_param_->input_batch_, conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); } else { - packed_input_ = execute_input_; + packed_input_ = input_ptr; } if (!need_align_) { - memset(execute_output_, 0, out_tensors_.at(kOutputIndex)->ElementsNum() * sizeof(float16_t)); - packed_output_ = execute_output_; + memset(output_ptr, 0, out_tensors_.at(kOutputIndex)->ElementsNum() * sizeof(float16_t)); + packed_output_ = output_ptr; } ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwFp16Run, this, conv_param_->thread_num_); if (ret != RET_OK) { @@ -176,7 +179,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { } if (need_align_) { - PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, + PackNHWC8ToNHWCFp16(packed_output_, output_ptr, conv_param_->output_batch_, conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h index 0d6dfdd87b..f9b1c20fc8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h @@ -19,7 +19,7 @@ #include #include "src/lite_kernel.h" -#include "src/runtime/kernel/arm/fp16/convolution_base_fp16.h" +#include "src/runtime/kernel/arm/base/convolution_base.h" #include "nnacl/fp16/conv_depthwise_fp16.h" #ifdef __cplusplus @@ -34,12 +34,11 @@ void ComputeStrides(int *shape, int *strides, int ndim); #endif namespace mindspore::kernel { -class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { +class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { public: DeconvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs, const InnerContext *ctx, - TypeId origin_weight_data_type, TypeId origin_bias_data_type) - : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type) {} + const std::vector &outputs, const InnerContext *ctx) + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} ~DeconvolutionDepthwiseFp16CPUKernel() override; int Init() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc index 353f172bed..b57bf7a37e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc @@ -32,9 +32,9 @@ DeConvolutionFp16CPUKernel::~DeConvolutionFp16CPUKernel() { delete matmul_param_; matmul_param_ = nullptr; } - if (execute_weight_ != nullptr) { - free(execute_weight_); - execute_weight_ = nullptr; + if (pack_weight_ != nullptr) { + free(pack_weight_); + pack_weight_ = nullptr; } return; } @@ -78,17 +78,17 @@ int DeConvolutionFp16CPUKernel::InitWeightBias() { } size_t weight_pack_size = input_channel * kernel_w * kernel_h * UP_ROUND(output_channel, C8NUM) * sizeof(float16_t); - execute_weight_ = reinterpret_cast(malloc(weight_pack_size)); - if (execute_weight_ == nullptr) { - MS_LOG(ERROR) << "deconv malloc execute_weight_ error!"; + pack_weight_ = reinterpret_cast(malloc(weight_pack_size)); + if (pack_weight_ == nullptr) { + MS_LOG(ERROR) << "deconv malloc pack_weight_ error!"; return RET_ERROR; } - memset(execute_weight_, 0, weight_pack_size); + memset(pack_weight_, 0, weight_pack_size); if (in_tensors_.at(1)->data_type() != kNumberTypeFloat16) { MS_LOG(ERROR) << "deconv fp16 kernel require fp16 weight"; return RET_ERROR; } - PackNHWCFp16ToC8HWN8Fp16(reinterpret_cast(in_tensors_.at(1)->data_c()), execute_weight_, input_channel, + PackNHWCFp16ToC8HWN8Fp16(reinterpret_cast(in_tensors_.at(1)->data_c()), pack_weight_, input_channel, kernel_w * kernel_h, output_channel); return RET_OK; } @@ -169,7 +169,7 @@ int DeConvolutionFp16CPUKernel::DoDeconv(int task_id) { } auto tmp_buf = tmp_buffer_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->row_16_; - MatMulFp16(pack_input_, execute_weight_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_, + MatMulFp16(pack_input_, pack_weight_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_, tmp_buf, nullptr, ActType_No, matmul_param_->deep_, matmul_param_->row_, oc * C8NUM * kernel_plane_, 0, OutType_C8); @@ -197,7 +197,12 @@ int DeConvolutionFp16CPUKernel::Init() { } int DeConvolutionFp16CPUKernel::Run() { - ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); + auto input_ptr = reinterpret_cast(in_tensors_.at(0)->data_c()); + auto output_ptr = reinterpret_cast(out_tensors_.at(0)->data_c()); + if (input_ptr == nullptr || output_ptr == nullptr) { + MS_LOG(ERROR) << "DeConvolution Fp16 get null tensor data!"; + return RET_ERROR; + } int error_code = InitRunBuf(); if (error_code != RET_OK) { @@ -207,8 +212,8 @@ int DeConvolutionFp16CPUKernel::Run() { } for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { - batch_input_ = execute_input_ + batch_index * conv_param_->input_channel_ * input_plane_; - batch_output_ = execute_output_ + batch_index * conv_param_->output_channel_ * output_plane_; + batch_input_ = input_ptr + batch_index * conv_param_->input_channel_ * input_plane_; + batch_output_ = output_ptr + batch_index * conv_param_->output_channel_ * output_plane_; RowMajor2Col16MajorFp16Opt(batch_input_, pack_input_, input_plane_, conv_param_->input_channel_); @@ -228,25 +233,17 @@ kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector MS_ASSERT(op_parameter != nullptr); MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion); - auto weight_data_type = inputs.at(1)->data_type(); - TypeId bias_data_type = kTypeUnknown; - if (inputs.size() == 3) { - bias_data_type = inputs.at(2)->data_type(); - } kernel::LiteKernel *kernel = nullptr; auto conv_param = reinterpret_cast(op_parameter); if (conv_param->group_ == 1) { if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) && (conv_param->dilation_h_ == 1 && conv_param->dilation_w_ == 1)) { - kernel = new (std::nothrow) - kernel::DeConvWinogradFp16CPUKernel(op_parameter, inputs, outputs, ctx, weight_data_type, bias_data_type); + kernel = new (std::nothrow) kernel::DeConvWinogradFp16CPUKernel(op_parameter, inputs, outputs, ctx); } else { - kernel = new (std::nothrow) - kernel::DeConvolutionFp16CPUKernel(op_parameter, inputs, outputs, ctx, weight_data_type, bias_data_type); + kernel = new (std::nothrow) kernel::DeConvolutionFp16CPUKernel(op_parameter, inputs, outputs, ctx); } } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { - kernel = new (std::nothrow) - DeconvolutionDepthwiseFp16CPUKernel(op_parameter, inputs, outputs, ctx, weight_data_type, bias_data_type); + kernel = new (std::nothrow) DeconvolutionDepthwiseFp16CPUKernel(op_parameter, inputs, outputs, ctx); } if (kernel == nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h index d1ecc46057..3256bb75ad 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h @@ -21,15 +21,14 @@ #include "nnacl/fp16/deconv_fp16.h" #include "nnacl/fp16/matmul_fp16.h" #include "src/kernel_registry.h" -#include "src/runtime/kernel/arm/fp16/convolution_base_fp16.h" +#include "src/runtime/kernel/arm/base/convolution_base.h" namespace mindspore::kernel { -class DeConvolutionFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { +class DeConvolutionFp16CPUKernel : public ConvolutionBaseCPUKernel { public: DeConvolutionFp16CPUKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs, const lite::InnerContext *ctx, - TypeId origin_weight_data_type, TypeId origin_bias_data_type) - : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type) {} + const std::vector &outputs, const lite::InnerContext *ctx) + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} ~DeConvolutionFp16CPUKernel() override; int Init() override; int Run() override; @@ -52,6 +51,7 @@ class DeConvolutionFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { int thread_count_; int thread_stride_; float16_t *pack_input_; + float16_t *pack_weight_; float16_t *pack_output_; float16_t *tmp_buffer_; float16_t *batch_input_; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc index 9cbf54369b..b198df7218 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc @@ -317,15 +317,11 @@ int DeConvWinogradFp16CPUKernel::InitComputeParam() { int DeConvWinogradFp16CPUKernel::InitDataParam() { /* unit data : weight & winograd data*/ auto weight_tensor = in_tensors_.at(kWeightIndex); - auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteFilter(weight_tensor, weight_tensor->data_c()); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Get Execute filter failed."; - return ret; - } + auto origin_weight = reinterpret_cast(weight_tensor->data_c()); for (int i = 0; i < deconv_param_->compute_size_; i++) { DeConvComputeUnit *unit = &deconv_param_->compute_units_[i]; - ret = PackDeConvWgDataFp16(execute_weight_, unit, conv_param_, deconv_param_); + auto ret = PackDeConvWgDataFp16(origin_weight, unit, conv_param_, deconv_param_); if (ret != RET_OK) { return ret; } @@ -338,18 +334,11 @@ int DeConvWinogradFp16CPUKernel::InitDataParam() { return RET_ERROR; } memset(bias_data_, 0, deconv_param_->oc_up4_ * sizeof(float16_t)); - auto fp16_bias_data = reinterpret_cast(bias_data_); if (in_tensors_.size() == 3 && in_tensors_.at(kBiasIndex)->shape().size() == 1 && in_tensors_.at(kBiasIndex)->DimensionSize(0) == conv_param_->output_channel_) { - auto src_bias = reinterpret_cast(in_tensors_.at(kBiasIndex)->MutableData()); - MS_ASSERT(src_bias); - for (int i = 0; i < conv_param_->output_channel_; ++i) { - fp16_bias_data[i] = (float16_t)src_bias[i]; - } - } else { - MS_ASSERT(in_tensors_.size() == kInputSize1); + auto src_bias = reinterpret_cast(in_tensors_.at(kBiasIndex)->data_c()); + memcpy(bias_data_, src_bias, in_tensors_.at(kBiasIndex)->Size()); } - return RET_OK; } @@ -391,11 +380,16 @@ int DeConvWinogradFp16CPUKernel::Init() { } int DeConvWinogradFp16CPUKernel::Run() { - ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); + auto input_ptr = reinterpret_cast(in_tensors_.at(0)->data_c()); + auto output_ptr = reinterpret_cast(out_tensors_.at(0)->data_c()); + if (input_ptr == nullptr || output_ptr == nullptr) { + MS_LOG(ERROR) << "Deconvolution Winograd Fp16 get null tensor data!"; + return RET_ERROR; + } for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { - nhwc_input_ = execute_input_ + batch_index * deconv_param_->input_plane_ * conv_param_->input_channel_; - nhwc_output_ = execute_output_ + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_; + nhwc_input_ = input_ptr + batch_index * deconv_param_->input_plane_ * conv_param_->input_channel_; + nhwc_output_ = output_ptr + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_; ::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float16_t)); ParallelLaunch(this->context_->thread_pool_, DeConvWgFp16Run, this, deconv_param_->thread_num_); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h index e099e91fc0..19f0d9c7df 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h @@ -22,15 +22,14 @@ #include "nnacl/fp16/common_func_fp16.h" #include "nnacl/fp16/deconv_winograd_fp16.h" #include "nnacl/fp16/pack_fp16.h" -#include "src/runtime/kernel/arm/fp16/convolution_base_fp16.h" +#include "src/runtime/kernel/arm/base/convolution_base.h" namespace mindspore::kernel { -class DeConvWinogradFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { +class DeConvWinogradFp16CPUKernel : public ConvolutionBaseCPUKernel { public: DeConvWinogradFp16CPUKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs, const lite::InnerContext *ctx, - TypeId origin_weight_data_type, TypeId origin_bias_data_type) - : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type) {} + const std::vector &outputs, const lite::InnerContext *ctx) + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} ~DeConvWinogradFp16CPUKernel() override; int Init() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc index 4b39ff7c47..0564bd29ea 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc @@ -24,6 +24,7 @@ #include "src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h" #include "src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.h" #include "src/runtime/kernel/arm/base/group_convolution_creator.h" +#include "src/runtime/kernel/arm/base/group_convolution.h" #include "schema/model_generated.h" #include "include/errorcode.h" @@ -161,9 +162,9 @@ kernel::LiteKernel *ConvolutionDelegateCPUKernel::CpuConvFp32KernelSelect() { return kernel; } -kernel::LiteKernel *DispatchConvDw(const std::vector &inputs, - const std::vector &outputs, OpParameter *opParameter, - const InnerContext *ctx) { +kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, OpParameter *opParameter, + const InnerContext *ctx) { auto conv_param = reinterpret_cast(opParameter); kernel::LiteKernel *kernel = nullptr; if (opParameter != nullptr && opParameter->infer_flag_) { @@ -187,6 +188,29 @@ kernel::LiteKernel *DispatchConvDw(const std::vector &inputs, return kernel; } +kernel::LiteKernel *CpuGroupConvFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, OpParameter *op_parameter, + const lite::InnerContext *ctx) { + auto conv_param = reinterpret_cast(op_parameter); + GroupConvCreator group_conv_creator(inputs, outputs, op_parameter, ctx, false, kNumberTypeFloat32); + group_conv_creator.SetShapeOfTensors(); + for (int i = 0; i < conv_param->group_; ++i) { + ConvParameter *new_conv_param = CreateNewConvParameter(conv_param); + std::vector new_inputs; + std::vector new_outputs; + auto ret = group_conv_creator.GetSingleConvParam(new_conv_param, &new_inputs, &new_outputs, i); + if (ret != RET_OK) { + MS_LOG(ERROR) << "GetSingleConv for fp32 group conv failed."; + return nullptr; + } + group_conv_creator.get_group_conv()->emplace_back(new (std::nothrow) ConvolutionDelegateCPUKernel( + reinterpret_cast(new_conv_param), new_inputs, new_outputs, ctx)); + } + return new (std::nothrow) + GroupConvolutionCPUKernel(op_parameter, inputs, outputs, ctx, *(group_conv_creator.get_group_conv()), + reinterpret_cast(op_parameter)->group_); +} + /* creator func */ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *op_parameter, @@ -200,7 +224,7 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector & if (conv_param->group_ == 1) { kernel = new (std::nothrow) kernel::ConvolutionDelegateCPUKernel(op_parameter, inputs, outputs, ctx); } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { - kernel = DispatchConvDw(inputs, outputs, op_parameter, ctx); + kernel = CpuConvDwFp32KernelCreator(inputs, outputs, op_parameter, ctx); } else { kernel = CpuGroupConvFp32KernelCreator(inputs, outputs, op_parameter, ctx); } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8_creator.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8_creator.cc index de91a2910d..e1b9e22584 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8_creator.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8_creator.cc @@ -21,6 +21,7 @@ #include "src/runtime/kernel/arm/int8/convolution_depthwise_int8.h" #include "src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.h" #include "src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h" +#include "src/runtime/kernel/arm/int8/group_convolution_int8.h" #include "src/runtime/kernel/arm/base/group_convolution_creator.h" #include "schema/model_generated.h" #include "src/kernel_registry.h" @@ -83,6 +84,29 @@ kernel::LiteKernel *CpuConvInt8KernelSelect(const std::vector &i return kernel; } +kernel::LiteKernel *CpuGroupConvInt8KernelCreator(const std::vector &inputs, + const std::vector &outputs, OpParameter *op_parameter, + const lite::InnerContext *ctx, int group) { + auto conv_param = reinterpret_cast(op_parameter); + GroupConvCreator group_conv_creator(inputs, outputs, op_parameter, ctx, true, kNumberTypeInt8); + group_conv_creator.SetShapeOfTensors(); + for (int i = 0; i < conv_param->group_; ++i) { + ConvParameter *new_conv_param = CreateNewConvParameter(conv_param); + std::vector new_inputs; + std::vector new_outputs; + auto ret = group_conv_creator.GetSingleConvParam(new_conv_param, &new_inputs, &new_outputs, i); + if (ret != RET_OK) { + MS_LOG(ERROR) << "GetSingleConv for int8 group conv failed."; + return nullptr; + } + group_conv_creator.CopyQuantParam(&new_inputs); + group_conv_creator.get_group_conv()->emplace_back( + CpuConvInt8KernelSelect(new_inputs, new_outputs, reinterpret_cast(new_conv_param), ctx)); + } + return new (std::nothrow) + GroupConvolutionInt8CPUKernel(op_parameter, inputs, outputs, ctx, *(group_conv_creator.get_group_conv()), group); +} + kernel::LiteKernel *CpuConvInt8KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *op_parameter, const InnerContext *ctx, const kernel::KernelKey &desc) {