From: @zoloft Reviewed-by: @wangchengyuan Signed-off-by:pull/13620/MERGE
| @@ -76,6 +76,7 @@ set(CODER_OPCODERS_SRC | |||||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc | ${MICRO_DIR}/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc | ||||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc | ${MICRO_DIR}/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc | ||||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/concat_fp32_coder.cc | ${MICRO_DIR}/coder/opcoders/nnacl/fp32/concat_fp32_coder.cc | ||||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.cc | |||||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_fp32_coder.cc | ${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_fp32_coder.cc | ||||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc | ${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc | ||||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc | ${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc | ||||
| @@ -281,8 +282,8 @@ endif() | |||||
| #### avx | #### avx | ||||
| if("${X86_64_SIMD}" STREQUAL "avx") | if("${X86_64_SIMD}" STREQUAL "avx") | ||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -mavx -mavx2") | |||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -mavx -mavx2") | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -mavx -mavx2") | |||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -mavx -mavx2") | |||||
| set(AVX_SRC | set(AVX_SRC | ||||
| ${LITE_DIR}/nnacl/intrinsics/avx/common_utils.c | ${LITE_DIR}/nnacl/intrinsics/avx/common_utils.c | ||||
| ${LITE_DIR}/nnacl/intrinsics/sse/sse_common.c | ${LITE_DIR}/nnacl/intrinsics/sse/sse_common.c | ||||
| @@ -20,23 +20,80 @@ | |||||
| #include "nnacl/fp32/winograd_utils.h" | #include "nnacl/fp32/winograd_utils.h" | ||||
| #include "nnacl/int8/quantize.h" | #include "nnacl/int8/quantize.h" | ||||
| #include "coder/log.h" | #include "coder/log.h" | ||||
| namespace mindspore::lite::micro { | |||||
| Conv2DBaseCoder::~Conv2DBaseCoder() { | |||||
| FreeConvQuantParams(); | |||||
| conv_param_ = nullptr; | |||||
| conv_quant_arg_ = nullptr; | |||||
| filter_tensor_ = nullptr; | |||||
| bias_tensor_ = nullptr; | |||||
| } | |||||
| void Conv2DBaseCoder::FreeConvQuantParams() { | |||||
| if (conv_quant_arg_ == nullptr) { | |||||
| return; | |||||
| } | |||||
| if (conv_quant_arg_->real_multiplier_ != nullptr) { | |||||
| free(conv_quant_arg_->real_multiplier_); | |||||
| conv_quant_arg_->real_multiplier_ = nullptr; | |||||
| } | |||||
| if (conv_quant_arg_->left_shift_ != nullptr) { | |||||
| free(conv_quant_arg_->left_shift_); | |||||
| conv_quant_arg_->left_shift_ = nullptr; | |||||
| } | |||||
| if (conv_quant_arg_->right_shift_ != nullptr) { | |||||
| free(conv_quant_arg_->right_shift_); | |||||
| conv_quant_arg_->right_shift_ = nullptr; | |||||
| } | |||||
| if (conv_quant_arg_->quant_multiplier_ != nullptr) { | |||||
| free(conv_quant_arg_->quant_multiplier_); | |||||
| conv_quant_arg_->quant_multiplier_ = nullptr; | |||||
| } | |||||
| if (conv_quant_arg_->out_act_min_ != nullptr) { | |||||
| free(conv_quant_arg_->out_act_min_); | |||||
| conv_quant_arg_->out_act_min_ = nullptr; | |||||
| } | |||||
| if (conv_quant_arg_->out_act_max_ != nullptr) { | |||||
| free(conv_quant_arg_->out_act_max_); | |||||
| conv_quant_arg_->out_act_max_ = nullptr; | |||||
| } | |||||
| if (conv_quant_arg_->input_quant_args_ != nullptr) { | |||||
| free(conv_quant_arg_->input_quant_args_); | |||||
| conv_quant_arg_->input_quant_args_ = nullptr; | |||||
| } | |||||
| if (conv_quant_arg_->filter_quant_args_ != nullptr) { | |||||
| free(conv_quant_arg_->filter_quant_args_); | |||||
| conv_quant_arg_->filter_quant_args_ = nullptr; | |||||
| } | |||||
| if (conv_quant_arg_->output_quant_args_ != nullptr) { | |||||
| free(conv_quant_arg_->output_quant_args_); | |||||
| conv_quant_arg_->output_quant_args_ = nullptr; | |||||
| } | |||||
| } | |||||
| namespace { | |||||
| int MallocConvQuantParams(ConvQuantArg *quant_arg, size_t input_arg_num, size_t filter_arg_num, size_t output_arg_num) { | |||||
| int Conv2DBaseCoder::MallocConvQuantParams(size_t input_arg_num, size_t filter_arg_num, size_t output_arg_num) { | |||||
| MS_CHECK_TRUE(input_arg_num > 0, "invalid value of input_arg_num"); | MS_CHECK_TRUE(input_arg_num > 0, "invalid value of input_arg_num"); | ||||
| MS_CHECK_TRUE(filter_arg_num > 0, "invalid value of filter_arg_num"); | MS_CHECK_TRUE(filter_arg_num > 0, "invalid value of filter_arg_num"); | ||||
| MS_CHECK_TRUE(output_arg_num > 0, "invalid value of output_arg_num"); | MS_CHECK_TRUE(output_arg_num > 0, "invalid value of output_arg_num"); | ||||
| quant_arg->input_quant_args_ = static_cast<QuantArg *>(malloc(input_arg_num * sizeof(struct QuantArg))); | |||||
| MS_CHECK_PTR(quant_arg->input_quant_args_); | |||||
| quant_arg->filter_quant_args_ = static_cast<QuantArg *>(malloc(filter_arg_num * sizeof(QuantArg))); | |||||
| MS_CHECK_PTR(quant_arg->filter_quant_args_); | |||||
| quant_arg->output_quant_args_ = static_cast<QuantArg *>(malloc(output_arg_num * sizeof(QuantArg))); | |||||
| MS_CHECK_PTR(quant_arg->output_quant_args_); | |||||
| return mindspore::lite::RET_OK; | |||||
| conv_quant_arg_->input_quant_args_ = reinterpret_cast<::QuantArg *>(malloc(input_arg_num * sizeof(::QuantArg))); | |||||
| if (conv_quant_arg_->input_quant_args_ == nullptr) { | |||||
| FreeConvQuantParams(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| conv_quant_arg_->filter_quant_args_ = reinterpret_cast<::QuantArg *>(malloc(filter_arg_num * sizeof(::QuantArg))); | |||||
| if (conv_quant_arg_->filter_quant_args_ == nullptr) { | |||||
| FreeConvQuantParams(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| conv_quant_arg_->output_quant_args_ = reinterpret_cast<::QuantArg *>(malloc(output_arg_num * sizeof(::QuantArg))); | |||||
| if (conv_quant_arg_->output_quant_args_ == nullptr) { | |||||
| FreeConvQuantParams(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | } | ||||
| } // namespace | |||||
| namespace mindspore::lite::micro { | |||||
| std::string Conv2DBaseCoder::LayoutTransformFp32(schema::Format src_format, schema::Format dst_format) { | std::string Conv2DBaseCoder::LayoutTransformFp32(schema::Format src_format, schema::Format dst_format) { | ||||
| std::string ret; | std::string ret; | ||||
| if (src_format == schema::Format_NHWC && dst_format == schema::Format_NC4HW4) { | if (src_format == schema::Format_NHWC && dst_format == schema::Format_NC4HW4) { | ||||
| @@ -116,7 +173,7 @@ int Conv2DBaseCoder::MallocQuantParam() { | |||||
| conv_quant_arg_->input_arg_num_ = input_arg_num; | conv_quant_arg_->input_arg_num_ = input_arg_num; | ||||
| conv_quant_arg_->filter_arg_num_ = filter_arg_num; | conv_quant_arg_->filter_arg_num_ = filter_arg_num; | ||||
| conv_quant_arg_->output_arg_num_ = output_arg_num; | conv_quant_arg_->output_arg_num_ = output_arg_num; | ||||
| MallocConvQuantParams(conv_quant_arg_, input_arg_num, filter_arg_num, output_arg_num); | |||||
| MallocConvQuantParams(input_arg_num, filter_arg_num, output_arg_num); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -125,7 +182,7 @@ int Conv2DBaseCoder::SetInputTensorQuantParam() { | |||||
| if (in_arg_num == kPerTensor) { | if (in_arg_num == kPerTensor) { | ||||
| QuantArg input_quant_arg = input_tensor_->quant_params().at(0); | QuantArg input_quant_arg = input_tensor_->quant_params().at(0); | ||||
| conv_quant_arg_->input_quant_args_[0].zp_ = input_quant_arg.zeroPoint; | conv_quant_arg_->input_quant_args_[0].zp_ = input_quant_arg.zeroPoint; | ||||
| conv_quant_arg_->input_quant_args_[0].scale_ = input_quant_arg.scale; | |||||
| conv_quant_arg_->input_quant_args_[0].scale_ = static_cast<float>(input_quant_arg.scale); | |||||
| return RET_OK; | return RET_OK; | ||||
| } else { | } else { | ||||
| // per channel | // per channel | ||||
| @@ -139,12 +196,12 @@ int Conv2DBaseCoder::SetFilterTensorQuantParam() { | |||||
| if (weight_arg_num == kPerTensor) { | if (weight_arg_num == kPerTensor) { | ||||
| QuantArg weight_quant_arg = filter_tensor_->quant_params().at(0); | QuantArg weight_quant_arg = filter_tensor_->quant_params().at(0); | ||||
| conv_quant_arg_->filter_quant_args_[0].zp_ = weight_quant_arg.zeroPoint; | conv_quant_arg_->filter_quant_args_[0].zp_ = weight_quant_arg.zeroPoint; | ||||
| conv_quant_arg_->filter_quant_args_[0].scale_ = weight_quant_arg.scale; | |||||
| conv_quant_arg_->filter_quant_args_[0].scale_ = static_cast<float>(weight_quant_arg.scale); | |||||
| } else { | } else { | ||||
| std::vector<QuantArg> weight_quant_arg = filter_tensor_->quant_params(); | std::vector<QuantArg> weight_quant_arg = filter_tensor_->quant_params(); | ||||
| for (int i = 0; i < static_cast<int>(weight_arg_num); ++i) { | for (int i = 0; i < static_cast<int>(weight_arg_num); ++i) { | ||||
| conv_quant_arg_->filter_quant_args_[i].zp_ = weight_quant_arg[i].zeroPoint; | conv_quant_arg_->filter_quant_args_[i].zp_ = weight_quant_arg[i].zeroPoint; | ||||
| conv_quant_arg_->filter_quant_args_[i].scale_ = weight_quant_arg[i].scale; | |||||
| conv_quant_arg_->filter_quant_args_[i].scale_ = static_cast<float>(weight_quant_arg[i].scale); | |||||
| } | } | ||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -155,7 +212,7 @@ int Conv2DBaseCoder::SetOutputTensorQuantParam() { | |||||
| if (out_arg_num == kPerTensor) { | if (out_arg_num == kPerTensor) { | ||||
| QuantArg output_quant_arg = output_tensor_->quant_params().at(0); | QuantArg output_quant_arg = output_tensor_->quant_params().at(0); | ||||
| conv_quant_arg_->output_quant_args_[0].zp_ = output_quant_arg.zeroPoint; | conv_quant_arg_->output_quant_args_[0].zp_ = output_quant_arg.zeroPoint; | ||||
| conv_quant_arg_->output_quant_args_[0].scale_ = output_quant_arg.scale; | |||||
| conv_quant_arg_->output_quant_args_[0].scale_ = static_cast<float>(output_quant_arg.scale); | |||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << "Not Support Per Channel for input now."; | MS_LOG(ERROR) << "Not Support Per Channel for input now."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -170,17 +227,35 @@ int Conv2DBaseCoder::SetQuantMultiplier() { | |||||
| weight_arg_num = conv_quant_arg_->filter_arg_num_; | weight_arg_num = conv_quant_arg_->filter_arg_num_; | ||||
| } | } | ||||
| conv_quant_arg_->real_multiplier_ = reinterpret_cast<double *>(malloc(weight_arg_num * sizeof(double))); | conv_quant_arg_->real_multiplier_ = reinterpret_cast<double *>(malloc(weight_arg_num * sizeof(double))); | ||||
| MS_CHECK_PTR(conv_quant_arg_->real_multiplier_); | |||||
| if (conv_quant_arg_->real_multiplier_ == nullptr) { | |||||
| FreeConvQuantParams(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| conv_quant_arg_->left_shift_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t))); | conv_quant_arg_->left_shift_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t))); | ||||
| MS_CHECK_PTR(conv_quant_arg_->left_shift_); | |||||
| if (conv_quant_arg_->left_shift_ == nullptr) { | |||||
| FreeConvQuantParams(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| conv_quant_arg_->right_shift_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t))); | conv_quant_arg_->right_shift_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t))); | ||||
| MS_CHECK_PTR(conv_quant_arg_->right_shift_); | |||||
| if (conv_quant_arg_->right_shift_ == nullptr) { | |||||
| FreeConvQuantParams(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| conv_quant_arg_->quant_multiplier_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t))); | conv_quant_arg_->quant_multiplier_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t))); | ||||
| MS_CHECK_PTR(conv_quant_arg_->quant_multiplier_); | |||||
| if (conv_quant_arg_->quant_multiplier_ == nullptr) { | |||||
| FreeConvQuantParams(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| conv_quant_arg_->out_act_min_ = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t))); | conv_quant_arg_->out_act_min_ = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t))); | ||||
| MS_CHECK_PTR(conv_quant_arg_->out_act_min_); | |||||
| if (conv_quant_arg_->out_act_min_ == nullptr) { | |||||
| FreeConvQuantParams(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| conv_quant_arg_->out_act_max_ = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t))); | conv_quant_arg_->out_act_max_ = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t))); | ||||
| MS_CHECK_PTR(conv_quant_arg_->out_act_max_); | |||||
| if (conv_quant_arg_->out_act_max_ == nullptr) { | |||||
| FreeConvQuantParams(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| for (int i = 0; i < weight_arg_num; ++i) { | for (int i = 0; i < weight_arg_num; ++i) { | ||||
| const auto in_scale = | const auto in_scale = | ||||
| static_cast<double>(conv_quant_arg_->input_quant_args_[0].scale_ * conv_quant_arg_->filter_quant_args_[i].scale_); | static_cast<double>(conv_quant_arg_->input_quant_args_[0].scale_ * conv_quant_arg_->filter_quant_args_[i].scale_); | ||||
| @@ -197,7 +272,7 @@ int Conv2DBaseCoder::SetQuantMultiplier() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int Conv2DBaseCoder::CheckResizeValid() const { | |||||
| int Conv2DBaseCoder::CheckResizeValid() { | |||||
| // ===============check in channel================= // | // ===============check in channel================= // | ||||
| int32_t filter_in_channel = filter_tensor_->Channel(); | int32_t filter_in_channel = filter_tensor_->Channel(); | ||||
| int32_t resize_in_channel = input_tensor_->Channel(); | int32_t resize_in_channel = input_tensor_->Channel(); | ||||
| @@ -240,10 +315,6 @@ int Conv2DBaseCoder::SetQuantParam() { | |||||
| MS_CHECK_RET_CODE(SetIfPerChannel(), "Set if per tensor channel failed."); | MS_CHECK_RET_CODE(SetIfPerChannel(), "Set if per tensor channel failed."); | ||||
| SetRoundingAndMultipilerMode(); | SetRoundingAndMultipilerMode(); | ||||
| MS_CHECK_RET_CODE(SetQuantMultiplier(), "Set Quant Multiplier Failed."); | MS_CHECK_RET_CODE(SetQuantMultiplier(), "Set Quant Multiplier Failed."); | ||||
| // now only consider per tensor for output | |||||
| MS_CHECK_PTR(conv_param_->conv_quant_arg_.out_act_min_); | |||||
| MS_CHECK_PTR(conv_param_->conv_quant_arg_.out_act_max_); | |||||
| MS_CHECK_PTR(conv_param_->conv_quant_arg_.output_quant_args_); | |||||
| bool relu = conv_param_->act_type_ == ActType_Relu; | bool relu = conv_param_->act_type_ == ActType_Relu; | ||||
| bool relu6 = conv_param_->act_type_ == ActType_Relu6; | bool relu6 = conv_param_->act_type_ == ActType_Relu6; | ||||
| CalculateActivationRangeQuantized(relu, relu6, conv_param_->conv_quant_arg_.output_quant_args_[0].zp_, | CalculateActivationRangeQuantized(relu, relu6, conv_param_->conv_quant_arg_.output_quant_args_[0].zp_, | ||||
| @@ -32,24 +32,7 @@ class Conv2DBaseCoder : public OperatorCoder { | |||||
| const Model::Node *node, size_t node_index, Target target) | const Model::Node *node, size_t node_index, Target target) | ||||
| : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {} | : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {} | ||||
| ~Conv2DBaseCoder() override { | |||||
| if (conv_quant_arg_ == nullptr) { | |||||
| return; | |||||
| } | |||||
| free(conv_quant_arg_->real_multiplier_); | |||||
| free(conv_quant_arg_->left_shift_); | |||||
| free(conv_quant_arg_->right_shift_); | |||||
| free(conv_quant_arg_->quant_multiplier_); | |||||
| free(conv_quant_arg_->out_act_min_); | |||||
| free(conv_quant_arg_->out_act_max_); | |||||
| free(conv_quant_arg_->input_quant_args_); | |||||
| free(conv_quant_arg_->filter_quant_args_); | |||||
| free(conv_quant_arg_->output_quant_args_); | |||||
| conv_param_ = nullptr; | |||||
| conv_quant_arg_ = nullptr; | |||||
| filter_tensor_ = nullptr; | |||||
| bias_tensor_ = nullptr; | |||||
| } | |||||
| ~Conv2DBaseCoder() override; | |||||
| protected: | protected: | ||||
| virtual int Init(); | virtual int Init(); | ||||
| @@ -68,7 +51,7 @@ class Conv2DBaseCoder : public OperatorCoder { | |||||
| int SetQuantMultiplier(); | int SetQuantMultiplier(); | ||||
| int CheckResizeValid() const; | |||||
| int CheckResizeValid(); | |||||
| int SetIfPerChannel(); | int SetIfPerChannel(); | ||||
| @@ -80,6 +63,11 @@ class Conv2DBaseCoder : public OperatorCoder { | |||||
| std::string LayoutTransform(TypeId data_type, schema::Format src_format, schema::Format dst_format); | std::string LayoutTransform(TypeId data_type, schema::Format src_format, schema::Format dst_format); | ||||
| private: | |||||
| int MallocConvQuantParams(size_t input_arg_num, size_t filter_arg_num, size_t output_arg_num); | |||||
| void FreeConvQuantParams(); | |||||
| protected: | |||||
| ConvParameter *conv_param_{nullptr}; | ConvParameter *conv_param_{nullptr}; | ||||
| ConvQuantArg *conv_quant_arg_{nullptr}; | ConvQuantArg *conv_quant_arg_{nullptr}; | ||||
| @@ -0,0 +1,147 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.h" | |||||
| #include "src/common/version_manager.h" | |||||
| #include "src/ops/populate/populate_register.h" | |||||
| #include "nnacl/fp32/winograd_utils.h" | |||||
| #include "coder/opcoders/nnacl/fp32/convolution_fp32_coder.h" | |||||
| #include "coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.h" | |||||
| #include "coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h" | |||||
| using mindspore::schema::PrimitiveType_Conv2DFusion; | |||||
| namespace mindspore::lite::micro::nnacl { | |||||
| int ConvDelegateCoder::Prepare(CoderContext *const context) { | |||||
| // Update shape info of input and output | |||||
| SetInputOutputShapeInfo(reinterpret_cast<ConvParameter *>(parameter_), input_tensor_, output_tensor_); | |||||
| if (conv_coder_ == nullptr) { | |||||
| // need to select actual execute coder here | |||||
| conv_coder_ = CPUConvolutionFP32CoderSelect(input_tensors_, output_tensors_, node_, node_index(), target_); | |||||
| MS_CHECK_PTR(conv_coder_); | |||||
| const void *primitive = node_->primitive_; | |||||
| MS_CHECK_PTR(primitive); | |||||
| int primitive_type = GetPrimitiveType(node_->primitive_); | |||||
| int schema_version = VersionManager::GetInstance()->GetSchemaVersion(); | |||||
| ParameterGen parameter_gen = | |||||
| PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node_->primitive_), schema_version); | |||||
| MS_CHECK_PTR(parameter_gen); | |||||
| OpParameter *op_parameter = parameter_gen(node_->primitive_); | |||||
| op_parameter->thread_num_ = thread_num_; | |||||
| conv_coder_->set_type(primitive_type); | |||||
| conv_coder_->set_thread_num(thread_num_); | |||||
| conv_coder_->set_parameter(op_parameter); | |||||
| } | |||||
| return conv_coder_->Prepare(context); | |||||
| } | |||||
| int ConvDelegateCoder::DoCode(CoderContext *const context) { return conv_coder_->DoCode(context); } | |||||
| void SetInputOutputShapeInfo(ConvParameter *conv_param, const lite::Tensor *input, const lite::Tensor *output) { | |||||
| conv_param->input_batch_ = input->Batch(); | |||||
| conv_param->input_h_ = input->Height(); | |||||
| conv_param->input_w_ = input->Width(); | |||||
| conv_param->input_channel_ = input->Channel(); | |||||
| conv_param->output_batch_ = output->Batch(); | |||||
| conv_param->output_h_ = output->Height(); | |||||
| conv_param->output_w_ = output->Width(); | |||||
| conv_param->output_channel_ = output->Channel(); | |||||
| } | |||||
| std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderSelect(const std::vector<Tensor *> &in_tensors, | |||||
| const std::vector<Tensor *> &out_tensors, | |||||
| const Model::Node *node, size_t node_index, | |||||
| Target target) { | |||||
| const void *primitive = node->primitive_; | |||||
| if (primitive == nullptr) { | |||||
| return nullptr; | |||||
| } | |||||
| int schema_version = VersionManager::GetInstance()->GetSchemaVersion(); | |||||
| ParameterGen paramGen = | |||||
| PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version); | |||||
| if (paramGen == nullptr) { | |||||
| MS_LOG(ERROR) << "parameter generator is null"; | |||||
| return nullptr; | |||||
| } | |||||
| auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_)); | |||||
| bool use_winograd = false; | |||||
| int out_unit = 0; | |||||
| int kernel_h = conv_param->kernel_h_; | |||||
| int kernel_w = conv_param->kernel_w_; | |||||
| conv_param->input_h_ = in_tensors.at(kInputIndex)->Height(); | |||||
| conv_param->input_w_ = in_tensors.at(kInputIndex)->Width(); | |||||
| conv_param->input_channel_ = in_tensors.at(kInputIndex)->Channel(); | |||||
| conv_param->output_h_ = out_tensors.at(kOutputIndex)->Height(); | |||||
| conv_param->output_w_ = out_tensors.at(kOutputIndex)->Width(); | |||||
| conv_param->output_channel_ = out_tensors.at(kOutputIndex)->Channel(); | |||||
| conv_param->op_parameter_.thread_num_ = 1; | |||||
| use_winograd = CheckIfUseWinograd(&out_unit, conv_param); | |||||
| free(conv_param); | |||||
| std::unique_ptr<OperatorCoder> coder; | |||||
| if (kernel_h == 1 && kernel_w == 1) { | |||||
| MS_LOG(DEBUG) << "create ConvolutionFP32Coder"; | |||||
| coder = CPUOpCoderCreator<ConvolutionFP32Coder>(in_tensors, out_tensors, node, node_index, target); | |||||
| } else if (use_winograd) { | |||||
| MS_LOG(DEBUG) << "create Conv2DWinogradFP32Coder"; | |||||
| coder = std::make_unique<ConvolutionWinogradFP32Coder>(in_tensors, out_tensors, node, node_index, target, out_unit); | |||||
| } else { | |||||
| MS_LOG(DEBUG) << "create ConvolutionFP32Coder"; | |||||
| coder = CPUOpCoderCreator<ConvolutionFP32Coder>(in_tensors, out_tensors, node, node_index, target); | |||||
| } | |||||
| return coder; | |||||
| } | |||||
| std::unique_ptr<OperatorCoder> CreateDelegateConv(const std::vector<Tensor *> &in_tensors, | |||||
| const std::vector<Tensor *> &out_tensors, const Model::Node *node, | |||||
| size_t node_index, Target target) { | |||||
| return CPUOpCoderCreator<ConvDelegateCoder>(in_tensors, out_tensors, node, node_index, target); | |||||
| } | |||||
| std::unique_ptr<OperatorCoder> CPUConvDwFp32CoderCreator(const std::vector<Tensor *> &in_tensors, | |||||
| const std::vector<Tensor *> &out_tensors, | |||||
| const Model::Node *node, size_t node_index, Target target) { | |||||
| return CPUOpCoderCreator<ConvolutionDepthwiseFP32Coder>(in_tensors, out_tensors, node, node_index, target); | |||||
| } | |||||
| std::unique_ptr<OperatorCoder> CPUConv2DFusionFP32CoderCreator(const std::vector<Tensor *> &in_tensors, | |||||
| const std::vector<Tensor *> &out_tensors, | |||||
| const Model::Node *node, size_t node_index, | |||||
| Target target) { | |||||
| const void *primitive = node->primitive_; | |||||
| if (primitive == nullptr) { | |||||
| return nullptr; | |||||
| } | |||||
| int schema_version = VersionManager::GetInstance()->GetSchemaVersion(); | |||||
| ParameterGen paramGen = | |||||
| PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version); | |||||
| if (paramGen == nullptr) { | |||||
| MS_LOG(ERROR) << "parameter generator is null"; | |||||
| return nullptr; | |||||
| } | |||||
| auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_)); | |||||
| std::unique_ptr<OperatorCoder> coder; | |||||
| if (conv_param->group_ == 1) { | |||||
| coder = CreateDelegateConv(in_tensors, out_tensors, node, node_index, target); | |||||
| } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { | |||||
| coder = CPUConvDwFp32CoderCreator(in_tensors, out_tensors, node, node_index, target); | |||||
| } else { | |||||
| // GroupConv | |||||
| return nullptr; | |||||
| } | |||||
| return coder; | |||||
| } | |||||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Conv2DFusion, CPUConv2DFusionFP32CoderCreator) | |||||
| } // namespace mindspore::lite::micro::nnacl | |||||
| @@ -0,0 +1,56 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_MICRO_OPCODERS_NNACL_FP32_CONV2D_DELEGATE_FP32_CODER_H | |||||
| #define MINDSPORE_LITE_MICRO_OPCODERS_NNACL_FP32_CONV2D_DELEGATE_FP32_CODER_H | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "coder/opcoders/op_coder.h" | |||||
| #include "nnacl/conv_parameter.h" | |||||
| namespace mindspore::lite::micro::nnacl { | |||||
| class ConvDelegateCoder : public OperatorCoder { | |||||
| public: | |||||
| ConvDelegateCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||||
| const Model::Node *node, size_t node_index, Target target) | |||||
| : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {} | |||||
| ~ConvDelegateCoder() override = default; | |||||
| int Prepare(CoderContext *const context) override; | |||||
| int DoCode(CoderContext *const context) override; | |||||
| protected: | |||||
| std::unique_ptr<OperatorCoder> conv_coder_ = nullptr; | |||||
| }; | |||||
| void SetInputOutputShapeInfo(ConvParameter *conv_param, const lite::Tensor *input, const lite::Tensor *output); | |||||
| std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderSelect(const std::vector<Tensor *> &in_tensors, | |||||
| const std::vector<Tensor *> &out_tensors, | |||||
| const Model::Node *node, size_t node_index, Target target); | |||||
| std::unique_ptr<OperatorCoder> CreateDelegateConv(const std::vector<Tensor *> &in_tensors, | |||||
| const std::vector<Tensor *> &out_tensors, const Model::Node *node, | |||||
| size_t node_index, Target target); | |||||
| std::unique_ptr<OperatorCoder> CPUConvDwFp32CoderCreator(const std::vector<Tensor *> &in_tensors, | |||||
| const std::vector<Tensor *> &out_tensors, | |||||
| const Model::Node *node, size_t node_index, Target target); | |||||
| std::unique_ptr<OperatorCoder> CPUConv2DFusionFP32CoderCreator(const std::vector<Tensor *> &in_tensors, | |||||
| const std::vector<Tensor *> &out_tensors, | |||||
| const Model::Node *node, size_t node_index, | |||||
| Target target); | |||||
| } // namespace mindspore::lite::micro::nnacl | |||||
| #endif // MINDSPORE_LITE_MICRO_OPCODERS_NNACL_FP32_CONV2D_DELEGATE_FP32_CODER_H | |||||
| @@ -32,17 +32,14 @@ using mindspore::schema::PrimitiveType_Conv2DFusion; | |||||
| namespace mindspore::lite::micro::nnacl { | namespace mindspore::lite::micro::nnacl { | ||||
| int ConvolutionFP32Coder::InitTmpBuffer() { | int ConvolutionFP32Coder::InitTmpBuffer() { | ||||
| int in_channel = conv_param_->input_channel_; | int in_channel = conv_param_->input_channel_; | ||||
| int uint_size; | |||||
| if (target_ == kARM32A) { | |||||
| uint_size = conv_param_->kernel_h_ * conv_param_->kernel_w_ * in_channel * C4NUM * thread_num_; | |||||
| } else { | |||||
| uint_size = conv_param_->kernel_h_ * conv_param_->kernel_w_ * in_channel * C12NUM * thread_num_; | |||||
| } | |||||
| int uint_size = conv_param_->kernel_h_ * conv_param_->kernel_w_ * in_channel * C12NUM * thread_num_; | |||||
| packed_input_size_ = uint_size * sizeof(float); | packed_input_size_ = uint_size * sizeof(float); | ||||
| packed_input_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, packed_input_size_, kWorkspace)); | packed_input_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, packed_input_size_, kWorkspace)); | ||||
| MS_CHECK_PTR(packed_input_); | |||||
| col_major_input_size_ = uint_size * sizeof(float); | col_major_input_size_ = uint_size * sizeof(float); | ||||
| col_major_input_ = | col_major_input_ = | ||||
| reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, col_major_input_size_, kWorkspace)); | reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, col_major_input_size_, kWorkspace)); | ||||
| MS_CHECK_PTR(col_major_input_); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -68,12 +65,13 @@ int ConvolutionFP32Coder::InitWeightBias(CoderContext *const context) { | |||||
| conv_param_->input_channel_ = in_channel; | conv_param_->input_channel_ = in_channel; | ||||
| conv_param_->output_channel_ = out_channel; | conv_param_->output_channel_ = out_channel; | ||||
| int kernel_plane = kernel_h * kernel_w; | int kernel_plane = kernel_h * kernel_w; | ||||
| const int oc_block = C8NUM; | |||||
| int oc_block_num = UP_DIV(out_channel, C8NUM); | |||||
| int pack_weight_size = oc_block_num * oc_block * in_channel * kernel_plane; | |||||
| int oc_block = C8NUM; | |||||
| if (target_ == kARM32A) { | |||||
| oc_block = C4NUM; | |||||
| } | |||||
| int oc_block_num = UP_ROUND(out_channel, oc_block); | |||||
| int pack_weight_size = oc_block_num * in_channel * kernel_plane; | |||||
| pack_weight_size_ = pack_weight_size * sizeof(float); | pack_weight_size_ = pack_weight_size * sizeof(float); | ||||
| auto origin_weight = reinterpret_cast<float *>(filter_tensor_->MutableData()); | |||||
| MS_CHECK_PTR(origin_weight); | |||||
| packed_weight_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight)); | packed_weight_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight)); | ||||
| MS_CHECK_PTR(packed_weight_); | MS_CHECK_PTR(packed_weight_); | ||||
| auto out_channel_size = static_cast<size_t>(out_channel); | auto out_channel_size = static_cast<size_t>(out_channel); | ||||
| @@ -88,10 +86,15 @@ int ConvolutionFP32Coder::InitWeightBias(CoderContext *const context) { | |||||
| } | } | ||||
| init_code.CodeMallocExpression(packed_weight_, pack_weight_size_); | init_code.CodeMallocExpression(packed_weight_, pack_weight_size_); | ||||
| init_code.CodeFunction("memset", packed_weight_, 0, pack_weight_size_); | init_code.CodeFunction("memset", packed_weight_, 0, pack_weight_size_); | ||||
| init_code.CodeFunction("RowMajor2Col8Major", init_weight_str, packed_weight_, out_channel_size, | |||||
| in_channel * kernel_plane); | |||||
| if (target_ == kARM32A) { | |||||
| init_code.CodeFunction("RowMajor2Col4Major", init_weight_str, packed_weight_, out_channel_size, | |||||
| in_channel * kernel_plane); | |||||
| } else { | |||||
| init_code.CodeFunction("RowMajor2Col8Major", init_weight_str, packed_weight_, out_channel_size, | |||||
| in_channel * kernel_plane); | |||||
| } | |||||
| auto bias_data_size = static_cast<size_t>(oc_block_num * oc_block * sizeof(float)); | |||||
| auto bias_data_size = static_cast<size_t>(oc_block_num * sizeof(float)); | |||||
| bias_data_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight)); | bias_data_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight)); | ||||
| MS_CHECK_PTR(bias_data_); | MS_CHECK_PTR(bias_data_); | ||||
| if (input_tensors_.size() == kInputSize2) { | if (input_tensors_.size() == kInputSize2) { | ||||
| @@ -140,78 +143,4 @@ int ConvolutionFP32Coder::DoCode(CoderContext *const context) { | |||||
| context->AppendCode(code.str()); | context->AppendCode(code.str()); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderCreator(const std::vector<Tensor *> &in_tensors, | |||||
| const std::vector<Tensor *> &out_tensors, | |||||
| const Model::Node *node, size_t node_index, | |||||
| Target target) { | |||||
| std::vector<Tensor *> inputs = in_tensors; | |||||
| std::vector<Tensor *> outputs = out_tensors; | |||||
| const void *primitive = node->primitive_; | |||||
| if (primitive == nullptr) { | |||||
| return nullptr; | |||||
| } | |||||
| int schema_version = VersionManager::GetInstance()->GetSchemaVersion(); | |||||
| ParameterGen paramGen = | |||||
| PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version); | |||||
| if (paramGen == nullptr) { | |||||
| MS_LOG(ERROR) << "parameter generator is null"; | |||||
| return nullptr; | |||||
| } | |||||
| auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_)); | |||||
| int kernel_h = conv_param->kernel_h_; | |||||
| int kernel_w = conv_param->kernel_w_; | |||||
| conv_param->input_h_ = inputs.at(kInputIndex)->Height(); | |||||
| conv_param->input_w_ = inputs.at(kInputIndex)->Width(); | |||||
| conv_param->input_channel_ = inputs.at(kInputIndex)->Channel(); | |||||
| conv_param->output_h_ = outputs.at(kOutputIndex)->Height(); | |||||
| conv_param->output_w_ = outputs.at(kOutputIndex)->Width(); | |||||
| conv_param->output_channel_ = outputs.at(kOutputIndex)->Channel(); | |||||
| conv_param->op_parameter_.thread_num_ = 1; | |||||
| int out_unit = 0; | |||||
| bool use_winograd = CheckIfUseWinograd(&out_unit, conv_param); | |||||
| free(conv_param); | |||||
| // weight de quant | |||||
| std::unique_ptr<OperatorCoder> coder; | |||||
| if (kernel_h == 1 && kernel_w == 1) { | |||||
| MS_LOG(DEBUG) << "create ConvolutionFP32Coder"; | |||||
| coder = CPUOpCoderCreator<ConvolutionFP32Coder>(in_tensors, out_tensors, node, node_index, target); | |||||
| } else if (use_winograd) { | |||||
| MS_LOG(DEBUG) << "create Conv2DWinogradFP32Coder"; | |||||
| coder = std::make_unique<ConvolutionWinogradFP32Coder>(in_tensors, out_tensors, node, node_index, target, out_unit); | |||||
| } else { | |||||
| MS_LOG(DEBUG) << "create ConvolutionFP32Coder"; | |||||
| coder = CPUOpCoderCreator<ConvolutionFP32Coder>(in_tensors, out_tensors, node, node_index, target); | |||||
| } | |||||
| return coder; | |||||
| } | |||||
| std::unique_ptr<OperatorCoder> CPUConv2DFusionFP32CoderCreator(const std::vector<Tensor *> &in_tensors, | |||||
| const std::vector<Tensor *> &out_tensors, | |||||
| const Model::Node *node, size_t node_index, | |||||
| Target target) { | |||||
| const void *primitive = node->primitive_; | |||||
| if (primitive == nullptr) { | |||||
| return nullptr; | |||||
| } | |||||
| int schema_version = VersionManager::GetInstance()->GetSchemaVersion(); | |||||
| ParameterGen paramGen = | |||||
| PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version); | |||||
| if (paramGen == nullptr) { | |||||
| MS_LOG(ERROR) << "parameter generator is null"; | |||||
| return nullptr; | |||||
| } | |||||
| auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_)); | |||||
| std::unique_ptr<OperatorCoder> coder; | |||||
| if (conv_param->group_ == 1) { | |||||
| coder = CPUConvolutionFP32CoderCreator(in_tensors, out_tensors, node, node_index, target); | |||||
| } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { | |||||
| coder = CPUOpCoderCreator<ConvolutionDepthwiseFP32Coder>(in_tensors, out_tensors, node, node_index, target); | |||||
| } else { | |||||
| // GroupConv | |||||
| } | |||||
| return coder; | |||||
| } | |||||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Conv2DFusion, CPUConv2DFusionFP32CoderCreator) | |||||
| } // namespace mindspore::lite::micro::nnacl | } // namespace mindspore::lite::micro::nnacl | ||||
| @@ -25,6 +25,7 @@ OperatorCoder::~OperatorCoder() { | |||||
| if (parameter_ != nullptr) { | if (parameter_ != nullptr) { | ||||
| free(parameter_); | free(parameter_); | ||||
| } | } | ||||
| parameter_ = nullptr; | |||||
| } | } | ||||
| const std::vector<Tensor *> OperatorCoder::input_tensors() const { return input_tensors_; } | const std::vector<Tensor *> OperatorCoder::input_tensors() const { return input_tensors_; } | ||||