| @@ -99,6 +99,7 @@ set(CODER_OPCODERS_SRC | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/splice_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/exp_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.cc | |||
| #### nnacl int8 coder | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/activation_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/add_int8_coder.cc | |||
| @@ -188,11 +189,13 @@ set(LITE_KERNEL_SRC | |||
| ${NNACL_DIR}/fp32/winograd_utils.c | |||
| ${NNACL_DIR}/fp32/pack_fp32.c | |||
| ${NNACL_DIR}/fp32/arithmetic_fp32.c | |||
| ${NNACL_DIR}/fp32/deconv_fp32.c | |||
| ${NNACL_DIR}/fp32/matmul_fp32.c | |||
| ${NNACL_DIR}/fp32/common_func_fp32.c | |||
| ${NNACL_DIR}/int8/quantize.c | |||
| ${NNACL_DIR}/int8/pack_int8.c | |||
| ${NNACL_DIR}/int8/matmul_int8.c | |||
| ${NNACL_DIR}/int8/fixed_point.c | |||
| ${NNACL_DIR}/fp32/matmul_fp32.c | |||
| ${NNACL_DIR}/int8/arithmetic_int8.c | |||
| ${NNACL_DIR}/int8/add_int8.c | |||
| ${NNACL_DIR}/int8/concat_int8.c | |||
| @@ -288,6 +291,8 @@ set(LITE_KERNEL_SRC | |||
| if("${X86_64_SIMD}" STREQUAL "sse") | |||
| set(SSE_SRC | |||
| ${NNACL_DIR}/intrinsics/sse/MatMul_Sse.c | |||
| ${NNACL_DIR}/intrinsics/sse/PostFuncBiasReluC8.c | |||
| ${NNACL_DIR}/intrinsics/sse/PostFuncBiasReluC4.c | |||
| ) | |||
| set_property(SOURCE ${SSE_SRC} PROPERTY LANGUAGE C) | |||
| endif() | |||
| @@ -299,6 +304,8 @@ if("${X86_64_SIMD}" STREQUAL "avx") | |||
| set(AVX_SRC | |||
| ${NNACL_DIR}/intrinsics/avx/common_utils.c | |||
| ${NNACL_DIR}/intrinsics/sse/MatMul_Sse.c | |||
| ${NNACL_DIR}/intrinsics/sse/PostFuncBiasReluC8.c | |||
| ${NNACL_DIR}/intrinsics/sse/PostFuncBiasReluC4.c | |||
| ${NNACL_DIR}/assembly/avx/MatmulAvx.S | |||
| ) | |||
| set_property(SOURCE ${AVX_SRC} PROPERTY LANGUAGE C) | |||
| @@ -7,6 +7,7 @@ set(WRAPPER_SRC | |||
| ${WRAPPER_DIR}/base/optimize_handler_wrapper.c | |||
| ${WRAPPER_DIR}/fp32/matmul_fp32_wrapper.c | |||
| ${WRAPPER_DIR}/fp32/arithmetic_fp32_wrapper.c | |||
| ${WRAPPER_DIR}/fp32/deconvolution_fp32_wrapper.c | |||
| ${WRAPPER_DIR}/int8/matmul_int8_wrapper.c | |||
| ${WRAPPER_DIR}/int8/add_int8_wrapper.c | |||
| ${WRAPPER_DIR}/int8/concat_int8_wrapper.c | |||
| @@ -0,0 +1,196 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.h" | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| #include "coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h" | |||
| #include "nnacl/fp32/winograd_utils.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "coder/log.h" | |||
| #include "coder/opcoders/parallel.h" | |||
| #include "src/common/version_manager.h" | |||
| #include "coder/opcoders/nnacl/dequant/de_quant.h" | |||
| using mindspore::schema::PrimitiveType_Conv2dTransposeFusion; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| int DeConvolutionFP32Coder::InitRunBuf() { | |||
| pack_output_size_ = UP_ROUND(conv_param_->output_channel_, C8NUM) * output_plane_ * sizeof(float); | |||
| packed_output_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, pack_output_size_, kWorkspace)); | |||
| MS_CHECK_PTR(packed_output_); | |||
| if (target_ == kARM32A) { | |||
| tmp_buffer_size_ = matmul_param_.row_4_ * matmul_param_.col_8_ * sizeof(float); | |||
| } else { | |||
| tmp_buffer_size_ = matmul_param_.row_12_ * matmul_param_.col_8_ * sizeof(float); | |||
| } | |||
| tmp_buffer_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, tmp_buffer_size_, kWorkspace)); | |||
| MS_CHECK_PTR(tmp_buffer_); | |||
| if (target_ == kARM32A) { | |||
| pack_input_size_ = matmul_param_.row_4_ * matmul_param_.deep_ * sizeof(float); | |||
| } else { | |||
| pack_input_size_ = matmul_param_.row_12_ * matmul_param_.deep_ * sizeof(float); | |||
| } | |||
| packed_input_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, pack_input_size_, kWorkspace)); | |||
| MS_CHECK_PTR(packed_input_); | |||
| return RET_OK; | |||
| } | |||
| int DeConvolutionFP32Coder::InitParam() { | |||
| input_plane_ = conv_param_->input_h_ * conv_param_->input_w_; | |||
| kernel_plane_ = conv_param_->kernel_w_ * conv_param_->kernel_h_; | |||
| output_plane_ = conv_param_->output_h_ * conv_param_->output_w_; | |||
| matmul_param_.row_ = input_plane_; | |||
| matmul_param_.deep_ = conv_param_->input_channel_; | |||
| matmul_param_.col_ = conv_param_->output_channel_ * kernel_plane_; | |||
| matmul_param_.row_12_ = UP_ROUND(matmul_param_.row_, C12NUM); | |||
| matmul_param_.row_4_ = UP_ROUND(matmul_param_.row_, C4NUM); | |||
| matmul_param_.col_8_ = UP_ROUND(conv_param_->output_channel_, C8NUM) * kernel_plane_; | |||
| return RET_OK; | |||
| } | |||
| int DeConvolutionFP32Coder::Prepare(CoderContext *const context) { | |||
| MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2DBaseCoder::Init() failed."); | |||
| MS_CHECK_RET_CODE(InitWeightBias(context), "Init weight bias failed."); | |||
| return Resize(); | |||
| } | |||
| int DeConvolutionFP32Coder::Resize() { | |||
| MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "init failed."); | |||
| MS_CHECK_RET_CODE(InitParam(), "init param failed."); | |||
| MS_CHECK_RET_CODE(InitRunBuf(), "init run buffer failed."); | |||
| return RET_OK; | |||
| } | |||
| int DeConvolutionFP32Coder::InitWeightBias(CoderContext *const context) { | |||
| int kernel_h = filter_tensor_->Height(); | |||
| int kernel_w = filter_tensor_->Width(); | |||
| int in_channel = filter_tensor_->Channel(); | |||
| int out_channel = filter_tensor_->Batch(); | |||
| conv_param_->input_channel_ = in_channel; | |||
| conv_param_->output_channel_ = out_channel; | |||
| if (input_tensors_.size() == kInputSize2) { | |||
| bias_data_size_ = UP_ROUND(out_channel, C4NUM) * sizeof(float); | |||
| packed_bias_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight)); | |||
| MS_CHECK_PTR(packed_bias_); | |||
| } | |||
| int kernel_plane = kernel_h * kernel_w; | |||
| int pack_weight_size = in_channel * kernel_plane; | |||
| pack_weight_size_ = pack_weight_size * UP_ROUND(out_channel, C8NUM) * sizeof(float); | |||
| packed_weight_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight)); | |||
| MS_CHECK_PTR(packed_weight_); | |||
| NNaclFp32Serializer init_code; | |||
| if (input_tensors_.size() == kInputSize2) { | |||
| init_code.CodeMallocExpression(packed_bias_, bias_data_size_); | |||
| init_code.CodeFunction("memset", packed_bias_, 0, pack_weight_size_); | |||
| init_code.CodeFunction("memcpy", packed_bias_, bias_tensor_, out_channel * sizeof(float)); | |||
| } | |||
| init_code.CodeMallocExpression(packed_weight_, pack_weight_size_); | |||
| init_code.CodeFunction("memset", packed_weight_, 0, pack_weight_size_); | |||
| init_code.CodeFunction("PackNHWCToC8HWN8Fp32", filter_tensor_, packed_weight_, in_channel, kernel_plane, out_channel); | |||
| context->AppendInitCode(init_code.str()); | |||
| return RET_OK; | |||
| } | |||
| int DeConvolutionFP32Coder::DoCode(CoderContext *const context) { | |||
| Collect(context, | |||
| { | |||
| "wrapper/fp32/deconvolution_fp32_wrapper.h", | |||
| "nnacl/fp32/conv_common_fp32.h", | |||
| "nnacl/pack.h", | |||
| "nnacl/fp32/common_func_fp32.h", | |||
| "nnacl/base/minimal_filtering_generator.h", | |||
| "nnacl/fp32/matmul_fp32.h", | |||
| "nnacl/conv_parameter.h", | |||
| "nnacl/matmul_parameter.h", | |||
| "nnacl/op_base.h", | |||
| }, | |||
| { | |||
| "deconvolution_fp32_wrapper.c", | |||
| "common_func.c", | |||
| "conv_common_fp32.c", | |||
| "matmul_fp32.c", | |||
| "pack_fp32.c", | |||
| "deconv_fp32.c", | |||
| "minimal_filter_generator.c", | |||
| }); | |||
| if (target_ == kARM32A) { | |||
| Collect(context, {}, {}, | |||
| { | |||
| "MatmulFp32.S", | |||
| "MatmulFp32Opt.S", | |||
| "PreSum4x16Int8Peroc.S", | |||
| "PreSum4x16Int8Pert.S", | |||
| "IndirectGemmInt16to32_8x4.S", | |||
| "MatmulInt8.S", | |||
| "MatmulFp32Opt12x4.S", | |||
| }); | |||
| } else if (target_ == kARM64) { | |||
| Collect(context, {}, {}, | |||
| { | |||
| "MatmulFp32.S", | |||
| "MatmulFp32Opt.S", | |||
| "PreSum4x16Int8Peroc.S", | |||
| "MatVecMulFp32.S", | |||
| "PreSum4x16Int8Peroc.S", | |||
| "PreSum4x16Int8Pert.S", | |||
| "IndirectGemmInt16to32_8x4.S", | |||
| "MatmulInt8.S", | |||
| }); | |||
| } | |||
| NNaclFp32Serializer code; | |||
| // call the op function | |||
| code.CodeFunction("memset", packed_input_, "0", pack_input_size_); | |||
| code.CodeFunction("memset", packed_output_, "0", pack_output_size_); | |||
| code.CodeFunction("memset", tmp_buffer_, "0", tmp_buffer_size_); | |||
| code.CodeStruct("conv_parameter", *conv_param_); | |||
| code.CodeStruct("matmul_parameter", matmul_param_); | |||
| std::string src_in_ptr_str = allocator_->GetRuntimeAddr(input_tensor_); | |||
| std::string src_out_ptr_str = allocator_->GetRuntimeAddr(output_tensor_); | |||
| for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { | |||
| input_ptr_ = src_in_ptr_str + std::to_string(batch_index * input_plane_ * conv_param_->input_channel_); | |||
| output_ptr_ = src_out_ptr_str + std::to_string(batch_index * output_plane_ * conv_param_->output_channel_); | |||
| if (target_ == kARM32A) { | |||
| code.CodeFunction("RowMajor2Col4Major", input_ptr_, packed_input_, matmul_param_.row_, matmul_param_.deep_); | |||
| } else { | |||
| code.CodeFunction("RowMajor2Col12Major", input_ptr_, packed_input_, matmul_param_.row_, matmul_param_.deep_); | |||
| } | |||
| code.CodeBaseStruct("DeConvFp32Args", kRunArgs, packed_input_, packed_weight_, packed_bias_, packed_output_, | |||
| output_ptr_, tmp_buffer_, "&matmul_parameter", "&conv_parameter"); | |||
| if (!support_parallel_) { | |||
| code.CodeFunction("DeConvFp32Run", kRunArgsAddr, kDefaultTaskId); | |||
| } else { | |||
| code.CodeFunction(kParallelLaunch, gThreadPool, "DeConvFp32Run", kRunArgsAddr, "conv_parameter.thread_num_"); | |||
| } | |||
| } | |||
| context->AppendCode(code.str()); | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Conv2dTransposeFusion, | |||
| CPUOpCoderCreator<DeConvolutionFP32Coder>); | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -0,0 +1,65 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_DECONV2D_FP32_CODER_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_DECONV2D_FP32_CODER_H_ | |||
| #include <vector> | |||
| #include <string> | |||
| #include "nnacl/conv_parameter.h" | |||
| #include "coder/opcoders/base/conv2d_base_coder.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "nnacl/fp32/deconv_fp32.h" | |||
| #include "nnacl/fp32/matmul_fp32.h" | |||
| namespace mindspore::lite::micro::nnacl { | |||
| class DeConvolutionFP32Coder final : public Conv2DBaseCoder { | |||
| public: | |||
| DeConvolutionFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| const Model::Node *node, size_t node_index, Target target) | |||
| : Conv2DBaseCoder(in_tensors, out_tensors, node, node_index, target) {} | |||
| int Prepare(CoderContext *const context) override; | |||
| int DoCode(CoderContext *const context) override; | |||
| ~DeConvolutionFP32Coder() override = default; | |||
| private: | |||
| int InitWeightBias(CoderContext *const context); | |||
| int Resize(); | |||
| int InitRunBuf(); | |||
| int InitParam(); | |||
| MatMulParameter matmul_param_{}; | |||
| size_t pack_output_size_{0}; | |||
| size_t tmp_buffer_size_{0}; | |||
| size_t pack_input_size_{0}; | |||
| size_t bias_data_size_{0}; | |||
| size_t pack_weight_size_{0}; | |||
| int input_plane_{0}; | |||
| int kernel_plane_{0}; | |||
| int output_plane_{0}; | |||
| float *packed_bias_{nullptr}; | |||
| float *packed_weight_{nullptr}; | |||
| float *packed_input_{nullptr}; | |||
| float *packed_output_{nullptr}; | |||
| float *tmp_buffer_{nullptr}; | |||
| std::string input_ptr_; | |||
| std::string output_ptr_; | |||
| }; | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_DECONV2D_FP32_CODER_H_ | |||
| @@ -0,0 +1,69 @@ | |||
| /* | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "wrapper/fp32/deconvolution_fp32_wrapper.h" | |||
| #include "nnacl/fp32/deconv_fp32.h" | |||
| #include "nnacl/fp32/matmul_fp32.h" | |||
| int DoDeconvFp32(const float *packed_input, const float *packed_weight, const float *packed_bias, float *packed_output, | |||
| float *output, float *tmp_ori_buffer, const MatMulParameter *matmul_param, | |||
| const ConvParameter *conv_param, int task_id) { | |||
| int thread_count = MSMIN(conv_param->thread_num_, UP_DIV(conv_param->output_channel_, C8NUM)); | |||
| int thread_stride = UP_DIV(UP_DIV(conv_param->output_channel_, C8NUM), thread_count); | |||
| int res_stride = UP_DIV(conv_param->output_channel_, C8NUM) - task_id * thread_stride; | |||
| int oc = MSMIN(thread_stride, res_stride); | |||
| int cur_stride = thread_stride * C8NUM; | |||
| res_stride = conv_param->output_channel_ - task_id * thread_stride * C8NUM; | |||
| int oc_res = MSMIN(cur_stride, res_stride); | |||
| if (oc <= 0 || oc_res <= 0) { | |||
| return NNACL_OK; | |||
| } | |||
| int kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_; | |||
| int output_plane = conv_param->output_h_ * conv_param->output_w_; | |||
| #if defined(ENABLE_ARM32) | |||
| float *tmp_buffer = tmp_ori_buffer + task_id * thread_stride * C8NUM * kernel_plane * matmul_param->row_4_; | |||
| MatMulOpt(packed_input, packed_weight + task_id * thread_stride * C8NUM * kernel_plane * matmul_param->deep_, | |||
| tmp_buffer, NULL, ActType_No, matmul_param->deep_, matmul_param->row_4_, oc * C8NUM * kernel_plane, | |||
| matmul_param->col_, OutType_C8); | |||
| #else | |||
| float *tmp_buffer = tmp_ori_buffer + task_id * thread_stride * C8NUM * kernel_plane * matmul_param->row_12_; | |||
| MatMulOpt(packed_input, packed_weight + task_id * thread_stride * C8NUM * kernel_plane * matmul_param->deep_, | |||
| tmp_buffer, NULL, ActType_No, matmul_param->deep_, matmul_param->row_12_, oc * C8NUM * kernel_plane, | |||
| matmul_param->col_, OutType_C8); | |||
| #endif | |||
| DeConvPostFp32C8(tmp_buffer, packed_output + task_id * thread_stride * C8NUM * output_plane, | |||
| packed_bias + thread_stride * task_id * C8NUM, output + task_id * thread_stride * C8NUM, oc_res, | |||
| conv_param); | |||
| return NNACL_OK; | |||
| } | |||
| int DeConvFp32Run(void *cdata, int task_id) { | |||
| DeConvFp32Args *args = (DeConvFp32Args *)cdata; | |||
| const MatMulParameter *matmul_param = args->matmul_param_; | |||
| const ConvParameter *conv_param = args->conv_param_; | |||
| const float *packed_input = args->packed_input_; | |||
| const float *packed_weight = args->packed_weight_; | |||
| const float *packed_bias = args->packed_bias_; | |||
| float *packed_output = args->packed_output_; | |||
| float *output = args->output_; | |||
| float *tmp_buffer = args->tmp_buffer_; | |||
| DoDeconvFp32(packed_input, packed_weight, packed_bias, packed_output, output, tmp_buffer, matmul_param, conv_param, | |||
| task_id); | |||
| return NNACL_OK; | |||
| } | |||
| @@ -0,0 +1,48 @@ | |||
| /* | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_WRAPPER_DECONVOLUTION_FP32_WRAPPER_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_WRAPPER_DECONVOLUTION_FP32_WRAPPER_H_ | |||
| #include "nnacl/errorcode.h" | |||
| #include "nnacl/conv_parameter.h" | |||
| #include "nnacl/matmul_parameter.h" | |||
| typedef struct { | |||
| const float *packed_input_; | |||
| const float *packed_weight_; | |||
| const float *packed_bias_; | |||
| float *packed_output_; | |||
| float *output_; | |||
| float *tmp_buffer_; | |||
| const MatMulParameter *matmul_param_; | |||
| const ConvParameter *conv_param_; | |||
| } DeConvFp32Args; | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int DoDeconvFp32(const float *packed_input, const float *packed_weight, const float *packed_bias, float *packed_output, | |||
| float *output, float *tmp_ori_buffer, const MatMulParameter *matmul_param, | |||
| const ConvParameter *conv_param, int task_id); | |||
| int DeConvFp32Run(void *cdata, int task_id); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_WRAPPER_DECONVOLUTION_FP32_WRAPPER_H_ | |||