diff --git a/mindspore/lite/micro/cmake/file_list.cmake b/mindspore/lite/micro/cmake/file_list.cmake index 20f4c891c0..7d3c51b3a5 100644 --- a/mindspore/lite/micro/cmake/file_list.cmake +++ b/mindspore/lite/micro/cmake/file_list.cmake @@ -93,6 +93,7 @@ set(CODER_OPCODERS_SRC ${MICRO_DIR}/coder/opcoders/nnacl/fp32/pooling_fp32_coder.cc ${MICRO_DIR}/coder/opcoders/nnacl/fp32/power_fp32_coder.cc ${MICRO_DIR}/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/resize_fp32_coder.cc ${MICRO_DIR}/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc ${MICRO_DIR}/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc ${MICRO_DIR}/coder/opcoders/nnacl/fp32/tile_fp32_coder.cc @@ -150,6 +151,7 @@ set(LITE_SRC ${LITE_DIR}/src/ops/populate/conv2d_populate.cc ${LITE_DIR}/src/ops/populate/detection_post_process_populate.cc ${LITE_DIR}/src/ops/populate/depthwise_conv2d_populate.cc + ${LITE_DIR}/src/ops/populate/deconv2d_populate.cc ${LITE_DIR}/src/ops/populate/full_connection_populate.cc ${LITE_DIR}/src/ops/populate/pooling_populate.cc ${LITE_DIR}/src/ops/populate/quant_dtype_cast_populate.cc @@ -173,6 +175,7 @@ set(LITE_SRC ${LITE_DIR}/src/ops/populate/erf_populate.cc ${LITE_DIR}/src/ops/populate/exp_populate.cc ${LITE_DIR}/src/ops/populate/strided_slice_populate.cc + ${LITE_DIR}/src/ops/populate/scale_populate.cc ${LITE_DIR}/src/ops/populate/lstm_populate.cc ${LITE_DIR}/src/ops/populate/squeeze_populate.cc ${LITE_DIR}/src/ops/populate/transpose_populate.cc @@ -192,6 +195,7 @@ set(LITE_KERNEL_SRC ${NNACL_DIR}/fp32/deconv_fp32.c ${NNACL_DIR}/fp32/matmul_fp32.c ${NNACL_DIR}/fp32/common_func_fp32.c + ${NNACL_DIR}/fp32/resize_fp32.c ${NNACL_DIR}/int8/quantize.c ${NNACL_DIR}/int8/pack_int8.c ${NNACL_DIR}/int8/matmul_int8.c diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.cc index 0b9d2e8f40..902a36bdfa 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.cc @@ -133,7 +133,7 @@ int DeConvolutionFP32Coder::DoCode(CoderContext *const context) { "matmul_fp32.c", "pack_fp32.c", "deconv_fp32.c", - "minimal_filter_generator.c", + "minimal_filtering_generator.c", }); if (target_ == kARM32A) { Collect(context, {}, {}, @@ -172,8 +172,8 @@ int DeConvolutionFP32Coder::DoCode(CoderContext *const context) { std::string src_out_ptr_str = allocator_->GetRuntimeAddr(output_tensor_); for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { - input_ptr_ = src_in_ptr_str + std::to_string(batch_index * input_plane_ * conv_param_->input_channel_); - output_ptr_ = src_out_ptr_str + std::to_string(batch_index * output_plane_ * conv_param_->output_channel_); + input_ptr_ = src_in_ptr_str + "+" + std::to_string(batch_index * input_plane_ * conv_param_->input_channel_); + output_ptr_ = src_out_ptr_str + "+" + std::to_string(batch_index * output_plane_ * conv_param_->output_channel_); if (target_ == kARM32A) { code.CodeFunction("RowMajor2Col4Major", input_ptr_, packed_input_, matmul_param_.row_, matmul_param_.deep_); diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.cc new file mode 100644 index 0000000000..3173d7e539 --- /dev/null +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.cc @@ -0,0 +1,213 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "coder/opcoders/nnacl/fp32/resize_fp32_coder.h" +#include +#include +#include +#include "coder/opcoders/serializers/serializer.h" +#include "coder/opcoders/file_collector.h" +#include "coder/opcoders/parallel.h" + +using mindspore::schema::CoordinateTransformMode_ALIGN_CORNERS; +using mindspore::schema::CoordinateTransformMode_ASYMMETRIC; +using mindspore::schema::CoordinateTransformMode_HALF_PIXEL; +using mindspore::schema::PrimitiveType_Resize; + +namespace mindspore::lite::micro::nnacl { + +int ResizeFP32Coder::Prepare(CoderContext *const context) { + MS_CHECK_RET_CODE(ResizeBaseCoder::Init(), "ResizeBaseCoder::Init failed"); + MS_CHECK_RET_CODE(SelectCalculatorFunc(), "SelectCalculatorFunc failed"); + MS_CHECK_RET_CODE(ReSize(), "ReSize failed"); + return RET_OK; +} + +int ResizeFP32Coder::SelectCalculatorFunc() { + const std::map> cal_fuc_list = { + std::make_pair(CoordinateTransformMode_ASYMMETRIC, std::make_pair(CalculateAsymmetric, "CalculateAsymmetric")), + std::make_pair(CoordinateTransformMode_ALIGN_CORNERS, + std::make_pair(CalculateAlignCorners, "CalculateAlignCorners")), + std::make_pair(CoordinateTransformMode_HALF_PIXEL, std::make_pair(CalculateHalfPixel, "CalculateHalfPixel")), + }; + + auto fun_pair = cal_fuc_list.find(coordinate_transform_mode_); + if (fun_pair != cal_fuc_list.end()) { + calculate_ = fun_pair->second.first; + calculate_str_ = fun_pair->second.second; + } else { + MS_LOG(ERROR) << "Do not support coordinate transform mode. Mode is" + << schema::EnumNameCoordinateTransformMode( + static_cast(coordinate_transform_mode_)); + return RET_ERROR; + } + return RET_OK; +} + +int ResizeFP32Coder::ReSize() { + if (method_ == static_cast(schema::ResizeMethod_NEAREST)) { + return RET_OK; + } + + if (!const_shape_) { + new_height_ = output_tensor_->shape()[1]; + new_width_ = output_tensor_->shape()[2]; + } + + MS_CHECK_RET_CODE_WITH_EXE(MallocTmpBuffer(), "MallocTmpBuffer failed", FreeTmpBuffer()); + MS_CHECK_RET_CODE_WITH_EXE(ResizePrepare(), "ResizePrepare failed", FreeTmpBuffer()); + + return RET_OK; +} + +// Bilinear interpolation : +// Bilinear interpolation considers the closest 2x2 neighborhood of known pixel values surrounding the unknown pixel. +// It takes a weighted average of these 4 pixels to arrive at its final interpolated value. Thus, we need to reserve +// twice bigger space than coordinates arrays for weight arrays. It means x_weight_len is twice as much as x_len in +// detail. +// +// Bicubic interpolation: +// Bicubic goes one step beyond bilinear by considering the closest 4x4 neighborhood of known pixels --- for a total of +// 16 pixels. Since these are at various distances from the unknown pixel, closer pixels are given a higher weighting in +// the calculation. +void ResizeFP32Coder::CalTmpBufferLen() { + if (method_ == static_cast(schema::ResizeMethod_LINEAR)) { + x_len_ = new_width_; + y_len_ = new_height_; + x_weight_len_ = new_width_; + y_weight_len_ = new_height_; + } + if (method_ == static_cast(schema::ResizeMethod_CUBIC)) { + x_len_ = new_width_ * 4; + y_len_ = new_height_ * 4; + x_weight_len_ = new_width_ * 4; + y_weight_len_ = new_height_ * 4; + } +} + +int ResizeFP32Coder::MallocTmpBuffer() { + if (method_ != static_cast(schema::ResizeMethod_LINEAR) && + method_ != static_cast(schema::ResizeMethod_CUBIC)) { + return RET_OK; + } + // make sure y_bottoms_, y_tops_, etc. are null before malloc + FreeTmpBuffer(); + + CalTmpBufferLen(); + + // malloc memory for x, y coordinates + { + coordinate_.x_lefts_ = reinterpret_cast(malloc(sizeof(int) * x_len_)); + CHECK_MALLOC_RES(coordinate_.x_lefts_, RET_NULL_PTR) + coordinate_.y_tops_ = reinterpret_cast(malloc(sizeof(int) * y_len_)); + CHECK_MALLOC_RES(coordinate_.y_tops_, RET_NULL_PTR) + if (method_ == static_cast(schema::ResizeMethod_LINEAR)) { + coordinate_.x_rights_ = reinterpret_cast(malloc(sizeof(int) * x_len_)); + CHECK_MALLOC_RES(coordinate_.x_rights_, RET_NULL_PTR) + coordinate_.y_bottoms_ = reinterpret_cast(malloc(sizeof(int) * y_len_)); + CHECK_MALLOC_RES(coordinate_.y_bottoms_, RET_NULL_PTR) + } + } + + // malloc memory for weights of x, y axes + { + x_weights_ = reinterpret_cast(malloc(sizeof(float) * x_weight_len_)); + CHECK_MALLOC_RES(x_weights_, RET_NULL_PTR) + y_weights_ = reinterpret_cast(malloc(sizeof(float) * y_weight_len_)); + CHECK_MALLOC_RES(y_weights_, RET_NULL_PTR) + } + + { + size_t line_buffer_size = sizeof(float) * x_len_ * input_tensor_->Channel() * 2 * kMaxThreadNumSupported; + line_buffer_ = reinterpret_cast(allocator_->Malloc(kNumberTypeFloat32, line_buffer_size, kWorkspace)); + CHECK_MALLOC_RES(line_buffer_, RET_NULL_PTR) + } + return RET_OK; +} + +void ResizeFP32Coder::FreeTmpBuffer() { coordinate_.FreeData(); } + +int ResizeFP32Coder::ResizePrepare() { + auto input_shape = input_tensor_->shape(); + if (method_ == static_cast(schema::ResizeMethod_LINEAR)) { + return PrepareResizeBilinear(input_shape.data(), output_tensor_->shape().data(), calculate_, coordinate_.y_bottoms_, + coordinate_.y_tops_, coordinate_.x_lefts_, coordinate_.x_rights_, y_weights_, + x_weights_); + } + if (method_ == static_cast(schema::ResizeMethod_CUBIC)) { + auto resize_parameter = reinterpret_cast(parameter_); + MS_CHECK_PTR(resize_parameter); + auto cubic_coeff = resize_parameter->cubic_coeff_; + return PrepareResizeBicubic(input_shape.data(), output_tensor_->shape().data(), calculate_, coordinate_.y_tops_, + coordinate_.x_lefts_, y_weights_, x_weights_, cubic_coeff); + } + return RET_OK; +} + +int ResizeFP32Coder::DoCode(CoderContext *const context) { + Collect(context, + { + "nnacl/fp32/resize_fp32.h", + }, + { + "resize_fp32.c", + }); + Serializer code; + code.CodeArray("input_shape", input_tensor_->shape().data(), input_tensor_->shape().size(), true); + code.CodeArray("output_shape", output_tensor_->shape().data(), output_tensor_->shape().size(), true); + + switch (method_) { + case static_cast(schema::ResizeMethod_LINEAR): { + code.CodeArray("y_bottoms", coordinate_.y_bottoms_, sizeof(int) * y_len_, true); + code.CodeArray("y_tops", coordinate_.y_tops_, sizeof(int) * y_len_, true); + code.CodeArray("x_lefts", coordinate_.x_lefts_, sizeof(int) * x_len_, true); + code.CodeArray("x_rights", coordinate_.x_rights_, sizeof(int) * x_len_, true); + code.CodeArray("y_weights", y_weights_, sizeof(float) * y_weight_len_, true); + code.CodeArray("x_weights", x_weights_, sizeof(float) * x_weight_len_, true); + + int c = input_tensor_->shape().at(3); + code << "float *line0 = " << MemoryAllocator::GetInstance()->GetRuntimeAddr(line_buffer_) << ";\n"; + code << "float *line1 = line0 + " << new_width_ << " * " << c << ";\n"; + code.CodeFunction("ResizeBilinear", input_tensor_, output_tensor_, "input_shape", "output_shape", "y_bottoms", + "y_tops", "x_lefts", "x_rights", "y_weights", "x_weights", "line0", "line1", 0, new_height_); + break; + } + case static_cast(schema::ResizeMethod_NEAREST): { + code.CodeFunction("ResizeNearestNeighbor", input_tensor_, output_tensor_, "input_shape", "output_shape", + calculate_str_, coordinate_transform_mode_, kDefaultTaskId, kDefaultThreadNum); + break; + } + case static_cast(schema::ResizeMethod_CUBIC): { + code.CodeArray("y_tops", coordinate_.y_tops_, sizeof(int) * y_len_, true); + code.CodeArray("x_lefts", coordinate_.x_lefts_, sizeof(int) * x_len_, true); + code.CodeArray("y_weights", y_weights_, sizeof(float) * y_weight_len_, true); + code.CodeArray("x_weights", x_weights_, sizeof(float) * x_weight_len_, true); + code.CodeFunction("ResizeBicubic", input_tensor_, output_tensor_, "input_shape", "output_shape", "y_tops", + "x_lefts", "y_weights", "x_weights", line_buffer_, 0, new_height_); + break; + } + default: { + MS_LOG(ERROR) << "Resize unknown method " << method_; + return RET_ERROR; + } + } + + context->AppendCode(code.str()); + return RET_OK; +} + +REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Resize, CPUOpCoderCreator) +} // namespace mindspore::lite::micro::nnacl diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.h b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.h new file mode 100644 index 0000000000..ac1da325d3 --- /dev/null +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.h @@ -0,0 +1,63 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_FP32_RESIZE_FP32_CODER_H_ +#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_FP32_RESIZE_FP32_CODER_H_ + +#include "coder/opcoders/base/resize_base_coder.h" +#include +#include +#include +#include "include/errorcode.h" +#include "nnacl/fp32/resize_fp32.h" +#include "src/lite_kernel.h" +#include "src/runtime/kernel/arm/fp32/resize_fp32.h" + +namespace mindspore::lite::micro::nnacl { + +class ResizeFP32Coder final : public ResizeBaseCoder { + public: + ResizeFP32Coder(const std::vector &in_tensors, const std::vector &out_tensors, + const Model::Node *node, size_t node_index, Target target) + : ResizeBaseCoder(in_tensors, out_tensors, node, node_index, target) {} + ~ResizeFP32Coder() override { FreeTmpBuffer(); }; + int Prepare(CoderContext *const context) override; + int ReSize(); + int DoCode(CoderContext *const context) override; + + private: + int SelectCalculatorFunc(); + void CalTmpBufferLen(); + int MallocTmpBuffer(); + void FreeTmpBuffer(); + int ResizePrepare(); + + ResizeCoordinate coordinate_; + size_t x_len_{0}; + size_t y_len_{0}; + size_t x_weight_len_{0}; + size_t y_weight_len_{0}; + + float *y_weights_{nullptr}; + float *x_weights_{nullptr}; + float *line_buffer_{nullptr}; + CalculateOriginalCoordinate calculate_{nullptr}; + std::string calculate_str_; +}; + +} // namespace mindspore::lite::micro::nnacl + +#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_FP32_RESIZE_FP32_CODER_H_ diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc index 7df9984cf3..3325dc86c5 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc @@ -130,11 +130,10 @@ int ScaleFP32Coder::DoCode(CoderContext *const context) { Collect(context, { "nnacl/scale.h", - "nnacl/fp32/scale.h", - "nnacl/quantization/quantize.h", + "nnacl/fp32/scale_fp32.h", }, { - "scale.c", + "scale_fp32.c", }); NNaclFp32Serializer code;