!15850 add resize fp32 coder

From: @zhujingxuan Reviewed-by: @wangchengyuan,@hangangqiang Signed-off-by: @wangchengyuan
4 years ago · a273da8a5f
--- a/mindspore/lite/micro/cmake/file_list.cmake
+++ b/mindspore/lite/micro/cmake/file_list.cmake
@@ -93,6 +93,7 @@ set(CODER_OPCODERS_SRC
        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/pooling_fp32_coder.cc
        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/power_fp32_coder.cc
        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc
        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/resize_fp32_coder.cc
        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc
        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc
        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/tile_fp32_coder.cc
@@ -150,6 +151,7 @@ set(LITE_SRC
        ${LITE_DIR}/src/ops/populate/conv2d_populate.cc
        ${LITE_DIR}/src/ops/populate/detection_post_process_populate.cc
        ${LITE_DIR}/src/ops/populate/depthwise_conv2d_populate.cc
        ${LITE_DIR}/src/ops/populate/deconv2d_populate.cc
        ${LITE_DIR}/src/ops/populate/full_connection_populate.cc
        ${LITE_DIR}/src/ops/populate/pooling_populate.cc
        ${LITE_DIR}/src/ops/populate/quant_dtype_cast_populate.cc
@@ -173,6 +175,7 @@ set(LITE_SRC
        ${LITE_DIR}/src/ops/populate/erf_populate.cc
        ${LITE_DIR}/src/ops/populate/exp_populate.cc
        ${LITE_DIR}/src/ops/populate/strided_slice_populate.cc
        ${LITE_DIR}/src/ops/populate/scale_populate.cc
        ${LITE_DIR}/src/ops/populate/lstm_populate.cc
        ${LITE_DIR}/src/ops/populate/squeeze_populate.cc
        ${LITE_DIR}/src/ops/populate/transpose_populate.cc
@@ -192,6 +195,7 @@ set(LITE_KERNEL_SRC
        ${NNACL_DIR}/fp32/deconv_fp32.c
        ${NNACL_DIR}/fp32/matmul_fp32.c
        ${NNACL_DIR}/fp32/common_func_fp32.c
        ${NNACL_DIR}/fp32/resize_fp32.c
        ${NNACL_DIR}/int8/quantize.c
        ${NNACL_DIR}/int8/pack_int8.c
        ${NNACL_DIR}/int8/matmul_int8.c
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.cc
@@ -133,7 +133,7 @@ int DeConvolutionFP32Coder::DoCode(CoderContext *const context) {
            "matmul_fp32.c",
            "pack_fp32.c",
            "deconv_fp32.c",
            "minimal_filter_generator.c",
            "minimal_filtering_generator.c",
          });
  if (target_ == kARM32A) {
    Collect(context, {}, {},
@@ -172,8 +172,8 @@ int DeConvolutionFP32Coder::DoCode(CoderContext *const context) {
  std::string src_out_ptr_str = allocator_->GetRuntimeAddr(output_tensor_);

  for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
    input_ptr_ = src_in_ptr_str + std::to_string(batch_index * input_plane_ * conv_param_->input_channel_);
    output_ptr_ = src_out_ptr_str + std::to_string(batch_index * output_plane_ * conv_param_->output_channel_);
    input_ptr_ = src_in_ptr_str + "+" + std::to_string(batch_index * input_plane_ * conv_param_->input_channel_);
    output_ptr_ = src_out_ptr_str + "+" + std::to_string(batch_index * output_plane_ * conv_param_->output_channel_);

    if (target_ == kARM32A) {
      code.CodeFunction("RowMajor2Col4Major", input_ptr_, packed_input_, matmul_param_.row_, matmul_param_.deep_);
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.cc
@@ -0,0 +1,213 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "coder/opcoders/nnacl/fp32/resize_fp32_coder.h"
 #include <string>
 #include <map>
 #include <utility>
 #include "coder/opcoders/serializers/serializer.h"
 #include "coder/opcoders/file_collector.h"
 #include "coder/opcoders/parallel.h"

 using mindspore::schema::CoordinateTransformMode_ALIGN_CORNERS;
 using mindspore::schema::CoordinateTransformMode_ASYMMETRIC;
 using mindspore::schema::CoordinateTransformMode_HALF_PIXEL;
 using mindspore::schema::PrimitiveType_Resize;

 namespace mindspore::lite::micro::nnacl {

 int ResizeFP32Coder::Prepare(CoderContext *const context) {
  MS_CHECK_RET_CODE(ResizeBaseCoder::Init(), "ResizeBaseCoder::Init failed");
  MS_CHECK_RET_CODE(SelectCalculatorFunc(), "SelectCalculatorFunc failed");
  MS_CHECK_RET_CODE(ReSize(), "ReSize failed");
  return RET_OK;
 }

 int ResizeFP32Coder::SelectCalculatorFunc() {
  const std::map<int, std::pair<CalculateOriginalCoordinate, std::string>> cal_fuc_list = {
    std::make_pair(CoordinateTransformMode_ASYMMETRIC, std::make_pair(CalculateAsymmetric, "CalculateAsymmetric")),
    std::make_pair(CoordinateTransformMode_ALIGN_CORNERS,
                   std::make_pair(CalculateAlignCorners, "CalculateAlignCorners")),
    std::make_pair(CoordinateTransformMode_HALF_PIXEL, std::make_pair(CalculateHalfPixel, "CalculateHalfPixel")),
  };

  auto fun_pair = cal_fuc_list.find(coordinate_transform_mode_);
  if (fun_pair != cal_fuc_list.end()) {
    calculate_ = fun_pair->second.first;
    calculate_str_ = fun_pair->second.second;
  } else {
    MS_LOG(ERROR) << "Do not support coordinate transform mode. Mode is"
                  << schema::EnumNameCoordinateTransformMode(
                       static_cast<schema::CoordinateTransformMode>(coordinate_transform_mode_));
    return RET_ERROR;
  }
  return RET_OK;
 }

 int ResizeFP32Coder::ReSize() {
  if (method_ == static_cast<int>(schema::ResizeMethod_NEAREST)) {
    return RET_OK;
  }

  if (!const_shape_) {
    new_height_ = output_tensor_->shape()[1];
    new_width_ = output_tensor_->shape()[2];
  }

  MS_CHECK_RET_CODE_WITH_EXE(MallocTmpBuffer(), "MallocTmpBuffer failed", FreeTmpBuffer());
  MS_CHECK_RET_CODE_WITH_EXE(ResizePrepare(), "ResizePrepare failed", FreeTmpBuffer());

  return RET_OK;
 }

 // Bilinear interpolation :
 // Bilinear interpolation considers the closest 2x2 neighborhood of known pixel values surrounding the unknown pixel.
 // It takes a weighted average of these 4 pixels to arrive at its final interpolated value. Thus, we need to reserve
 // twice bigger space than coordinates arrays for weight arrays. It means x_weight_len is twice as much as x_len in
 // detail.
 //
 // Bicubic interpolation:
 // Bicubic goes one step beyond bilinear by considering the closest 4x4 neighborhood of known pixels --- for a total of
 // 16 pixels. Since these are at various distances from the unknown pixel, closer pixels are given a higher weighting in
 // the calculation.
 void ResizeFP32Coder::CalTmpBufferLen() {
  if (method_ == static_cast<int>(schema::ResizeMethod_LINEAR)) {
    x_len_ = new_width_;
    y_len_ = new_height_;
    x_weight_len_ = new_width_;
    y_weight_len_ = new_height_;
  }
  if (method_ == static_cast<int>(schema::ResizeMethod_CUBIC)) {
    x_len_ = new_width_ * 4;
    y_len_ = new_height_ * 4;
    x_weight_len_ = new_width_ * 4;
    y_weight_len_ = new_height_ * 4;
  }
 }

 int ResizeFP32Coder::MallocTmpBuffer() {
  if (method_ != static_cast<int>(schema::ResizeMethod_LINEAR) &&
      method_ != static_cast<int>(schema::ResizeMethod_CUBIC)) {
    return RET_OK;
  }
  // make sure y_bottoms_, y_tops_, etc. are null before malloc
  FreeTmpBuffer();

  CalTmpBufferLen();

  // malloc memory for x, y coordinates
  {
    coordinate_.x_lefts_ = reinterpret_cast<int *>(malloc(sizeof(int) * x_len_));
    CHECK_MALLOC_RES(coordinate_.x_lefts_, RET_NULL_PTR)
    coordinate_.y_tops_ = reinterpret_cast<int *>(malloc(sizeof(int) * y_len_));
    CHECK_MALLOC_RES(coordinate_.y_tops_, RET_NULL_PTR)
    if (method_ == static_cast<int>(schema::ResizeMethod_LINEAR)) {
      coordinate_.x_rights_ = reinterpret_cast<int *>(malloc(sizeof(int) * x_len_));
      CHECK_MALLOC_RES(coordinate_.x_rights_, RET_NULL_PTR)
      coordinate_.y_bottoms_ = reinterpret_cast<int *>(malloc(sizeof(int) * y_len_));
      CHECK_MALLOC_RES(coordinate_.y_bottoms_, RET_NULL_PTR)
    }
  }

  // malloc memory for weights of x, y axes
  {
    x_weights_ = reinterpret_cast<float *>(malloc(sizeof(float) * x_weight_len_));
    CHECK_MALLOC_RES(x_weights_, RET_NULL_PTR)
    y_weights_ = reinterpret_cast<float *>(malloc(sizeof(float) * y_weight_len_));
    CHECK_MALLOC_RES(y_weights_, RET_NULL_PTR)
  }

  {
    size_t line_buffer_size = sizeof(float) * x_len_ * input_tensor_->Channel() * 2 * kMaxThreadNumSupported;
    line_buffer_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, line_buffer_size, kWorkspace));
    CHECK_MALLOC_RES(line_buffer_, RET_NULL_PTR)
  }
  return RET_OK;
 }

 void ResizeFP32Coder::FreeTmpBuffer() { coordinate_.FreeData(); }

 int ResizeFP32Coder::ResizePrepare() {
  auto input_shape = input_tensor_->shape();
  if (method_ == static_cast<int>(schema::ResizeMethod_LINEAR)) {
    return PrepareResizeBilinear(input_shape.data(), output_tensor_->shape().data(), calculate_, coordinate_.y_bottoms_,
                                 coordinate_.y_tops_, coordinate_.x_lefts_, coordinate_.x_rights_, y_weights_,
                                 x_weights_);
  }
  if (method_ == static_cast<int>(schema::ResizeMethod_CUBIC)) {
    auto resize_parameter = reinterpret_cast<ResizeParameter *>(parameter_);
    MS_CHECK_PTR(resize_parameter);
    auto cubic_coeff = resize_parameter->cubic_coeff_;
    return PrepareResizeBicubic(input_shape.data(), output_tensor_->shape().data(), calculate_, coordinate_.y_tops_,
                                coordinate_.x_lefts_, y_weights_, x_weights_, cubic_coeff);
  }
  return RET_OK;
 }

 int ResizeFP32Coder::DoCode(CoderContext *const context) {
  Collect(context,
          {
            "nnacl/fp32/resize_fp32.h",
          },
          {
            "resize_fp32.c",
          });
  Serializer code;
  code.CodeArray("input_shape", input_tensor_->shape().data(), input_tensor_->shape().size(), true);
  code.CodeArray("output_shape", output_tensor_->shape().data(), output_tensor_->shape().size(), true);

  switch (method_) {
    case static_cast<int>(schema::ResizeMethod_LINEAR): {
      code.CodeArray("y_bottoms", coordinate_.y_bottoms_, sizeof(int) * y_len_, true);
      code.CodeArray("y_tops", coordinate_.y_tops_, sizeof(int) * y_len_, true);
      code.CodeArray("x_lefts", coordinate_.x_lefts_, sizeof(int) * x_len_, true);
      code.CodeArray("x_rights", coordinate_.x_rights_, sizeof(int) * x_len_, true);
      code.CodeArray("y_weights", y_weights_, sizeof(float) * y_weight_len_, true);
      code.CodeArray("x_weights", x_weights_, sizeof(float) * x_weight_len_, true);

      int c = input_tensor_->shape().at(3);
      code << "float *line0 = " << MemoryAllocator::GetInstance()->GetRuntimeAddr(line_buffer_) << ";\n";
      code << "float *line1 = line0 + " << new_width_ << " * " << c << ";\n";
      code.CodeFunction("ResizeBilinear", input_tensor_, output_tensor_, "input_shape", "output_shape", "y_bottoms",
                        "y_tops", "x_lefts", "x_rights", "y_weights", "x_weights", "line0", "line1", 0, new_height_);
      break;
    }
    case static_cast<int>(schema::ResizeMethod_NEAREST): {
      code.CodeFunction("ResizeNearestNeighbor", input_tensor_, output_tensor_, "input_shape", "output_shape",
                        calculate_str_, coordinate_transform_mode_, kDefaultTaskId, kDefaultThreadNum);
      break;
    }
    case static_cast<int>(schema::ResizeMethod_CUBIC): {
      code.CodeArray("y_tops", coordinate_.y_tops_, sizeof(int) * y_len_, true);
      code.CodeArray("x_lefts", coordinate_.x_lefts_, sizeof(int) * x_len_, true);
      code.CodeArray("y_weights", y_weights_, sizeof(float) * y_weight_len_, true);
      code.CodeArray("x_weights", x_weights_, sizeof(float) * x_weight_len_, true);
      code.CodeFunction("ResizeBicubic", input_tensor_, output_tensor_, "input_shape", "output_shape", "y_tops",
                        "x_lefts", "y_weights", "x_weights", line_buffer_, 0, new_height_);
      break;
    }
    default: {
      MS_LOG(ERROR) << "Resize unknown method " << method_;
      return RET_ERROR;
    }
  }

  context->AppendCode(code.str());
  return RET_OK;
 }

 REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Resize, CPUOpCoderCreator<ResizeFP32Coder>)
 }  // namespace mindspore::lite::micro::nnacl
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.h
@@ -0,0 +1,63 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_FP32_RESIZE_FP32_CODER_H_
 #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_FP32_RESIZE_FP32_CODER_H_

 #include "coder/opcoders/base/resize_base_coder.h"
 #include <vector>
 #include <algorithm>
 #include <string>
 #include "include/errorcode.h"
 #include "nnacl/fp32/resize_fp32.h"
 #include "src/lite_kernel.h"
 #include "src/runtime/kernel/arm/fp32/resize_fp32.h"

 namespace mindspore::lite::micro::nnacl {

 class ResizeFP32Coder final : public ResizeBaseCoder {
 public:
  ResizeFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                  const Model::Node *node, size_t node_index, Target target)
      : ResizeBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
  ~ResizeFP32Coder() override { FreeTmpBuffer(); };
  int Prepare(CoderContext *const context) override;
  int ReSize();
  int DoCode(CoderContext *const context) override;

 private:
  int SelectCalculatorFunc();
  void CalTmpBufferLen();
  int MallocTmpBuffer();
  void FreeTmpBuffer();
  int ResizePrepare();

  ResizeCoordinate coordinate_;
  size_t x_len_{0};
  size_t y_len_{0};
  size_t x_weight_len_{0};
  size_t y_weight_len_{0};

  float *y_weights_{nullptr};
  float *x_weights_{nullptr};
  float *line_buffer_{nullptr};
  CalculateOriginalCoordinate calculate_{nullptr};
  std::string calculate_str_;
 };

 }  // namespace mindspore::lite::micro::nnacl

 #endif  // MINDSPORE_LITE_MICRO_CODER_OPCODERS_FP32_RESIZE_FP32_CODER_H_
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc
@@ -130,11 +130,10 @@ int ScaleFP32Coder::DoCode(CoderContext *const context) {
  Collect(context,
          {
            "nnacl/scale.h",
            "nnacl/fp32/scale.h",
            "nnacl/quantization/quantize.h",
            "nnacl/fp32/scale_fp32.h",
          },
          {
            "scale.c",
            "scale_fp32.c",
          });

  NNaclFp32Serializer code;