From 47ba025995cab5361d2819b1ce3de9c419cad75f Mon Sep 17 00:00:00 2001
From: fuzhiye <fuzhiye@huawei.com>
Date: Fri, 7 Aug 2020 14:23:33 +0800
Subject: [PATCH] 1. softmax op support input dim greater than 2. 2. add fp32
 data type cast op register

---
 .../kernel/arm/base/quant_dtype_cast.cc       |  1 +
 .../src/runtime/kernel/arm/fp32/softmax.cc    | 20 ++++++++++++++++---
 .../src/runtime/kernel/arm/fp32/softmax.h     |  8 ++++++--
 .../runtime/kernel/arm/nnacl/fp32/softmax.cc  | 15 +++++++-------
 mindspore/lite/test/st/benchmark_test.cc      | 18 ++++++++---------
 .../parser/tflite/tflite_softmax_parser.cc    |  3 +--
 6 files changed, 42 insertions(+), 23 deletions(-)
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
index ef5516be2d..80fac8f757 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
@@ -145,4 +145,5 @@ kernel::LiteKernel *CpuQuantDTypeCastFp32KernelCreator(const std::vector<lite::t
 }
 
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_QuantDTypeCast, CpuQuantDTypeCastFp32KernelCreator)
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_QuantDTypeCast, CpuQuantDTypeCastFp32KernelCreator)
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.cc
index ffdb5080e0..5dbd0acc43 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.cc
@@ -33,9 +33,23 @@ int SoftmaxCPUKernel::Init() {
   SoftmaxBaseCPUKernel::Init();
 
   // malloc tmp buffer
+  auto n_dim = softmax_param_->n_dim_;
   auto axis = softmax_param_->axis_;
-  sum_data = reinterpret_cast<float *>(malloc(softmax_param_->input_shape_[axis] * sizeof(float)));
-  memset(sum_data, 0, softmax_param_->input_shape_[axis] * sizeof(float));
+  if (axis == -1) {
+    softmax_param_->axis_ += n_dim;
+    axis = softmax_param_->axis_;
+  }
+  auto in_shape = inputs_.front()->shape();
+  int out_plane_size = 1;
+  for (int i = 0; i < axis; ++i) {
+    out_plane_size *= in_shape[i];
+  }
+  int in_plane_size = 1;
+  for (int i = axis + 1; i < n_dim; i++) {
+    in_plane_size *= in_shape[i];
+  }
+  sum_data_ = reinterpret_cast<float *>(malloc(out_plane_size * in_plane_size * sizeof(float)));
+  memset(sum_data_, 0, out_plane_size * in_plane_size * sizeof(float));
   return RET_OK;
 }
 
@@ -44,7 +58,7 @@ int SoftmaxCPUKernel::ReSize() { return RET_OK; }
 int SoftmaxCPUKernel::Run() {
   auto input_ptr = reinterpret_cast<float *>(inputs_.at(kInputIndex)->Data());
   auto output_ptr = reinterpret_cast<float *>(outputs_.at(kOutputIndex)->Data());
-  Softmax(input_ptr, output_ptr, sum_data, softmax_param_);
+  Softmax(input_ptr, output_ptr, sum_data_, softmax_param_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.h b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.h
index 0e9c0a7ebf..6c46045794 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.h
@@ -27,14 +27,18 @@ class SoftmaxCPUKernel : public SoftmaxBaseCPUKernel {
   SoftmaxCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                    const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
       : SoftmaxBaseCPUKernel(parameter, inputs, outputs, ctx) {}
-  ~SoftmaxCPUKernel() override = default;
+  ~SoftmaxCPUKernel() override {
+    if (sum_data_ != nullptr) {
+      free(sum_data_);
+    }
+  };
 
   int Init() override;
   int ReSize() override;
   int Run() override;
 
  private:
-  float *sum_data;
+  float *sum_data_;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/softmax.cc b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/softmax.cc
index 2e046f2651..d07229b871 100644
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/softmax.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/softmax.cc
@@ -37,24 +37,25 @@ void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, Softmax
 
   for (int i = 0; i < outter_size; i++) {
     int outter_offset = i * input_shape[axis] * inner_size;
-    for (int j = 0; j < input_shape[axis]; j++) {
-      int axis_offset = outter_offset + j * inner_size;
-      for (int k = 0; k < inner_size; k++) {
-        int inner_offset = axis_offset + k;
-        sum_data[j] += output_ptr[inner_offset];
+    int sum_outter_offset = i * inner_size;
+    for (int k = 0; k < inner_size; k++) {
+      int inner_offset = outter_offset + k;
+      for (int j = 0; j < input_shape[axis]; j++) {
+        int axis_offset = inner_offset + j * inner_size;
+        sum_data[k + sum_outter_offset] += output_ptr[axis_offset];
       }
     }
   }
 
   for (int i = 0; i < outter_size; i++) {
     int outter_offset = i * input_shape[axis] * inner_size;
+    int sum_outter_offset = i * inner_size;
     for (int j = 0; j < input_shape[axis]; j++) {
       int axis_offset = outter_offset + j * inner_size;
       for (int k = 0; k < inner_size; k++) {
         int inner_offset = axis_offset + k;
-        output_ptr[inner_offset] = output_ptr[inner_offset] / sum_data[j];
+        output_ptr[inner_offset] = output_ptr[inner_offset] / sum_data[k + sum_outter_offset];
       }
     }
   }
 }
-
diff --git a/mindspore/lite/test/st/benchmark_test.cc b/mindspore/lite/test/st/benchmark_test.cc
index 86b468677b..48dd6dd7db 100644
--- a/mindspore/lite/test/st/benchmark_test.cc
+++ b/mindspore/lite/test/st/benchmark_test.cc
@@ -26,18 +26,18 @@ class BenchmarkTest : public mindspore::Common {
 };
 
 TEST_F(BenchmarkTest, TestVideo) {
-  const char *argv[] = {"./benchmark", "--modelPath=./hiai/hiai_label_and_video.ms"
-                                       "--inDataPath=./hiai/hiai_label_and_video.bin"
+  const char *argv[] = {"./benchmark", "--modelPath=./hiai/hiai_label_and_video.ms",
+                                       "--inDataPath=./hiai/hiai_label_and_video.bin",
                                        "--calibDataPath=./hiai/hiai_label_and_video.txt"};
-  auto status = RunBenchmark(2, argv);
+  auto status = RunBenchmark(4, argv);
   ASSERT_EQ(status, RET_OK);
 }
 
 TEST_F(BenchmarkTest, TestOCR_02) {
-  const char *argv[] = {"./benchmark", "--modelPath=./hiai/hiai_cv_focusShootOCRMOdel_02.ms"
-                                       "--inDataPath=./hiai/hiai_cv_focusShootOCRMOdel_02.bin"
+  const char *argv[] = {"./benchmark", "--modelPath=./hiai/hiai_cv_focusShootOCRMOdel_02.ms",
+                                       "--inDataPath=./hiai/hiai_cv_focusShootOCRMOdel_02.bin",
                                        "--calibDataPath=./hiai/hiai_cv_focusShootOCRMOdel_02.txt"};
-  auto status = RunBenchmark(2, argv);
+  auto status = RunBenchmark(4, argv);
   ASSERT_EQ(status, RET_OK);
 }
 
@@ -51,10 +51,10 @@ ASSERT_EQ(status, RET_OK);
 }
 
 TEST_F(BenchmarkTest, TestHebing) {
-  const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_hebing_3branch.ms"
-                                       "--inDataPath=./hiai/model_hebing_3branch.bin"
+  const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_hebing_3branch.ms",
+                                       "--inDataPath=./hiai/model_hebing_3branch.bin",
                                        "--calibDataPath=./hiai/model_hebing_3branch.txt"};
-  auto status = RunBenchmark(2, argv);
+  auto status = RunBenchmark(4, argv);
   ASSERT_EQ(status, RET_OK);
 }
 }  // namespace lite
diff --git a/mindspore/lite/tools/converter/parser/tflite/tflite_softmax_parser.cc b/mindspore/lite/tools/converter/parser/tflite/tflite_softmax_parser.cc
index 9680de53a0..c85b5515cf 100644
--- a/mindspore/lite/tools/converter/parser/tflite/tflite_softmax_parser.cc
+++ b/mindspore/lite/tools/converter/parser/tflite/tflite_softmax_parser.cc
@@ -24,8 +24,7 @@ STATUS TfliteSoftmaxParser::Parse(const std::unique_ptr<tflite::OperatorT> &tfli
                                   const std::vector<std::unique_ptr<tflite::TensorT>> &tfliteTensors,
                                   const std::vector<std::unique_ptr<tflite::BufferT>> &tfliteModelBuffer,
                                   const std::vector<std::unique_ptr<tflite::OperatorCodeT>> &tfliteOpSet,
-                                  schema::CNodeT *op,
-                                  TensorCache *tensor_cache, bool quantizedModel) {
+                                  schema::CNodeT *op, TensorCache *tensor_cache, bool quantizedModel) {
   MS_LOG(DEBUG) << "parse TfliteSoftmaxParser";
   std::unique_ptr<schema::SoftMaxT> attr(new schema::SoftMaxT());