From 47ba025995cab5361d2819b1ce3de9c419cad75f Mon Sep 17 00:00:00 2001 From: fuzhiye Date: Fri, 7 Aug 2020 14:23:33 +0800 Subject: [PATCH] 1. softmax op support input dim greater than 2. 2. add fp32 data type cast op register --- .../kernel/arm/base/quant_dtype_cast.cc | 1 + .../src/runtime/kernel/arm/fp32/softmax.cc | 20 ++++++++++++++++--- .../src/runtime/kernel/arm/fp32/softmax.h | 8 ++++++-- .../runtime/kernel/arm/nnacl/fp32/softmax.cc | 15 +++++++------- mindspore/lite/test/st/benchmark_test.cc | 18 ++++++++--------- .../parser/tflite/tflite_softmax_parser.cc | 3 +-- 6 files changed, 42 insertions(+), 23 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc index ef5516be2d..80fac8f757 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc @@ -145,4 +145,5 @@ kernel::LiteKernel *CpuQuantDTypeCastFp32KernelCreator(const std::vectorn_dim_; auto axis = softmax_param_->axis_; - sum_data = reinterpret_cast(malloc(softmax_param_->input_shape_[axis] * sizeof(float))); - memset(sum_data, 0, softmax_param_->input_shape_[axis] * sizeof(float)); + if (axis == -1) { + softmax_param_->axis_ += n_dim; + axis = softmax_param_->axis_; + } + auto in_shape = inputs_.front()->shape(); + int out_plane_size = 1; + for (int i = 0; i < axis; ++i) { + out_plane_size *= in_shape[i]; + } + int in_plane_size = 1; + for (int i = axis + 1; i < n_dim; i++) { + in_plane_size *= in_shape[i]; + } + sum_data_ = reinterpret_cast(malloc(out_plane_size * in_plane_size * sizeof(float))); + memset(sum_data_, 0, out_plane_size * in_plane_size * sizeof(float)); return RET_OK; } @@ -44,7 +58,7 @@ int SoftmaxCPUKernel::ReSize() { return RET_OK; } int SoftmaxCPUKernel::Run() { auto input_ptr = reinterpret_cast(inputs_.at(kInputIndex)->Data()); auto output_ptr = reinterpret_cast(outputs_.at(kOutputIndex)->Data()); - Softmax(input_ptr, output_ptr, sum_data, softmax_param_); + Softmax(input_ptr, output_ptr, sum_data_, softmax_param_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.h b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.h index 0e9c0a7ebf..6c46045794 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.h @@ -27,14 +27,18 @@ class SoftmaxCPUKernel : public SoftmaxBaseCPUKernel { SoftmaxCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::Context *ctx) : SoftmaxBaseCPUKernel(parameter, inputs, outputs, ctx) {} - ~SoftmaxCPUKernel() override = default; + ~SoftmaxCPUKernel() override { + if (sum_data_ != nullptr) { + free(sum_data_); + } + }; int Init() override; int ReSize() override; int Run() override; private: - float *sum_data; + float *sum_data_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/softmax.cc b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/softmax.cc index 2e046f2651..d07229b871 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/softmax.cc +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/softmax.cc @@ -37,24 +37,25 @@ void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, Softmax for (int i = 0; i < outter_size; i++) { int outter_offset = i * input_shape[axis] * inner_size; - for (int j = 0; j < input_shape[axis]; j++) { - int axis_offset = outter_offset + j * inner_size; - for (int k = 0; k < inner_size; k++) { - int inner_offset = axis_offset + k; - sum_data[j] += output_ptr[inner_offset]; + int sum_outter_offset = i * inner_size; + for (int k = 0; k < inner_size; k++) { + int inner_offset = outter_offset + k; + for (int j = 0; j < input_shape[axis]; j++) { + int axis_offset = inner_offset + j * inner_size; + sum_data[k + sum_outter_offset] += output_ptr[axis_offset]; } } } for (int i = 0; i < outter_size; i++) { int outter_offset = i * input_shape[axis] * inner_size; + int sum_outter_offset = i * inner_size; for (int j = 0; j < input_shape[axis]; j++) { int axis_offset = outter_offset + j * inner_size; for (int k = 0; k < inner_size; k++) { int inner_offset = axis_offset + k; - output_ptr[inner_offset] = output_ptr[inner_offset] / sum_data[j]; + output_ptr[inner_offset] = output_ptr[inner_offset] / sum_data[k + sum_outter_offset]; } } } } - diff --git a/mindspore/lite/test/st/benchmark_test.cc b/mindspore/lite/test/st/benchmark_test.cc index 86b468677b..48dd6dd7db 100644 --- a/mindspore/lite/test/st/benchmark_test.cc +++ b/mindspore/lite/test/st/benchmark_test.cc @@ -26,18 +26,18 @@ class BenchmarkTest : public mindspore::Common { }; TEST_F(BenchmarkTest, TestVideo) { - const char *argv[] = {"./benchmark", "--modelPath=./hiai/hiai_label_and_video.ms" - "--inDataPath=./hiai/hiai_label_and_video.bin" + const char *argv[] = {"./benchmark", "--modelPath=./hiai/hiai_label_and_video.ms", + "--inDataPath=./hiai/hiai_label_and_video.bin", "--calibDataPath=./hiai/hiai_label_and_video.txt"}; - auto status = RunBenchmark(2, argv); + auto status = RunBenchmark(4, argv); ASSERT_EQ(status, RET_OK); } TEST_F(BenchmarkTest, TestOCR_02) { - const char *argv[] = {"./benchmark", "--modelPath=./hiai/hiai_cv_focusShootOCRMOdel_02.ms" - "--inDataPath=./hiai/hiai_cv_focusShootOCRMOdel_02.bin" + const char *argv[] = {"./benchmark", "--modelPath=./hiai/hiai_cv_focusShootOCRMOdel_02.ms", + "--inDataPath=./hiai/hiai_cv_focusShootOCRMOdel_02.bin", "--calibDataPath=./hiai/hiai_cv_focusShootOCRMOdel_02.txt"}; - auto status = RunBenchmark(2, argv); + auto status = RunBenchmark(4, argv); ASSERT_EQ(status, RET_OK); } @@ -51,10 +51,10 @@ ASSERT_EQ(status, RET_OK); } TEST_F(BenchmarkTest, TestHebing) { - const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_hebing_3branch.ms" - "--inDataPath=./hiai/model_hebing_3branch.bin" + const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_hebing_3branch.ms", + "--inDataPath=./hiai/model_hebing_3branch.bin", "--calibDataPath=./hiai/model_hebing_3branch.txt"}; - auto status = RunBenchmark(2, argv); + auto status = RunBenchmark(4, argv); ASSERT_EQ(status, RET_OK); } } // namespace lite diff --git a/mindspore/lite/tools/converter/parser/tflite/tflite_softmax_parser.cc b/mindspore/lite/tools/converter/parser/tflite/tflite_softmax_parser.cc index 9680de53a0..c85b5515cf 100644 --- a/mindspore/lite/tools/converter/parser/tflite/tflite_softmax_parser.cc +++ b/mindspore/lite/tools/converter/parser/tflite/tflite_softmax_parser.cc @@ -24,8 +24,7 @@ STATUS TfliteSoftmaxParser::Parse(const std::unique_ptr &tfli const std::vector> &tfliteTensors, const std::vector> &tfliteModelBuffer, const std::vector> &tfliteOpSet, - schema::CNodeT *op, - TensorCache *tensor_cache, bool quantizedModel) { + schema::CNodeT *op, TensorCache *tensor_cache, bool quantizedModel) { MS_LOG(DEBUG) << "parse TfliteSoftmaxParser"; std::unique_ptr attr(new schema::SoftMaxT());