From 8946acc35b53606f4d2a8fab0b94a094860078da Mon Sep 17 00:00:00 2001 From: zhaozhenlong Date: Wed, 16 Sep 2020 19:44:59 +0800 Subject: [PATCH] add coffe for reduce --- mindspore/lite/nnacl/reduce_parameter.h | 1 + mindspore/lite/schema/ops.fbs | 1 + mindspore/lite/src/ops/reduce.cc | 3 + mindspore/lite/src/ops/reduce.h | 2 + mindspore/lite/src/populate_parameter.cc | 1 + .../src/runtime/kernel/arm/fp32/reduce.cc | 54 ++- .../lite/src/runtime/kernel/arm/fp32/reduce.h | 7 +- .../kernel/arm/fp32/reduce_fp32_tests.cc | 434 ++++++++++++------ 8 files changed, 359 insertions(+), 144 deletions(-) diff --git a/mindspore/lite/nnacl/reduce_parameter.h b/mindspore/lite/nnacl/reduce_parameter.h index 2a801f6c84..e28f6f625f 100644 --- a/mindspore/lite/nnacl/reduce_parameter.h +++ b/mindspore/lite/nnacl/reduce_parameter.h @@ -23,6 +23,7 @@ struct ReduceParameter { OpParameter op_parameter_; bool keep_dims_; bool reduce_to_end_; + float coeff; int axes_[REDUCE_MAX_AXES_NUM]; int num_axes_; int mode_; diff --git a/mindspore/lite/schema/ops.fbs b/mindspore/lite/schema/ops.fbs index c728751791..1e4c1de009 100644 --- a/mindspore/lite/schema/ops.fbs +++ b/mindspore/lite/schema/ops.fbs @@ -676,6 +676,7 @@ table Reduce { keepDims: int; mode: ReduceMode; reduceToEnd: bool = false; + coeff: float = 1.0; } table Transpose { diff --git a/mindspore/lite/src/ops/reduce.cc b/mindspore/lite/src/ops/reduce.cc index b8ff29f505..ad6635522c 100644 --- a/mindspore/lite/src/ops/reduce.cc +++ b/mindspore/lite/src/ops/reduce.cc @@ -24,11 +24,13 @@ std::vector Reduce::GetAxes() const { return this->primitive_->value.AsRedu int Reduce::GetKeepDims() const { return this->primitive_->value.AsReduce()->keepDims; } int Reduce::GetMode() const { return this->primitive_->value.AsReduce()->mode; } bool Reduce::GetReduceToEnd() const { return this->primitive_->value.AsReduce()->reduceToEnd; } +float Reduce::GetCoeff() const { return this->primitive_->value.AsReduce()->coeff; } void Reduce::SetAxes(const std::vector &axes) { this->primitive_->value.AsReduce()->axes = axes; } void Reduce::SetKeepDims(int keep_dims) { this->primitive_->value.AsReduce()->keepDims = keep_dims; } void Reduce::SetMode(int mode) { this->primitive_->value.AsReduce()->mode = (schema::ReduceMode)mode; } void Reduce::SetReduceToEnd(bool reduce_to_end) { this->primitive_->value.AsReduce()->reduceToEnd = reduce_to_end; } +void Reduce::SetCoeff(float coeff) { this->primitive_->value.AsReduce()->coeff = coeff; } int Reduce::UnPackAttr(const Primitive &prim, const std::vector &inputs) { if (this->primitive_ == nullptr) { @@ -101,6 +103,7 @@ std::vector Reduce::GetAxes() const { int Reduce::GetKeepDims() const { return this->primitive_->value_as_Reduce()->keepDims(); } int Reduce::GetMode() const { return this->primitive_->value_as_Reduce()->mode(); } bool Reduce::GetReduceToEnd() const { return this->primitive_->value_as_Reduce()->reduceToEnd(); } +float Reduce::GetCoeff() const { return this->primitive_->value_as_Reduce()->coeff(); } int Reduce::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { MS_ASSERT(nullptr != primitive); MS_ASSERT(nullptr != fbb); diff --git a/mindspore/lite/src/ops/reduce.h b/mindspore/lite/src/ops/reduce.h index e5d50a2641..d859e0a1c1 100644 --- a/mindspore/lite/src/ops/reduce.h +++ b/mindspore/lite/src/ops/reduce.h @@ -38,6 +38,7 @@ class Reduce : public PrimitiveC { void SetKeepDims(int keep_dims); void SetMode(int mode); void SetReduceToEnd(bool reduce_to_end); + void SetCoeff(float coeff); #else Reduce() = default; @@ -48,6 +49,7 @@ class Reduce : public PrimitiveC { int GetKeepDims() const; int GetMode() const; bool GetReduceToEnd() const; + float GetCoeff() const; }; } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/populate_parameter.cc b/mindspore/lite/src/populate_parameter.cc index c838129f1d..53af8819fc 100644 --- a/mindspore/lite/src/populate_parameter.cc +++ b/mindspore/lite/src/populate_parameter.cc @@ -551,6 +551,7 @@ OpParameter *PopulateReduceParameter(const mindspore::lite::PrimitiveC *primitiv auto reduce = reinterpret_cast(const_cast(primitive)); reduce_param->keep_dims_ = reduce->GetKeepDims(); reduce_param->reduce_to_end_ = reduce->GetReduceToEnd(); + reduce_param->coeff = reduce->GetCoeff(); auto axisVector = reduce->GetAxes(); if (axisVector.size() > REDUCE_MAX_AXES_NUM) { MS_LOG(ERROR) << "Reduce axes size " << axisVector.size() << " exceed limit " << REDUCE_MAX_AXES_NUM; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc index 81bcc81ae1..426647cdd4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc @@ -30,6 +30,7 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Mean; using mindspore::schema::PrimitiveType_Reduce; using mindspore::schema::ReduceMode; +using mindspore::schema::ReduceMode_ReduceASum; using mindspore::schema::ReduceMode_ReduceMax; using mindspore::schema::ReduceMode_ReduceMean; using mindspore::schema::ReduceMode_ReduceMin; @@ -68,7 +69,11 @@ int ReduceCPUKernel::Init() { break; } case static_cast(ReduceMode_ReduceSumSquare): { - reducer_ = ReduceSumSquare; + reducer_ = ReduceSum; + break; + } + case static_cast(ReduceMode_ReduceASum): { + reducer_ = ReduceSum; break; } default: @@ -125,6 +130,7 @@ int ReduceCPUKernel::Run() { } src_data_ = in_tensors_.at(0)->MutableData(); + PreProcess(); for (size_t i = 0; i < static_cast(num_axes_); ++i) { if (i != static_cast(num_axes_ - 1)) { dst_data_ = data_buffers_[i]; @@ -142,10 +148,56 @@ int ReduceCPUKernel::Run() { } src_data_ = dst_data_; } + if (reduce_param_->reduce_to_end_ && reduce_param_->coeff - 1.0f > 1e-5) { + ret = CalculateCoeffOutput(); + if (ret != RET_OK) { + return ret; + } + } + FreeTmpBuffer(); return RET_OK; } +void ReduceCPUKernel::PreProcess() { + if (data_type_ == kDataTypeInt) { + return; + } + int num = in_tensors_.at(0)->ElementsNum(); + float *data = reinterpret_cast(in_tensors_.at(0)->MutableData()); + if (data == nullptr) { + return; + } + if (reduce_param_->mode_ == static_cast(ReduceMode_ReduceASum)) { + for (int i = 0; i < num; ++i) { + if (data[i] < 0.0f) { + data[i] = 0.0f - data[i]; + } + } + } + if (reduce_param_->mode_ == static_cast(ReduceMode_ReduceSumSquare)) { + for (int i = 0; i < num; ++i) { + data[i] = data[i] * data[i]; + } + } +} + +int ReduceCPUKernel::CalculateCoeffOutput() { + auto out_tensor = out_tensors_.at(0); + int num = out_tensor->ElementsNum(); + if (data_type_ != kDataTypeFloat) { + return RET_ERROR; + } + float *out_data = reinterpret_cast(out_tensor->MutableData()); + if (out_data == nullptr) { + return RET_NULL_PTR; + } + for (int i = 0; i < num; ++i) { + out_data[i] *= reduce_param_->coeff; + } + return RET_OK; +} + int ReduceCPUKernel::MallocTmpBuffer() { data_buffers_.clear(); for (auto size : buffer_sizes_) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.h b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.h index b7b485de88..2cc3a6fc96 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.h @@ -36,7 +36,9 @@ class ReduceCPUKernel : public ReduceBaseCPUKernel { ReduceCPUKernel(OpParameter *param, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) {} + : ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) { + reduce_param_ = reinterpret_cast(param); + } ~ReduceCPUKernel() { src_data_ = nullptr; dst_data_ = nullptr; @@ -50,6 +52,7 @@ class ReduceCPUKernel : public ReduceBaseCPUKernel { int CallReduceUnit(int task_id); private: + ReduceParameter *reduce_param_; Reducer reducer_ = nullptr; IntReducer int_reducer_ = nullptr; std::vector data_buffers_; @@ -61,6 +64,8 @@ class ReduceCPUKernel : public ReduceBaseCPUKernel { private: int MallocTmpBuffer(); void FreeTmpBuffer(); + int CalculateCoeffOutput(); + void PreProcess(); }; } // namespace mindspore::kernel diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/reduce_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/reduce_fp32_tests.cc index 82db7d3418..4da3ef549e 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/reduce_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/reduce_fp32_tests.cc @@ -17,18 +17,82 @@ #include "utils/log_adapter.h" #include "common/common_test.h" #include "mindspore/lite/nnacl/fp32/reduce.h" +#include "schema/inner/model_generated.h" +#include "src/tensor.h" +#include "mindspore/lite/src/kernel_registry.h" +#include "mindspore/lite/src/runtime/allocator.h" + +using mindspore::lite::Allocator; +using mindspore::lite::Tensor; +using mindspore::schema::ReduceMode; +using mindspore::schema::ReduceMode_ReduceMax; +using mindspore::schema::ReduceMode_ReduceMean; +using mindspore::schema::ReduceMode_ReduceMin; +using mindspore::schema::ReduceMode_ReduceProd; +using mindspore::schema::ReduceMode_ReduceSum; +using mindspore::schema::ReduceMode_ReduceASum; +using mindspore::schema::ReduceMode_ReduceSumSquare; namespace mindspore { class TestReduceFp32 : public mindspore::CommonTest { public: TestReduceFp32() = default; - int tid = 0; - int thread_num = 1; + + void Prepare(const std::vector &in_shape, const std::vector &out_shape, float *input_data, + float *output_data, ReduceMode mode, const int *axes, const int num_axes, bool reduce_to_end, + float coeff); + void TearDown() override; + + public: + int tid_ = 0; + int thread_num_ = 1; float err_tol = 1e-5; + ReduceParameter param_ = {}; + Tensor in_tensor_; + Tensor out_tensor_; + std::vector inputs{&in_tensor_}; + std::vector outputs{&out_tensor_}; + kernel::KernelKey desc_ = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Reduce}; + kernel::KernelCreator creator_ = nullptr; + lite::InnerContext *ctx_ = nullptr; + kernel::LiteKernel *kernel_ = nullptr; }; -TEST_F(TestReduceFp32, Mean) { +void TestReduceFp32::TearDown() { + delete ctx_; + in_tensor_.SetData(nullptr); + out_tensor_.SetData(nullptr); +} + +void TestReduceFp32::Prepare(const std::vector &in_shape, const std::vector &out_shape, float *input_data, + float *output_data, ReduceMode mode, const int *axes, const int num_axes, + bool reduce_to_end, float coeff) { + in_tensor_.set_data_type(kNumberTypeFloat32); + in_tensor_.set_shape(in_shape); + in_tensor_.SetData(input_data); + + out_tensor_.set_data_type(kNumberTypeFloat32); + out_tensor_.set_shape(out_shape); + out_tensor_.SetData(output_data); + + param_.mode_ = static_cast(mode); + param_.num_axes_ = num_axes; + memcpy(param_.axes_, axes, num_axes * sizeof(int)); + param_.reduce_to_end_ = reduce_to_end; + param_.coeff = coeff; + + ctx_ = new (std::nothrow) lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx_->Init()); + creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc_); + if (ctx_->allocator == nullptr) { + ctx_->allocator = Allocator::Create(); + } + ctx_->thread_num_ = thread_num_; + kernel_ = creator_(inputs, outputs, reinterpret_cast(¶m_), ctx_, desc_, nullptr); +} + +TEST_F(TestReduceFp32, Mean1) { /* 2 4 4 3 NHWC */ float in[96] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, @@ -39,21 +103,52 @@ TEST_F(TestReduceFp32, Mean) { float correct[24] = {18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0}; - int input_shape[4] = {2, 4, 4, 3}; - // int output_shape[4] = {2, 1, 4, 3}; - + std::vector in_shape{2, 4, 4, 3}; + std::vector out_shape{2, 1, 4, 3}; + int axes[1] = {1}; + int axis_num = 1; float out[24] = {0}; - int outer_size = 2; - int inner_size = 12; - int axis_size = 4; - (void)ReduceMean(outer_size, inner_size, axis_size, in, out, tid, thread_num); + bool reduce_to_end = false; + float coeff = 1.0f; + thread_num_ = 2; + + Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceMean, axes, axis_num, reduce_to_end, coeff); + kernel_->Run(); int output_size = 24; CompareOutputData(out, correct, output_size, err_tol); } -TEST_F(TestReduceFp32, Mean2Thread) { - /* 2*4*4*3 NHWC */ +// thread num 2 reduce_to_end +TEST_F(TestReduceFp32, Mean2) { + /* 2 4 4 3 NHWC */ + float in[96] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, + 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, + 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, + 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, + 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, + 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0}; + float correct[2] = {47.0, 143.0}; + + std::vector in_shape{2, 4, 4, 3}; + std::vector out_shape{2, 1, 1, 1}; + int axes[1] = {1}; + int axis_num = 1; + float out[24] = {0}; + bool reduce_to_end = true; + float coeff = 2.0f; + thread_num_ = 2; + + Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceMean, axes, axis_num, reduce_to_end, coeff); + kernel_->Run(); + + int output_size = 2; + CompareOutputData(out, correct, output_size, err_tol); +} + +// thread num 1 +TEST_F(TestReduceFp32, Mean3) { + /* 2 4 4 3 NHWC */ float in[96] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, @@ -63,20 +158,19 @@ TEST_F(TestReduceFp32, Mean2Thread) { float correct[24] = {18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0}; - int input_shape[4] = {2, 4, 4, 3}; - // int output_shape[4] = {2, 1, 4, 3}; - + std::vector in_shape{2, 4, 4, 3}; + std::vector out_shape{2, 1, 4, 3}; + int axes[1] = {1}; + int axis_num = 1; float out[24] = {0}; - int outer_size = 2; - int inner_size = 12; - int axis_size = 4; - thread_num = 2; - tid = 0; - (void)ReduceMean(outer_size, inner_size, axis_size, in, out, tid, thread_num); - tid = 1; - (void)ReduceMean(outer_size, inner_size, axis_size, in, out, tid, thread_num); + bool reduce_to_end = false; + float coeff = 2.0f; + thread_num_ = 1; - int output_size = 24; + Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceMean, axes, axis_num, reduce_to_end, coeff); + kernel_->Run(); + + int output_size = 2; CompareOutputData(out, correct, output_size, err_tol); } @@ -91,37 +185,16 @@ TEST_F(TestReduceFp32, MeanAllAxis) { float correct[1] = {47.5}; float out[1] = {0}; - int input_shape[4] = {2, 4, 4, 3}; - int outer_size = 1; - int inner_size = 48; - int axis_size = 2; - float *src = in; - float dst1[48] = {0}; - MS_ASSERT(dst != nullptr); - (void)ReduceMean(outer_size, inner_size, axis_size, src, dst1, tid, thread_num); - - input_shape[0] = 1; // 1 4 4 3 - outer_size = 1; - inner_size = 12; - axis_size = 4; - src = dst1; - float dst2[12] = {0}; - (void)ReduceMean(outer_size, inner_size, axis_size, src, dst2, tid, thread_num); - - input_shape[1] = 1; // 1 1 4 3 - outer_size = 1; - inner_size = 3; - axis_size = 4; - src = dst2; - float dst3[3] = {0}; - (void)ReduceMean(outer_size, inner_size, axis_size, src, dst3, tid, thread_num); - - input_shape[2] = 1; // 1 1 1 3 - outer_size = 1; - inner_size = 1; - axis_size = 3; - src = dst3; - (void)ReduceMean(outer_size, inner_size, axis_size, src, out, tid, thread_num); + std::vector in_shape{2, 4, 4, 3}; + std::vector out_shape{1, 1, 1, 1}; + int axes[4] = {0, 1, 2, 3}; + int axis_num = 4; + bool reduce_to_end = false; + float coeff = 0.0f; + thread_num_ = 2; + + Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceMean, axes, axis_num, reduce_to_end, coeff); + kernel_->Run(); int output_size = 1; CompareOutputData(out, correct, output_size, err_tol); @@ -138,20 +211,24 @@ TEST_F(TestReduceFp32, Sum) { float correct[24] = {72.0, 76.0, 80.0, 84.0, 88.0, 92.0, 96.0, 100.0, 104.0, 108.0, 112.0, 116.0, 264.0, 268.0, 272.0, 276.0, 280.0, 284.0, 288.0, 292.0, 296.0, 300.0, 304.0, 308.0}; - int input_shape[4] = {2, 4, 4, 3}; - // int output_shape[4] = {2, 1, 4, 3}; - + std::vector in_shape{2, 4, 4, 3}; + std::vector out_shape{2, 1, 4, 3}; + int axes[1] = {1}; + int axis_num = 1; float out[24] = {0}; - int outer_size = 2; - int inner_size = 12; - int axis_size = 4; - (void)ReduceSum(outer_size, inner_size, axis_size, in, out, tid, thread_num); + bool reduce_to_end = false; + float coeff = 1.0f; + thread_num_ = 2; + + Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceSum, axes, axis_num, reduce_to_end, coeff); + kernel_->Run(); int output_size = 24; CompareOutputData(out, correct, output_size, err_tol); } -TEST_F(TestReduceFp32, Sum2Thread) { +// sum reduce_to_end +TEST_F(TestReduceFp32, Sum2) { /* 2*4*4*3 NHWC */ float in[96] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, @@ -159,23 +236,51 @@ TEST_F(TestReduceFp32, Sum2Thread) { 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0}; - float correct[24] = {72.0, 76.0, 80.0, 84.0, 88.0, 92.0, 96.0, 100.0, 104.0, 108.0, 112.0, 116.0, - 264.0, 268.0, 272.0, 276.0, 280.0, 284.0, 288.0, 292.0, 296.0, 300.0, 304.0, 308.0}; - - int input_shape[4] = {2, 4, 4, 3}; - // int output_shape[4] = {2, 1, 4, 3}; - - float out[24] = {0}; - int outer_size = 2; - int inner_size = 12; - int axis_size = 4; - thread_num = 2; - tid = 0; - (void)ReduceSum(outer_size, inner_size, axis_size, in, out, tid, thread_num); - tid = 1; - (void)ReduceSum(outer_size, inner_size, axis_size, in, out, tid, thread_num); + float correct[32] = {6.0, 24.0, 42.0, 60.0, 78.0, 96.0, 114.0, 132.0, 150.0, 168.0, 186.0, + 204.0, 222.0, 240.0, 258.0, 276.0, 294.0, 312.0, 330.0, 348.0, 366.0, 384.0, + 402.0, 420.0, 438.0, 456.0, 474.0, 492.0, 510.0, 528.0, 546.0, 564.0}; + + std::vector in_shape{2, 4, 4, 3}; + std::vector out_shape{2, 4, 4, 1}; + int axes[1] = {-1}; + int axis_num = 1; + float out[32] = {0}; + bool reduce_to_end = true; + float coeff = 2.0f; + thread_num_ = 2; + + Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceSum, axes, axis_num, reduce_to_end, coeff); + kernel_->Run(); + + int output_size = 32; + CompareOutputData(out, correct, output_size, err_tol); +} - int output_size = 24; +TEST_F(TestReduceFp32, Sum3) { + /* 2*4*4*3 NHWC */ + float in[96] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, + 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, + 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, + 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, + 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, + 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0}; + float correct[32] = {3.0, 12.0, 21.0, 30.0, 39.0, 48.0, 57.0, 66.0, 75.0, 84.0, 93.0, + 102.0, 111.0, 120.0, 129.0, 138.0, 147.0, 156.0, 165.0, 174.0, 183.0, 192.0, + 201.0, 210.0, 219.0, 228.0, 237.0, 246.0, 255.0, 264.0, 273.0, 282.0}; + + std::vector in_shape{2, 4, 4, 3}; + std::vector out_shape{2, 4, 4, 1}; + int axes[1] = {-1}; + int axis_num = 1; + float out[32] = {0}; + bool reduce_to_end = false; + float coeff = 0.0f; + thread_num_ = 2; + + Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceSum, axes, axis_num, reduce_to_end, coeff); + kernel_->Run(); + + int output_size = 32; CompareOutputData(out, correct, output_size, err_tol); } @@ -189,38 +294,16 @@ TEST_F(TestReduceFp32, SumAllAxis) { 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0}; float correct[1] = {4560}; float out[1] = {0}; + std::vector in_shape{2, 4, 4, 3}; + std::vector out_shape{1, 1, 1, 1}; + int axes[4] = {0}; + int axis_num = 4; + bool reduce_to_end = true; + float coeff = 1.0f; + thread_num_ = 2; - int input_shape[4] = {2, 4, 4, 3}; - int outer_size = 1; - int inner_size = 48; - int axis_size = 2; - float *src = in; - float dst1[48] = {0}; - MS_ASSERT(dst != nullptr); - (void)ReduceSum(outer_size, inner_size, axis_size, src, dst1, tid, thread_num); - - input_shape[0] = 1; // 1 4 4 3 - outer_size = 1; - inner_size = 12; - axis_size = 4; - src = dst1; - float dst2[12] = {0}; - (void)ReduceSum(outer_size, inner_size, axis_size, src, dst2, tid, thread_num); - - input_shape[1] = 1; // 1 1 4 3 - outer_size = 1; - inner_size = 3; - axis_size = 4; - src = dst2; - float dst3[3] = {0}; - (void)ReduceSum(outer_size, inner_size, axis_size, src, dst3, tid, thread_num); - - input_shape[2] = 1; // 1 1 1 3 - outer_size = 1; - inner_size = 1; - axis_size = 3; - src = dst3; - (void)ReduceSum(outer_size, inner_size, axis_size, src, out, tid, thread_num); + Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceSum, axes, axis_num, reduce_to_end, coeff); + kernel_->Run(); int output_size = 1; CompareOutputData(out, correct, output_size, err_tol); @@ -237,14 +320,17 @@ TEST_F(TestReduceFp32, Max) { float correct[24] = {36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0}; - int input_shape[4] = {2, 4, 4, 3}; - // int output_shape[4] = {2, 1, 4, 3}; - + std::vector in_shape{2, 4, 4, 3}; + std::vector out_shape{2, 1, 4, 3}; + int axes[1] = {1}; + int axis_num = 1; float out[24] = {0}; - int outer_size = 2; - int inner_size = 12; - int axis_size = 4; - (void)ReduceMax(outer_size, inner_size, axis_size, in, out, tid, thread_num); + bool reduce_to_end = false; + float coeff = 1.0f; + thread_num_ = 2; + + Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceMax, axes, axis_num, reduce_to_end, coeff); + kernel_->Run(); int output_size = 24; CompareOutputData(out, correct, output_size, err_tol); @@ -261,14 +347,17 @@ TEST_F(TestReduceFp32, Min) { float correct[24] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0}; - int input_shape[4] = {2, 4, 4, 3}; - // int output_shape[4] = {2, 1, 4, 3}; - + std::vector in_shape{2, 4, 4, 3}; + std::vector out_shape{2, 1, 4, 3}; + int axes[1] = {1}; + int axis_num = 1; float out[24] = {0}; - int outer_size = 2; - int inner_size = 12; - int axis_size = 4; - (void)ReduceMin(outer_size, inner_size, axis_size, in, out, tid, thread_num); + bool reduce_to_end = false; + float coeff = 1.0f; + thread_num_ = 2; + + Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceMin, axes, axis_num, reduce_to_end, coeff); + kernel_->Run(); int output_size = 24; CompareOutputData(out, correct, output_size, err_tol); @@ -286,14 +375,17 @@ TEST_F(TestReduceFp32, Prod) { 225280.0, 280665.0, 344080.0, 416185.0, 17418240.0, 18546744.0, 19728400.0, 20964824.0, 22257664.0, 23608584.0, 25019280.0, 26491464.0, 28026880.0, 29627288.0, 31294480.0, 33030264.0}; - int input_shape[4] = {2, 4, 4, 3}; - // int output_shape[4] = {2, 1, 4, 3}; - + std::vector in_shape{2, 4, 4, 3}; + std::vector out_shape{2, 1, 4, 3}; + int axes[1] = {1}; + int axis_num = 1; float out[24] = {0}; - int outer_size = 2; - int inner_size = 12; - int axis_size = 4; - (void)ReduceProd(outer_size, inner_size, axis_size, in, out, tid, thread_num); + bool reduce_to_end = false; + float coeff = 1.0f; + thread_num_ = 2; + + Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceProd, axes, axis_num, reduce_to_end, coeff); + kernel_->Run(); int output_size = 24; CompareOutputData(out, correct, output_size, err_tol); @@ -307,20 +399,78 @@ TEST_F(TestReduceFp32, SumSquare) { 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0}; - float correct[24] = {2016.0, 2164.0, 2320.0, 2484.0, 2656.0, 2836.0, 3024.0, 3220.0, - 3424.0, 3636.0, 3856.0, 4084.0, 18144.0, 18676.0, 19216.0, 19764.0, - 20320.0, 20884.0, 21456.0, 22036.0, 22624.0, 23220.0, 23824.0, 24436.0}; + float correct[8] = {1012.0, 7636.0, 21172.0, 41620.0, 68980.0, 103252.0, 144436.0, 192532.0}; - int input_shape[4] = {2, 4, 4, 3}; - // int output_shape[4] = {2, 1, 4, 3}; + std::vector in_shape{2, 4, 4, 3}; + std::vector out_shape{2, 4, 1, 1}; + int axes[1] = {2}; + int axis_num = 1; + float out[8] = {0}; + bool reduce_to_end = true; + float coeff = 2.0f; + thread_num_ = 2; - float out[24] = {0}; - int outer_size = 2; - int inner_size = 12; - int axis_size = 4; - (void)ReduceSumSquare(outer_size, inner_size, axis_size, in, out, tid, thread_num); + Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceSumSquare, axes, axis_num, reduce_to_end, coeff); + kernel_->Run(); - int output_size = 24; + int output_size = 8; + CompareOutputData(out, correct, output_size, err_tol); +} + +TEST_F(TestReduceFp32, SumSquare2) { + /* 2*4*4*3 NHWC */ + float in[96] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, + 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, + 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, + 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, + 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, + 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0}; + float correct[32] = {10.0, 100.0, 298.0, 604.0, 1018.0, 1540.0, 2170.0, 2908.0, + 3754.0, 4708.0, 5770.0, 6940.0, 8218.0, 9604.0, 11098.0, 12700.0, + 14410.0, 16228.0, 18154.0, 20188.0, 22330.0, 24580.0, 26938.0, 29404.0, + 31978.0, 34660.0, 37450.0, 40348.0, 43354.0, 46468.0, 49690.0, 53020.0}; + + std::vector in_shape{2, 4, 4, 3}; + std::vector out_shape{2, 4, 4, 1}; + int axes[1] = {3}; + int axis_num = 1; + float out[32] = {0}; + bool reduce_to_end = true; + float coeff = 2.0f; + thread_num_ = 2; + + Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceSumSquare, axes, axis_num, reduce_to_end, coeff); + kernel_->Run(); + + int output_size = 32; + CompareOutputData(out, correct, output_size, err_tol); +} + +TEST_F(TestReduceFp32, ASum) { + /* 2*4*4*3 NHWC */ + float in[96] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, + 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, + 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, + 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, + 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, + 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0}; + float correct[32] = {3.0, 12.0, 21.0, 30.0, 39.0, 48.0, 57.0, 66.0, 75.0, 84.0, 93.0, + 102.0, 111.0, 120.0, 129.0, 138.0, 147.0, 156.0, 165.0, 174.0, 183.0, 192.0, + 201.0, 210.0, 219.0, 228.0, 237.0, 246.0, 255.0, 264.0, 273.0, 282.0}; + + std::vector in_shape{2, 4, 4, 3}; + std::vector out_shape{2, 4, 4, 1}; + int axes[1] = {3}; + int axis_num = 1; + float out[32] = {0}; + bool reduce_to_end = true; + float coeff = 1.0f; + thread_num_ = 2; + + Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceASum, axes, axis_num, reduce_to_end, coeff); + kernel_->Run(); + + int output_size = 32; CompareOutputData(out, correct, output_size, err_tol); } } // namespace mindspore