From 4d6f6181d3776aad9d2f860199df634d83cdf59a Mon Sep 17 00:00:00 2001 From: zhaozhenlong Date: Sat, 15 Aug 2020 11:02:30 +0800 Subject: [PATCH] Reduce int8 --- mindspore/lite/src/populate_parameter.cc | 2 +- .../runtime/kernel/arm/base/reduce_base.cc | 199 ++++++++ .../src/runtime/kernel/arm/base/reduce_base.h | 54 ++ .../src/runtime/kernel/arm/base/resize_base.h | 2 +- .../src/runtime/kernel/arm/fp32/reduce.cc | 132 +---- .../lite/src/runtime/kernel/arm/fp32/reduce.h | 34 +- .../runtime/kernel/arm/int8/reduce_int8.cc | 323 ++++++++++++ .../src/runtime/kernel/arm/int8/reduce_int8.h | 98 ++++ .../src/runtime/kernel/arm/nnacl/errorcode.h | 2 + .../runtime/kernel/arm/nnacl/fp32/reduce.h | 9 +- .../kernel/arm/nnacl/int8/reduce_int8.c | 467 ++++++++++++++++++ .../kernel/arm/nnacl/int8/reduce_int8.h | 53 ++ .../kernel/arm/nnacl/quantization/quantize.h | 20 + .../kernel/arm/nnacl/reduce_parameter.h | 30 ++ .../kernel/arm/nnacl/resize_parameter.h | 2 +- .../arm/fp32/resize_bilinear_fp32_tests.cc | 271 +++++----- .../resize_nearest_neighbor_fp32_tests.cc | 239 ++++++--- .../kernel/arm/int8/reduce_int8_tests.cc | 355 +++++++++++++ .../arm/int8/resize_bilinear_int8_tests.cc | 6 +- .../resize_nearest_neighbor_int8_tests.cc | 4 +- 20 files changed, 1940 insertions(+), 362 deletions(-) create mode 100644 mindspore/lite/src/runtime/kernel/arm/base/reduce_base.cc create mode 100644 mindspore/lite/src/runtime/kernel/arm/base/reduce_base.h create mode 100644 mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.cc create mode 100644 mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h create mode 100644 mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reduce_int8.c create mode 100644 mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reduce_int8.h create mode 100644 mindspore/lite/src/runtime/kernel/arm/nnacl/reduce_parameter.h create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reduce_int8_tests.cc diff --git a/mindspore/lite/src/populate_parameter.cc b/mindspore/lite/src/populate_parameter.cc index 80a00476cf..b6a20936b0 100644 --- a/mindspore/lite/src/populate_parameter.cc +++ b/mindspore/lite/src/populate_parameter.cc @@ -38,7 +38,7 @@ #include "src/runtime/kernel/arm/nnacl/softmax_parameter.h" #include "src/runtime/kernel/arm/nnacl/tile.h" #include "src/runtime/kernel/arm/nnacl/fp32/topk.h" -#include "src/runtime/kernel/arm/nnacl/fp32/reduce.h" +#include "src/runtime/kernel/arm/nnacl/reduce_parameter.h" #include "src/runtime/kernel/arm/nnacl/fp32/activation.h" #include "src/runtime/kernel/arm/nnacl/fp32/arithmetic.h" #include "src/runtime/kernel/arm/nnacl/fp32/batchnorm.h" diff --git a/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.cc new file mode 100644 index 0000000000..901113f12c --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.cc @@ -0,0 +1,199 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "schema/model_generated.h" +#include "src/kernel_registry.h" +#include "include/errorcode.h" +#include "src/runtime/runtime_api.h" +#include "src/runtime/kernel/arm/base/reduce_base.h" +#include "src/runtime/kernel/arm/fp32/reduce.h" +#include "src/runtime/kernel/arm/int8/reduce_int8.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_NULL_PTR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Mean; +using mindspore::schema::PrimitiveType_Reduce; + +namespace mindspore::kernel { +namespace { +constexpr size_t kInputNum = 1; +constexpr size_t kOutputNum = 1; +} // namespace + +int ReduceBaseCPUKernel::CheckInputsOutputs() { + if (in_tensors_.size() != kInputNum) { + MS_LOG(ERROR) << "Reduce inputs size should be " << kInputNum << " but got " << in_tensors_.size(); + return RET_ERROR; + } + if (out_tensors_.size() != kOutputNum) { + MS_LOG(ERROR) << "Reduce outputs size should be " << kOutputNum << " but got " << out_tensors_.size(); + return RET_ERROR; + } + auto input = in_tensors_.at(0); + if (input == nullptr) { + MS_LOG(ERROR) << "Reduce input is nullptr"; + return RET_NULL_PTR; + } + auto output = out_tensors_.at(0); + if (output == nullptr) { + MS_LOG(ERROR) << "Reduce output is nullptr"; + return RET_NULL_PTR; + } + return RET_OK; +} + +int ReduceBaseCPUKernel::CheckParameters() { + size_t input_rank = in_tensors_.at(0)->shape().size(); + if (static_cast(num_axes_) > input_rank) { + MS_LOG(ERROR) << "Reduce op invalid num of reduce axes " << num_axes_ << " larger than input rank " << input_rank; + return RET_ERROR; + } + for (auto i = 0; i < num_axes_; i++) { + if (axes_[i] < -static_cast(input_rank) || axes_[i] >= static_cast(input_rank)) { + MS_LOG(ERROR) << "Reduce got invalid axis " << axes_[i] << ", axis should be in [" + << -static_cast(input_rank) << ", " << input_rank - 1 << "]."; + return RET_ERROR; + } + if (axes_[i] < 0) { + axes_[i] += static_cast(input_rank); + } + } + + if (num_axes_ == 0) { + for (int i = 0; i < input_rank; i++) { + axes_[i] = i; + } + num_axes_ = static_cast(input_rank); + } + + return RET_OK; +} + +int ReduceBaseCPUKernel::Init() { + auto reduce_param = reinterpret_cast(op_parameter_); + if (reduce_param == nullptr) { + return RET_NULL_PTR; + } + num_axes_ = reduce_param->num_axes_; + mode_ = reduce_param->mode_; + memcpy(axes_, reduce_param->axes_, sizeof(reduce_param->axes_)); + + auto ret = CheckInputsOutputs(); + if (ret != RET_OK) { + return ret; + } + ret = CheckParameters(); + if (ret != RET_OK) { + return ret; + } + + return RET_OK; +} + +kernel::LiteKernel *CpuReduceFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::Context *ctx, + const kernel::KernelKey &desc, const lite::Primitive *primitive) { + MS_ASSERT(opParameter != nullptr); + MS_ASSERT(desc.type == schema::PrimitiveType_Reduce); + if (opParameter == nullptr) { + MS_LOG(ERROR) << "Reduce opParameter nullptr"; + return nullptr; + } + if (desc.type != schema::PrimitiveType_Reduce) { + MS_LOG(ERROR) << "Reduce op desc.type should be PrimitiveType_Reduce, got " << desc.type; + return nullptr; + } + auto *kernel = new (std::nothrow) ReduceCPUKernel(opParameter, inputs, outputs, ctx, primitive); + if (kernel == nullptr) { + MS_LOG(ERROR) << "Reduce new ReduceCPUKernel failed."; + return nullptr; + } + auto ret = kernel->Init(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + delete kernel; + return nullptr; + } + return kernel; +} + +kernel::LiteKernel *CpuMeanFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::Context *ctx, + const kernel::KernelKey &desc, const lite::Primitive *primitive) { + MS_ASSERT(opParameter != nullptr); + MS_ASSERT(desc.type == schema::PrimitiveType_Mean); + if (opParameter == nullptr) { + MS_LOG(ERROR) << "Reduce opParameter nullptr"; + return nullptr; + } + if (desc.type != schema::PrimitiveType_Mean) { + MS_LOG(ERROR) << "Reduce op desc.type should be PrimitiveType_Mean, got " << desc.type; + return nullptr; + } + auto *kernel = new (std::nothrow) ReduceCPUKernel(opParameter, inputs, outputs, ctx, primitive); + if (kernel == nullptr) { + MS_LOG(ERROR) << "Reduce new ReduceCPUKernel failed."; + return nullptr; + } + auto ret = kernel->Init(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + delete kernel; + return nullptr; + } + return kernel; +} + +kernel::LiteKernel *CpuReduceInt8KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::Context *ctx, + const kernel::KernelKey &desc, const lite::Primitive *primitive) { + MS_ASSERT(opParameter != nullptr); + MS_ASSERT(desc.type == schema::PrimitiveType_Reduce); + if (opParameter == nullptr) { + MS_LOG(ERROR) << "Reduce opParameter nullptr"; + return nullptr; + } + if (desc.type != schema::PrimitiveType_Reduce) { + MS_LOG(ERROR) << "Reduce op desc.type should be PrimitiveType_Reduce, got " << desc.type; + return nullptr; + } + auto *kernel = new (std::nothrow) ReduceInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); + if (kernel == nullptr) { + MS_LOG(ERROR) << "Reduce new ReduceCPUKernel failed."; + return nullptr; + } + auto ret = kernel->Init(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + delete kernel; + return nullptr; + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Reduce, CpuReduceFp32KernelCreator) +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Mean, CpuMeanFp32KernelCreator) +REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Reduce, CpuReduceInt8KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.h b/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.h new file mode 100644 index 0000000000..3410dfff0f --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.h @@ -0,0 +1,54 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_REDUCE_BASE_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_REDUCE_BASE_H_ + +#include +#include "src/lite_kernel.h" +#include "ir/anf.h" +#include "nnacl/reduce_parameter.h" + +namespace mindspore::kernel { +class ReduceBaseCPUKernel : public LiteKernel { + public: + ReduceBaseCPUKernel(OpParameter *param, const std::vector &inputs, + const std::vector &outputs, const lite::Context *ctx, + const lite::Primitive *primitive) + : LiteKernel(param, inputs, outputs, ctx, primitive) {} + virtual ~ReduceBaseCPUKernel() = default; + + int Init() override; + int ReSize() override { return 0; }; + + private: + int CheckInputsOutputs(); + int CheckParameters(); + + protected: + int axes_[REDUCE_MAX_AXES_NUM]; + int num_axes_; + int mode_; + + protected: + int outer_size_; + int inner_size_; + int axis_size_; + std::vector tmp_shape_; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_REDUCE_BASE_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/base/resize_base.h b/mindspore/lite/src/runtime/kernel/arm/base/resize_base.h index 79ca034cb5..85a3537ba4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/resize_base.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/resize_base.h @@ -31,7 +31,7 @@ class ResizeBaseCPUKernel : public LiteKernel { const lite::Primitive *primitive) : LiteKernel(parameter, inputs, outputs, ctx, primitive), context_(ctx) {} - ~ResizeBaseCPUKernel() = default; + virtual ~ResizeBaseCPUKernel() = default; int Init() override; int ReSize() override { return 0; }; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc index e9929aced4..64c4a07253 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc @@ -20,6 +20,7 @@ #include "include/errorcode.h" #include "src/runtime/runtime_api.h" #include "src/runtime/kernel/arm/nnacl/fp32/reduce.h" +#include "src/runtime/kernel/arm/base/reduce_base.h" using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::lite::KernelRegistrar; @@ -37,69 +38,9 @@ using mindspore::schema::ReduceMode_ReduceSum; using mindspore::schema::ReduceMode_ReduceSumSquare; namespace mindspore::kernel { -namespace { -constexpr size_t kInputNum = 1; -constexpr size_t kOutputNum = 1; -} // namespace - -int ReduceCPUKernel::CheckInputsOutputs() { - if (in_tensors_.size() != kInputNum) { - MS_LOG(ERROR) << "Reduce inputs size should be " << kInputNum << " but got " << in_tensors_.size(); - return RET_ERROR; - } - if (out_tensors_.size() != kOutputNum) { - MS_LOG(ERROR) << "Reduce outputs size should be " << kOutputNum << " but got " << out_tensors_.size(); - return RET_ERROR; - } - auto input = in_tensors_.at(0); - if (input == nullptr) { - MS_LOG(ERROR) << "Reduce input is nullptr"; - return RET_NULL_PTR; - } - auto output = out_tensors_.at(0); - if (output == nullptr) { - MS_LOG(ERROR) << "Reduce output is nullptr"; - return RET_NULL_PTR; - } - return RET_OK; -} - -int ReduceCPUKernel::CheckParameters() { - size_t input_rank = in_tensors_.at(0)->shape().size(); - if (static_cast(num_axes_) > input_rank) { - MS_LOG(ERROR) << "Reduce num of reduce axes " << num_axes_ << " larger than input rank " << input_rank; - return RET_ERROR; - } - for (auto i = 0; i < num_axes_; i++) { - if (axes_[i] < -static_cast(input_rank) || axes_[i] >= static_cast(input_rank)) { - MS_LOG(ERROR) << "Reduce got invalid axis " << axes_[i] << ", axis should be in [" - << -static_cast(input_rank) << ", " << input_rank - 1 << "]."; - return RET_ERROR; - } - if (axes_[i] < 0) { - axes_[i] += static_cast(input_rank); - } - } - - if (num_axes_ == 0) { - for (int i = 0; i < input_rank; i++) { - axes_[i] = i; - } - } - - return RET_OK; -} int ReduceCPUKernel::Init() { - if (context_->infer_shape_interrupt_ && !context_->running_) { - set_need_reinit(); - return RET_OK; - } - auto ret = CheckInputsOutputs(); - if (ret != RET_OK) { - return ret; - } - ret = CheckParameters(); + auto ret = ReduceBaseCPUKernel::Init(); if (ret != RET_OK) { return ret; } @@ -107,7 +48,6 @@ int ReduceCPUKernel::Init() { if (ret != RET_OK) { return ret; } - switch (mode_) { case static_cast(ReduceMode_ReduceSum): { reducer_ = ReduceSum; @@ -137,7 +77,10 @@ int ReduceCPUKernel::Init() { MS_LOG(ERROR) << "Reduce unsupported reduce mode: " << mode_; return RET_ERROR; } - return RET_OK; + if (!InferShapeDone()) { + return RET_OK; + } + return ReSize(); } int ReduceCPUKernel::CallReduceUnit(int task_id) { @@ -225,67 +168,4 @@ int ReduceCPUKernel::MallocTmpBuffer() { } return RET_OK; } - -kernel::LiteKernel *CpuReduceFp32KernelCreator(const std::vector &inputs, - const std::vector &outputs, - OpParameter *opParameter, const lite::Context *ctx, - const kernel::KernelKey &desc, const lite::Primitive *primitive) { - MS_ASSERT(opParameter != nullptr); - MS_ASSERT(desc.type == schema::PrimitiveType_Reduce); - if (opParameter == nullptr) { - MS_LOG(ERROR) << "Reduce opParameter nullptr"; - return nullptr; - } - if (desc.type != schema::PrimitiveType_Reduce) { - MS_LOG(ERROR) << "Reduce op desc.type should be PrimitiveType_Reduce, got " << desc.type; - return nullptr; - } - auto *kernel = new (std::nothrow) - ReduceCPUKernel(reinterpret_cast(opParameter), inputs, outputs, ctx, primitive); - if (kernel == nullptr) { - MS_LOG(ERROR) << "Reduce new ReduceCPUKernel failed."; - return nullptr; - } - auto ret = kernel->Init(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " - << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); - delete kernel; - return nullptr; - } - return kernel; -} - -kernel::LiteKernel *CpuMeanFp32KernelCreator(const std::vector &inputs, - const std::vector &outputs, - OpParameter *opParameter, const lite::Context *ctx, - const kernel::KernelKey &desc, const lite::Primitive *primitive) { - MS_ASSERT(opParameter != nullptr); - MS_ASSERT(desc.type == schema::PrimitiveType_Mean); - if (opParameter == nullptr) { - MS_LOG(ERROR) << "Reduce opParameter nullptr"; - return nullptr; - } - if (desc.type != schema::PrimitiveType_Mean) { - MS_LOG(ERROR) << "Reduce op desc.type should be PrimitiveType_Mean, got " << desc.type; - return nullptr; - } - auto *kernel = new (std::nothrow) - ReduceCPUKernel(reinterpret_cast(opParameter), inputs, outputs, ctx, primitive); - if (kernel == nullptr) { - MS_LOG(ERROR) << "Reduce new ReduceCPUKernel failed."; - return nullptr; - } - auto ret = kernel->Init(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " - << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); - delete kernel; - return nullptr; - } - return kernel; -} - -REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Reduce, CpuReduceFp32KernelCreator) -REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Mean, CpuMeanFp32KernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.h b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.h index 2857ee9baf..5b05b76598 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.h @@ -21,25 +21,20 @@ #include "src/lite_kernel.h" #include "src/runtime/kernel/arm/nnacl/fp32/reduce.h" +#include "src/runtime/kernel/arm/base/reduce_base.h" #include "ir/anf.h" using mindspore::schema::ReduceMode; namespace mindspore::kernel { -class ReduceCPUKernel : public LiteKernel { +class ReduceCPUKernel : public ReduceBaseCPUKernel { typedef int (*Reducer)(const int outer_size, const int inner_size, const int axis_size, const float *src_data, const int *src_shape, float *dst_data, const int tid, const int thread_num); public: - ReduceCPUKernel(ReduceParameter *param, const std::vector &inputs, + ReduceCPUKernel(OpParameter *param, const std::vector &inputs, const std::vector &outputs, const lite::Context *ctx, const lite::Primitive *primitive) - : LiteKernel(reinterpret_cast(param), inputs, outputs, ctx, primitive), - context_(ctx), - keep_dims_(param->keep_dims_), - num_axes_(param->num_axes_), - mode_(param->mode_) { - memcpy(axes_, param->axes_, sizeof(param->axes_)); - } + : ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) {} ~ReduceCPUKernel() { for (auto i = 0; i < data_buffers_.size(); i++) { float *buffer = data_buffers_[i]; @@ -58,26 +53,13 @@ class ReduceCPUKernel : public LiteKernel { int CallReduceUnit(int task_id); private: - int CheckInputsOutputs(); - int CheckParameters(); - int MallocTmpBuffer(); - - private: - const lite::Context *context_ = nullptr; - bool keep_dims_; - int axes_[REDUCE_MAX_AXES_NUM]; - int num_axes_; - int mode_; - - private: + Reducer reducer_; std::vector data_buffers_; - int outer_size_; - int inner_size_; - int axis_size_; - std::vector tmp_shape_; const float *src_data_; float *dst_data_; - Reducer reducer_; + + private: + int MallocTmpBuffer(); }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.cc new file mode 100644 index 0000000000..5a0dc32a71 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.cc @@ -0,0 +1,323 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "schema/model_generated.h" +#include "src/runtime/runtime_api.h" +#include "src/kernel_registry.h" +#include "nnacl/quantization/quantize.h" +#include "include/errorcode.h" +#include "src/runtime/kernel/arm/int8/reduce_int8.h" + +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_NULL_PTR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Reduce; +using mindspore::schema::ReduceMode_ReduceMax; +using mindspore::schema::ReduceMode_ReduceMean; +using mindspore::schema::ReduceMode_ReduceMin; +using mindspore::schema::ReduceMode_ReduceProd; +using mindspore::schema::ReduceMode_ReduceSum; +using mindspore::schema::ReduceMode_ReduceSumSquare; + +namespace mindspore::kernel { +int ReduceInt8CPUKernel::Init() { + auto ret = ReduceBaseCPUKernel::Init(); + if (ret != RET_OK) { + return ret; + } + ret = MallocTmpBuffer(); + if (ret != RET_OK) { + return ret; + } + ret = CalculateQuantArgs(); + if (ret != RET_OK) { + return ret; + } + + switch (mode_) { + case static_cast(ReduceMode_ReduceMean): { + reducer_ = ReduceMeanInt8; + last_reducer_ = ReduceMeanLastAxis; + break; + } + case static_cast(ReduceMode_ReduceSum): { + reducer_ = ReduceSumInt8; + last_reducer_ = ReduceSumLastAxis; + break; + } + + case static_cast(ReduceMode_ReduceMax): { + reducer_ = ReduceMaxInt8; + last_reducer_ = ReduceMaxLastAxis; + break; + } + case static_cast(ReduceMode_ReduceMin): { + reducer_ = ReduceMinInt8; + last_reducer_ = ReduceMinLastAxis; + break; + } + case static_cast(ReduceMode_ReduceProd): { + reducer_ = ReduceProdInt8; + last_reducer_ = ReduceProdLastAxis; + break; + } + case static_cast(ReduceMode_ReduceSumSquare): { + // In multi-axes reduce cases, sum square output different output for different reduce order + // e.g. axes [2, 3] is different from axes [3, 2]. + reducer_ = ReduceSumSquareInt8; + last_reducer_ = ReduceSumSquareLastAxis; + break; + } + default: + MS_LOG(ERROR) << "Reduce unsupported reduce mode: " << mode_; + return RET_ERROR; + } + if (!InferShapeDone()) { + return RET_OK; + } + return ReSize(); +} + +int ReduceInt8CPUKernel::CalculateQuantArgs() { + lite::tensor::Tensor *input = in_tensors_.at(0); + lite::tensor::Tensor *output = out_tensors_.at(0); + MS_ASSERT(input); + MS_ASSERT(output); + + quant_arg_.in_scale_ = input->GetQuantParams().front().scale; + quant_arg_.in_zp_ = input->GetQuantParams().front().zeroPoint; + quant_arg_.out_scale_ = output->GetQuantParams().front().scale; + quant_arg_.out_zp_ = output->GetQuantParams().front().zeroPoint; + + // (quant_out - out_zp) * out_scale = (quant_in - in_zp) * in_scale + const double input_output_multiplier = quant_arg_.in_scale_ / quant_arg_.out_scale_; + int shift; + QuantizeMultiplierSmallerThanOne(input_output_multiplier, &quant_arg_.in_out_multiplier_, &shift); + quant_arg_.in_out_left_shift_ = shift < 0 ? -shift : 0; + quant_arg_.in_out_right_shift_ = shift > 0 ? shift : 0; + + // (quant_out - zp_out)*scale_out = sum((quant_in -zp)*scale_in) * (1/num) for each axis in axes + // quant_out = sum(quant_in-zp) * (scale_in/scale_out) * (1/num) + if (mode_ == static_cast(schema::ReduceMode_ReduceMean)) { + for (auto i = 0; i < num_axes_; i++) { + auto axis = axes_[i]; + double reciprocal = 1.0 / in_tensors_.at(0)->shape()[axis]; + QuantMulArg *qm = new (std::nothrow) QuantMulArg; + if (qm == nullptr) { + MS_LOG(ERROR) << "Reduce new QuantMulArg failed."; + return RET_NULL_PTR; + } + QuantizeMultiplierSmallerThanOne(reciprocal, &qm->multiplier_, &shift); + qm->left_shift_ = shift < 0 ? -shift : 0; + qm->right_shift_ = shift > 0 ? shift : 0; + mean_multipliers_.push_back(qm); + } + } + + // (quant_out - zp) * scale_out = prod(quant_in - zp) * scale_in^num + // quant_out = prod(quant_in-zp) * (scale_in^num/scale_out) + zp_out + // scale_in^num-1 * scale_in/scale_out + if (mode_ == static_cast(schema::ReduceMode_ReduceProd)) { + for (auto i = 0; i < num_axes_; i++) { + int axis_size = in_tensors_.at(0)->shape()[axes_[i]]; + QuantMulArg *qm = new (std::nothrow) QuantMulArg; + if (qm == nullptr) { + MS_LOG(ERROR) << "ReduceProd new QuantMulArg failed."; + return RET_NULL_PTR; + } + double prod_multiplier = pow(quant_arg_.in_scale_, axis_size - 1); + QuantizeMultiplierSmallerThanOne(prod_multiplier, &qm->multiplier_, &shift); + qm->left_shift_ = shift < 0 ? -shift : 0; + qm->right_shift_ = shift > 0 ? shift : 0; + prod_multipliers_.push_back(qm); + } + } + + // (quant_out - zp) * scale_out = sum((quant_in - zp)^2 * scale_in^2) + // quant_out = sum((quant_in - zp)^2) * scale_in^2 / scale_out + zp_out + // scale_in * scale_in/scale_out + if (mode_ == static_cast(schema::ReduceMode_ReduceSumSquare)) { + for (auto i = 0; i < num_axes_ - 1; i++) { + QuantMulArg *qm = new (std::nothrow) QuantMulArg; + if (qm == nullptr) { + MS_LOG(ERROR) << "ReduceProd new QuantMultiplier failed."; + return RET_NULL_PTR; + } + double sumsquare_multiplier = quant_arg_.in_scale_; + QuantizeMultiplierSmallerThanOne(sumsquare_multiplier, &qm->multiplier_, &shift); + qm->left_shift_ = shift < 0 ? -shift : 0; + qm->right_shift_ = shift > 0 ? shift : 0; + sum_square_multipliers_.push_back(qm); + } + + QuantMulArg *qm = new (std::nothrow) QuantMulArg; + if (qm == nullptr) { + MS_LOG(ERROR) << "ReduceProd new QuantMultiplier failed."; + return RET_NULL_PTR; + } + double sumsquare_multiplier = quant_arg_.in_scale_ * quant_arg_.in_scale_ / quant_arg_.out_scale_; + QuantizeMultiplierSmallerThanOne(sumsquare_multiplier, &qm->multiplier_, &shift); + qm->left_shift_ = shift < 0 ? -shift : 0; + qm->right_shift_ = shift > 0 ? shift : 0; + sum_square_multipliers_.push_back(qm); + } + return RET_OK; +} + +int ReduceInt8CPUKernel::MallocTmpBuffer() { + auto input_shape = in_tensors_.at(0)->shape(); + for (auto i = 0; i < num_axes_ - 1; i++) { + int axis = axes_[i]; + size_t size = 1; + for (auto j = 0; j < input_shape.size(); j++) { + if (static_cast(axis) != j) { + size *= input_shape[j]; + } + } + int32_t *buffer = reinterpret_cast(malloc(size * sizeof(int32_t))); + if (buffer == nullptr) { + MS_LOG(ERROR) << "Malloc data failed."; + return RET_ERROR; + } + data_buffers_.emplace_back(buffer); + input_shape[axis] = 1; + } + + auto input = in_tensors_.at(0); + begin_src_data_ = reinterpret_cast(malloc(sizeof(int32_t) * input->ElementsNum())); + if (begin_src_data_ == nullptr) { + return RET_NULL_PTR; + } + auto input_data = reinterpret_cast(input->Data()); + for (auto i = 0; i < input->ElementsNum(); i++) { + begin_src_data_[i] = static_cast(input_data[i]); + } + return RET_OK; +} + +int ReduceInt8Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { + auto reduce = reinterpret_cast(cdata); + auto error_code = reduce->CallReduceUnit(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "Reduce Run error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int ReduceInt8CPUKernel::Run() { + auto prepare_ret = Prepare(); + if (prepare_ret != RET_OK) { + MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; + return prepare_ret; + } + + is_last_axis_ = false; + tmp_shape_ = in_tensors_.at(0)->shape(); + src_data_ = begin_src_data_; + + for (int i = 0; i < data_buffers_.size(); ++i) { + if (mode_ == static_cast(schema::ReduceMode_ReduceMean)) { + quant_arg_.mean_multiplier_ = mean_multipliers_[i]->multiplier_; + quant_arg_.mean_left_shift_ = mean_multipliers_[i]->left_shift_; + quant_arg_.mean_right_shift_ = mean_multipliers_[i]->right_shift_; + } + + if (mode_ == static_cast(schema::ReduceMode_ReduceProd)) { + quant_arg_.prod_multiplier_ = prod_multipliers_[i]->multiplier_; + quant_arg_.prod_left_shift_ = prod_multipliers_[i]->left_shift_; + quant_arg_.prod_right_shift_ = prod_multipliers_[i]->right_shift_; + } + if (mode_ == static_cast(schema::ReduceMode_ReduceSumSquare)) { + quant_arg_.sum_square_multiplier_ = sum_square_multipliers_[i]->multiplier_; + quant_arg_.sum_square_left_shift_ = sum_square_multipliers_[i]->left_shift_; + quant_arg_.sum_square_right_shift_ = sum_square_multipliers_[i]->right_shift_; + } + dst_data_ = data_buffers_[i]; + int axis = axes_[i]; + outer_size_ = 1; + for (int j = 0; j < axis; j++) { + outer_size_ *= tmp_shape_[j]; + } + inner_size_ = 1; + for (int k = axis + 1; k < static_cast(tmp_shape_.size()); k++) { + inner_size_ *= tmp_shape_[k]; + } + axis_size_ = tmp_shape_[axis]; + auto error_code = LiteBackendParallelLaunch(ReduceInt8Impl, this, context_->thread_num_); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; + return RET_ERROR; + } + tmp_shape_[axis] = 1; + src_data_ = dst_data_; + } + + if (mode_ == static_cast(schema::ReduceMode_ReduceMean)) { + quant_arg_.mean_multiplier_ = mean_multipliers_.back()->multiplier_; + quant_arg_.mean_left_shift_ = mean_multipliers_.back()->left_shift_; + quant_arg_.mean_right_shift_ = mean_multipliers_.back()->right_shift_; + } + if (mode_ == static_cast(schema::ReduceMode_ReduceProd)) { + quant_arg_.prod_multiplier_ = prod_multipliers_.back()->multiplier_; + quant_arg_.prod_left_shift_ = prod_multipliers_.back()->left_shift_; + quant_arg_.prod_right_shift_ = prod_multipliers_.back()->right_shift_; + } + if (mode_ == static_cast(schema::ReduceMode_ReduceSumSquare)) { + quant_arg_.sum_square_multiplier_ = sum_square_multipliers_.back()->multiplier_; + quant_arg_.sum_square_left_shift_ = sum_square_multipliers_.back()->left_shift_; + quant_arg_.sum_square_right_shift_ = sum_square_multipliers_.back()->right_shift_; + } + int last_reduce_axis = axes_[num_axes_ - 1]; + outer_size_ = 1; + for (int i = 0; i < last_reduce_axis; i++) { + outer_size_ *= tmp_shape_[i]; + } + inner_size_ = 1; + for (int i = last_reduce_axis + 1; i < static_cast(tmp_shape_.size()); i++) { + inner_size_ *= tmp_shape_[i]; + } + axis_size_ = tmp_shape_[last_reduce_axis]; + last_dst_data_ = reinterpret_cast(out_tensors_.at(0)->Data()); + is_last_axis_ = true; + auto error_code = LiteBackendParallelLaunch(ReduceInt8Impl, this, context_->thread_num_); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; + return RET_ERROR; + } + + if (begin_src_data_ != nullptr) { + free(begin_src_data_); + begin_src_data_ = nullptr; + } + + return RET_OK; +} + +int ReduceInt8CPUKernel::CallReduceUnit(int task_id) { + int ret; + if (!is_last_axis_) { + ret = + reducer_(outer_size_, inner_size_, axis_size_, src_data_, dst_data_, &quant_arg_, task_id, context_->thread_num_); + } else { + ret = last_reducer_(outer_size_, inner_size_, axis_size_, src_data_, last_dst_data_, &quant_arg_, task_id, + context_->thread_num_); + } + return ret; +} +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h new file mode 100644 index 0000000000..895e9016d9 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h @@ -0,0 +1,98 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_REDUCE_INT8_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_REDUCE_INT8_H_ + +#include +#include "src/lite_kernel.h" +#include "nnacl/reduce_parameter.h" +#include "nnacl/int8/reduce_int8.h" +#include "nnacl/quantization/quantize.h" +#include "ir/anf.h" +#include "src/runtime/kernel/arm/base/reduce_base.h" + +using mindspore::schema::ReduceMode; + +namespace mindspore::kernel { +class ReduceInt8CPUKernel : public ReduceBaseCPUKernel { + typedef int (*Reducer)(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); + typedef int (*LastReducer)(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); + + public: + ReduceInt8CPUKernel(OpParameter *param, const std::vector &inputs, + const std::vector &outputs, const lite::Context *ctx, + const lite::Primitive *primitive) + : ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) {} + ~ReduceInt8CPUKernel() { + for (auto i = 0; i < data_buffers_.size(); i++) { + int32_t *buffer = data_buffers_[i]; + if (buffer != nullptr) { + free(buffer); + buffer = nullptr; + } + } + for (auto qm : mean_multipliers_) { + delete qm; + qm = nullptr; + } + for (auto qm : prod_multipliers_) { + delete qm; + qm = nullptr; + } + for (auto qm : sum_square_multipliers_) { + delete qm; + qm = nullptr; + } + src_data_ = nullptr; + dst_data_ = nullptr; + } + + int Init() override; + int ReSize() override { return 0; }; + int Run() override; + int CallReduceUnit(int task_id); + int ReduceLastAxis(int task_id); + + public: + bool is_last_axis_ = true; + + private: + int MallocTmpBuffer(); + int CalculateQuantArgs(); + + private: + ReduceParameter *param_ = nullptr; + ReduceQuantArg quant_arg_; + + private: + int32_t *begin_src_data_ = nullptr; + int8_t *last_dst_data_ = nullptr; + std::vector data_buffers_; + const int32_t *src_data_ = nullptr; + int32_t *dst_data_ = nullptr; + + Reducer reducer_ = nullptr; + LastReducer last_reducer_ = nullptr; + std::vector mean_multipliers_; + std::vector prod_multipliers_; + std::vector sum_square_multipliers_; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_REDUCE_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/errorcode.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/errorcode.h index 2d4553cede..fbe5c6bf47 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/errorcode.h +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/errorcode.h @@ -49,6 +49,8 @@ typedef enum ErrorCodeUint8OpEnum { typedef enum ErrorCodeInt8OpEnum { NNACL_ERRCODE_OP_INT8_START = 40000, + NNACL_ERRCODE_ADD_OVERFLOW, + NNACL_ERRCODE_MUL_OVERFLOW, NNACL_ERRCODE_OP_INT8_END = 49999 } ErrorCodeInt8OpEnums; diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/reduce.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/reduce.h index e6a9b29938..db0fa5cad7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/reduce.h +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/reduce.h @@ -17,15 +17,8 @@ #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_REDUCE_H_ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_REDUCE_H_ #include "nnacl/op_base.h" -#define REDUCE_MAX_AXES_NUM 8 +#include "src/runtime/kernel/arm/nnacl/reduce_parameter.h" -typedef struct ReduceParameter { - OpParameter op_parameter_; - bool keep_dims_; - int axes_[REDUCE_MAX_AXES_NUM]; - int num_axes_; - int mode_; -} ReduceParameter; #ifdef __cplusplus extern "C" { diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reduce_int8.c b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reduce_int8.c new file mode 100644 index 0000000000..61952ae7f6 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reduce_int8.c @@ -0,0 +1,467 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "nnacl/int8/reduce_int8.h" +#include "nnacl/errorcode.h" +#include "nnacl/quantization/fixed_point.h" + +inline bool isAddOverflow(int32_t x, int32_t y) { + int32_t sum = x + y; + return (x > 0 && y > 0 && sum < 0) || (x < 0 && y < 0 && sum > 0); +} + +inline bool isMulOverflow(int32_t x, int32_t y) { + int32_t p = x * y; + return (x != 0) && (p / x != y); +} + +// Get x such that (x-zp_in) * scale_in = mean +// Assuming reduce n axes, this works for first n-1 reduce. One call for one reduce. +int ReduceMeanInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { + if (src_data == NULL || dst_data == NULL) { + return NNACL_NULL_PTR; + } + int i, j, k; + for (j = tid; j < outer_size; j += thread_num) { + const int32_t *outer_src = src_data + j * axis_size * inner_size; + int32_t *outer_dst = dst_data + j * inner_size; + for (k = 0; k < inner_size; k++) { + const int32_t *inner_src = outer_src + k; + int32_t *inner_dst = outer_dst + k; + int32_t sum = 0; + // (x - zp_in) * scale_in = mean[(item - zp_in) * scale_in] + // x = mean(item-zp_in) + zp_in + for (i = 0; i < axis_size; i++) { + int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; + if (isAddOverflow(sum, tmp)) { + return NNACL_ERRCODE_ADD_OVERFLOW; + } + sum += tmp; + } + int32_t mean = RoundingDivideByPOT( + SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->mean_left_shift_), quant->mean_multiplier_), + quant->mean_right_shift_); + if (isAddOverflow(mean, quant->in_zp_)) { + return NNACL_ERRCODE_ADD_OVERFLOW; + } + *inner_dst = mean + quant->in_zp_; + } + } + return NNACL_OK; +} + +// suppose reduce n axes, this works for last reduce axis. +// get y such that (y-zp_out) * scale_out = mean(x-zp_in)*scale_in +int ReduceMeanLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { + if (src_data == NULL || dst_data == NULL) { + return NNACL_NULL_PTR; + } + int i, j, k; + for (j = tid; j < outer_size; j += thread_num) { + const int32_t *outer_src = src_data + j * axis_size * inner_size; + int8_t *outer_dst = dst_data + j * inner_size; + for (k = 0; k < inner_size; k++) { + const int32_t *inner_src = outer_src + k; + int8_t *inner_dst = outer_dst + k; + int32_t sum = 0; + for (i = 0; i < axis_size; i++) { + // y = mean(x-zp_in) * scale + zp_out + int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; + if (isAddOverflow(tmp, sum)) { + return NNACL_ERRCODE_ADD_OVERFLOW; + } + sum += tmp; + } + // sum / num + int32_t mean = RoundingDivideByPOT( + SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->mean_left_shift_), quant->mean_multiplier_), + quant->mean_right_shift_); + // trans to output scale + int32_t mean_scaled = + RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(mean * (1 << (unsigned int)quant->in_out_left_shift_), + quant->in_out_multiplier_), + quant->in_out_right_shift_); + if (isAddOverflow(mean_scaled, quant->out_zp_)) { + return NNACL_ERRCODE_ADD_OVERFLOW; + } + mean = mean_scaled + quant->out_zp_; + + if (mean > INT8_MAX) { + *inner_dst = INT8_MAX; + } else if (mean < INT8_MIN) { + *inner_dst = INT8_MIN; + } else { + *inner_dst = (int8_t)mean; + } + } + } + return NNACL_OK; +} + +// Get x such that (x-zp_in) * scale_in = sum(item-zp_in)*scale_in +// Assuming reduce n axes, this works for first n-1 reduce. One call for one reduce. +int ReduceSumInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { + if (src_data == NULL || dst_data == NULL) { + return NNACL_NULL_PTR; + } + int i, j, k; + for (j = tid; j < outer_size; j += thread_num) { + const int32_t *outer_src = src_data + j * axis_size * inner_size; + int32_t *outer_dst = dst_data + j * inner_size; + for (k = 0; k < inner_size; k++) { + const int32_t *inner_src = outer_src + k; + int32_t *inner_dst = outer_dst + k; + int32_t sum = 0; + for (i = 0; i < axis_size; i++) { + int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; + if (isAddOverflow(tmp, sum)) { + return NNACL_ERRCODE_ADD_OVERFLOW; + } + sum += tmp; + } + + if (isAddOverflow(quant->in_zp_, sum)) { + return NNACL_ERRCODE_ADD_OVERFLOW; + } + *inner_dst = sum + quant->in_zp_; + } + } + return NNACL_OK; +} + +// suppose reduce n axes, this works for last reduce axis. +// get y such that (y-zp_out) * scale_out = sum(item-zp_in)*scale_in +int ReduceSumLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { + if (src_data == NULL || dst_data == NULL) { + return NNACL_NULL_PTR; + } + int i, j, k; + for (j = tid; j < outer_size; j += thread_num) { + const int32_t *outer_src = src_data + j * axis_size * inner_size; + int8_t *outer_dst = dst_data + j * inner_size; + for (k = 0; k < inner_size; k++) { + const int32_t *inner_src = outer_src + k; + int8_t *inner_dst = outer_dst + k; + int32_t sum = 0; + for (i = 0; i < axis_size; i++) { + int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; + if (isAddOverflow(tmp, sum)) { + return NNACL_ERRCODE_ADD_OVERFLOW; + } + sum += tmp; + } + int32_t sum_scaled = + RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->in_out_left_shift_), + quant->in_out_multiplier_), + quant->in_out_right_shift_); + if (isAddOverflow(sum_scaled, quant->out_zp_)) { + return NNACL_ERRCODE_ADD_OVERFLOW; + } + sum = sum_scaled + quant->out_zp_; + if (sum > INT8_MAX) { + *inner_dst = INT8_MAX; + } else if (sum < INT8_MIN) { + *inner_dst = INT8_MIN; + } else { + *inner_dst = (int8_t)sum; + } + } + } + return NNACL_OK; +} + +int ReduceMaxLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { + if (src_data == NULL || dst_data == NULL) { + return NNACL_NULL_PTR; + } + int i, j, k; + for (j = tid; j < outer_size; j += thread_num) { + const int32_t *outer_src = src_data + j * axis_size * inner_size; + int8_t *outer_dst = dst_data + j * inner_size; + for (k = 0; k < inner_size; k++) { + const int32_t *inner_src = outer_src + k; + int8_t *inner_dst = outer_dst + k; + int32_t tmp = INT8_MIN; + for (i = 0; i < axis_size; i++) { + tmp = tmp > inner_src[i * inner_size] ? tmp : inner_src[i * inner_size]; + } + int32_t tmp_scaled = RoundingDivideByPOT( + SaturatingRoundingDoublingHighMul((tmp - quant->in_zp_) * (1 << (unsigned int)quant->in_out_left_shift_), + quant->in_out_multiplier_), + quant->in_out_right_shift_); + if (isAddOverflow(tmp_scaled, quant->out_zp_)) { + return NNACL_ERRCODE_ADD_OVERFLOW; + } + tmp = tmp_scaled + quant->out_zp_; + if (tmp > INT8_MAX) { + *inner_dst = INT8_MAX; + } else if (tmp < INT8_MIN) { + *inner_dst = INT8_MIN; + } else { + *inner_dst = (int8_t)tmp; + } + } + } + return NNACL_OK; +} + +int ReduceMaxInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { + if (src_data == NULL || dst_data == NULL) { + return NNACL_NULL_PTR; + } + int i, j, k; + for (j = tid; j < outer_size; j += thread_num) { + const int32_t *outer_src = src_data + j * axis_size * inner_size; + int32_t *outer_dst = dst_data + j * inner_size; + for (k = 0; k < inner_size; k++) { + const int32_t *inner_src = outer_src + k; + int32_t *inner_dst = outer_dst + k; + int32_t tmp = INT8_MIN; + for (i = 0; i < axis_size; i++) { + tmp = tmp > inner_src[i * inner_size] ? tmp : inner_src[i * inner_size]; + } + + *inner_dst = tmp; + } + } + return NNACL_OK; +} + +int ReduceMinLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { + if (src_data == NULL || dst_data == NULL) { + return NNACL_NULL_PTR; + } + int i, j, k; + int base_offset = 20; + for (j = tid; j < outer_size; j += thread_num) { + const int32_t *outer_src = src_data + j * axis_size * inner_size; + int8_t *outer_dst = dst_data + j * inner_size; + for (k = 0; k < inner_size; k++) { + const int32_t *inner_src = outer_src + k; + int8_t *inner_dst = outer_dst + k; + int32_t tmp = INT8_MAX; + for (i = 0; i < axis_size; i++) { + tmp = tmp < inner_src[i * inner_size] ? tmp : inner_src[i * inner_size]; + } + int32_t tmp_scaled = + RoundingDivideByPOT(SaturatingRoundingDoublingHighMul( + (tmp - quant->in_zp_) * (1 << (unsigned int)quant->in_out_left_shift_ + base_offset), + quant->in_out_multiplier_), + quant->in_out_right_shift_ + base_offset); + if (isAddOverflow(tmp_scaled, quant->out_zp_)) { + return NNACL_ERRCODE_ADD_OVERFLOW; + } + tmp = tmp_scaled + quant->out_zp_; + if (tmp > INT8_MAX) { + *inner_dst = INT8_MAX; + } else if (tmp < INT8_MIN) { + *inner_dst = INT8_MIN; + } else { + *inner_dst = (int8_t)tmp; + } + } + } + return NNACL_OK; +} + +int ReduceMinInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { + if (src_data == NULL || dst_data == NULL) { + return NNACL_NULL_PTR; + } + int i, j, k; + for (j = tid; j < outer_size; j += thread_num) { + const int32_t *outer_src = src_data + j * axis_size * inner_size; + int32_t *outer_dst = dst_data + j * inner_size; + for (k = 0; k < inner_size; k++) { + const int32_t *inner_src = outer_src + k; + int32_t *inner_dst = outer_dst + k; + int32_t tmp = INT8_MAX; + for (i = 0; i < axis_size; i++) { + tmp = tmp < inner_src[i * inner_size] ? tmp : inner_src[i * inner_size]; + } + *inner_dst = tmp; + } + } + return NNACL_OK; +} + +int ReduceProdLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { + if (src_data == NULL || dst_data == NULL) { + return NNACL_NULL_PTR; + } + int i, j, k; + for (j = tid; j < outer_size; j += thread_num) { + const int32_t *outer_src = src_data + j * axis_size * inner_size; + int8_t *outer_dst = dst_data + j * inner_size; + for (k = 0; k < inner_size; k++) { + const int32_t *inner_src = outer_src + k; + int8_t *inner_dst = outer_dst + k; + int32_t prod = 1; + for (i = 0; i < axis_size; i++) { + // quant_out = prod(quant_in-zp) * (scale_in^num/scale_out) + zp_out + int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; + if (isMulOverflow(prod, tmp)) { + return NNACL_ERRCODE_MUL_OVERFLOW; + } + prod *= tmp; + } + prod = RoundingDivideByPOT( + SaturatingRoundingDoublingHighMul(prod * (1 << (unsigned int)quant->prod_left_shift_), quant->prod_multiplier_), + quant->prod_right_shift_); + int32_t prod_scaled = + RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(prod * (1 << (unsigned int)quant->in_out_left_shift_), + quant->in_out_multiplier_), + quant->in_out_right_shift_); + if (isAddOverflow(prod_scaled, quant->out_zp_)) { + return NNACL_ERRCODE_ADD_OVERFLOW; + } + prod = prod_scaled + quant->out_zp_; + if (prod > INT8_MAX) { + *inner_dst = INT8_MAX; + } else if (prod < INT8_MIN) { + *inner_dst = INT8_MIN; + } else { + *inner_dst = (int8_t)prod; + } + } + } + return NNACL_OK; +} + +int ReduceProdInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { + if (src_data == NULL || dst_data == NULL) { + return NNACL_NULL_PTR; + } + int i, j, k; + for (j = tid; j < outer_size; j += thread_num) { + const int32_t *outer_src = src_data + j * axis_size * inner_size; + int32_t *outer_dst = dst_data + j * inner_size; + for (k = 0; k < inner_size; k++) { + const int32_t *inner_src = outer_src + k; + int32_t *inner_dst = outer_dst + k; + int32_t prod = 1; + for (i = 0; i < axis_size; i++) { + int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; + if (isMulOverflow(prod, tmp)) { + return NNACL_ERRCODE_MUL_OVERFLOW; + } + prod *= tmp; + } + prod = RoundingDivideByPOT( + SaturatingRoundingDoublingHighMul(prod * (1 << (unsigned int)quant->prod_left_shift_), quant->prod_multiplier_), + quant->prod_right_shift_); + if (isAddOverflow(prod, quant->in_zp_)) { + return NNACL_ERRCODE_ADD_OVERFLOW; + } + *inner_dst = prod + quant->in_zp_; // todo overflow + } + } + return NNACL_OK; +} + +int ReduceSumSquareLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { + if (src_data == NULL || dst_data == NULL) { + return NNACL_NULL_PTR; + } + int i, j, k; + for (j = tid; j < outer_size; j += thread_num) { + const int32_t *outer_src = src_data + j * axis_size * inner_size; + int8_t *outer_dst = dst_data + j * inner_size; + for (k = 0; k < inner_size; k++) { + const int32_t *inner_src = outer_src + k; + int8_t *inner_dst = outer_dst + k; + int32_t sum = 0; + // quant_out = sum((quant_in - zp)^2) * scale_in^2 / scale_out + zp_out + for (i = 0; i < axis_size; i++) { + int32_t tmp; + if (isMulOverflow(inner_src[i * inner_size] - quant->in_zp_, inner_src[i * inner_size] - quant->in_zp_)) { + return NNACL_ERRCODE_MUL_OVERFLOW; + } + tmp = (inner_src[i * inner_size] - quant->in_zp_) * (inner_src[i * inner_size] - quant->in_zp_); + if (isAddOverflow(sum, tmp)) { + return NNACL_ERRCODE_ADD_OVERFLOW; + } + sum += tmp; + } + int32_t sum_scaled = + RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->sum_square_left_shift_), + quant->sum_square_multiplier_), + quant->sum_square_right_shift_); + if (isAddOverflow(sum_scaled, quant->out_zp_)) { + return NNACL_ERRCODE_ADD_OVERFLOW; + } + sum = sum_scaled + quant->out_zp_; + + if (sum > INT8_MAX) { + *inner_dst = INT8_MAX; + } else if (sum < INT8_MIN) { + *inner_dst = INT8_MIN; + } else { + *inner_dst = (int8_t)sum; + } + } + } + return NNACL_OK; +} + +int ReduceSumSquareInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { + if (src_data == NULL || dst_data == NULL) { + return NNACL_NULL_PTR; + } + int i, j, k; + for (j = tid; j < outer_size; j += thread_num) { + const int32_t *outer_src = src_data + j * axis_size * inner_size; + int32_t *outer_dst = dst_data + j * inner_size; + for (k = 0; k < inner_size; k++) { + const int32_t *inner_src = outer_src + k; + int32_t *inner_dst = outer_dst + k; + int32_t sum = 0; + for (i = 0; i < axis_size; i++) { + int32_t tmp; + if (isMulOverflow(inner_src[i * inner_size] - quant->in_zp_, inner_src[i * inner_size] - quant->in_zp_)) { + return NNACL_ERRCODE_MUL_OVERFLOW; + } + tmp = (inner_src[i * inner_size] - quant->in_zp_) * (inner_src[i * inner_size] - quant->in_zp_); + if (isAddOverflow(sum, tmp)) { + return NNACL_ERRCODE_ADD_OVERFLOW; + } + sum += tmp; + } + sum = + RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->sum_square_left_shift_), + quant->sum_square_multiplier_), + quant->sum_square_right_shift_); + if (isAddOverflow(sum, quant->in_zp_)) { + return NNACL_ERRCODE_ADD_OVERFLOW; + } + *inner_dst = sum + quant->in_zp_; + } + } + return NNACL_OK; +} diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reduce_int8.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reduce_int8.h new file mode 100644 index 0000000000..b8b95cb1df --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reduce_int8.h @@ -0,0 +1,53 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_REDUCE_INT8_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_REDUCE_INT8_H_ +#include "nnacl/quantization/quantize.h" +#ifdef __cplusplus +extern "C" { +#endif + +int ReduceMeanInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); +int ReduceMeanLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); +int ReduceSumInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); +int ReduceSumLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); +int ReduceMaxInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); +int ReduceMaxLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); +int ReduceMinInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); +int ReduceMinLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); +int ReduceProdLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); +int ReduceProdInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); +int ReduceSumSquareLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); +int ReduceSumSquareInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, + int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); +bool isAddOverflow(int32_t x, int32_t y); +bool isMulOverflow(int32_t x, int32_t y); +#ifdef __cplusplus +} +#endif +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_REDUCE_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/quantization/quantize.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/quantization/quantize.h index 64e6f534cd..cbdf2b5871 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/quantization/quantize.h +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/quantization/quantize.h @@ -219,6 +219,26 @@ typedef struct DivQuantArg { int output_multiplier_; int output_shift_; } DivQuantArg; + +typedef struct ReduceQuantArg { + double in_scale_; + int32_t in_zp_; + double out_scale_; + int32_t out_zp_; + int32_t in_out_multiplier_; + int in_out_left_shift_; + int in_out_right_shift_; + int32_t mean_multiplier_; + int mean_left_shift_; + int mean_right_shift_; + int32_t prod_multiplier_; + int prod_left_shift_; + int prod_right_shift_; + int32_t sum_square_multiplier_; + int sum_square_left_shift_; + int sum_square_right_shift_; +} ReduceQuantArg; + #ifdef __cplusplus extern "C" { #endif diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/reduce_parameter.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/reduce_parameter.h new file mode 100644 index 0000000000..092789aa35 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/reduce_parameter.h @@ -0,0 +1,30 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_REDUCE_PARAMETER_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_REDUCE_PARAMETER_H_ +#include "nnacl/op_base.h" +#define REDUCE_MAX_AXES_NUM 8 + +struct ReduceParameter { + OpParameter op_parameter_; + bool keep_dims_; + int axes_[REDUCE_MAX_AXES_NUM]; + int num_axes_; + int mode_; +}; + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_REDUCE_PARAMETER_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/resize_parameter.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/resize_parameter.h index 946f4f88a1..237b9c5ca2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/resize_parameter.h +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/resize_parameter.h @@ -16,7 +16,7 @@ #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_RESIZE_PARAMETER_H_ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_RESIZE_PARAMETER_H_ -#include "src/runtime/kernel/arm/nnacl/op_base.h" +#include "nnacl/op_base.h" typedef struct ResizeParameter { OpParameter op_parameter_; int method_; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc index b9e316260b..8745ce5d57 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc @@ -13,204 +13,255 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include #include +#include "mindspore/lite/src/lite_kernel.h" +#include "mindspore/lite/src/ir/tensor.h" #include "common/common_test.h" -#include "mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/resize.h" +#include "nnacl/resize_parameter.h" +#include "mindspore/lite/src/kernel_registry.h" namespace mindspore { class TestResizeBilinearFp32 : public mindspore::CommonTest { public: TestResizeBilinearFp32() = default; + void Prepare(const std::vector &input_shape, const std::vector &output_shape, float *input_data, + float *output_data, const bool align_corners, const int thread_num); + + void TearDown() override; public: - int tid = 0; - int thread_num = 1; float err_tol = 1e-5; + lite::tensor::Tensor in_tensor_; + lite::tensor::Tensor out_tensor_; + std::vector inputs_{&in_tensor_}; + std::vector outputs_{&out_tensor_}; + ResizeParameter param_ = {{}}; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize}; + lite::Context ctx_ = lite::Context(); + kernel::KernelCreator creator_ = nullptr; + kernel::LiteKernel *kernel_ = nullptr; }; +void TestResizeBilinearFp32::TearDown() { + in_tensor_.SetData(nullptr); + out_tensor_.SetData(nullptr); +} + +void TestResizeBilinearFp32::Prepare(const std::vector &input_shape, const std::vector &output_shape, + float *input_data, float *output_data, const bool align_corners, + const int thread_num) { + in_tensor_.set_data_type(kNumberTypeFloat32); + in_tensor_.set_shape(input_shape); + out_tensor_.set_data_type(kNumberTypeFloat32); + out_tensor_.set_shape(output_shape); + in_tensor_.SetData(input_data); + out_tensor_.SetData(output_data); + + ResizeParameter param_ = { + {}, static_cast(schema::ResizeMethod_BILINEAR), output_shape[1], output_shape[2], align_corners}; + desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize}; + ctx_ = lite::Context(); + ctx_.thread_num_ = thread_num; + creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator_, nullptr); + kernel_ = creator_(inputs_, outputs_, reinterpret_cast(¶m_), &ctx_, desc, nullptr); + ASSERT_NE(kernel_, nullptr); +} + // 1*1 -> 1*1 TEST_F(TestResizeBilinearFp32, ResizeBilinearTest1) { - std::vector input = {1.0}; + float input_data[] = {1.0f}; + float output_data[1] = {0}; std::vector input_shape = {1, 1, 1, 1}; std::vector output_shape = {1, 1, 1, 1}; std::vector expect = {1.0}; bool align_corners = false; - auto output_size = 1; - std::vector output(output_size, 0.0); - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 1*1 TEST_F(TestResizeBilinearFp32, ResizeBilinearTest2) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[1] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 1, 1, 1}; std::vector expect = {0.0}; bool align_corners = false; - int output_size = 1; - std::vector output(output_size, 0.0); - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 1*2 TEST_F(TestResizeBilinearFp32, ResizeBilinearTest3) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[2] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 1, 2, 1}; std::vector expect = {0.0, 1.0}; bool align_corners = false; - auto output_size = 2; - std::vector output(output_size, 0.0); - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 2*1 TEST_F(TestResizeBilinearFp32, ResizeBilinearTest4) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[2] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 2, 1, 1}; std::vector expect = {0.0, 2.0}; bool align_corners = false; - auto output_size = 2; - std::vector output(output_size, 0.0); - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 2*2 TEST_F(TestResizeBilinearFp32, ResizeBilinearTest5) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[4] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 2, 2, 1}; std::vector expect = {0.0, 1.0, 2.0, 3.0}; bool align_corners = false; - auto output_size = 4; - std::vector output(output_size, 0.0); - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 1*4 TEST_F(TestResizeBilinearFp32, ResizeBilinearTest6) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[4] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 1, 4, 1}; std::vector expect = {0.0, 0.5, 1.0, 1.0}; bool align_corners = false; - auto output_size = 4; - std::vector output(output_size, 0.0); - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 4*1 TEST_F(TestResizeBilinearFp32, ResizeBilinearTest7) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[4] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 4, 1, 1}; std::vector expect = {0.0, 1.0, 2.0, 2.0}; bool align_corners = false; - auto output_size = 4; - std::vector output(output_size, 0.0); - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 2*4 TEST_F(TestResizeBilinearFp32, ResizeBilinearTest8) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[8] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 2, 4, 1}; std::vector expect = {0.0, 0.5, 1.0, 1.0, 2.0, 2.5, 3.0, 3.0}; bool align_corners = false; - auto output_size = 8; - std::vector output(output_size, 0.0); - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 4*2 TEST_F(TestResizeBilinearFp32, ResizeBilinearTest9) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[8] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 4, 2, 1}; std::vector expect = {0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 3.0}; bool align_corners = false; - auto output_size = 8; - std::vector output(output_size, 0.0); - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 3*3 TEST_F(TestResizeBilinearFp32, ResizeBilinearTest10) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[9] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 3, 3, 1}; std::vector expect = {0.0, 0.6666667, 1.0, 1.3333334, 2.0, 2.3333335, 2.0, 2.6666667, 3.0}; bool align_corners = false; auto output_size = 9; - std::vector output(output_size, 0.0); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 4*4 TEST_F(TestResizeBilinearFp32, ResizeBilinearTest11) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[16] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 4, 4, 1}; std::vector expect = {0.0, 0.5, 1.0, 1.0, 1.0, 1.5, 2.0, 2.0, 2.0, 2.5, 3.0, 3.0, 2.0, 2.5, 3.0, 3.0}; bool align_corners = false; auto output_size = 16; - std::vector output(output_size, 0.0); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2*2*5 -> 2*4*4*5 TEST_F(TestResizeBilinearFp32, ResizeBilinearTest12) { - std::vector input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, - 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, - 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, + 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, + 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; + float output_data[160] = {0}; std::vector input_shape = {2, 2, 2, 5}; std::vector output_shape = {2, 4, 4, 5}; std::vector expect = { @@ -224,20 +275,21 @@ TEST_F(TestResizeBilinearFp32, ResizeBilinearTest12) { 33.5, 34.5, 35.5, 36.5, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0, 34.0, 32.5, 33.5, 34.5, 35.5, 36.5, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0}; bool align_corners = false; - auto output_size = 160; - std::vector output(output_size, 0.0); - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2*2*5 -> 2*4*4*5 align corners TEST_F(TestResizeBilinearFp32, ResizeBilinearTest13) { - std::vector input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, - 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, - 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, + 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, + 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; + float output_data[160] = {0}; std::vector input_shape = {2, 2, 2, 5}; std::vector output_shape = {2, 4, 4, 5}; std::vector expect = { @@ -258,20 +310,21 @@ TEST_F(TestResizeBilinearFp32, ResizeBilinearTest13) { 30.0, 31.0, 32.0, 33.0, 34.0, 31.666666, 32.666668, 33.666668, 34.666668, 35.666668, 33.333332, 34.333332, 35.333332, 36.333332, 37.333332, 35.0, 36.0, 37.0, 38.0, 39.0}; bool align_corners = true; - auto output_size = 160; - std::vector output(output_size, 0.0); - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2*2*5 -> 2*4*4*5 thread_num 2 TEST_F(TestResizeBilinearFp32, ResizeBilinearTest14) { - std::vector input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, - 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, - 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, + 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, + 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; + float output_data[160] = {0}; std::vector input_shape = {2, 2, 2, 5}; std::vector output_shape = {2, 4, 4, 5}; std::vector expect = { @@ -285,24 +338,22 @@ TEST_F(TestResizeBilinearFp32, ResizeBilinearTest14) { 33.5, 34.5, 35.5, 36.5, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0, 34.0, 32.5, 33.5, 34.5, 35.5, 36.5, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0}; bool align_corners = false; - auto output_size = 160; - std::vector output(output_size, 0.0); - thread_num = 2; - tid = 0; - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - tid = 1; - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + int thread_num = 2; + + Prepare(input_shape, output_shape, input_data, output_data, align_corners, thread_num); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2*2*5 -> 2*4*4*5 thread_num 4 TEST_F(TestResizeBilinearFp32, ResizeBilinearTest15) { - std::vector input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, - 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, - 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, + 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, + 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; + float output_data[160] = {0}; std::vector input_shape = {2, 2, 2, 5}; std::vector output_shape = {2, 4, 4, 5}; std::vector expect = { @@ -319,19 +370,11 @@ TEST_F(TestResizeBilinearFp32, ResizeBilinearTest15) { auto output_size = 160; std::vector output(output_size, 0.0); - thread_num = 4; - tid = 0; - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - tid = 1; - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - tid = 2; - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - tid = 3; - ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, - thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + int thread_num = 4; + Prepare(input_shape, output_shape, input_data, output_data, align_corners, thread_num); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_nearest_neighbor_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_nearest_neighbor_fp32_tests.cc index 65cb508489..b1fae684ed 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_nearest_neighbor_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_nearest_neighbor_fp32_tests.cc @@ -15,168 +15,250 @@ */ #include #include "common/common_test.h" -#include "mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/resize.h" +#include "nnacl/resize_parameter.h" +#include "mindspore/lite/src/kernel_registry.h" namespace mindspore { class TestResizeNearestNeighborFp32 : public mindspore::CommonTest { public: TestResizeNearestNeighborFp32() = default; + void Prepare(const std::vector &input_shape, const std::vector &output_shape, float *input_data, + float *output_data, const bool align_corners, const int thread_num); + + void TearDown() override; public: - int tid = 0; - int thread_num = 1; float err_tol = 1e-5; + lite::tensor::Tensor in_tensor_; + lite::tensor::Tensor out_tensor_; + std::vector inputs_{&in_tensor_}; + std::vector outputs_{&out_tensor_}; + ResizeParameter param_ = {{}}; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize}; + lite::Context ctx_ = lite::Context(); + kernel::KernelCreator creator_ = nullptr; + kernel::LiteKernel *kernel_ = nullptr; }; +void TestResizeNearestNeighborFp32::TearDown() { + in_tensor_.SetData(nullptr); + out_tensor_.SetData(nullptr); +} + +void TestResizeNearestNeighborFp32::Prepare(const std::vector &input_shape, const std::vector &output_shape, + float *input_data, float *output_data, const bool align_corners, + const int thread_num) { + in_tensor_.set_data_type(kNumberTypeFloat32); + in_tensor_.set_shape(input_shape); + out_tensor_.set_data_type(kNumberTypeFloat32); + out_tensor_.set_shape(output_shape); + in_tensor_.SetData(input_data); + out_tensor_.SetData(output_data); + + ResizeParameter param_ = { + {}, static_cast(schema::ResizeMethod_NEAREST_NEIGHBOR), output_shape[1], output_shape[2], align_corners}; + desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize}; + ctx_ = lite::Context(); + ctx_.thread_num_ = thread_num; + creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator_, nullptr); + kernel_ = creator_(inputs_, outputs_, reinterpret_cast(¶m_), &ctx_, desc, nullptr); + ASSERT_NE(kernel_, nullptr); +} // 1*1 -> 1*1 TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest1) { - std::vector input = {1.0}; + float input_data[] = {1.0}; + float output_data[1] = {0}; std::vector input_shape = {1, 1, 1, 1}; std::vector output_shape = {1, 1, 1, 1}; std::vector expect = {1.0}; size_t output_size = 1; - std::vector output(output_size, 0.0); + bool align_corners = false; + + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 1*1 TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest2) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[1] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 1, 1, 1}; std::vector expect = {0.0}; size_t output_size = 1; - std::vector output(output_size, 0.0); + bool align_corners = false; - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 1*2 TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest3) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[2] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 1, 2, 1}; std::vector expect = {0.0, 1.0}; size_t output_size = 2; - std::vector output(output_size, 0.0); + bool align_corners = false; + + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 2*1 TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest4) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[2] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 2, 1, 1}; std::vector expect = {0.0, 2.0}; size_t output_size = 2; - std::vector output(output_size, 0.0); + bool align_corners = false; + + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 2*2 TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest5) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[4] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 2, 2, 1}; std::vector expect = {0.0, 1.0, 2.0, 3.0}; size_t output_size = 4; - std::vector output(output_size, 0.0); + bool align_corners = false; - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 1*4 TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest6) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[4] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 1, 4, 1}; std::vector expect = {0.0, 0.0, 1.0, 1.0}; size_t output_size = 4; - std::vector output(output_size, 0.0); + bool align_corners = false; + + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 4*1 TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest7) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[4] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 4, 1, 1}; std::vector expect = {0.0, 0.0, 2.0, 2.0}; size_t output_size = 4; - std::vector output(output_size, 0.0); + bool align_corners = false; - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 2*4 TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest8) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[8] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 2, 4, 1}; std::vector expect = {0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0}; size_t output_size = 8; - std::vector output(output_size, 0.0); + bool align_corners = false; + + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 4*2 TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest9) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[8] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 4, 2, 1}; std::vector expect = {0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0}; size_t output_size = 8; - std::vector output(output_size, 0.0); + bool align_corners = false; + + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 3*3 TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest10) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[9] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 3, 3, 1}; std::vector expect = {0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0}; size_t output_size = 9; - std::vector output(output_size, 0.0); + bool align_corners = false; - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2 -> 4*4 TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest11) { - std::vector input = {0.0, 1.0, 2.0, 3.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0}; + float output_data[16] = {0}; std::vector input_shape = {1, 2, 2, 1}; std::vector output_shape = {1, 4, 4, 1}; std::vector expect = {0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 2.0, 2.0, 3.0, 3.0}; size_t output_size = 16; - std::vector output(output_size, 0.0); + bool align_corners = false; + + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2*2*5 -> 2*4*4*5 TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest12) { - std::vector input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, - 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, - 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, + 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, + 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; + float output_data[160] = {0}; std::vector input_shape = {2, 2, 2, 5}; std::vector output_shape = {2, 4, 4, 5}; std::vector expect = { @@ -190,17 +272,21 @@ TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest12) { 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0, 34.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0}; size_t output_size = 160; - std::vector output(output_size, 0.0); + bool align_corners = false; - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2*2*5 -> 2*4*4*5 thread_num 2 TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest13) { - std::vector input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, - 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, - 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, + 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, + 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; + float output_data[160] = {0}; std::vector input_shape = {2, 2, 2, 5}; std::vector output_shape = {2, 4, 4, 5}; std::vector expect = { @@ -214,21 +300,21 @@ TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest13) { 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0, 34.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0}; size_t output_size = 160; - std::vector output(output_size, 0.0); - - thread_num = 2; - tid = 0; - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - tid = 1; - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + bool align_corners = false; + + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 2); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } // 2*2*2*5 -> 2*4*4*5 thread_num 4 TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest14) { - std::vector input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, - 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, - 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; + float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, + 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, + 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; + float output_data[160] = {0}; std::vector input_shape = {2, 2, 2, 5}; std::vector output_shape = {2, 4, 4, 5}; std::vector expect = { @@ -242,17 +328,12 @@ TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest14) { 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0, 34.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0}; size_t output_size = 160; - std::vector output(output_size, 0.0); - - thread_num = 4; - tid = 0; - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - tid = 1; - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - tid = 2; - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - tid = 3; - ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); - CompareOutputData(output.data(), expect.data(), output_size, err_tol); + bool align_corners = false; + + Prepare(input_shape, output_shape, input_data, output_data, align_corners, 4); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputData(output_data, expect.data(), output_size, err_tol); } } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reduce_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reduce_int8_tests.cc new file mode 100644 index 0000000000..f3e78a5014 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reduce_int8_tests.cc @@ -0,0 +1,355 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include "utils/log_adapter.h" +#include "common/common_test.h" +#include "src/ir/tensor.h" +#include "mindspore/lite/src/kernel_registry.h" +#include "nnacl/fp32/reduce.h" + +namespace mindspore { +using mindspore::lite::tensor::QuantArg; +using mindspore::lite::tensor::Tensor; +using mindspore::schema::ReduceMode; +using mindspore::schema::ReduceMode_ReduceMax; +using mindspore::schema::ReduceMode_ReduceMean; +using mindspore::schema::ReduceMode_ReduceMin; +using mindspore::schema::ReduceMode_ReduceProd; +using mindspore::schema::ReduceMode_ReduceSum; +using mindspore::schema::ReduceMode_ReduceSumSquare; + +class TestReduceInt8 : public mindspore::CommonTest { + public: + TestReduceInt8() = default; + void Prepare(const std::vector &in_shape, const std::vector &out_shape, int8_t *input_data, + int8_t *output_data, ReduceMode mode, const int *axes, const int num_axes); + void TearDown() override; + + public: + int thread_num_ = 1; + + ReduceParameter param_ = {}; + Tensor in_tensor_; + Tensor out_tensor_; + std::vector inputs{&in_tensor_}; + std::vector outputs{&out_tensor_}; + kernel::KernelKey desc_ = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Reduce}; + kernel::KernelCreator creator_ = nullptr; + lite::Context ctx_ = lite::Context(); + kernel::LiteKernel *kernel_ = nullptr; + const QuantArg quant_in_ = {0.005f, 5}; + const QuantArg quant_out_ = {0.01f, 1}; + float err_tol_ = 0.05; +}; + +void TestReduceInt8::TearDown() { + in_tensor_.SetData(nullptr); + out_tensor_.SetData(nullptr); +} + +void TestReduceInt8::Prepare(const std::vector &in_shape, const std::vector &out_shape, int8_t *input_data, + int8_t *output_data, ReduceMode mode, const int *axes, const int num_axes) { + in_tensor_.set_data_type(kNumberTypeInt8); + in_tensor_.set_shape(in_shape); + in_tensor_.SetData(input_data); + in_tensor_.AddQuantParam(quant_in_); + + out_tensor_.set_data_type(kNumberTypeInt8); + out_tensor_.set_shape(out_shape); + out_tensor_.SetData(output_data); + out_tensor_.AddQuantParam(quant_out_); + + param_.mode_ = static_cast(mode); + param_.num_axes_ = num_axes; + memcpy(param_.axes_, axes, num_axes * sizeof(int)); + + creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc_); + + ctx_.thread_num_ = thread_num_; + kernel_ = creator_(inputs, outputs, reinterpret_cast(¶m_), &ctx_, desc_, nullptr); +} + +TEST_F(TestReduceInt8, Mean) { + /* 2 4 4 3 NHWC */ + int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; + int8_t output_data[32] = {0}; + int axes[] = {3}; + int num_axes = 1; + std::vector input_shape = {2, 4, 4, 3}; + std::vector output_shape = {2, 4, 4, 1}; + int output_size = 32; + int8_t correct[] = {-1, 1, 2, 3, 5, 7, 8, 10, 11, 12, 14, 16, 17, 19, 20, 22, + 23, 25, 26, 28, 29, 30, 32, 34, 35, 37, 38, 40, 41, 43, 44, 46}; + + thread_num_ = 2; + Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMean, axes, num_axes); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + err_tol_ = 0.09375; + CompareOutputInt8(output_data, correct, output_size, err_tol_); +} + +TEST_F(TestReduceInt8, MeanAllAxis) { + /* 2*4*4*3 NHWC */ + int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; + int8_t output_data[1] = {0}; + int axes[] = {0}; + int num_axes = 0; + std::vector input_shape = {2, 4, 4, 3}; + std::vector output_shape = {1}; + int output_size = 1; + int8_t correct[] = {22}; + thread_num_ = 2; + Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMean, axes, num_axes); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + err_tol_ = 1.0f; + CompareOutputInt8(output_data, correct, output_size, err_tol_); +} + +TEST_F(TestReduceInt8, Sum) { + /* 2*4*4*3 NHWC */ + int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; + int8_t output_data[32] = {0}; + int axes[] = {-1}; + int num_axes = 1; + std::vector input_shape = {2, 4, 4, 3}; + std::vector output_shape = {2, 4, 4, 1}; + int output_size = 32; + int8_t correct[] = {-5, -1, 4, 9, 13, 18, 22, 27, 31, 36, 40, 45, 49, 54, 58, 63, + 67, 72, 76, 81, 85, 90, 94, 99, 103, 107, 112, 117, 121, 126, 127, 127}; + thread_num_ = 2; + Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceSum, axes, num_axes); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + err_tol_ = 0.0625f; + CompareOutputInt8(output_data, correct, output_size, err_tol_); +} + +TEST_F(TestReduceInt8, SumAllAxis) { + /* 2*4*4*3 NHWC */ + int8_t input_data[96] = { + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + }; + int8_t output_data[1] = {0}; + int axes[] = {0, 1, 2, 3}; + int num_axes = 4; + std::vector input_shape = {2, 4, 4, 3}; + std::vector output_shape = {1}; + int output_size = 1; + int8_t correct[] = {-47}; + thread_num_ = 2; + Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceSum, axes, num_axes); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputInt8(output_data, correct, output_size, err_tol_); +} + +TEST_F(TestReduceInt8, Max) { + /* 2*4*4*3 NHWC */ + int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; + int8_t output_data[32] = {0}; + int axes[] = {3}; + int num_axes = 1; + std::vector input_shape = {2, 4, 4, 3}; + std::vector output_shape = {2, 4, 4, 1}; + int output_size = 32; + int8_t correct[] = {-1, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16, 18, 19, 21, 22, + 24, 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, 45, 46}; + thread_num_ = 2; + Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMax, axes, num_axes); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputInt8(output_data, correct, output_size, err_tol_); +} + +TEST_F(TestReduceInt8, MaxAll) { + /* 2*4*4*3 NHWC */ + int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; + int8_t output_data[1] = {0}; + int axes[] = {0, 1, 2, 3}; + int num_axes = 4; + std::vector input_shape = {2, 4, 4, 3}; + std::vector output_shape = {1}; + int output_size = 1; + int8_t correct[] = {46}; + thread_num_ = 2; + Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMax, axes, num_axes); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputInt8(output_data, correct, output_size, err_tol_); +} + +TEST_F(TestReduceInt8, Min) { + /* 2*4*4*3 NHWC */ + int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; + int8_t output_data[32] = {0}; + int axes[] = {3}; + int num_axes = 1; + std::vector input_shape = {2, 4, 4, 3}; + std::vector output_shape = {2, 4, 4, 1}; + int output_size = 32; + int8_t correct[] = {-2, 0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, + 23, 24, 26, 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 42, 44, 45}; + thread_num_ = 2; + Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMin, axes, num_axes); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputInt8(output_data, correct, output_size, err_tol_); +} + +TEST_F(TestReduceInt8, MinAll) { + /* 2*4*4*3 NHWC */ + int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; + int8_t output_data[1] = {0}; + int axes[] = {0}; + int num_axes = 0; + std::vector input_shape = {2, 4, 4, 3}; + std::vector output_shape = {1}; + int output_size = 1; + int8_t correct[] = {-2}; + thread_num_ = 2; + Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMin, axes, num_axes); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputInt8(output_data, correct, output_size, err_tol_); +} + +TEST_F(TestReduceInt8, Prod) { + /* 2*4*4*3 NHWC */ + int8_t input_data[96] = {105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105}; + int8_t output_data[32] = {0}; + int axes[] = {3}; + int num_axes = 1; + std::vector input_shape = {2, 4, 4, 3}; + std::vector output_shape = {2, 4, 4, 1}; + int output_size = 32; + int8_t correct[] = { + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + }; + thread_num_ = 2; + Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceProd, axes, num_axes); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputInt8(output_data, correct, output_size, err_tol_); +} + +TEST_F(TestReduceInt8, Prod2Axis) { + /* 2*4*4*3 NHWC */ + int8_t input_data[12] = {105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105}; + int8_t output_data[8] = {0}; + int axes[] = {2, 3}; + int num_axes = 2; + std::vector input_shape = {1, 2, 2, 3}; + std::vector output_shape = {1, 2}; + int output_size = 2; + int8_t correct[] = {3, 3}; + thread_num_ = 1; + Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceProd, axes, num_axes); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputInt8(output_data, correct, output_size, err_tol_); +} + +TEST_F(TestReduceInt8, SumSquare) { + /* 2*4*4*3 NHWC */ + + int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; + int8_t output_data[32] = {0}; + int axes[] = {3}; + int num_axes = 1; + std::vector input_shape = {2, 4, 4, 3}; + std::vector output_shape = {2, 4, 4, 1}; + int output_size = 32; + int8_t correct[] = {1, 1, 1, 1, 1, 2, 2, 3, 4, 5, 6, 7, 9, 10, 12, 14, + 16, 18, 20, 22, 25, 27, 30, 33, 36, 39, 42, 45, 49, 53, 56, 60}; + thread_num_ = 1; + Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceSumSquare, axes, num_axes); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputInt8(output_data, correct, output_size, err_tol_); +} + +TEST_F(TestReduceInt8, SumSquare2Axis) { + /* 2*4*4*3 NHWC */ + int8_t input_data[12] = {105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105}; + int8_t output_data[8] = {0}; + int axes[] = {3, 2}; + int num_axes = 2; + std::vector input_shape = {1, 2, 2, 3}; + std::vector output_shape = {1, 2}; + int output_size = 2; + int8_t correct[] = {114, 114}; + thread_num_ = 1; + Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceSumSquare, axes, num_axes); + auto ret = kernel_->Run(); + EXPECT_EQ(0, ret); + + CompareOutputInt8(output_data, correct, output_size, err_tol_); +} + +} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc index ed0b269dd6..dc27b89a1b 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc @@ -18,10 +18,8 @@ #include "include/context.h" #include "src/ir/tensor.h" #include "common/common_test.h" -#include "src/common/file_utils.h" #include "mindspore/lite/src/kernel_registry.h" -#include "src/runtime/kernel/arm/nnacl/int8/resize.h" -#include "src/runtime/kernel/arm/int8/resize_int8.h" +#include "nnacl/int8/resize.h" namespace mindspore { using mindspore::lite::tensor::QuantArg; @@ -92,7 +90,7 @@ TEST_F(TestResizeBilinearInt8, Bilinear0) { int8_t expect[16] = {4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 5, 5, 6, 6}; Prepare(in_shape, out_shape, input_data, output_data, quant_in, quant_out, align_corners, thread_num); - kernel_->Init(); + kernel_->Init(); // todo delete kernel_->Run(); CompareOutputInt8(output_data, expect, 16, err_percent_); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_nearest_neighbor_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_nearest_neighbor_int8_tests.cc index ffc3790c54..794a348c71 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_nearest_neighbor_int8_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_nearest_neighbor_int8_tests.cc @@ -19,7 +19,7 @@ #include "src/ir/tensor.h" #include "common/common_test.h" #include "mindspore/lite/src/kernel_registry.h" -#include "src/runtime/kernel/arm/nnacl/int8/resize.h" +#include "nnacl/int8/resize.h" namespace mindspore { using mindspore::lite::tensor::QuantArg; @@ -92,7 +92,7 @@ TEST_F(TestResizeNearestNeighborInt8, NearestNeighbor0) { err_percent_ = 0.25f; Prepare(in_shape, out_shape, input_data, output_data, quant_in, quant_out, false, thread_num); - kernel_->Init(); + kernel_->Init(); // todo delete kernel_->Run(); CompareOutputInt8(output_data, expect, 16, err_percent_);