Merge pull request !4485 from zhaozhenlong/lite/op/int8/reduce_mean_sumtags/v0.7.0-beta
| @@ -38,7 +38,7 @@ | |||||
| #include "src/runtime/kernel/arm/nnacl/softmax_parameter.h" | #include "src/runtime/kernel/arm/nnacl/softmax_parameter.h" | ||||
| #include "src/runtime/kernel/arm/nnacl/fp32/tile.h" | #include "src/runtime/kernel/arm/nnacl/fp32/tile.h" | ||||
| #include "src/runtime/kernel/arm/nnacl/fp32/topk.h" | #include "src/runtime/kernel/arm/nnacl/fp32/topk.h" | ||||
| #include "src/runtime/kernel/arm/nnacl/fp32/reduce.h" | |||||
| #include "src/runtime/kernel/arm/nnacl/reduce_parameter.h" | |||||
| #include "src/runtime/kernel/arm/nnacl/fp32/activation.h" | #include "src/runtime/kernel/arm/nnacl/fp32/activation.h" | ||||
| #include "src/runtime/kernel/arm/nnacl/fp32/arithmetic.h" | #include "src/runtime/kernel/arm/nnacl/fp32/arithmetic.h" | ||||
| #include "src/runtime/kernel/arm/nnacl/fp32/batchnorm.h" | #include "src/runtime/kernel/arm/nnacl/fp32/batchnorm.h" | ||||
| @@ -0,0 +1,199 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "schema/model_generated.h" | |||||
| #include "src/kernel_registry.h" | |||||
| #include "include/errorcode.h" | |||||
| #include "src/runtime/runtime_api.h" | |||||
| #include "src/runtime/kernel/arm/base/reduce_base.h" | |||||
| #include "src/runtime/kernel/arm/fp32/reduce.h" | |||||
| #include "src/runtime/kernel/arm/int8/reduce_int8.h" | |||||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||||
| using mindspore::lite::KernelRegistrar; | |||||
| using mindspore::lite::RET_ERROR; | |||||
| using mindspore::lite::RET_NULL_PTR; | |||||
| using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Mean; | |||||
| using mindspore::schema::PrimitiveType_Reduce; | |||||
| namespace mindspore::kernel { | |||||
| namespace { | |||||
| constexpr size_t kInputNum = 1; | |||||
| constexpr size_t kOutputNum = 1; | |||||
| } // namespace | |||||
| int ReduceBaseCPUKernel::CheckInputsOutputs() { | |||||
| if (in_tensors_.size() != kInputNum) { | |||||
| MS_LOG(ERROR) << "Reduce inputs size should be " << kInputNum << " but got " << in_tensors_.size(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (out_tensors_.size() != kOutputNum) { | |||||
| MS_LOG(ERROR) << "Reduce outputs size should be " << kOutputNum << " but got " << out_tensors_.size(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto input = in_tensors_.at(0); | |||||
| if (input == nullptr) { | |||||
| MS_LOG(ERROR) << "Reduce input is nullptr"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| auto output = out_tensors_.at(0); | |||||
| if (output == nullptr) { | |||||
| MS_LOG(ERROR) << "Reduce output is nullptr"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ReduceBaseCPUKernel::CheckParameters() { | |||||
| size_t input_rank = in_tensors_.at(0)->shape().size(); | |||||
| if (static_cast<size_t>(num_axes_) > input_rank) { | |||||
| MS_LOG(ERROR) << "Reduce op invalid num of reduce axes " << num_axes_ << " larger than input rank " << input_rank; | |||||
| return RET_ERROR; | |||||
| } | |||||
| for (auto i = 0; i < num_axes_; i++) { | |||||
| if (axes_[i] < -static_cast<int>(input_rank) || axes_[i] >= static_cast<int>(input_rank)) { | |||||
| MS_LOG(ERROR) << "Reduce got invalid axis " << axes_[i] << ", axis should be in [" | |||||
| << -static_cast<int>(input_rank) << ", " << input_rank - 1 << "]."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (axes_[i] < 0) { | |||||
| axes_[i] += static_cast<int>(input_rank); | |||||
| } | |||||
| } | |||||
| if (num_axes_ == 0) { | |||||
| for (int i = 0; i < input_rank; i++) { | |||||
| axes_[i] = i; | |||||
| } | |||||
| num_axes_ = static_cast<int>(input_rank); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ReduceBaseCPUKernel::Init() { | |||||
| auto reduce_param = reinterpret_cast<ReduceParameter *>(op_parameter_); | |||||
| if (reduce_param == nullptr) { | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| num_axes_ = reduce_param->num_axes_; | |||||
| mode_ = reduce_param->mode_; | |||||
| memcpy(axes_, reduce_param->axes_, sizeof(reduce_param->axes_)); | |||||
| auto ret = CheckInputsOutputs(); | |||||
| if (ret != RET_OK) { | |||||
| return ret; | |||||
| } | |||||
| ret = CheckParameters(); | |||||
| if (ret != RET_OK) { | |||||
| return ret; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| kernel::LiteKernel *CpuReduceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const lite::Context *ctx, | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | |||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Reduce); | |||||
| if (opParameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Reduce opParameter nullptr"; | |||||
| return nullptr; | |||||
| } | |||||
| if (desc.type != schema::PrimitiveType_Reduce) { | |||||
| MS_LOG(ERROR) << "Reduce op desc.type should be PrimitiveType_Reduce, got " << desc.type; | |||||
| return nullptr; | |||||
| } | |||||
| auto *kernel = new (std::nothrow) ReduceCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | |||||
| MS_LOG(ERROR) << "Reduce new ReduceCPUKernel failed."; | |||||
| return nullptr; | |||||
| } | |||||
| auto ret = kernel->Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||||
| delete kernel; | |||||
| return nullptr; | |||||
| } | |||||
| return kernel; | |||||
| } | |||||
| kernel::LiteKernel *CpuMeanFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const lite::Context *ctx, | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | |||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Mean); | |||||
| if (opParameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Reduce opParameter nullptr"; | |||||
| return nullptr; | |||||
| } | |||||
| if (desc.type != schema::PrimitiveType_Mean) { | |||||
| MS_LOG(ERROR) << "Reduce op desc.type should be PrimitiveType_Mean, got " << desc.type; | |||||
| return nullptr; | |||||
| } | |||||
| auto *kernel = new (std::nothrow) ReduceCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | |||||
| MS_LOG(ERROR) << "Reduce new ReduceCPUKernel failed."; | |||||
| return nullptr; | |||||
| } | |||||
| auto ret = kernel->Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||||
| delete kernel; | |||||
| return nullptr; | |||||
| } | |||||
| return kernel; | |||||
| } | |||||
| kernel::LiteKernel *CpuReduceInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const lite::Context *ctx, | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | |||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Reduce); | |||||
| if (opParameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Reduce opParameter nullptr"; | |||||
| return nullptr; | |||||
| } | |||||
| if (desc.type != schema::PrimitiveType_Reduce) { | |||||
| MS_LOG(ERROR) << "Reduce op desc.type should be PrimitiveType_Reduce, got " << desc.type; | |||||
| return nullptr; | |||||
| } | |||||
| auto *kernel = new (std::nothrow) ReduceInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | |||||
| MS_LOG(ERROR) << "Reduce new ReduceCPUKernel failed."; | |||||
| return nullptr; | |||||
| } | |||||
| auto ret = kernel->Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||||
| delete kernel; | |||||
| return nullptr; | |||||
| } | |||||
| return kernel; | |||||
| } | |||||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Reduce, CpuReduceFp32KernelCreator) | |||||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Mean, CpuMeanFp32KernelCreator) | |||||
| REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Reduce, CpuReduceInt8KernelCreator) | |||||
| } // namespace mindspore::kernel | |||||
| @@ -0,0 +1,54 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_REDUCE_BASE_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_REDUCE_BASE_H_ | |||||
| #include <vector> | |||||
| #include "src/lite_kernel.h" | |||||
| #include "ir/anf.h" | |||||
| #include "nnacl/reduce_parameter.h" | |||||
| namespace mindspore::kernel { | |||||
| class ReduceBaseCPUKernel : public LiteKernel { | |||||
| public: | |||||
| ReduceBaseCPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs, | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : LiteKernel(param, inputs, outputs, ctx, primitive) {} | |||||
| virtual ~ReduceBaseCPUKernel() = default; | |||||
| int Init() override; | |||||
| int ReSize() override { return 0; }; | |||||
| private: | |||||
| int CheckInputsOutputs(); | |||||
| int CheckParameters(); | |||||
| protected: | |||||
| int axes_[REDUCE_MAX_AXES_NUM]; | |||||
| int num_axes_; | |||||
| int mode_; | |||||
| protected: | |||||
| int outer_size_; | |||||
| int inner_size_; | |||||
| int axis_size_; | |||||
| std::vector<int> tmp_shape_; | |||||
| }; | |||||
| } // namespace mindspore::kernel | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_REDUCE_BASE_H_ | |||||
| @@ -31,7 +31,7 @@ class ResizeBaseCPUKernel : public LiteKernel { | |||||
| const lite::Primitive *primitive) | const lite::Primitive *primitive) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), context_(ctx) {} | : LiteKernel(parameter, inputs, outputs, ctx, primitive), context_(ctx) {} | ||||
| ~ResizeBaseCPUKernel() = default; | |||||
| virtual ~ResizeBaseCPUKernel() = default; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override { return 0; }; | int ReSize() override { return 0; }; | ||||
| @@ -20,6 +20,7 @@ | |||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/runtime/runtime_api.h" | #include "src/runtime/runtime_api.h" | ||||
| #include "src/runtime/kernel/arm/nnacl/fp32/reduce.h" | #include "src/runtime/kernel/arm/nnacl/fp32/reduce.h" | ||||
| #include "src/runtime/kernel/arm/base/reduce_base.h" | |||||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | using mindspore::kernel::KERNEL_ARCH::kCPU; | ||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| @@ -37,69 +38,9 @@ using mindspore::schema::ReduceMode_ReduceSum; | |||||
| using mindspore::schema::ReduceMode_ReduceSumSquare; | using mindspore::schema::ReduceMode_ReduceSumSquare; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| namespace { | |||||
| constexpr size_t kInputNum = 1; | |||||
| constexpr size_t kOutputNum = 1; | |||||
| } // namespace | |||||
| int ReduceCPUKernel::CheckInputsOutputs() { | |||||
| if (in_tensors_.size() != kInputNum) { | |||||
| MS_LOG(ERROR) << "Reduce inputs size should be " << kInputNum << " but got " << in_tensors_.size(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (out_tensors_.size() != kOutputNum) { | |||||
| MS_LOG(ERROR) << "Reduce outputs size should be " << kOutputNum << " but got " << out_tensors_.size(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto input = in_tensors_.at(0); | |||||
| if (input == nullptr) { | |||||
| MS_LOG(ERROR) << "Reduce input is nullptr"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| auto output = out_tensors_.at(0); | |||||
| if (output == nullptr) { | |||||
| MS_LOG(ERROR) << "Reduce output is nullptr"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ReduceCPUKernel::CheckParameters() { | |||||
| size_t input_rank = in_tensors_.at(0)->shape().size(); | |||||
| if (static_cast<size_t>(num_axes_) > input_rank) { | |||||
| MS_LOG(ERROR) << "Reduce num of reduce axes " << num_axes_ << " larger than input rank " << input_rank; | |||||
| return RET_ERROR; | |||||
| } | |||||
| for (auto i = 0; i < num_axes_; i++) { | |||||
| if (axes_[i] < -static_cast<int>(input_rank) || axes_[i] >= static_cast<int>(input_rank)) { | |||||
| MS_LOG(ERROR) << "Reduce got invalid axis " << axes_[i] << ", axis should be in [" | |||||
| << -static_cast<int>(input_rank) << ", " << input_rank - 1 << "]."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (axes_[i] < 0) { | |||||
| axes_[i] += static_cast<int>(input_rank); | |||||
| } | |||||
| } | |||||
| if (num_axes_ == 0) { | |||||
| for (int i = 0; i < input_rank; i++) { | |||||
| axes_[i] = i; | |||||
| } | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ReduceCPUKernel::Init() { | int ReduceCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| return RET_OK; | |||||
| } | |||||
| auto ret = CheckInputsOutputs(); | |||||
| if (ret != RET_OK) { | |||||
| return ret; | |||||
| } | |||||
| ret = CheckParameters(); | |||||
| auto ret = ReduceBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -107,7 +48,6 @@ int ReduceCPUKernel::Init() { | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| switch (mode_) { | switch (mode_) { | ||||
| case static_cast<int>(ReduceMode_ReduceSum): { | case static_cast<int>(ReduceMode_ReduceSum): { | ||||
| reducer_ = ReduceSum; | reducer_ = ReduceSum; | ||||
| @@ -137,7 +77,10 @@ int ReduceCPUKernel::Init() { | |||||
| MS_LOG(ERROR) << "Reduce unsupported reduce mode: " << mode_; | MS_LOG(ERROR) << "Reduce unsupported reduce mode: " << mode_; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| return RET_OK; | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | } | ||||
| int ReduceCPUKernel::CallReduceUnit(int task_id) { | int ReduceCPUKernel::CallReduceUnit(int task_id) { | ||||
| @@ -225,67 +168,4 @@ int ReduceCPUKernel::MallocTmpBuffer() { | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| kernel::LiteKernel *CpuReduceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const lite::Context *ctx, | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | |||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Reduce); | |||||
| if (opParameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Reduce opParameter nullptr"; | |||||
| return nullptr; | |||||
| } | |||||
| if (desc.type != schema::PrimitiveType_Reduce) { | |||||
| MS_LOG(ERROR) << "Reduce op desc.type should be PrimitiveType_Reduce, got " << desc.type; | |||||
| return nullptr; | |||||
| } | |||||
| auto *kernel = new (std::nothrow) | |||||
| ReduceCPUKernel(reinterpret_cast<ReduceParameter *>(opParameter), inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | |||||
| MS_LOG(ERROR) << "Reduce new ReduceCPUKernel failed."; | |||||
| return nullptr; | |||||
| } | |||||
| auto ret = kernel->Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||||
| delete kernel; | |||||
| return nullptr; | |||||
| } | |||||
| return kernel; | |||||
| } | |||||
| kernel::LiteKernel *CpuMeanFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const lite::Context *ctx, | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| MS_ASSERT(opParameter != nullptr); | |||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Mean); | |||||
| if (opParameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Reduce opParameter nullptr"; | |||||
| return nullptr; | |||||
| } | |||||
| if (desc.type != schema::PrimitiveType_Mean) { | |||||
| MS_LOG(ERROR) << "Reduce op desc.type should be PrimitiveType_Mean, got " << desc.type; | |||||
| return nullptr; | |||||
| } | |||||
| auto *kernel = new (std::nothrow) | |||||
| ReduceCPUKernel(reinterpret_cast<ReduceParameter *>(opParameter), inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | |||||
| MS_LOG(ERROR) << "Reduce new ReduceCPUKernel failed."; | |||||
| return nullptr; | |||||
| } | |||||
| auto ret = kernel->Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||||
| delete kernel; | |||||
| return nullptr; | |||||
| } | |||||
| return kernel; | |||||
| } | |||||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Reduce, CpuReduceFp32KernelCreator) | |||||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Mean, CpuMeanFp32KernelCreator) | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -21,25 +21,20 @@ | |||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "src/runtime/kernel/arm/nnacl/fp32/reduce.h" | #include "src/runtime/kernel/arm/nnacl/fp32/reduce.h" | ||||
| #include "src/runtime/kernel/arm/base/reduce_base.h" | |||||
| #include "ir/anf.h" | #include "ir/anf.h" | ||||
| using mindspore::schema::ReduceMode; | using mindspore::schema::ReduceMode; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class ReduceCPUKernel : public LiteKernel { | |||||
| class ReduceCPUKernel : public ReduceBaseCPUKernel { | |||||
| typedef int (*Reducer)(const int outer_size, const int inner_size, const int axis_size, const float *src_data, | typedef int (*Reducer)(const int outer_size, const int inner_size, const int axis_size, const float *src_data, | ||||
| const int *src_shape, float *dst_data, const int tid, const int thread_num); | const int *src_shape, float *dst_data, const int tid, const int thread_num); | ||||
| public: | public: | ||||
| ReduceCPUKernel(ReduceParameter *param, const std::vector<lite::tensor::Tensor *> &inputs, | |||||
| ReduceCPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs, | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | ||||
| const lite::Primitive *primitive) | const lite::Primitive *primitive) | ||||
| : LiteKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs, ctx, primitive), | |||||
| context_(ctx), | |||||
| keep_dims_(param->keep_dims_), | |||||
| num_axes_(param->num_axes_), | |||||
| mode_(param->mode_) { | |||||
| memcpy(axes_, param->axes_, sizeof(param->axes_)); | |||||
| } | |||||
| : ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) {} | |||||
| ~ReduceCPUKernel() { | ~ReduceCPUKernel() { | ||||
| for (auto i = 0; i < data_buffers_.size(); i++) { | for (auto i = 0; i < data_buffers_.size(); i++) { | ||||
| float *buffer = data_buffers_[i]; | float *buffer = data_buffers_[i]; | ||||
| @@ -58,26 +53,13 @@ class ReduceCPUKernel : public LiteKernel { | |||||
| int CallReduceUnit(int task_id); | int CallReduceUnit(int task_id); | ||||
| private: | private: | ||||
| int CheckInputsOutputs(); | |||||
| int CheckParameters(); | |||||
| int MallocTmpBuffer(); | |||||
| private: | |||||
| const lite::Context *context_ = nullptr; | |||||
| bool keep_dims_; | |||||
| int axes_[REDUCE_MAX_AXES_NUM]; | |||||
| int num_axes_; | |||||
| int mode_; | |||||
| private: | |||||
| Reducer reducer_; | |||||
| std::vector<float *> data_buffers_; | std::vector<float *> data_buffers_; | ||||
| int outer_size_; | |||||
| int inner_size_; | |||||
| int axis_size_; | |||||
| std::vector<int> tmp_shape_; | |||||
| const float *src_data_; | const float *src_data_; | ||||
| float *dst_data_; | float *dst_data_; | ||||
| Reducer reducer_; | |||||
| private: | |||||
| int MallocTmpBuffer(); | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -0,0 +1,323 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <algorithm> | |||||
| #include "schema/model_generated.h" | |||||
| #include "src/runtime/runtime_api.h" | |||||
| #include "src/kernel_registry.h" | |||||
| #include "nnacl/quantization/quantize.h" | |||||
| #include "include/errorcode.h" | |||||
| #include "src/runtime/kernel/arm/int8/reduce_int8.h" | |||||
| using mindspore::lite::KernelRegistrar; | |||||
| using mindspore::lite::RET_ERROR; | |||||
| using mindspore::lite::RET_NULL_PTR; | |||||
| using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Reduce; | |||||
| using mindspore::schema::ReduceMode_ReduceMax; | |||||
| using mindspore::schema::ReduceMode_ReduceMean; | |||||
| using mindspore::schema::ReduceMode_ReduceMin; | |||||
| using mindspore::schema::ReduceMode_ReduceProd; | |||||
| using mindspore::schema::ReduceMode_ReduceSum; | |||||
| using mindspore::schema::ReduceMode_ReduceSumSquare; | |||||
| namespace mindspore::kernel { | |||||
| int ReduceInt8CPUKernel::Init() { | |||||
| auto ret = ReduceBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | |||||
| return ret; | |||||
| } | |||||
| ret = MallocTmpBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| return ret; | |||||
| } | |||||
| ret = CalculateQuantArgs(); | |||||
| if (ret != RET_OK) { | |||||
| return ret; | |||||
| } | |||||
| switch (mode_) { | |||||
| case static_cast<int>(ReduceMode_ReduceMean): { | |||||
| reducer_ = ReduceMeanInt8; | |||||
| last_reducer_ = ReduceMeanLastAxis; | |||||
| break; | |||||
| } | |||||
| case static_cast<int>(ReduceMode_ReduceSum): { | |||||
| reducer_ = ReduceSumInt8; | |||||
| last_reducer_ = ReduceSumLastAxis; | |||||
| break; | |||||
| } | |||||
| case static_cast<int>(ReduceMode_ReduceMax): { | |||||
| reducer_ = ReduceMaxInt8; | |||||
| last_reducer_ = ReduceMaxLastAxis; | |||||
| break; | |||||
| } | |||||
| case static_cast<int>(ReduceMode_ReduceMin): { | |||||
| reducer_ = ReduceMinInt8; | |||||
| last_reducer_ = ReduceMinLastAxis; | |||||
| break; | |||||
| } | |||||
| case static_cast<int>(ReduceMode_ReduceProd): { | |||||
| reducer_ = ReduceProdInt8; | |||||
| last_reducer_ = ReduceProdLastAxis; | |||||
| break; | |||||
| } | |||||
| case static_cast<int>(ReduceMode_ReduceSumSquare): { | |||||
| // In multi-axes reduce cases, sum square output different output for different reduce order | |||||
| // e.g. axes [2, 3] is different from axes [3, 2]. | |||||
| reducer_ = ReduceSumSquareInt8; | |||||
| last_reducer_ = ReduceSumSquareLastAxis; | |||||
| break; | |||||
| } | |||||
| default: | |||||
| MS_LOG(ERROR) << "Reduce unsupported reduce mode: " << mode_; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | |||||
| int ReduceInt8CPUKernel::CalculateQuantArgs() { | |||||
| lite::tensor::Tensor *input = in_tensors_.at(0); | |||||
| lite::tensor::Tensor *output = out_tensors_.at(0); | |||||
| MS_ASSERT(input); | |||||
| MS_ASSERT(output); | |||||
| quant_arg_.in_scale_ = input->GetQuantParams().front().scale; | |||||
| quant_arg_.in_zp_ = input->GetQuantParams().front().zeroPoint; | |||||
| quant_arg_.out_scale_ = output->GetQuantParams().front().scale; | |||||
| quant_arg_.out_zp_ = output->GetQuantParams().front().zeroPoint; | |||||
| // (quant_out - out_zp) * out_scale = (quant_in - in_zp) * in_scale | |||||
| const double input_output_multiplier = quant_arg_.in_scale_ / quant_arg_.out_scale_; | |||||
| int shift; | |||||
| QuantizeMultiplierSmallerThanOne(input_output_multiplier, &quant_arg_.in_out_multiplier_, &shift); | |||||
| quant_arg_.in_out_left_shift_ = shift < 0 ? -shift : 0; | |||||
| quant_arg_.in_out_right_shift_ = shift > 0 ? shift : 0; | |||||
| // (quant_out - zp_out)*scale_out = sum((quant_in -zp)*scale_in) * (1/num) for each axis in axes | |||||
| // quant_out = sum(quant_in-zp) * (scale_in/scale_out) * (1/num) | |||||
| if (mode_ == static_cast<int>(schema::ReduceMode_ReduceMean)) { | |||||
| for (auto i = 0; i < num_axes_; i++) { | |||||
| auto axis = axes_[i]; | |||||
| double reciprocal = 1.0 / in_tensors_.at(0)->shape()[axis]; | |||||
| QuantMulArg *qm = new (std::nothrow) QuantMulArg; | |||||
| if (qm == nullptr) { | |||||
| MS_LOG(ERROR) << "Reduce new QuantMulArg failed."; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| QuantizeMultiplierSmallerThanOne(reciprocal, &qm->multiplier_, &shift); | |||||
| qm->left_shift_ = shift < 0 ? -shift : 0; | |||||
| qm->right_shift_ = shift > 0 ? shift : 0; | |||||
| mean_multipliers_.push_back(qm); | |||||
| } | |||||
| } | |||||
| // (quant_out - zp) * scale_out = prod(quant_in - zp) * scale_in^num | |||||
| // quant_out = prod(quant_in-zp) * (scale_in^num/scale_out) + zp_out | |||||
| // scale_in^num-1 * scale_in/scale_out | |||||
| if (mode_ == static_cast<int>(schema::ReduceMode_ReduceProd)) { | |||||
| for (auto i = 0; i < num_axes_; i++) { | |||||
| int axis_size = in_tensors_.at(0)->shape()[axes_[i]]; | |||||
| QuantMulArg *qm = new (std::nothrow) QuantMulArg; | |||||
| if (qm == nullptr) { | |||||
| MS_LOG(ERROR) << "ReduceProd new QuantMulArg failed."; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| double prod_multiplier = pow(quant_arg_.in_scale_, axis_size - 1); | |||||
| QuantizeMultiplierSmallerThanOne(prod_multiplier, &qm->multiplier_, &shift); | |||||
| qm->left_shift_ = shift < 0 ? -shift : 0; | |||||
| qm->right_shift_ = shift > 0 ? shift : 0; | |||||
| prod_multipliers_.push_back(qm); | |||||
| } | |||||
| } | |||||
| // (quant_out - zp) * scale_out = sum((quant_in - zp)^2 * scale_in^2) | |||||
| // quant_out = sum((quant_in - zp)^2) * scale_in^2 / scale_out + zp_out | |||||
| // scale_in * scale_in/scale_out | |||||
| if (mode_ == static_cast<int>(schema::ReduceMode_ReduceSumSquare)) { | |||||
| for (auto i = 0; i < num_axes_ - 1; i++) { | |||||
| QuantMulArg *qm = new (std::nothrow) QuantMulArg; | |||||
| if (qm == nullptr) { | |||||
| MS_LOG(ERROR) << "ReduceProd new QuantMultiplier failed."; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| double sumsquare_multiplier = quant_arg_.in_scale_; | |||||
| QuantizeMultiplierSmallerThanOne(sumsquare_multiplier, &qm->multiplier_, &shift); | |||||
| qm->left_shift_ = shift < 0 ? -shift : 0; | |||||
| qm->right_shift_ = shift > 0 ? shift : 0; | |||||
| sum_square_multipliers_.push_back(qm); | |||||
| } | |||||
| QuantMulArg *qm = new (std::nothrow) QuantMulArg; | |||||
| if (qm == nullptr) { | |||||
| MS_LOG(ERROR) << "ReduceProd new QuantMultiplier failed."; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| double sumsquare_multiplier = quant_arg_.in_scale_ * quant_arg_.in_scale_ / quant_arg_.out_scale_; | |||||
| QuantizeMultiplierSmallerThanOne(sumsquare_multiplier, &qm->multiplier_, &shift); | |||||
| qm->left_shift_ = shift < 0 ? -shift : 0; | |||||
| qm->right_shift_ = shift > 0 ? shift : 0; | |||||
| sum_square_multipliers_.push_back(qm); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ReduceInt8CPUKernel::MallocTmpBuffer() { | |||||
| auto input_shape = in_tensors_.at(0)->shape(); | |||||
| for (auto i = 0; i < num_axes_ - 1; i++) { | |||||
| int axis = axes_[i]; | |||||
| size_t size = 1; | |||||
| for (auto j = 0; j < input_shape.size(); j++) { | |||||
| if (static_cast<size_t>(axis) != j) { | |||||
| size *= input_shape[j]; | |||||
| } | |||||
| } | |||||
| int32_t *buffer = reinterpret_cast<int32_t *>(malloc(size * sizeof(int32_t))); | |||||
| if (buffer == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc data failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| data_buffers_.emplace_back(buffer); | |||||
| input_shape[axis] = 1; | |||||
| } | |||||
| auto input = in_tensors_.at(0); | |||||
| begin_src_data_ = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t) * input->ElementsNum())); | |||||
| if (begin_src_data_ == nullptr) { | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| auto input_data = reinterpret_cast<int8_t *>(input->Data()); | |||||
| for (auto i = 0; i < input->ElementsNum(); i++) { | |||||
| begin_src_data_[i] = static_cast<int32_t>(input_data[i]); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ReduceInt8Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| auto reduce = reinterpret_cast<ReduceInt8CPUKernel *>(cdata); | |||||
| auto error_code = reduce->CallReduceUnit(task_id); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "Reduce Run error task_id[" << task_id << "] error_code[" << error_code << "]"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ReduceInt8CPUKernel::Run() { | |||||
| auto prepare_ret = Prepare(); | |||||
| if (prepare_ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||||
| return prepare_ret; | |||||
| } | |||||
| is_last_axis_ = false; | |||||
| tmp_shape_ = in_tensors_.at(0)->shape(); | |||||
| src_data_ = begin_src_data_; | |||||
| for (int i = 0; i < data_buffers_.size(); ++i) { | |||||
| if (mode_ == static_cast<int>(schema::ReduceMode_ReduceMean)) { | |||||
| quant_arg_.mean_multiplier_ = mean_multipliers_[i]->multiplier_; | |||||
| quant_arg_.mean_left_shift_ = mean_multipliers_[i]->left_shift_; | |||||
| quant_arg_.mean_right_shift_ = mean_multipliers_[i]->right_shift_; | |||||
| } | |||||
| if (mode_ == static_cast<int>(schema::ReduceMode_ReduceProd)) { | |||||
| quant_arg_.prod_multiplier_ = prod_multipliers_[i]->multiplier_; | |||||
| quant_arg_.prod_left_shift_ = prod_multipliers_[i]->left_shift_; | |||||
| quant_arg_.prod_right_shift_ = prod_multipliers_[i]->right_shift_; | |||||
| } | |||||
| if (mode_ == static_cast<int>(schema::ReduceMode_ReduceSumSquare)) { | |||||
| quant_arg_.sum_square_multiplier_ = sum_square_multipliers_[i]->multiplier_; | |||||
| quant_arg_.sum_square_left_shift_ = sum_square_multipliers_[i]->left_shift_; | |||||
| quant_arg_.sum_square_right_shift_ = sum_square_multipliers_[i]->right_shift_; | |||||
| } | |||||
| dst_data_ = data_buffers_[i]; | |||||
| int axis = axes_[i]; | |||||
| outer_size_ = 1; | |||||
| for (int j = 0; j < axis; j++) { | |||||
| outer_size_ *= tmp_shape_[j]; | |||||
| } | |||||
| inner_size_ = 1; | |||||
| for (int k = axis + 1; k < static_cast<int>(tmp_shape_.size()); k++) { | |||||
| inner_size_ *= tmp_shape_[k]; | |||||
| } | |||||
| axis_size_ = tmp_shape_[axis]; | |||||
| auto error_code = LiteBackendParallelLaunch(ReduceInt8Impl, this, context_->thread_num_); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| tmp_shape_[axis] = 1; | |||||
| src_data_ = dst_data_; | |||||
| } | |||||
| if (mode_ == static_cast<int>(schema::ReduceMode_ReduceMean)) { | |||||
| quant_arg_.mean_multiplier_ = mean_multipliers_.back()->multiplier_; | |||||
| quant_arg_.mean_left_shift_ = mean_multipliers_.back()->left_shift_; | |||||
| quant_arg_.mean_right_shift_ = mean_multipliers_.back()->right_shift_; | |||||
| } | |||||
| if (mode_ == static_cast<int>(schema::ReduceMode_ReduceProd)) { | |||||
| quant_arg_.prod_multiplier_ = prod_multipliers_.back()->multiplier_; | |||||
| quant_arg_.prod_left_shift_ = prod_multipliers_.back()->left_shift_; | |||||
| quant_arg_.prod_right_shift_ = prod_multipliers_.back()->right_shift_; | |||||
| } | |||||
| if (mode_ == static_cast<int>(schema::ReduceMode_ReduceSumSquare)) { | |||||
| quant_arg_.sum_square_multiplier_ = sum_square_multipliers_.back()->multiplier_; | |||||
| quant_arg_.sum_square_left_shift_ = sum_square_multipliers_.back()->left_shift_; | |||||
| quant_arg_.sum_square_right_shift_ = sum_square_multipliers_.back()->right_shift_; | |||||
| } | |||||
| int last_reduce_axis = axes_[num_axes_ - 1]; | |||||
| outer_size_ = 1; | |||||
| for (int i = 0; i < last_reduce_axis; i++) { | |||||
| outer_size_ *= tmp_shape_[i]; | |||||
| } | |||||
| inner_size_ = 1; | |||||
| for (int i = last_reduce_axis + 1; i < static_cast<int>(tmp_shape_.size()); i++) { | |||||
| inner_size_ *= tmp_shape_[i]; | |||||
| } | |||||
| axis_size_ = tmp_shape_[last_reduce_axis]; | |||||
| last_dst_data_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->Data()); | |||||
| is_last_axis_ = true; | |||||
| auto error_code = LiteBackendParallelLaunch(ReduceInt8Impl, this, context_->thread_num_); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (begin_src_data_ != nullptr) { | |||||
| free(begin_src_data_); | |||||
| begin_src_data_ = nullptr; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ReduceInt8CPUKernel::CallReduceUnit(int task_id) { | |||||
| int ret; | |||||
| if (!is_last_axis_) { | |||||
| ret = | |||||
| reducer_(outer_size_, inner_size_, axis_size_, src_data_, dst_data_, &quant_arg_, task_id, context_->thread_num_); | |||||
| } else { | |||||
| ret = last_reducer_(outer_size_, inner_size_, axis_size_, src_data_, last_dst_data_, &quant_arg_, task_id, | |||||
| context_->thread_num_); | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| } // namespace mindspore::kernel | |||||
| @@ -0,0 +1,98 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_REDUCE_INT8_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_REDUCE_INT8_H_ | |||||
| #include <vector> | |||||
| #include "src/lite_kernel.h" | |||||
| #include "nnacl/reduce_parameter.h" | |||||
| #include "nnacl/int8/reduce_int8.h" | |||||
| #include "nnacl/quantization/quantize.h" | |||||
| #include "ir/anf.h" | |||||
| #include "src/runtime/kernel/arm/base/reduce_base.h" | |||||
| using mindspore::schema::ReduceMode; | |||||
| namespace mindspore::kernel { | |||||
| class ReduceInt8CPUKernel : public ReduceBaseCPUKernel { | |||||
| typedef int (*Reducer)(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); | |||||
| typedef int (*LastReducer)(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); | |||||
| public: | |||||
| ReduceInt8CPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs, | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) {} | |||||
| ~ReduceInt8CPUKernel() { | |||||
| for (auto i = 0; i < data_buffers_.size(); i++) { | |||||
| int32_t *buffer = data_buffers_[i]; | |||||
| if (buffer != nullptr) { | |||||
| free(buffer); | |||||
| buffer = nullptr; | |||||
| } | |||||
| } | |||||
| for (auto qm : mean_multipliers_) { | |||||
| delete qm; | |||||
| qm = nullptr; | |||||
| } | |||||
| for (auto qm : prod_multipliers_) { | |||||
| delete qm; | |||||
| qm = nullptr; | |||||
| } | |||||
| for (auto qm : sum_square_multipliers_) { | |||||
| delete qm; | |||||
| qm = nullptr; | |||||
| } | |||||
| src_data_ = nullptr; | |||||
| dst_data_ = nullptr; | |||||
| } | |||||
| int Init() override; | |||||
| int ReSize() override { return 0; }; | |||||
| int Run() override; | |||||
| int CallReduceUnit(int task_id); | |||||
| int ReduceLastAxis(int task_id); | |||||
| public: | |||||
| bool is_last_axis_ = true; | |||||
| private: | |||||
| int MallocTmpBuffer(); | |||||
| int CalculateQuantArgs(); | |||||
| private: | |||||
| ReduceParameter *param_ = nullptr; | |||||
| ReduceQuantArg quant_arg_; | |||||
| private: | |||||
| int32_t *begin_src_data_ = nullptr; | |||||
| int8_t *last_dst_data_ = nullptr; | |||||
| std::vector<int32_t *> data_buffers_; | |||||
| const int32_t *src_data_ = nullptr; | |||||
| int32_t *dst_data_ = nullptr; | |||||
| Reducer reducer_ = nullptr; | |||||
| LastReducer last_reducer_ = nullptr; | |||||
| std::vector<QuantMulArg *> mean_multipliers_; | |||||
| std::vector<QuantMulArg *> prod_multipliers_; | |||||
| std::vector<QuantMulArg *> sum_square_multipliers_; | |||||
| }; | |||||
| } // namespace mindspore::kernel | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_REDUCE_INT8_H_ | |||||
| @@ -49,6 +49,8 @@ typedef enum ErrorCodeUint8OpEnum { | |||||
| typedef enum ErrorCodeInt8OpEnum { | typedef enum ErrorCodeInt8OpEnum { | ||||
| NNACL_ERRCODE_OP_INT8_START = 40000, | NNACL_ERRCODE_OP_INT8_START = 40000, | ||||
| NNACL_ERRCODE_ADD_OVERFLOW, | |||||
| NNACL_ERRCODE_MUL_OVERFLOW, | |||||
| NNACL_ERRCODE_OP_INT8_END = 49999 | NNACL_ERRCODE_OP_INT8_END = 49999 | ||||
| } ErrorCodeInt8OpEnums; | } ErrorCodeInt8OpEnums; | ||||
| @@ -17,15 +17,8 @@ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_REDUCE_H_ | #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_REDUCE_H_ | ||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_REDUCE_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_REDUCE_H_ | ||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #define REDUCE_MAX_AXES_NUM 8 | |||||
| #include "src/runtime/kernel/arm/nnacl/reduce_parameter.h" | |||||
| typedef struct ReduceParameter { | |||||
| OpParameter op_parameter_; | |||||
| bool keep_dims_; | |||||
| int axes_[REDUCE_MAX_AXES_NUM]; | |||||
| int num_axes_; | |||||
| int mode_; | |||||
| } ReduceParameter; | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| @@ -0,0 +1,467 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <stdint.h> | |||||
| #include "nnacl/int8/reduce_int8.h" | |||||
| #include "nnacl/errorcode.h" | |||||
| #include "nnacl/quantization/fixed_point.h" | |||||
| inline bool isAddOverflow(int32_t x, int32_t y) { | |||||
| int32_t sum = x + y; | |||||
| return (x > 0 && y > 0 && sum < 0) || (x < 0 && y < 0 && sum > 0); | |||||
| } | |||||
| inline bool isMulOverflow(int32_t x, int32_t y) { | |||||
| int32_t p = x * y; | |||||
| return (x != 0) && (p / x != y); | |||||
| } | |||||
| // Get x such that (x-zp_in) * scale_in = mean | |||||
| // Assuming reduce n axes, this works for first n-1 reduce. One call for one reduce. | |||||
| int ReduceMeanInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | |||||
| return NNACL_NULL_PTR; | |||||
| } | |||||
| int i, j, k; | |||||
| for (j = tid; j < outer_size; j += thread_num) { | |||||
| const int32_t *outer_src = src_data + j * axis_size * inner_size; | |||||
| int32_t *outer_dst = dst_data + j * inner_size; | |||||
| for (k = 0; k < inner_size; k++) { | |||||
| const int32_t *inner_src = outer_src + k; | |||||
| int32_t *inner_dst = outer_dst + k; | |||||
| int32_t sum = 0; | |||||
| // (x - zp_in) * scale_in = mean[(item - zp_in) * scale_in] | |||||
| // x = mean(item-zp_in) + zp_in | |||||
| for (i = 0; i < axis_size; i++) { | |||||
| int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; | |||||
| if (isAddOverflow(sum, tmp)) { | |||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | |||||
| } | |||||
| sum += tmp; | |||||
| } | |||||
| int32_t mean = RoundingDivideByPOT( | |||||
| SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->mean_left_shift_), quant->mean_multiplier_), | |||||
| quant->mean_right_shift_); | |||||
| if (isAddOverflow(mean, quant->in_zp_)) { | |||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | |||||
| } | |||||
| *inner_dst = mean + quant->in_zp_; | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| // suppose reduce n axes, this works for last reduce axis. | |||||
| // get y such that (y-zp_out) * scale_out = mean(x-zp_in)*scale_in | |||||
| int ReduceMeanLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | |||||
| return NNACL_NULL_PTR; | |||||
| } | |||||
| int i, j, k; | |||||
| for (j = tid; j < outer_size; j += thread_num) { | |||||
| const int32_t *outer_src = src_data + j * axis_size * inner_size; | |||||
| int8_t *outer_dst = dst_data + j * inner_size; | |||||
| for (k = 0; k < inner_size; k++) { | |||||
| const int32_t *inner_src = outer_src + k; | |||||
| int8_t *inner_dst = outer_dst + k; | |||||
| int32_t sum = 0; | |||||
| for (i = 0; i < axis_size; i++) { | |||||
| // y = mean(x-zp_in) * scale + zp_out | |||||
| int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; | |||||
| if (isAddOverflow(tmp, sum)) { | |||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | |||||
| } | |||||
| sum += tmp; | |||||
| } | |||||
| // sum / num | |||||
| int32_t mean = RoundingDivideByPOT( | |||||
| SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->mean_left_shift_), quant->mean_multiplier_), | |||||
| quant->mean_right_shift_); | |||||
| // trans to output scale | |||||
| int32_t mean_scaled = | |||||
| RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(mean * (1 << (unsigned int)quant->in_out_left_shift_), | |||||
| quant->in_out_multiplier_), | |||||
| quant->in_out_right_shift_); | |||||
| if (isAddOverflow(mean_scaled, quant->out_zp_)) { | |||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | |||||
| } | |||||
| mean = mean_scaled + quant->out_zp_; | |||||
| if (mean > INT8_MAX) { | |||||
| *inner_dst = INT8_MAX; | |||||
| } else if (mean < INT8_MIN) { | |||||
| *inner_dst = INT8_MIN; | |||||
| } else { | |||||
| *inner_dst = (int8_t)mean; | |||||
| } | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| // Get x such that (x-zp_in) * scale_in = sum(item-zp_in)*scale_in | |||||
| // Assuming reduce n axes, this works for first n-1 reduce. One call for one reduce. | |||||
| int ReduceSumInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | |||||
| return NNACL_NULL_PTR; | |||||
| } | |||||
| int i, j, k; | |||||
| for (j = tid; j < outer_size; j += thread_num) { | |||||
| const int32_t *outer_src = src_data + j * axis_size * inner_size; | |||||
| int32_t *outer_dst = dst_data + j * inner_size; | |||||
| for (k = 0; k < inner_size; k++) { | |||||
| const int32_t *inner_src = outer_src + k; | |||||
| int32_t *inner_dst = outer_dst + k; | |||||
| int32_t sum = 0; | |||||
| for (i = 0; i < axis_size; i++) { | |||||
| int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; | |||||
| if (isAddOverflow(tmp, sum)) { | |||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | |||||
| } | |||||
| sum += tmp; | |||||
| } | |||||
| if (isAddOverflow(quant->in_zp_, sum)) { | |||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | |||||
| } | |||||
| *inner_dst = sum + quant->in_zp_; | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| // suppose reduce n axes, this works for last reduce axis. | |||||
| // get y such that (y-zp_out) * scale_out = sum(item-zp_in)*scale_in | |||||
| int ReduceSumLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | |||||
| return NNACL_NULL_PTR; | |||||
| } | |||||
| int i, j, k; | |||||
| for (j = tid; j < outer_size; j += thread_num) { | |||||
| const int32_t *outer_src = src_data + j * axis_size * inner_size; | |||||
| int8_t *outer_dst = dst_data + j * inner_size; | |||||
| for (k = 0; k < inner_size; k++) { | |||||
| const int32_t *inner_src = outer_src + k; | |||||
| int8_t *inner_dst = outer_dst + k; | |||||
| int32_t sum = 0; | |||||
| for (i = 0; i < axis_size; i++) { | |||||
| int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; | |||||
| if (isAddOverflow(tmp, sum)) { | |||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | |||||
| } | |||||
| sum += tmp; | |||||
| } | |||||
| int32_t sum_scaled = | |||||
| RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->in_out_left_shift_), | |||||
| quant->in_out_multiplier_), | |||||
| quant->in_out_right_shift_); | |||||
| if (isAddOverflow(sum_scaled, quant->out_zp_)) { | |||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | |||||
| } | |||||
| sum = sum_scaled + quant->out_zp_; | |||||
| if (sum > INT8_MAX) { | |||||
| *inner_dst = INT8_MAX; | |||||
| } else if (sum < INT8_MIN) { | |||||
| *inner_dst = INT8_MIN; | |||||
| } else { | |||||
| *inner_dst = (int8_t)sum; | |||||
| } | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| int ReduceMaxLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | |||||
| return NNACL_NULL_PTR; | |||||
| } | |||||
| int i, j, k; | |||||
| for (j = tid; j < outer_size; j += thread_num) { | |||||
| const int32_t *outer_src = src_data + j * axis_size * inner_size; | |||||
| int8_t *outer_dst = dst_data + j * inner_size; | |||||
| for (k = 0; k < inner_size; k++) { | |||||
| const int32_t *inner_src = outer_src + k; | |||||
| int8_t *inner_dst = outer_dst + k; | |||||
| int32_t tmp = INT8_MIN; | |||||
| for (i = 0; i < axis_size; i++) { | |||||
| tmp = tmp > inner_src[i * inner_size] ? tmp : inner_src[i * inner_size]; | |||||
| } | |||||
| int32_t tmp_scaled = RoundingDivideByPOT( | |||||
| SaturatingRoundingDoublingHighMul((tmp - quant->in_zp_) * (1 << (unsigned int)quant->in_out_left_shift_), | |||||
| quant->in_out_multiplier_), | |||||
| quant->in_out_right_shift_); | |||||
| if (isAddOverflow(tmp_scaled, quant->out_zp_)) { | |||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | |||||
| } | |||||
| tmp = tmp_scaled + quant->out_zp_; | |||||
| if (tmp > INT8_MAX) { | |||||
| *inner_dst = INT8_MAX; | |||||
| } else if (tmp < INT8_MIN) { | |||||
| *inner_dst = INT8_MIN; | |||||
| } else { | |||||
| *inner_dst = (int8_t)tmp; | |||||
| } | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| int ReduceMaxInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | |||||
| return NNACL_NULL_PTR; | |||||
| } | |||||
| int i, j, k; | |||||
| for (j = tid; j < outer_size; j += thread_num) { | |||||
| const int32_t *outer_src = src_data + j * axis_size * inner_size; | |||||
| int32_t *outer_dst = dst_data + j * inner_size; | |||||
| for (k = 0; k < inner_size; k++) { | |||||
| const int32_t *inner_src = outer_src + k; | |||||
| int32_t *inner_dst = outer_dst + k; | |||||
| int32_t tmp = INT8_MIN; | |||||
| for (i = 0; i < axis_size; i++) { | |||||
| tmp = tmp > inner_src[i * inner_size] ? tmp : inner_src[i * inner_size]; | |||||
| } | |||||
| *inner_dst = tmp; | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| int ReduceMinLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | |||||
| return NNACL_NULL_PTR; | |||||
| } | |||||
| int i, j, k; | |||||
| int base_offset = 20; | |||||
| for (j = tid; j < outer_size; j += thread_num) { | |||||
| const int32_t *outer_src = src_data + j * axis_size * inner_size; | |||||
| int8_t *outer_dst = dst_data + j * inner_size; | |||||
| for (k = 0; k < inner_size; k++) { | |||||
| const int32_t *inner_src = outer_src + k; | |||||
| int8_t *inner_dst = outer_dst + k; | |||||
| int32_t tmp = INT8_MAX; | |||||
| for (i = 0; i < axis_size; i++) { | |||||
| tmp = tmp < inner_src[i * inner_size] ? tmp : inner_src[i * inner_size]; | |||||
| } | |||||
| int32_t tmp_scaled = | |||||
| RoundingDivideByPOT(SaturatingRoundingDoublingHighMul( | |||||
| (tmp - quant->in_zp_) * (1 << (unsigned int)quant->in_out_left_shift_ + base_offset), | |||||
| quant->in_out_multiplier_), | |||||
| quant->in_out_right_shift_ + base_offset); | |||||
| if (isAddOverflow(tmp_scaled, quant->out_zp_)) { | |||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | |||||
| } | |||||
| tmp = tmp_scaled + quant->out_zp_; | |||||
| if (tmp > INT8_MAX) { | |||||
| *inner_dst = INT8_MAX; | |||||
| } else if (tmp < INT8_MIN) { | |||||
| *inner_dst = INT8_MIN; | |||||
| } else { | |||||
| *inner_dst = (int8_t)tmp; | |||||
| } | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| int ReduceMinInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | |||||
| return NNACL_NULL_PTR; | |||||
| } | |||||
| int i, j, k; | |||||
| for (j = tid; j < outer_size; j += thread_num) { | |||||
| const int32_t *outer_src = src_data + j * axis_size * inner_size; | |||||
| int32_t *outer_dst = dst_data + j * inner_size; | |||||
| for (k = 0; k < inner_size; k++) { | |||||
| const int32_t *inner_src = outer_src + k; | |||||
| int32_t *inner_dst = outer_dst + k; | |||||
| int32_t tmp = INT8_MAX; | |||||
| for (i = 0; i < axis_size; i++) { | |||||
| tmp = tmp < inner_src[i * inner_size] ? tmp : inner_src[i * inner_size]; | |||||
| } | |||||
| *inner_dst = tmp; | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| int ReduceProdLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | |||||
| return NNACL_NULL_PTR; | |||||
| } | |||||
| int i, j, k; | |||||
| for (j = tid; j < outer_size; j += thread_num) { | |||||
| const int32_t *outer_src = src_data + j * axis_size * inner_size; | |||||
| int8_t *outer_dst = dst_data + j * inner_size; | |||||
| for (k = 0; k < inner_size; k++) { | |||||
| const int32_t *inner_src = outer_src + k; | |||||
| int8_t *inner_dst = outer_dst + k; | |||||
| int32_t prod = 1; | |||||
| for (i = 0; i < axis_size; i++) { | |||||
| // quant_out = prod(quant_in-zp) * (scale_in^num/scale_out) + zp_out | |||||
| int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; | |||||
| if (isMulOverflow(prod, tmp)) { | |||||
| return NNACL_ERRCODE_MUL_OVERFLOW; | |||||
| } | |||||
| prod *= tmp; | |||||
| } | |||||
| prod = RoundingDivideByPOT( | |||||
| SaturatingRoundingDoublingHighMul(prod * (1 << (unsigned int)quant->prod_left_shift_), quant->prod_multiplier_), | |||||
| quant->prod_right_shift_); | |||||
| int32_t prod_scaled = | |||||
| RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(prod * (1 << (unsigned int)quant->in_out_left_shift_), | |||||
| quant->in_out_multiplier_), | |||||
| quant->in_out_right_shift_); | |||||
| if (isAddOverflow(prod_scaled, quant->out_zp_)) { | |||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | |||||
| } | |||||
| prod = prod_scaled + quant->out_zp_; | |||||
| if (prod > INT8_MAX) { | |||||
| *inner_dst = INT8_MAX; | |||||
| } else if (prod < INT8_MIN) { | |||||
| *inner_dst = INT8_MIN; | |||||
| } else { | |||||
| *inner_dst = (int8_t)prod; | |||||
| } | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| int ReduceProdInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | |||||
| return NNACL_NULL_PTR; | |||||
| } | |||||
| int i, j, k; | |||||
| for (j = tid; j < outer_size; j += thread_num) { | |||||
| const int32_t *outer_src = src_data + j * axis_size * inner_size; | |||||
| int32_t *outer_dst = dst_data + j * inner_size; | |||||
| for (k = 0; k < inner_size; k++) { | |||||
| const int32_t *inner_src = outer_src + k; | |||||
| int32_t *inner_dst = outer_dst + k; | |||||
| int32_t prod = 1; | |||||
| for (i = 0; i < axis_size; i++) { | |||||
| int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; | |||||
| if (isMulOverflow(prod, tmp)) { | |||||
| return NNACL_ERRCODE_MUL_OVERFLOW; | |||||
| } | |||||
| prod *= tmp; | |||||
| } | |||||
| prod = RoundingDivideByPOT( | |||||
| SaturatingRoundingDoublingHighMul(prod * (1 << (unsigned int)quant->prod_left_shift_), quant->prod_multiplier_), | |||||
| quant->prod_right_shift_); | |||||
| if (isAddOverflow(prod, quant->in_zp_)) { | |||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | |||||
| } | |||||
| *inner_dst = prod + quant->in_zp_; // todo overflow | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| int ReduceSumSquareLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | |||||
| return NNACL_NULL_PTR; | |||||
| } | |||||
| int i, j, k; | |||||
| for (j = tid; j < outer_size; j += thread_num) { | |||||
| const int32_t *outer_src = src_data + j * axis_size * inner_size; | |||||
| int8_t *outer_dst = dst_data + j * inner_size; | |||||
| for (k = 0; k < inner_size; k++) { | |||||
| const int32_t *inner_src = outer_src + k; | |||||
| int8_t *inner_dst = outer_dst + k; | |||||
| int32_t sum = 0; | |||||
| // quant_out = sum((quant_in - zp)^2) * scale_in^2 / scale_out + zp_out | |||||
| for (i = 0; i < axis_size; i++) { | |||||
| int32_t tmp; | |||||
| if (isMulOverflow(inner_src[i * inner_size] - quant->in_zp_, inner_src[i * inner_size] - quant->in_zp_)) { | |||||
| return NNACL_ERRCODE_MUL_OVERFLOW; | |||||
| } | |||||
| tmp = (inner_src[i * inner_size] - quant->in_zp_) * (inner_src[i * inner_size] - quant->in_zp_); | |||||
| if (isAddOverflow(sum, tmp)) { | |||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | |||||
| } | |||||
| sum += tmp; | |||||
| } | |||||
| int32_t sum_scaled = | |||||
| RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->sum_square_left_shift_), | |||||
| quant->sum_square_multiplier_), | |||||
| quant->sum_square_right_shift_); | |||||
| if (isAddOverflow(sum_scaled, quant->out_zp_)) { | |||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | |||||
| } | |||||
| sum = sum_scaled + quant->out_zp_; | |||||
| if (sum > INT8_MAX) { | |||||
| *inner_dst = INT8_MAX; | |||||
| } else if (sum < INT8_MIN) { | |||||
| *inner_dst = INT8_MIN; | |||||
| } else { | |||||
| *inner_dst = (int8_t)sum; | |||||
| } | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| int ReduceSumSquareInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | |||||
| return NNACL_NULL_PTR; | |||||
| } | |||||
| int i, j, k; | |||||
| for (j = tid; j < outer_size; j += thread_num) { | |||||
| const int32_t *outer_src = src_data + j * axis_size * inner_size; | |||||
| int32_t *outer_dst = dst_data + j * inner_size; | |||||
| for (k = 0; k < inner_size; k++) { | |||||
| const int32_t *inner_src = outer_src + k; | |||||
| int32_t *inner_dst = outer_dst + k; | |||||
| int32_t sum = 0; | |||||
| for (i = 0; i < axis_size; i++) { | |||||
| int32_t tmp; | |||||
| if (isMulOverflow(inner_src[i * inner_size] - quant->in_zp_, inner_src[i * inner_size] - quant->in_zp_)) { | |||||
| return NNACL_ERRCODE_MUL_OVERFLOW; | |||||
| } | |||||
| tmp = (inner_src[i * inner_size] - quant->in_zp_) * (inner_src[i * inner_size] - quant->in_zp_); | |||||
| if (isAddOverflow(sum, tmp)) { | |||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | |||||
| } | |||||
| sum += tmp; | |||||
| } | |||||
| sum = | |||||
| RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->sum_square_left_shift_), | |||||
| quant->sum_square_multiplier_), | |||||
| quant->sum_square_right_shift_); | |||||
| if (isAddOverflow(sum, quant->in_zp_)) { | |||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | |||||
| } | |||||
| *inner_dst = sum + quant->in_zp_; | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| @@ -0,0 +1,53 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_REDUCE_INT8_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_REDUCE_INT8_H_ | |||||
| #include "nnacl/quantization/quantize.h" | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| int ReduceMeanInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); | |||||
| int ReduceMeanLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); | |||||
| int ReduceSumInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); | |||||
| int ReduceSumLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); | |||||
| int ReduceMaxInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); | |||||
| int ReduceMaxLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); | |||||
| int ReduceMinInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); | |||||
| int ReduceMinLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); | |||||
| int ReduceProdLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); | |||||
| int ReduceProdInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); | |||||
| int ReduceSumSquareLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); | |||||
| int ReduceSumSquareInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data, | |||||
| int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num); | |||||
| bool isAddOverflow(int32_t x, int32_t y); | |||||
| bool isMulOverflow(int32_t x, int32_t y); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_REDUCE_INT8_H_ | |||||
| @@ -219,6 +219,26 @@ typedef struct DivQuantArg { | |||||
| int output_multiplier_; | int output_multiplier_; | ||||
| int output_shift_; | int output_shift_; | ||||
| } DivQuantArg; | } DivQuantArg; | ||||
| typedef struct ReduceQuantArg { | |||||
| double in_scale_; | |||||
| int32_t in_zp_; | |||||
| double out_scale_; | |||||
| int32_t out_zp_; | |||||
| int32_t in_out_multiplier_; | |||||
| int in_out_left_shift_; | |||||
| int in_out_right_shift_; | |||||
| int32_t mean_multiplier_; | |||||
| int mean_left_shift_; | |||||
| int mean_right_shift_; | |||||
| int32_t prod_multiplier_; | |||||
| int prod_left_shift_; | |||||
| int prod_right_shift_; | |||||
| int32_t sum_square_multiplier_; | |||||
| int sum_square_left_shift_; | |||||
| int sum_square_right_shift_; | |||||
| } ReduceQuantArg; | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| @@ -0,0 +1,30 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_REDUCE_PARAMETER_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_REDUCE_PARAMETER_H_ | |||||
| #include "nnacl/op_base.h" | |||||
| #define REDUCE_MAX_AXES_NUM 8 | |||||
| struct ReduceParameter { | |||||
| OpParameter op_parameter_; | |||||
| bool keep_dims_; | |||||
| int axes_[REDUCE_MAX_AXES_NUM]; | |||||
| int num_axes_; | |||||
| int mode_; | |||||
| }; | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_REDUCE_PARAMETER_H_ | |||||
| @@ -16,7 +16,7 @@ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_RESIZE_PARAMETER_H_ | #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_RESIZE_PARAMETER_H_ | ||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_RESIZE_PARAMETER_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_RESIZE_PARAMETER_H_ | ||||
| #include "src/runtime/kernel/arm/nnacl/op_base.h" | |||||
| #include "nnacl/op_base.h" | |||||
| typedef struct ResizeParameter { | typedef struct ResizeParameter { | ||||
| OpParameter op_parameter_; | OpParameter op_parameter_; | ||||
| int method_; | int method_; | ||||
| @@ -13,204 +13,255 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include <iostream> | |||||
| #include <vector> | #include <vector> | ||||
| #include "mindspore/lite/src/lite_kernel.h" | |||||
| #include "mindspore/lite/src/ir/tensor.h" | |||||
| #include "common/common_test.h" | #include "common/common_test.h" | ||||
| #include "mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/resize.h" | |||||
| #include "nnacl/resize_parameter.h" | |||||
| #include "mindspore/lite/src/kernel_registry.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| class TestResizeBilinearFp32 : public mindspore::CommonTest { | class TestResizeBilinearFp32 : public mindspore::CommonTest { | ||||
| public: | public: | ||||
| TestResizeBilinearFp32() = default; | TestResizeBilinearFp32() = default; | ||||
| void Prepare(const std::vector<int> &input_shape, const std::vector<int> &output_shape, float *input_data, | |||||
| float *output_data, const bool align_corners, const int thread_num); | |||||
| void TearDown() override; | |||||
| public: | public: | ||||
| int tid = 0; | |||||
| int thread_num = 1; | |||||
| float err_tol = 1e-5; | float err_tol = 1e-5; | ||||
| lite::tensor::Tensor in_tensor_; | |||||
| lite::tensor::Tensor out_tensor_; | |||||
| std::vector<lite::tensor::Tensor *> inputs_{&in_tensor_}; | |||||
| std::vector<lite::tensor::Tensor *> outputs_{&out_tensor_}; | |||||
| ResizeParameter param_ = {{}}; | |||||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize}; | |||||
| lite::Context ctx_ = lite::Context(); | |||||
| kernel::KernelCreator creator_ = nullptr; | |||||
| kernel::LiteKernel *kernel_ = nullptr; | |||||
| }; | }; | ||||
| void TestResizeBilinearFp32::TearDown() { | |||||
| in_tensor_.SetData(nullptr); | |||||
| out_tensor_.SetData(nullptr); | |||||
| } | |||||
| void TestResizeBilinearFp32::Prepare(const std::vector<int> &input_shape, const std::vector<int> &output_shape, | |||||
| float *input_data, float *output_data, const bool align_corners, | |||||
| const int thread_num) { | |||||
| in_tensor_.set_data_type(kNumberTypeFloat32); | |||||
| in_tensor_.set_shape(input_shape); | |||||
| out_tensor_.set_data_type(kNumberTypeFloat32); | |||||
| out_tensor_.set_shape(output_shape); | |||||
| in_tensor_.SetData(input_data); | |||||
| out_tensor_.SetData(output_data); | |||||
| ResizeParameter param_ = { | |||||
| {}, static_cast<int>(schema::ResizeMethod_BILINEAR), output_shape[1], output_shape[2], align_corners}; | |||||
| desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize}; | |||||
| ctx_ = lite::Context(); | |||||
| ctx_.thread_num_ = thread_num; | |||||
| creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||||
| ASSERT_NE(creator_, nullptr); | |||||
| kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(¶m_), &ctx_, desc, nullptr); | |||||
| ASSERT_NE(kernel_, nullptr); | |||||
| } | |||||
| // 1*1 -> 1*1 | // 1*1 -> 1*1 | ||||
| TEST_F(TestResizeBilinearFp32, ResizeBilinearTest1) { | TEST_F(TestResizeBilinearFp32, ResizeBilinearTest1) { | ||||
| std::vector<float> input = {1.0}; | |||||
| float input_data[] = {1.0f}; | |||||
| float output_data[1] = {0}; | |||||
| std::vector<int> input_shape = {1, 1, 1, 1}; | std::vector<int> input_shape = {1, 1, 1, 1}; | ||||
| std::vector<int> output_shape = {1, 1, 1, 1}; | std::vector<int> output_shape = {1, 1, 1, 1}; | ||||
| std::vector<float> expect = {1.0}; | std::vector<float> expect = {1.0}; | ||||
| bool align_corners = false; | bool align_corners = false; | ||||
| auto output_size = 1; | auto output_size = 1; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 1*1 | // 2*2 -> 1*1 | ||||
| TEST_F(TestResizeBilinearFp32, ResizeBilinearTest2) { | TEST_F(TestResizeBilinearFp32, ResizeBilinearTest2) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[1] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 1, 1, 1}; | std::vector<int> output_shape = {1, 1, 1, 1}; | ||||
| std::vector<float> expect = {0.0}; | std::vector<float> expect = {0.0}; | ||||
| bool align_corners = false; | bool align_corners = false; | ||||
| int output_size = 1; | int output_size = 1; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 1*2 | // 2*2 -> 1*2 | ||||
| TEST_F(TestResizeBilinearFp32, ResizeBilinearTest3) { | TEST_F(TestResizeBilinearFp32, ResizeBilinearTest3) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[2] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 1, 2, 1}; | std::vector<int> output_shape = {1, 1, 2, 1}; | ||||
| std::vector<float> expect = {0.0, 1.0}; | std::vector<float> expect = {0.0, 1.0}; | ||||
| bool align_corners = false; | bool align_corners = false; | ||||
| auto output_size = 2; | auto output_size = 2; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 2*1 | // 2*2 -> 2*1 | ||||
| TEST_F(TestResizeBilinearFp32, ResizeBilinearTest4) { | TEST_F(TestResizeBilinearFp32, ResizeBilinearTest4) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[2] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 2, 1, 1}; | std::vector<int> output_shape = {1, 2, 1, 1}; | ||||
| std::vector<float> expect = {0.0, 2.0}; | std::vector<float> expect = {0.0, 2.0}; | ||||
| bool align_corners = false; | bool align_corners = false; | ||||
| auto output_size = 2; | auto output_size = 2; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 2*2 | // 2*2 -> 2*2 | ||||
| TEST_F(TestResizeBilinearFp32, ResizeBilinearTest5) { | TEST_F(TestResizeBilinearFp32, ResizeBilinearTest5) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[4] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 2, 2, 1}; | std::vector<int> output_shape = {1, 2, 2, 1}; | ||||
| std::vector<float> expect = {0.0, 1.0, 2.0, 3.0}; | std::vector<float> expect = {0.0, 1.0, 2.0, 3.0}; | ||||
| bool align_corners = false; | bool align_corners = false; | ||||
| auto output_size = 4; | auto output_size = 4; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 1*4 | // 2*2 -> 1*4 | ||||
| TEST_F(TestResizeBilinearFp32, ResizeBilinearTest6) { | TEST_F(TestResizeBilinearFp32, ResizeBilinearTest6) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[4] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 1, 4, 1}; | std::vector<int> output_shape = {1, 1, 4, 1}; | ||||
| std::vector<float> expect = {0.0, 0.5, 1.0, 1.0}; | std::vector<float> expect = {0.0, 0.5, 1.0, 1.0}; | ||||
| bool align_corners = false; | bool align_corners = false; | ||||
| auto output_size = 4; | auto output_size = 4; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 4*1 | // 2*2 -> 4*1 | ||||
| TEST_F(TestResizeBilinearFp32, ResizeBilinearTest7) { | TEST_F(TestResizeBilinearFp32, ResizeBilinearTest7) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[4] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 4, 1, 1}; | std::vector<int> output_shape = {1, 4, 1, 1}; | ||||
| std::vector<float> expect = {0.0, 1.0, 2.0, 2.0}; | std::vector<float> expect = {0.0, 1.0, 2.0, 2.0}; | ||||
| bool align_corners = false; | bool align_corners = false; | ||||
| auto output_size = 4; | auto output_size = 4; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 2*4 | // 2*2 -> 2*4 | ||||
| TEST_F(TestResizeBilinearFp32, ResizeBilinearTest8) { | TEST_F(TestResizeBilinearFp32, ResizeBilinearTest8) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[8] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 2, 4, 1}; | std::vector<int> output_shape = {1, 2, 4, 1}; | ||||
| std::vector<float> expect = {0.0, 0.5, 1.0, 1.0, 2.0, 2.5, 3.0, 3.0}; | std::vector<float> expect = {0.0, 0.5, 1.0, 1.0, 2.0, 2.5, 3.0, 3.0}; | ||||
| bool align_corners = false; | bool align_corners = false; | ||||
| auto output_size = 8; | auto output_size = 8; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 4*2 | // 2*2 -> 4*2 | ||||
| TEST_F(TestResizeBilinearFp32, ResizeBilinearTest9) { | TEST_F(TestResizeBilinearFp32, ResizeBilinearTest9) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[8] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 4, 2, 1}; | std::vector<int> output_shape = {1, 4, 2, 1}; | ||||
| std::vector<float> expect = {0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 3.0}; | std::vector<float> expect = {0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 3.0}; | ||||
| bool align_corners = false; | bool align_corners = false; | ||||
| auto output_size = 8; | auto output_size = 8; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 3*3 | // 2*2 -> 3*3 | ||||
| TEST_F(TestResizeBilinearFp32, ResizeBilinearTest10) { | TEST_F(TestResizeBilinearFp32, ResizeBilinearTest10) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[9] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 3, 3, 1}; | std::vector<int> output_shape = {1, 3, 3, 1}; | ||||
| std::vector<float> expect = {0.0, 0.6666667, 1.0, 1.3333334, 2.0, 2.3333335, 2.0, 2.6666667, 3.0}; | std::vector<float> expect = {0.0, 0.6666667, 1.0, 1.3333334, 2.0, 2.3333335, 2.0, 2.6666667, 3.0}; | ||||
| bool align_corners = false; | bool align_corners = false; | ||||
| auto output_size = 9; | auto output_size = 9; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 4*4 | // 2*2 -> 4*4 | ||||
| TEST_F(TestResizeBilinearFp32, ResizeBilinearTest11) { | TEST_F(TestResizeBilinearFp32, ResizeBilinearTest11) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[16] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 4, 4, 1}; | std::vector<int> output_shape = {1, 4, 4, 1}; | ||||
| std::vector<float> expect = {0.0, 0.5, 1.0, 1.0, 1.0, 1.5, 2.0, 2.0, 2.0, 2.5, 3.0, 3.0, 2.0, 2.5, 3.0, 3.0}; | std::vector<float> expect = {0.0, 0.5, 1.0, 1.0, 1.0, 1.5, 2.0, 2.0, 2.0, 2.5, 3.0, 3.0, 2.0, 2.5, 3.0, 3.0}; | ||||
| bool align_corners = false; | bool align_corners = false; | ||||
| auto output_size = 16; | auto output_size = 16; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2*2*5 -> 2*4*4*5 | // 2*2*2*5 -> 2*4*4*5 | ||||
| TEST_F(TestResizeBilinearFp32, ResizeBilinearTest12) { | TEST_F(TestResizeBilinearFp32, ResizeBilinearTest12) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, | |||||
| 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, | |||||
| 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, | |||||
| 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, | |||||
| 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | |||||
| float output_data[160] = {0}; | |||||
| std::vector<int> input_shape = {2, 2, 2, 5}; | std::vector<int> input_shape = {2, 2, 2, 5}; | ||||
| std::vector<int> output_shape = {2, 4, 4, 5}; | std::vector<int> output_shape = {2, 4, 4, 5}; | ||||
| std::vector<float> expect = { | std::vector<float> expect = { | ||||
| @@ -224,20 +275,21 @@ TEST_F(TestResizeBilinearFp32, ResizeBilinearTest12) { | |||||
| 33.5, 34.5, 35.5, 36.5, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0, | 33.5, 34.5, 35.5, 36.5, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0, | ||||
| 34.0, 32.5, 33.5, 34.5, 35.5, 36.5, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | 34.0, 32.5, 33.5, 34.5, 35.5, 36.5, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | ||||
| bool align_corners = false; | bool align_corners = false; | ||||
| auto output_size = 160; | auto output_size = 160; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2*2*5 -> 2*4*4*5 align corners | // 2*2*2*5 -> 2*4*4*5 align corners | ||||
| TEST_F(TestResizeBilinearFp32, ResizeBilinearTest13) { | TEST_F(TestResizeBilinearFp32, ResizeBilinearTest13) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, | |||||
| 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, | |||||
| 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, | |||||
| 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, | |||||
| 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | |||||
| float output_data[160] = {0}; | |||||
| std::vector<int> input_shape = {2, 2, 2, 5}; | std::vector<int> input_shape = {2, 2, 2, 5}; | ||||
| std::vector<int> output_shape = {2, 4, 4, 5}; | std::vector<int> output_shape = {2, 4, 4, 5}; | ||||
| std::vector<float> expect = { | std::vector<float> expect = { | ||||
| @@ -258,20 +310,21 @@ TEST_F(TestResizeBilinearFp32, ResizeBilinearTest13) { | |||||
| 30.0, 31.0, 32.0, 33.0, 34.0, 31.666666, 32.666668, 33.666668, 34.666668, 35.666668, | 30.0, 31.0, 32.0, 33.0, 34.0, 31.666666, 32.666668, 33.666668, 34.666668, 35.666668, | ||||
| 33.333332, 34.333332, 35.333332, 36.333332, 37.333332, 35.0, 36.0, 37.0, 38.0, 39.0}; | 33.333332, 34.333332, 35.333332, 36.333332, 37.333332, 35.0, 36.0, 37.0, 38.0, 39.0}; | ||||
| bool align_corners = true; | bool align_corners = true; | ||||
| auto output_size = 160; | auto output_size = 160; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2*2*5 -> 2*4*4*5 thread_num 2 | // 2*2*2*5 -> 2*4*4*5 thread_num 2 | ||||
| TEST_F(TestResizeBilinearFp32, ResizeBilinearTest14) { | TEST_F(TestResizeBilinearFp32, ResizeBilinearTest14) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, | |||||
| 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, | |||||
| 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, | |||||
| 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, | |||||
| 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | |||||
| float output_data[160] = {0}; | |||||
| std::vector<int> input_shape = {2, 2, 2, 5}; | std::vector<int> input_shape = {2, 2, 2, 5}; | ||||
| std::vector<int> output_shape = {2, 4, 4, 5}; | std::vector<int> output_shape = {2, 4, 4, 5}; | ||||
| std::vector<float> expect = { | std::vector<float> expect = { | ||||
| @@ -285,24 +338,22 @@ TEST_F(TestResizeBilinearFp32, ResizeBilinearTest14) { | |||||
| 33.5, 34.5, 35.5, 36.5, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0, | 33.5, 34.5, 35.5, 36.5, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0, | ||||
| 34.0, 32.5, 33.5, 34.5, 35.5, 36.5, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | 34.0, 32.5, 33.5, 34.5, 35.5, 36.5, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | ||||
| bool align_corners = false; | bool align_corners = false; | ||||
| auto output_size = 160; | auto output_size = 160; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| thread_num = 2; | |||||
| tid = 0; | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| tid = 1; | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| int thread_num = 2; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, thread_num); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2*2*5 -> 2*4*4*5 thread_num 4 | // 2*2*2*5 -> 2*4*4*5 thread_num 4 | ||||
| TEST_F(TestResizeBilinearFp32, ResizeBilinearTest15) { | TEST_F(TestResizeBilinearFp32, ResizeBilinearTest15) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, | |||||
| 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, | |||||
| 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, | |||||
| 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, | |||||
| 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | |||||
| float output_data[160] = {0}; | |||||
| std::vector<int> input_shape = {2, 2, 2, 5}; | std::vector<int> input_shape = {2, 2, 2, 5}; | ||||
| std::vector<int> output_shape = {2, 4, 4, 5}; | std::vector<int> output_shape = {2, 4, 4, 5}; | ||||
| std::vector<float> expect = { | std::vector<float> expect = { | ||||
| @@ -319,19 +370,11 @@ TEST_F(TestResizeBilinearFp32, ResizeBilinearTest15) { | |||||
| auto output_size = 160; | auto output_size = 160; | ||||
| std::vector<float> output(output_size, 0.0); | std::vector<float> output(output_size, 0.0); | ||||
| thread_num = 4; | |||||
| tid = 0; | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| tid = 1; | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| tid = 2; | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| tid = 3; | |||||
| ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid, | |||||
| thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| int thread_num = 4; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, thread_num); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -15,168 +15,250 @@ | |||||
| */ | */ | ||||
| #include <vector> | #include <vector> | ||||
| #include "common/common_test.h" | #include "common/common_test.h" | ||||
| #include "mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/resize.h" | |||||
| #include "nnacl/resize_parameter.h" | |||||
| #include "mindspore/lite/src/kernel_registry.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| class TestResizeNearestNeighborFp32 : public mindspore::CommonTest { | class TestResizeNearestNeighborFp32 : public mindspore::CommonTest { | ||||
| public: | public: | ||||
| TestResizeNearestNeighborFp32() = default; | TestResizeNearestNeighborFp32() = default; | ||||
| void Prepare(const std::vector<int> &input_shape, const std::vector<int> &output_shape, float *input_data, | |||||
| float *output_data, const bool align_corners, const int thread_num); | |||||
| void TearDown() override; | |||||
| public: | public: | ||||
| int tid = 0; | |||||
| int thread_num = 1; | |||||
| float err_tol = 1e-5; | float err_tol = 1e-5; | ||||
| lite::tensor::Tensor in_tensor_; | |||||
| lite::tensor::Tensor out_tensor_; | |||||
| std::vector<lite::tensor::Tensor *> inputs_{&in_tensor_}; | |||||
| std::vector<lite::tensor::Tensor *> outputs_{&out_tensor_}; | |||||
| ResizeParameter param_ = {{}}; | |||||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize}; | |||||
| lite::Context ctx_ = lite::Context(); | |||||
| kernel::KernelCreator creator_ = nullptr; | |||||
| kernel::LiteKernel *kernel_ = nullptr; | |||||
| }; | }; | ||||
| void TestResizeNearestNeighborFp32::TearDown() { | |||||
| in_tensor_.SetData(nullptr); | |||||
| out_tensor_.SetData(nullptr); | |||||
| } | |||||
| void TestResizeNearestNeighborFp32::Prepare(const std::vector<int> &input_shape, const std::vector<int> &output_shape, | |||||
| float *input_data, float *output_data, const bool align_corners, | |||||
| const int thread_num) { | |||||
| in_tensor_.set_data_type(kNumberTypeFloat32); | |||||
| in_tensor_.set_shape(input_shape); | |||||
| out_tensor_.set_data_type(kNumberTypeFloat32); | |||||
| out_tensor_.set_shape(output_shape); | |||||
| in_tensor_.SetData(input_data); | |||||
| out_tensor_.SetData(output_data); | |||||
| ResizeParameter param_ = { | |||||
| {}, static_cast<int>(schema::ResizeMethod_NEAREST_NEIGHBOR), output_shape[1], output_shape[2], align_corners}; | |||||
| desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize}; | |||||
| ctx_ = lite::Context(); | |||||
| ctx_.thread_num_ = thread_num; | |||||
| creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||||
| ASSERT_NE(creator_, nullptr); | |||||
| kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(¶m_), &ctx_, desc, nullptr); | |||||
| ASSERT_NE(kernel_, nullptr); | |||||
| } | |||||
| // 1*1 -> 1*1 | // 1*1 -> 1*1 | ||||
| TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest1) { | TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest1) { | ||||
| std::vector<float> input = {1.0}; | |||||
| float input_data[] = {1.0}; | |||||
| float output_data[1] = {0}; | |||||
| std::vector<int> input_shape = {1, 1, 1, 1}; | std::vector<int> input_shape = {1, 1, 1, 1}; | ||||
| std::vector<int> output_shape = {1, 1, 1, 1}; | std::vector<int> output_shape = {1, 1, 1, 1}; | ||||
| std::vector<float> expect = {1.0}; | std::vector<float> expect = {1.0}; | ||||
| size_t output_size = 1; | size_t output_size = 1; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| bool align_corners = false; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 1*1 | // 2*2 -> 1*1 | ||||
| TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest2) { | TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest2) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[1] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 1, 1, 1}; | std::vector<int> output_shape = {1, 1, 1, 1}; | ||||
| std::vector<float> expect = {0.0}; | std::vector<float> expect = {0.0}; | ||||
| size_t output_size = 1; | size_t output_size = 1; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| bool align_corners = false; | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 1*2 | // 2*2 -> 1*2 | ||||
| TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest3) { | TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest3) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[2] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 1, 2, 1}; | std::vector<int> output_shape = {1, 1, 2, 1}; | ||||
| std::vector<float> expect = {0.0, 1.0}; | std::vector<float> expect = {0.0, 1.0}; | ||||
| size_t output_size = 2; | size_t output_size = 2; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| bool align_corners = false; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 2*1 | // 2*2 -> 2*1 | ||||
| TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest4) { | TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest4) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[2] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 2, 1, 1}; | std::vector<int> output_shape = {1, 2, 1, 1}; | ||||
| std::vector<float> expect = {0.0, 2.0}; | std::vector<float> expect = {0.0, 2.0}; | ||||
| size_t output_size = 2; | size_t output_size = 2; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| bool align_corners = false; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 2*2 | // 2*2 -> 2*2 | ||||
| TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest5) { | TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest5) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[4] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 2, 2, 1}; | std::vector<int> output_shape = {1, 2, 2, 1}; | ||||
| std::vector<float> expect = {0.0, 1.0, 2.0, 3.0}; | std::vector<float> expect = {0.0, 1.0, 2.0, 3.0}; | ||||
| size_t output_size = 4; | size_t output_size = 4; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| bool align_corners = false; | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 1*4 | // 2*2 -> 1*4 | ||||
| TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest6) { | TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest6) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[4] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 1, 4, 1}; | std::vector<int> output_shape = {1, 1, 4, 1}; | ||||
| std::vector<float> expect = {0.0, 0.0, 1.0, 1.0}; | std::vector<float> expect = {0.0, 0.0, 1.0, 1.0}; | ||||
| size_t output_size = 4; | size_t output_size = 4; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| bool align_corners = false; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 4*1 | // 2*2 -> 4*1 | ||||
| TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest7) { | TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest7) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[4] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 4, 1, 1}; | std::vector<int> output_shape = {1, 4, 1, 1}; | ||||
| std::vector<float> expect = {0.0, 0.0, 2.0, 2.0}; | std::vector<float> expect = {0.0, 0.0, 2.0, 2.0}; | ||||
| size_t output_size = 4; | size_t output_size = 4; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| bool align_corners = false; | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 2*4 | // 2*2 -> 2*4 | ||||
| TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest8) { | TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest8) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[8] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 2, 4, 1}; | std::vector<int> output_shape = {1, 2, 4, 1}; | ||||
| std::vector<float> expect = {0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0}; | std::vector<float> expect = {0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0}; | ||||
| size_t output_size = 8; | size_t output_size = 8; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| bool align_corners = false; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 4*2 | // 2*2 -> 4*2 | ||||
| TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest9) { | TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest9) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[8] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 4, 2, 1}; | std::vector<int> output_shape = {1, 4, 2, 1}; | ||||
| std::vector<float> expect = {0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0}; | std::vector<float> expect = {0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0}; | ||||
| size_t output_size = 8; | size_t output_size = 8; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| bool align_corners = false; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 3*3 | // 2*2 -> 3*3 | ||||
| TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest10) { | TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest10) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[9] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 3, 3, 1}; | std::vector<int> output_shape = {1, 3, 3, 1}; | ||||
| std::vector<float> expect = {0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0}; | std::vector<float> expect = {0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0}; | ||||
| size_t output_size = 9; | size_t output_size = 9; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| bool align_corners = false; | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2 -> 4*4 | // 2*2 -> 4*4 | ||||
| TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest11) { | TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest11) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0}; | |||||
| float output_data[16] = {0}; | |||||
| std::vector<int> input_shape = {1, 2, 2, 1}; | std::vector<int> input_shape = {1, 2, 2, 1}; | ||||
| std::vector<int> output_shape = {1, 4, 4, 1}; | std::vector<int> output_shape = {1, 4, 4, 1}; | ||||
| std::vector<float> expect = {0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 2.0, 2.0, 3.0, 3.0}; | std::vector<float> expect = {0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 2.0, 2.0, 3.0, 3.0}; | ||||
| size_t output_size = 16; | size_t output_size = 16; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| bool align_corners = false; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2*2*5 -> 2*4*4*5 | // 2*2*2*5 -> 2*4*4*5 | ||||
| TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest12) { | TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest12) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, | |||||
| 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, | |||||
| 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, | |||||
| 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, | |||||
| 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | |||||
| float output_data[160] = {0}; | |||||
| std::vector<int> input_shape = {2, 2, 2, 5}; | std::vector<int> input_shape = {2, 2, 2, 5}; | ||||
| std::vector<int> output_shape = {2, 4, 4, 5}; | std::vector<int> output_shape = {2, 4, 4, 5}; | ||||
| std::vector<float> expect = { | std::vector<float> expect = { | ||||
| @@ -190,17 +272,21 @@ TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest12) { | |||||
| 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0, | 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0, | ||||
| 34.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | 34.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | ||||
| size_t output_size = 160; | size_t output_size = 160; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| bool align_corners = false; | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2*2*5 -> 2*4*4*5 thread_num 2 | // 2*2*2*5 -> 2*4*4*5 thread_num 2 | ||||
| TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest13) { | TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest13) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, | |||||
| 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, | |||||
| 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, | |||||
| 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, | |||||
| 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | |||||
| float output_data[160] = {0}; | |||||
| std::vector<int> input_shape = {2, 2, 2, 5}; | std::vector<int> input_shape = {2, 2, 2, 5}; | ||||
| std::vector<int> output_shape = {2, 4, 4, 5}; | std::vector<int> output_shape = {2, 4, 4, 5}; | ||||
| std::vector<float> expect = { | std::vector<float> expect = { | ||||
| @@ -214,21 +300,21 @@ TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest13) { | |||||
| 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0, | 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0, | ||||
| 34.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | 34.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | ||||
| size_t output_size = 160; | size_t output_size = 160; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| thread_num = 2; | |||||
| tid = 0; | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| tid = 1; | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| bool align_corners = false; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 2); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| // 2*2*2*5 -> 2*4*4*5 thread_num 4 | // 2*2*2*5 -> 2*4*4*5 thread_num 4 | ||||
| TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest14) { | TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest14) { | ||||
| std::vector<float> input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, | |||||
| 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, | |||||
| 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | |||||
| float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, | |||||
| 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, | |||||
| 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | |||||
| float output_data[160] = {0}; | |||||
| std::vector<int> input_shape = {2, 2, 2, 5}; | std::vector<int> input_shape = {2, 2, 2, 5}; | ||||
| std::vector<int> output_shape = {2, 4, 4, 5}; | std::vector<int> output_shape = {2, 4, 4, 5}; | ||||
| std::vector<float> expect = { | std::vector<float> expect = { | ||||
| @@ -242,17 +328,12 @@ TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest14) { | |||||
| 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0, | 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0, | ||||
| 34.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | 34.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0}; | ||||
| size_t output_size = 160; | size_t output_size = 160; | ||||
| std::vector<float> output(output_size, 0.0); | |||||
| thread_num = 4; | |||||
| tid = 0; | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| tid = 1; | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| tid = 2; | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| tid = 3; | |||||
| ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num); | |||||
| CompareOutputData(output.data(), expect.data(), output_size, err_tol); | |||||
| bool align_corners = false; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, align_corners, 4); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputData(output_data, expect.data(), output_size, err_tol); | |||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -0,0 +1,355 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <memory> | |||||
| #include "utils/log_adapter.h" | |||||
| #include "common/common_test.h" | |||||
| #include "src/ir/tensor.h" | |||||
| #include "mindspore/lite/src/kernel_registry.h" | |||||
| #include "nnacl/fp32/reduce.h" | |||||
| namespace mindspore { | |||||
| using mindspore::lite::tensor::QuantArg; | |||||
| using mindspore::lite::tensor::Tensor; | |||||
| using mindspore::schema::ReduceMode; | |||||
| using mindspore::schema::ReduceMode_ReduceMax; | |||||
| using mindspore::schema::ReduceMode_ReduceMean; | |||||
| using mindspore::schema::ReduceMode_ReduceMin; | |||||
| using mindspore::schema::ReduceMode_ReduceProd; | |||||
| using mindspore::schema::ReduceMode_ReduceSum; | |||||
| using mindspore::schema::ReduceMode_ReduceSumSquare; | |||||
| class TestReduceInt8 : public mindspore::CommonTest { | |||||
| public: | |||||
| TestReduceInt8() = default; | |||||
| void Prepare(const std::vector<int> &in_shape, const std::vector<int> &out_shape, int8_t *input_data, | |||||
| int8_t *output_data, ReduceMode mode, const int *axes, const int num_axes); | |||||
| void TearDown() override; | |||||
| public: | |||||
| int thread_num_ = 1; | |||||
| ReduceParameter param_ = {}; | |||||
| Tensor in_tensor_; | |||||
| Tensor out_tensor_; | |||||
| std::vector<Tensor *> inputs{&in_tensor_}; | |||||
| std::vector<Tensor *> outputs{&out_tensor_}; | |||||
| kernel::KernelKey desc_ = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Reduce}; | |||||
| kernel::KernelCreator creator_ = nullptr; | |||||
| lite::Context ctx_ = lite::Context(); | |||||
| kernel::LiteKernel *kernel_ = nullptr; | |||||
| const QuantArg quant_in_ = {0.005f, 5}; | |||||
| const QuantArg quant_out_ = {0.01f, 1}; | |||||
| float err_tol_ = 0.05; | |||||
| }; | |||||
| void TestReduceInt8::TearDown() { | |||||
| in_tensor_.SetData(nullptr); | |||||
| out_tensor_.SetData(nullptr); | |||||
| } | |||||
| void TestReduceInt8::Prepare(const std::vector<int> &in_shape, const std::vector<int> &out_shape, int8_t *input_data, | |||||
| int8_t *output_data, ReduceMode mode, const int *axes, const int num_axes) { | |||||
| in_tensor_.set_data_type(kNumberTypeInt8); | |||||
| in_tensor_.set_shape(in_shape); | |||||
| in_tensor_.SetData(input_data); | |||||
| in_tensor_.AddQuantParam(quant_in_); | |||||
| out_tensor_.set_data_type(kNumberTypeInt8); | |||||
| out_tensor_.set_shape(out_shape); | |||||
| out_tensor_.SetData(output_data); | |||||
| out_tensor_.AddQuantParam(quant_out_); | |||||
| param_.mode_ = static_cast<int>(mode); | |||||
| param_.num_axes_ = num_axes; | |||||
| memcpy(param_.axes_, axes, num_axes * sizeof(int)); | |||||
| creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc_); | |||||
| ctx_.thread_num_ = thread_num_; | |||||
| kernel_ = creator_(inputs, outputs, reinterpret_cast<OpParameter *>(¶m_), &ctx_, desc_, nullptr); | |||||
| } | |||||
| TEST_F(TestReduceInt8, Mean) { | |||||
| /* 2 4 4 3 NHWC */ | |||||
| int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | |||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | |||||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, | |||||
| 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; | |||||
| int8_t output_data[32] = {0}; | |||||
| int axes[] = {3}; | |||||
| int num_axes = 1; | |||||
| std::vector<int> input_shape = {2, 4, 4, 3}; | |||||
| std::vector<int> output_shape = {2, 4, 4, 1}; | |||||
| int output_size = 32; | |||||
| int8_t correct[] = {-1, 1, 2, 3, 5, 7, 8, 10, 11, 12, 14, 16, 17, 19, 20, 22, | |||||
| 23, 25, 26, 28, 29, 30, 32, 34, 35, 37, 38, 40, 41, 43, 44, 46}; | |||||
| thread_num_ = 2; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMean, axes, num_axes); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| err_tol_ = 0.09375; | |||||
| CompareOutputInt8(output_data, correct, output_size, err_tol_); | |||||
| } | |||||
| TEST_F(TestReduceInt8, MeanAllAxis) { | |||||
| /* 2*4*4*3 NHWC */ | |||||
| int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | |||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | |||||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, | |||||
| 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; | |||||
| int8_t output_data[1] = {0}; | |||||
| int axes[] = {0}; | |||||
| int num_axes = 0; | |||||
| std::vector<int> input_shape = {2, 4, 4, 3}; | |||||
| std::vector<int> output_shape = {1}; | |||||
| int output_size = 1; | |||||
| int8_t correct[] = {22}; | |||||
| thread_num_ = 2; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMean, axes, num_axes); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| err_tol_ = 1.0f; | |||||
| CompareOutputInt8(output_data, correct, output_size, err_tol_); | |||||
| } | |||||
| TEST_F(TestReduceInt8, Sum) { | |||||
| /* 2*4*4*3 NHWC */ | |||||
| int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | |||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | |||||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, | |||||
| 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; | |||||
| int8_t output_data[32] = {0}; | |||||
| int axes[] = {-1}; | |||||
| int num_axes = 1; | |||||
| std::vector<int> input_shape = {2, 4, 4, 3}; | |||||
| std::vector<int> output_shape = {2, 4, 4, 1}; | |||||
| int output_size = 32; | |||||
| int8_t correct[] = {-5, -1, 4, 9, 13, 18, 22, 27, 31, 36, 40, 45, 49, 54, 58, 63, | |||||
| 67, 72, 76, 81, 85, 90, 94, 99, 103, 107, 112, 117, 121, 126, 127, 127}; | |||||
| thread_num_ = 2; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceSum, axes, num_axes); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| err_tol_ = 0.0625f; | |||||
| CompareOutputInt8(output_data, correct, output_size, err_tol_); | |||||
| } | |||||
| TEST_F(TestReduceInt8, SumAllAxis) { | |||||
| /* 2*4*4*3 NHWC */ | |||||
| int8_t input_data[96] = { | |||||
| 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |||||
| 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |||||
| 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |||||
| }; | |||||
| int8_t output_data[1] = {0}; | |||||
| int axes[] = {0, 1, 2, 3}; | |||||
| int num_axes = 4; | |||||
| std::vector<int> input_shape = {2, 4, 4, 3}; | |||||
| std::vector<int> output_shape = {1}; | |||||
| int output_size = 1; | |||||
| int8_t correct[] = {-47}; | |||||
| thread_num_ = 2; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceSum, axes, num_axes); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputInt8(output_data, correct, output_size, err_tol_); | |||||
| } | |||||
| TEST_F(TestReduceInt8, Max) { | |||||
| /* 2*4*4*3 NHWC */ | |||||
| int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | |||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | |||||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, | |||||
| 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; | |||||
| int8_t output_data[32] = {0}; | |||||
| int axes[] = {3}; | |||||
| int num_axes = 1; | |||||
| std::vector<int> input_shape = {2, 4, 4, 3}; | |||||
| std::vector<int> output_shape = {2, 4, 4, 1}; | |||||
| int output_size = 32; | |||||
| int8_t correct[] = {-1, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16, 18, 19, 21, 22, | |||||
| 24, 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, 45, 46}; | |||||
| thread_num_ = 2; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMax, axes, num_axes); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputInt8(output_data, correct, output_size, err_tol_); | |||||
| } | |||||
| TEST_F(TestReduceInt8, MaxAll) { | |||||
| /* 2*4*4*3 NHWC */ | |||||
| int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | |||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | |||||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, | |||||
| 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; | |||||
| int8_t output_data[1] = {0}; | |||||
| int axes[] = {0, 1, 2, 3}; | |||||
| int num_axes = 4; | |||||
| std::vector<int> input_shape = {2, 4, 4, 3}; | |||||
| std::vector<int> output_shape = {1}; | |||||
| int output_size = 1; | |||||
| int8_t correct[] = {46}; | |||||
| thread_num_ = 2; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMax, axes, num_axes); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputInt8(output_data, correct, output_size, err_tol_); | |||||
| } | |||||
| TEST_F(TestReduceInt8, Min) { | |||||
| /* 2*4*4*3 NHWC */ | |||||
| int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | |||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | |||||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, | |||||
| 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; | |||||
| int8_t output_data[32] = {0}; | |||||
| int axes[] = {3}; | |||||
| int num_axes = 1; | |||||
| std::vector<int> input_shape = {2, 4, 4, 3}; | |||||
| std::vector<int> output_shape = {2, 4, 4, 1}; | |||||
| int output_size = 32; | |||||
| int8_t correct[] = {-2, 0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, | |||||
| 23, 24, 26, 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 42, 44, 45}; | |||||
| thread_num_ = 2; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMin, axes, num_axes); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputInt8(output_data, correct, output_size, err_tol_); | |||||
| } | |||||
| TEST_F(TestReduceInt8, MinAll) { | |||||
| /* 2*4*4*3 NHWC */ | |||||
| int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | |||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | |||||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, | |||||
| 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; | |||||
| int8_t output_data[1] = {0}; | |||||
| int axes[] = {0}; | |||||
| int num_axes = 0; | |||||
| std::vector<int> input_shape = {2, 4, 4, 3}; | |||||
| std::vector<int> output_shape = {1}; | |||||
| int output_size = 1; | |||||
| int8_t correct[] = {-2}; | |||||
| thread_num_ = 2; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMin, axes, num_axes); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputInt8(output_data, correct, output_size, err_tol_); | |||||
| } | |||||
| TEST_F(TestReduceInt8, Prod) { | |||||
| /* 2*4*4*3 NHWC */ | |||||
| int8_t input_data[96] = {105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, | |||||
| 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, | |||||
| 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, | |||||
| 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, | |||||
| 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, | |||||
| 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105}; | |||||
| int8_t output_data[32] = {0}; | |||||
| int axes[] = {3}; | |||||
| int num_axes = 1; | |||||
| std::vector<int> input_shape = {2, 4, 4, 3}; | |||||
| std::vector<int> output_shape = {2, 4, 4, 1}; | |||||
| int output_size = 32; | |||||
| int8_t correct[] = { | |||||
| 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, | |||||
| 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, | |||||
| }; | |||||
| thread_num_ = 2; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceProd, axes, num_axes); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputInt8(output_data, correct, output_size, err_tol_); | |||||
| } | |||||
| TEST_F(TestReduceInt8, Prod2Axis) { | |||||
| /* 2*4*4*3 NHWC */ | |||||
| int8_t input_data[12] = {105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105}; | |||||
| int8_t output_data[8] = {0}; | |||||
| int axes[] = {2, 3}; | |||||
| int num_axes = 2; | |||||
| std::vector<int> input_shape = {1, 2, 2, 3}; | |||||
| std::vector<int> output_shape = {1, 2}; | |||||
| int output_size = 2; | |||||
| int8_t correct[] = {3, 3}; | |||||
| thread_num_ = 1; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceProd, axes, num_axes); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputInt8(output_data, correct, output_size, err_tol_); | |||||
| } | |||||
| TEST_F(TestReduceInt8, SumSquare) { | |||||
| /* 2*4*4*3 NHWC */ | |||||
| int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | |||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | |||||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, | |||||
| 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; | |||||
| int8_t output_data[32] = {0}; | |||||
| int axes[] = {3}; | |||||
| int num_axes = 1; | |||||
| std::vector<int> input_shape = {2, 4, 4, 3}; | |||||
| std::vector<int> output_shape = {2, 4, 4, 1}; | |||||
| int output_size = 32; | |||||
| int8_t correct[] = {1, 1, 1, 1, 1, 2, 2, 3, 4, 5, 6, 7, 9, 10, 12, 14, | |||||
| 16, 18, 20, 22, 25, 27, 30, 33, 36, 39, 42, 45, 49, 53, 56, 60}; | |||||
| thread_num_ = 1; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceSumSquare, axes, num_axes); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputInt8(output_data, correct, output_size, err_tol_); | |||||
| } | |||||
| TEST_F(TestReduceInt8, SumSquare2Axis) { | |||||
| /* 2*4*4*3 NHWC */ | |||||
| int8_t input_data[12] = {105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105}; | |||||
| int8_t output_data[8] = {0}; | |||||
| int axes[] = {3, 2}; | |||||
| int num_axes = 2; | |||||
| std::vector<int> input_shape = {1, 2, 2, 3}; | |||||
| std::vector<int> output_shape = {1, 2}; | |||||
| int output_size = 2; | |||||
| int8_t correct[] = {114, 114}; | |||||
| thread_num_ = 1; | |||||
| Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceSumSquare, axes, num_axes); | |||||
| auto ret = kernel_->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| CompareOutputInt8(output_data, correct, output_size, err_tol_); | |||||
| } | |||||
| } // namespace mindspore | |||||
| @@ -18,10 +18,8 @@ | |||||
| #include "include/context.h" | #include "include/context.h" | ||||
| #include "src/ir/tensor.h" | #include "src/ir/tensor.h" | ||||
| #include "common/common_test.h" | #include "common/common_test.h" | ||||
| #include "src/common/file_utils.h" | |||||
| #include "mindspore/lite/src/kernel_registry.h" | #include "mindspore/lite/src/kernel_registry.h" | ||||
| #include "src/runtime/kernel/arm/nnacl/int8/resize.h" | |||||
| #include "src/runtime/kernel/arm/int8/resize_int8.h" | |||||
| #include "nnacl/int8/resize.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| using mindspore::lite::tensor::QuantArg; | using mindspore::lite::tensor::QuantArg; | ||||
| @@ -92,7 +90,7 @@ TEST_F(TestResizeBilinearInt8, Bilinear0) { | |||||
| int8_t expect[16] = {4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 5, 5, 6, 6}; | int8_t expect[16] = {4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 5, 5, 6, 6}; | ||||
| Prepare(in_shape, out_shape, input_data, output_data, quant_in, quant_out, align_corners, thread_num); | Prepare(in_shape, out_shape, input_data, output_data, quant_in, quant_out, align_corners, thread_num); | ||||
| kernel_->Init(); | |||||
| kernel_->Init(); // todo delete | |||||
| kernel_->Run(); | kernel_->Run(); | ||||
| CompareOutputInt8(output_data, expect, 16, err_percent_); | CompareOutputInt8(output_data, expect, 16, err_percent_); | ||||
| @@ -19,7 +19,7 @@ | |||||
| #include "src/ir/tensor.h" | #include "src/ir/tensor.h" | ||||
| #include "common/common_test.h" | #include "common/common_test.h" | ||||
| #include "mindspore/lite/src/kernel_registry.h" | #include "mindspore/lite/src/kernel_registry.h" | ||||
| #include "src/runtime/kernel/arm/nnacl/int8/resize.h" | |||||
| #include "nnacl/int8/resize.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| using mindspore::lite::tensor::QuantArg; | using mindspore::lite::tensor::QuantArg; | ||||
| @@ -92,7 +92,7 @@ TEST_F(TestResizeNearestNeighborInt8, NearestNeighbor0) { | |||||
| err_percent_ = 0.25f; | err_percent_ = 0.25f; | ||||
| Prepare(in_shape, out_shape, input_data, output_data, quant_in, quant_out, false, thread_num); | Prepare(in_shape, out_shape, input_data, output_data, quant_in, quant_out, false, thread_num); | ||||
| kernel_->Init(); | |||||
| kernel_->Init(); // todo delete | |||||
| kernel_->Run(); | kernel_->Run(); | ||||
| CompareOutputInt8(output_data, expect, 16, err_percent_); | CompareOutputInt8(output_data, expect, 16, err_percent_); | ||||