Browse Source

!4485 add reduce ops int8

Merge pull request !4485 from zhaozhenlong/lite/op/int8/reduce_mean_sum
tags/v0.7.0-beta
mindspore-ci-bot Gitee 5 years ago
parent
commit
0a01bed4cb
20 changed files with 1940 additions and 362 deletions
  1. +1
    -1
      mindspore/lite/src/populate_parameter.cc
  2. +199
    -0
      mindspore/lite/src/runtime/kernel/arm/base/reduce_base.cc
  3. +54
    -0
      mindspore/lite/src/runtime/kernel/arm/base/reduce_base.h
  4. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/base/resize_base.h
  5. +6
    -126
      mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc
  6. +8
    -26
      mindspore/lite/src/runtime/kernel/arm/fp32/reduce.h
  7. +323
    -0
      mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.cc
  8. +98
    -0
      mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h
  9. +2
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/errorcode.h
  10. +1
    -8
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/reduce.h
  11. +467
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reduce_int8.c
  12. +53
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reduce_int8.h
  13. +20
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/quantization/quantize.h
  14. +30
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/reduce_parameter.h
  15. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/resize_parameter.h
  16. +157
    -114
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc
  17. +160
    -79
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_nearest_neighbor_fp32_tests.cc
  18. +355
    -0
      mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reduce_int8_tests.cc
  19. +2
    -4
      mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc
  20. +2
    -2
      mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_nearest_neighbor_int8_tests.cc

+ 1
- 1
mindspore/lite/src/populate_parameter.cc View File

@@ -38,7 +38,7 @@
#include "src/runtime/kernel/arm/nnacl/softmax_parameter.h"
#include "src/runtime/kernel/arm/nnacl/fp32/tile.h"
#include "src/runtime/kernel/arm/nnacl/fp32/topk.h"
#include "src/runtime/kernel/arm/nnacl/fp32/reduce.h"
#include "src/runtime/kernel/arm/nnacl/reduce_parameter.h"
#include "src/runtime/kernel/arm/nnacl/fp32/activation.h"
#include "src/runtime/kernel/arm/nnacl/fp32/arithmetic.h"
#include "src/runtime/kernel/arm/nnacl/fp32/batchnorm.h"


+ 199
- 0
mindspore/lite/src/runtime/kernel/arm/base/reduce_base.cc View File

@@ -0,0 +1,199 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
#include "src/runtime/runtime_api.h"
#include "src/runtime/kernel/arm/base/reduce_base.h"
#include "src/runtime/kernel/arm/fp32/reduce.h"
#include "src/runtime/kernel/arm/int8/reduce_int8.h"

using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_NULL_PTR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Mean;
using mindspore::schema::PrimitiveType_Reduce;

namespace mindspore::kernel {
namespace {
constexpr size_t kInputNum = 1;
constexpr size_t kOutputNum = 1;
} // namespace

int ReduceBaseCPUKernel::CheckInputsOutputs() {
if (in_tensors_.size() != kInputNum) {
MS_LOG(ERROR) << "Reduce inputs size should be " << kInputNum << " but got " << in_tensors_.size();
return RET_ERROR;
}
if (out_tensors_.size() != kOutputNum) {
MS_LOG(ERROR) << "Reduce outputs size should be " << kOutputNum << " but got " << out_tensors_.size();
return RET_ERROR;
}
auto input = in_tensors_.at(0);
if (input == nullptr) {
MS_LOG(ERROR) << "Reduce input is nullptr";
return RET_NULL_PTR;
}
auto output = out_tensors_.at(0);
if (output == nullptr) {
MS_LOG(ERROR) << "Reduce output is nullptr";
return RET_NULL_PTR;
}
return RET_OK;
}

int ReduceBaseCPUKernel::CheckParameters() {
size_t input_rank = in_tensors_.at(0)->shape().size();
if (static_cast<size_t>(num_axes_) > input_rank) {
MS_LOG(ERROR) << "Reduce op invalid num of reduce axes " << num_axes_ << " larger than input rank " << input_rank;
return RET_ERROR;
}
for (auto i = 0; i < num_axes_; i++) {
if (axes_[i] < -static_cast<int>(input_rank) || axes_[i] >= static_cast<int>(input_rank)) {
MS_LOG(ERROR) << "Reduce got invalid axis " << axes_[i] << ", axis should be in ["
<< -static_cast<int>(input_rank) << ", " << input_rank - 1 << "].";
return RET_ERROR;
}
if (axes_[i] < 0) {
axes_[i] += static_cast<int>(input_rank);
}
}

if (num_axes_ == 0) {
for (int i = 0; i < input_rank; i++) {
axes_[i] = i;
}
num_axes_ = static_cast<int>(input_rank);
}

return RET_OK;
}

int ReduceBaseCPUKernel::Init() {
auto reduce_param = reinterpret_cast<ReduceParameter *>(op_parameter_);
if (reduce_param == nullptr) {
return RET_NULL_PTR;
}
num_axes_ = reduce_param->num_axes_;
mode_ = reduce_param->mode_;
memcpy(axes_, reduce_param->axes_, sizeof(reduce_param->axes_));

auto ret = CheckInputsOutputs();
if (ret != RET_OK) {
return ret;
}
ret = CheckParameters();
if (ret != RET_OK) {
return ret;
}

return RET_OK;
}

kernel::LiteKernel *CpuReduceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Reduce);
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Reduce opParameter nullptr";
return nullptr;
}
if (desc.type != schema::PrimitiveType_Reduce) {
MS_LOG(ERROR) << "Reduce op desc.type should be PrimitiveType_Reduce, got " << desc.type;
return nullptr;
}
auto *kernel = new (std::nothrow) ReduceCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "Reduce new ReduceCPUKernel failed.";
return nullptr;
}
auto ret = kernel->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
delete kernel;
return nullptr;
}
return kernel;
}

kernel::LiteKernel *CpuMeanFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Mean);
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Reduce opParameter nullptr";
return nullptr;
}
if (desc.type != schema::PrimitiveType_Mean) {
MS_LOG(ERROR) << "Reduce op desc.type should be PrimitiveType_Mean, got " << desc.type;
return nullptr;
}
auto *kernel = new (std::nothrow) ReduceCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "Reduce new ReduceCPUKernel failed.";
return nullptr;
}
auto ret = kernel->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
delete kernel;
return nullptr;
}
return kernel;
}

kernel::LiteKernel *CpuReduceInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Reduce);
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Reduce opParameter nullptr";
return nullptr;
}
if (desc.type != schema::PrimitiveType_Reduce) {
MS_LOG(ERROR) << "Reduce op desc.type should be PrimitiveType_Reduce, got " << desc.type;
return nullptr;
}
auto *kernel = new (std::nothrow) ReduceInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "Reduce new ReduceCPUKernel failed.";
return nullptr;
}
auto ret = kernel->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
delete kernel;
return nullptr;
}
return kernel;
}

REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Reduce, CpuReduceFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Mean, CpuMeanFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Reduce, CpuReduceInt8KernelCreator)
} // namespace mindspore::kernel

+ 54
- 0
mindspore/lite/src/runtime/kernel/arm/base/reduce_base.h View File

@@ -0,0 +1,54 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_REDUCE_BASE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_REDUCE_BASE_H_

#include <vector>
#include "src/lite_kernel.h"
#include "ir/anf.h"
#include "nnacl/reduce_parameter.h"

namespace mindspore::kernel {
class ReduceBaseCPUKernel : public LiteKernel {
public:
ReduceBaseCPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(param, inputs, outputs, ctx, primitive) {}
virtual ~ReduceBaseCPUKernel() = default;

int Init() override;
int ReSize() override { return 0; };

private:
int CheckInputsOutputs();
int CheckParameters();

protected:
int axes_[REDUCE_MAX_AXES_NUM];
int num_axes_;
int mode_;

protected:
int outer_size_;
int inner_size_;
int axis_size_;
std::vector<int> tmp_shape_;
};
} // namespace mindspore::kernel

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_REDUCE_BASE_H_

+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/base/resize_base.h View File

@@ -31,7 +31,7 @@ class ResizeBaseCPUKernel : public LiteKernel {
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), context_(ctx) {}

~ResizeBaseCPUKernel() = default;
virtual ~ResizeBaseCPUKernel() = default;

int Init() override;
int ReSize() override { return 0; };


+ 6
- 126
mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc View File

@@ -20,6 +20,7 @@
#include "include/errorcode.h"
#include "src/runtime/runtime_api.h"
#include "src/runtime/kernel/arm/nnacl/fp32/reduce.h"
#include "src/runtime/kernel/arm/base/reduce_base.h"

using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
@@ -37,69 +38,9 @@ using mindspore::schema::ReduceMode_ReduceSum;
using mindspore::schema::ReduceMode_ReduceSumSquare;

namespace mindspore::kernel {
namespace {
constexpr size_t kInputNum = 1;
constexpr size_t kOutputNum = 1;
} // namespace

int ReduceCPUKernel::CheckInputsOutputs() {
if (in_tensors_.size() != kInputNum) {
MS_LOG(ERROR) << "Reduce inputs size should be " << kInputNum << " but got " << in_tensors_.size();
return RET_ERROR;
}
if (out_tensors_.size() != kOutputNum) {
MS_LOG(ERROR) << "Reduce outputs size should be " << kOutputNum << " but got " << out_tensors_.size();
return RET_ERROR;
}
auto input = in_tensors_.at(0);
if (input == nullptr) {
MS_LOG(ERROR) << "Reduce input is nullptr";
return RET_NULL_PTR;
}
auto output = out_tensors_.at(0);
if (output == nullptr) {
MS_LOG(ERROR) << "Reduce output is nullptr";
return RET_NULL_PTR;
}
return RET_OK;
}

int ReduceCPUKernel::CheckParameters() {
size_t input_rank = in_tensors_.at(0)->shape().size();
if (static_cast<size_t>(num_axes_) > input_rank) {
MS_LOG(ERROR) << "Reduce num of reduce axes " << num_axes_ << " larger than input rank " << input_rank;
return RET_ERROR;
}
for (auto i = 0; i < num_axes_; i++) {
if (axes_[i] < -static_cast<int>(input_rank) || axes_[i] >= static_cast<int>(input_rank)) {
MS_LOG(ERROR) << "Reduce got invalid axis " << axes_[i] << ", axis should be in ["
<< -static_cast<int>(input_rank) << ", " << input_rank - 1 << "].";
return RET_ERROR;
}
if (axes_[i] < 0) {
axes_[i] += static_cast<int>(input_rank);
}
}

if (num_axes_ == 0) {
for (int i = 0; i < input_rank; i++) {
axes_[i] = i;
}
}

return RET_OK;
}

int ReduceCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
set_need_reinit();
return RET_OK;
}
auto ret = CheckInputsOutputs();
if (ret != RET_OK) {
return ret;
}
ret = CheckParameters();
auto ret = ReduceBaseCPUKernel::Init();
if (ret != RET_OK) {
return ret;
}
@@ -107,7 +48,6 @@ int ReduceCPUKernel::Init() {
if (ret != RET_OK) {
return ret;
}

switch (mode_) {
case static_cast<int>(ReduceMode_ReduceSum): {
reducer_ = ReduceSum;
@@ -137,7 +77,10 @@ int ReduceCPUKernel::Init() {
MS_LOG(ERROR) << "Reduce unsupported reduce mode: " << mode_;
return RET_ERROR;
}
return RET_OK;
if (!InferShapeDone()) {
return RET_OK;
}
return ReSize();
}

int ReduceCPUKernel::CallReduceUnit(int task_id) {
@@ -225,67 +168,4 @@ int ReduceCPUKernel::MallocTmpBuffer() {
}
return RET_OK;
}

kernel::LiteKernel *CpuReduceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Reduce);
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Reduce opParameter nullptr";
return nullptr;
}
if (desc.type != schema::PrimitiveType_Reduce) {
MS_LOG(ERROR) << "Reduce op desc.type should be PrimitiveType_Reduce, got " << desc.type;
return nullptr;
}
auto *kernel = new (std::nothrow)
ReduceCPUKernel(reinterpret_cast<ReduceParameter *>(opParameter), inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "Reduce new ReduceCPUKernel failed.";
return nullptr;
}
auto ret = kernel->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
delete kernel;
return nullptr;
}
return kernel;
}

kernel::LiteKernel *CpuMeanFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Mean);
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Reduce opParameter nullptr";
return nullptr;
}
if (desc.type != schema::PrimitiveType_Mean) {
MS_LOG(ERROR) << "Reduce op desc.type should be PrimitiveType_Mean, got " << desc.type;
return nullptr;
}
auto *kernel = new (std::nothrow)
ReduceCPUKernel(reinterpret_cast<ReduceParameter *>(opParameter), inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "Reduce new ReduceCPUKernel failed.";
return nullptr;
}
auto ret = kernel->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
delete kernel;
return nullptr;
}
return kernel;
}

REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Reduce, CpuReduceFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Mean, CpuMeanFp32KernelCreator)
} // namespace mindspore::kernel

+ 8
- 26
mindspore/lite/src/runtime/kernel/arm/fp32/reduce.h View File

@@ -21,25 +21,20 @@
#include "src/lite_kernel.h"

#include "src/runtime/kernel/arm/nnacl/fp32/reduce.h"
#include "src/runtime/kernel/arm/base/reduce_base.h"
#include "ir/anf.h"
using mindspore::schema::ReduceMode;

namespace mindspore::kernel {
class ReduceCPUKernel : public LiteKernel {
class ReduceCPUKernel : public ReduceBaseCPUKernel {
typedef int (*Reducer)(const int outer_size, const int inner_size, const int axis_size, const float *src_data,
const int *src_shape, float *dst_data, const int tid, const int thread_num);

public:
ReduceCPUKernel(ReduceParameter *param, const std::vector<lite::tensor::Tensor *> &inputs,
ReduceCPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs, ctx, primitive),
context_(ctx),
keep_dims_(param->keep_dims_),
num_axes_(param->num_axes_),
mode_(param->mode_) {
memcpy(axes_, param->axes_, sizeof(param->axes_));
}
: ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) {}
~ReduceCPUKernel() {
for (auto i = 0; i < data_buffers_.size(); i++) {
float *buffer = data_buffers_[i];
@@ -58,26 +53,13 @@ class ReduceCPUKernel : public LiteKernel {
int CallReduceUnit(int task_id);

private:
int CheckInputsOutputs();
int CheckParameters();
int MallocTmpBuffer();

private:
const lite::Context *context_ = nullptr;
bool keep_dims_;
int axes_[REDUCE_MAX_AXES_NUM];
int num_axes_;
int mode_;

private:
Reducer reducer_;
std::vector<float *> data_buffers_;
int outer_size_;
int inner_size_;
int axis_size_;
std::vector<int> tmp_shape_;
const float *src_data_;
float *dst_data_;
Reducer reducer_;

private:
int MallocTmpBuffer();
};
} // namespace mindspore::kernel



+ 323
- 0
mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.cc View File

@@ -0,0 +1,323 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <algorithm>
#include "schema/model_generated.h"
#include "src/runtime/runtime_api.h"
#include "src/kernel_registry.h"
#include "nnacl/quantization/quantize.h"
#include "include/errorcode.h"
#include "src/runtime/kernel/arm/int8/reduce_int8.h"

using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_NULL_PTR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Reduce;
using mindspore::schema::ReduceMode_ReduceMax;
using mindspore::schema::ReduceMode_ReduceMean;
using mindspore::schema::ReduceMode_ReduceMin;
using mindspore::schema::ReduceMode_ReduceProd;
using mindspore::schema::ReduceMode_ReduceSum;
using mindspore::schema::ReduceMode_ReduceSumSquare;

namespace mindspore::kernel {
int ReduceInt8CPUKernel::Init() {
auto ret = ReduceBaseCPUKernel::Init();
if (ret != RET_OK) {
return ret;
}
ret = MallocTmpBuffer();
if (ret != RET_OK) {
return ret;
}
ret = CalculateQuantArgs();
if (ret != RET_OK) {
return ret;
}

switch (mode_) {
case static_cast<int>(ReduceMode_ReduceMean): {
reducer_ = ReduceMeanInt8;
last_reducer_ = ReduceMeanLastAxis;
break;
}
case static_cast<int>(ReduceMode_ReduceSum): {
reducer_ = ReduceSumInt8;
last_reducer_ = ReduceSumLastAxis;
break;
}

case static_cast<int>(ReduceMode_ReduceMax): {
reducer_ = ReduceMaxInt8;
last_reducer_ = ReduceMaxLastAxis;
break;
}
case static_cast<int>(ReduceMode_ReduceMin): {
reducer_ = ReduceMinInt8;
last_reducer_ = ReduceMinLastAxis;
break;
}
case static_cast<int>(ReduceMode_ReduceProd): {
reducer_ = ReduceProdInt8;
last_reducer_ = ReduceProdLastAxis;
break;
}
case static_cast<int>(ReduceMode_ReduceSumSquare): {
// In multi-axes reduce cases, sum square output different output for different reduce order
// e.g. axes [2, 3] is different from axes [3, 2].
reducer_ = ReduceSumSquareInt8;
last_reducer_ = ReduceSumSquareLastAxis;
break;
}
default:
MS_LOG(ERROR) << "Reduce unsupported reduce mode: " << mode_;
return RET_ERROR;
}
if (!InferShapeDone()) {
return RET_OK;
}
return ReSize();
}

int ReduceInt8CPUKernel::CalculateQuantArgs() {
lite::tensor::Tensor *input = in_tensors_.at(0);
lite::tensor::Tensor *output = out_tensors_.at(0);
MS_ASSERT(input);
MS_ASSERT(output);

quant_arg_.in_scale_ = input->GetQuantParams().front().scale;
quant_arg_.in_zp_ = input->GetQuantParams().front().zeroPoint;
quant_arg_.out_scale_ = output->GetQuantParams().front().scale;
quant_arg_.out_zp_ = output->GetQuantParams().front().zeroPoint;

// (quant_out - out_zp) * out_scale = (quant_in - in_zp) * in_scale
const double input_output_multiplier = quant_arg_.in_scale_ / quant_arg_.out_scale_;
int shift;
QuantizeMultiplierSmallerThanOne(input_output_multiplier, &quant_arg_.in_out_multiplier_, &shift);
quant_arg_.in_out_left_shift_ = shift < 0 ? -shift : 0;
quant_arg_.in_out_right_shift_ = shift > 0 ? shift : 0;

// (quant_out - zp_out)*scale_out = sum((quant_in -zp)*scale_in) * (1/num) for each axis in axes
// quant_out = sum(quant_in-zp) * (scale_in/scale_out) * (1/num)
if (mode_ == static_cast<int>(schema::ReduceMode_ReduceMean)) {
for (auto i = 0; i < num_axes_; i++) {
auto axis = axes_[i];
double reciprocal = 1.0 / in_tensors_.at(0)->shape()[axis];
QuantMulArg *qm = new (std::nothrow) QuantMulArg;
if (qm == nullptr) {
MS_LOG(ERROR) << "Reduce new QuantMulArg failed.";
return RET_NULL_PTR;
}
QuantizeMultiplierSmallerThanOne(reciprocal, &qm->multiplier_, &shift);
qm->left_shift_ = shift < 0 ? -shift : 0;
qm->right_shift_ = shift > 0 ? shift : 0;
mean_multipliers_.push_back(qm);
}
}

// (quant_out - zp) * scale_out = prod(quant_in - zp) * scale_in^num
// quant_out = prod(quant_in-zp) * (scale_in^num/scale_out) + zp_out
// scale_in^num-1 * scale_in/scale_out
if (mode_ == static_cast<int>(schema::ReduceMode_ReduceProd)) {
for (auto i = 0; i < num_axes_; i++) {
int axis_size = in_tensors_.at(0)->shape()[axes_[i]];
QuantMulArg *qm = new (std::nothrow) QuantMulArg;
if (qm == nullptr) {
MS_LOG(ERROR) << "ReduceProd new QuantMulArg failed.";
return RET_NULL_PTR;
}
double prod_multiplier = pow(quant_arg_.in_scale_, axis_size - 1);
QuantizeMultiplierSmallerThanOne(prod_multiplier, &qm->multiplier_, &shift);
qm->left_shift_ = shift < 0 ? -shift : 0;
qm->right_shift_ = shift > 0 ? shift : 0;
prod_multipliers_.push_back(qm);
}
}

// (quant_out - zp) * scale_out = sum((quant_in - zp)^2 * scale_in^2)
// quant_out = sum((quant_in - zp)^2) * scale_in^2 / scale_out + zp_out
// scale_in * scale_in/scale_out
if (mode_ == static_cast<int>(schema::ReduceMode_ReduceSumSquare)) {
for (auto i = 0; i < num_axes_ - 1; i++) {
QuantMulArg *qm = new (std::nothrow) QuantMulArg;
if (qm == nullptr) {
MS_LOG(ERROR) << "ReduceProd new QuantMultiplier failed.";
return RET_NULL_PTR;
}
double sumsquare_multiplier = quant_arg_.in_scale_;
QuantizeMultiplierSmallerThanOne(sumsquare_multiplier, &qm->multiplier_, &shift);
qm->left_shift_ = shift < 0 ? -shift : 0;
qm->right_shift_ = shift > 0 ? shift : 0;
sum_square_multipliers_.push_back(qm);
}

QuantMulArg *qm = new (std::nothrow) QuantMulArg;
if (qm == nullptr) {
MS_LOG(ERROR) << "ReduceProd new QuantMultiplier failed.";
return RET_NULL_PTR;
}
double sumsquare_multiplier = quant_arg_.in_scale_ * quant_arg_.in_scale_ / quant_arg_.out_scale_;
QuantizeMultiplierSmallerThanOne(sumsquare_multiplier, &qm->multiplier_, &shift);
qm->left_shift_ = shift < 0 ? -shift : 0;
qm->right_shift_ = shift > 0 ? shift : 0;
sum_square_multipliers_.push_back(qm);
}
return RET_OK;
}

int ReduceInt8CPUKernel::MallocTmpBuffer() {
auto input_shape = in_tensors_.at(0)->shape();
for (auto i = 0; i < num_axes_ - 1; i++) {
int axis = axes_[i];
size_t size = 1;
for (auto j = 0; j < input_shape.size(); j++) {
if (static_cast<size_t>(axis) != j) {
size *= input_shape[j];
}
}
int32_t *buffer = reinterpret_cast<int32_t *>(malloc(size * sizeof(int32_t)));
if (buffer == nullptr) {
MS_LOG(ERROR) << "Malloc data failed.";
return RET_ERROR;
}
data_buffers_.emplace_back(buffer);
input_shape[axis] = 1;
}

auto input = in_tensors_.at(0);
begin_src_data_ = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t) * input->ElementsNum()));
if (begin_src_data_ == nullptr) {
return RET_NULL_PTR;
}
auto input_data = reinterpret_cast<int8_t *>(input->Data());
for (auto i = 0; i < input->ElementsNum(); i++) {
begin_src_data_[i] = static_cast<int32_t>(input_data[i]);
}
return RET_OK;
}

int ReduceInt8Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
auto reduce = reinterpret_cast<ReduceInt8CPUKernel *>(cdata);
auto error_code = reduce->CallReduceUnit(task_id);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Reduce Run error task_id[" << task_id << "] error_code[" << error_code << "]";
return RET_ERROR;
}
return RET_OK;
}

int ReduceInt8CPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}

is_last_axis_ = false;
tmp_shape_ = in_tensors_.at(0)->shape();
src_data_ = begin_src_data_;

for (int i = 0; i < data_buffers_.size(); ++i) {
if (mode_ == static_cast<int>(schema::ReduceMode_ReduceMean)) {
quant_arg_.mean_multiplier_ = mean_multipliers_[i]->multiplier_;
quant_arg_.mean_left_shift_ = mean_multipliers_[i]->left_shift_;
quant_arg_.mean_right_shift_ = mean_multipliers_[i]->right_shift_;
}

if (mode_ == static_cast<int>(schema::ReduceMode_ReduceProd)) {
quant_arg_.prod_multiplier_ = prod_multipliers_[i]->multiplier_;
quant_arg_.prod_left_shift_ = prod_multipliers_[i]->left_shift_;
quant_arg_.prod_right_shift_ = prod_multipliers_[i]->right_shift_;
}
if (mode_ == static_cast<int>(schema::ReduceMode_ReduceSumSquare)) {
quant_arg_.sum_square_multiplier_ = sum_square_multipliers_[i]->multiplier_;
quant_arg_.sum_square_left_shift_ = sum_square_multipliers_[i]->left_shift_;
quant_arg_.sum_square_right_shift_ = sum_square_multipliers_[i]->right_shift_;
}
dst_data_ = data_buffers_[i];
int axis = axes_[i];
outer_size_ = 1;
for (int j = 0; j < axis; j++) {
outer_size_ *= tmp_shape_[j];
}
inner_size_ = 1;
for (int k = axis + 1; k < static_cast<int>(tmp_shape_.size()); k++) {
inner_size_ *= tmp_shape_[k];
}
axis_size_ = tmp_shape_[axis];
auto error_code = LiteBackendParallelLaunch(ReduceInt8Impl, this, context_->thread_num_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
return RET_ERROR;
}
tmp_shape_[axis] = 1;
src_data_ = dst_data_;
}

if (mode_ == static_cast<int>(schema::ReduceMode_ReduceMean)) {
quant_arg_.mean_multiplier_ = mean_multipliers_.back()->multiplier_;
quant_arg_.mean_left_shift_ = mean_multipliers_.back()->left_shift_;
quant_arg_.mean_right_shift_ = mean_multipliers_.back()->right_shift_;
}
if (mode_ == static_cast<int>(schema::ReduceMode_ReduceProd)) {
quant_arg_.prod_multiplier_ = prod_multipliers_.back()->multiplier_;
quant_arg_.prod_left_shift_ = prod_multipliers_.back()->left_shift_;
quant_arg_.prod_right_shift_ = prod_multipliers_.back()->right_shift_;
}
if (mode_ == static_cast<int>(schema::ReduceMode_ReduceSumSquare)) {
quant_arg_.sum_square_multiplier_ = sum_square_multipliers_.back()->multiplier_;
quant_arg_.sum_square_left_shift_ = sum_square_multipliers_.back()->left_shift_;
quant_arg_.sum_square_right_shift_ = sum_square_multipliers_.back()->right_shift_;
}
int last_reduce_axis = axes_[num_axes_ - 1];
outer_size_ = 1;
for (int i = 0; i < last_reduce_axis; i++) {
outer_size_ *= tmp_shape_[i];
}
inner_size_ = 1;
for (int i = last_reduce_axis + 1; i < static_cast<int>(tmp_shape_.size()); i++) {
inner_size_ *= tmp_shape_[i];
}
axis_size_ = tmp_shape_[last_reduce_axis];
last_dst_data_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->Data());
is_last_axis_ = true;
auto error_code = LiteBackendParallelLaunch(ReduceInt8Impl, this, context_->thread_num_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
return RET_ERROR;
}

if (begin_src_data_ != nullptr) {
free(begin_src_data_);
begin_src_data_ = nullptr;
}

return RET_OK;
}

int ReduceInt8CPUKernel::CallReduceUnit(int task_id) {
int ret;
if (!is_last_axis_) {
ret =
reducer_(outer_size_, inner_size_, axis_size_, src_data_, dst_data_, &quant_arg_, task_id, context_->thread_num_);
} else {
ret = last_reducer_(outer_size_, inner_size_, axis_size_, src_data_, last_dst_data_, &quant_arg_, task_id,
context_->thread_num_);
}
return ret;
}
} // namespace mindspore::kernel

+ 98
- 0
mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h View File

@@ -0,0 +1,98 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_REDUCE_INT8_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_REDUCE_INT8_H_

#include <vector>
#include "src/lite_kernel.h"
#include "nnacl/reduce_parameter.h"
#include "nnacl/int8/reduce_int8.h"
#include "nnacl/quantization/quantize.h"
#include "ir/anf.h"
#include "src/runtime/kernel/arm/base/reduce_base.h"

using mindspore::schema::ReduceMode;

namespace mindspore::kernel {
class ReduceInt8CPUKernel : public ReduceBaseCPUKernel {
typedef int (*Reducer)(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num);
typedef int (*LastReducer)(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num);

public:
ReduceInt8CPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) {}
~ReduceInt8CPUKernel() {
for (auto i = 0; i < data_buffers_.size(); i++) {
int32_t *buffer = data_buffers_[i];
if (buffer != nullptr) {
free(buffer);
buffer = nullptr;
}
}
for (auto qm : mean_multipliers_) {
delete qm;
qm = nullptr;
}
for (auto qm : prod_multipliers_) {
delete qm;
qm = nullptr;
}
for (auto qm : sum_square_multipliers_) {
delete qm;
qm = nullptr;
}
src_data_ = nullptr;
dst_data_ = nullptr;
}

int Init() override;
int ReSize() override { return 0; };
int Run() override;
int CallReduceUnit(int task_id);
int ReduceLastAxis(int task_id);

public:
bool is_last_axis_ = true;

private:
int MallocTmpBuffer();
int CalculateQuantArgs();

private:
ReduceParameter *param_ = nullptr;
ReduceQuantArg quant_arg_;

private:
int32_t *begin_src_data_ = nullptr;
int8_t *last_dst_data_ = nullptr;
std::vector<int32_t *> data_buffers_;
const int32_t *src_data_ = nullptr;
int32_t *dst_data_ = nullptr;

Reducer reducer_ = nullptr;
LastReducer last_reducer_ = nullptr;
std::vector<QuantMulArg *> mean_multipliers_;
std::vector<QuantMulArg *> prod_multipliers_;
std::vector<QuantMulArg *> sum_square_multipliers_;
};
} // namespace mindspore::kernel

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_REDUCE_INT8_H_

+ 2
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/errorcode.h View File

@@ -49,6 +49,8 @@ typedef enum ErrorCodeUint8OpEnum {

typedef enum ErrorCodeInt8OpEnum {
NNACL_ERRCODE_OP_INT8_START = 40000,
NNACL_ERRCODE_ADD_OVERFLOW,
NNACL_ERRCODE_MUL_OVERFLOW,
NNACL_ERRCODE_OP_INT8_END = 49999
} ErrorCodeInt8OpEnums;



+ 1
- 8
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/reduce.h View File

@@ -17,15 +17,8 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_REDUCE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_REDUCE_H_
#include "nnacl/op_base.h"
#define REDUCE_MAX_AXES_NUM 8
#include "src/runtime/kernel/arm/nnacl/reduce_parameter.h"

typedef struct ReduceParameter {
OpParameter op_parameter_;
bool keep_dims_;
int axes_[REDUCE_MAX_AXES_NUM];
int num_axes_;
int mode_;
} ReduceParameter;

#ifdef __cplusplus
extern "C" {


+ 467
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reduce_int8.c View File

@@ -0,0 +1,467 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <stdint.h>
#include "nnacl/int8/reduce_int8.h"
#include "nnacl/errorcode.h"
#include "nnacl/quantization/fixed_point.h"

inline bool isAddOverflow(int32_t x, int32_t y) {
int32_t sum = x + y;
return (x > 0 && y > 0 && sum < 0) || (x < 0 && y < 0 && sum > 0);
}

inline bool isMulOverflow(int32_t x, int32_t y) {
int32_t p = x * y;
return (x != 0) && (p / x != y);
}

// Get x such that (x-zp_in) * scale_in = mean
// Assuming reduce n axes, this works for first n-1 reduce. One call for one reduce.
int ReduceMeanInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) {
if (src_data == NULL || dst_data == NULL) {
return NNACL_NULL_PTR;
}
int i, j, k;
for (j = tid; j < outer_size; j += thread_num) {
const int32_t *outer_src = src_data + j * axis_size * inner_size;
int32_t *outer_dst = dst_data + j * inner_size;
for (k = 0; k < inner_size; k++) {
const int32_t *inner_src = outer_src + k;
int32_t *inner_dst = outer_dst + k;
int32_t sum = 0;
// (x - zp_in) * scale_in = mean[(item - zp_in) * scale_in]
// x = mean(item-zp_in) + zp_in
for (i = 0; i < axis_size; i++) {
int32_t tmp = inner_src[i * inner_size] - quant->in_zp_;
if (isAddOverflow(sum, tmp)) {
return NNACL_ERRCODE_ADD_OVERFLOW;
}
sum += tmp;
}
int32_t mean = RoundingDivideByPOT(
SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->mean_left_shift_), quant->mean_multiplier_),
quant->mean_right_shift_);
if (isAddOverflow(mean, quant->in_zp_)) {
return NNACL_ERRCODE_ADD_OVERFLOW;
}
*inner_dst = mean + quant->in_zp_;
}
}
return NNACL_OK;
}

// suppose reduce n axes, this works for last reduce axis.
// get y such that (y-zp_out) * scale_out = mean(x-zp_in)*scale_in
int ReduceMeanLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) {
if (src_data == NULL || dst_data == NULL) {
return NNACL_NULL_PTR;
}
int i, j, k;
for (j = tid; j < outer_size; j += thread_num) {
const int32_t *outer_src = src_data + j * axis_size * inner_size;
int8_t *outer_dst = dst_data + j * inner_size;
for (k = 0; k < inner_size; k++) {
const int32_t *inner_src = outer_src + k;
int8_t *inner_dst = outer_dst + k;
int32_t sum = 0;
for (i = 0; i < axis_size; i++) {
// y = mean(x-zp_in) * scale + zp_out
int32_t tmp = inner_src[i * inner_size] - quant->in_zp_;
if (isAddOverflow(tmp, sum)) {
return NNACL_ERRCODE_ADD_OVERFLOW;
}
sum += tmp;
}
// sum / num
int32_t mean = RoundingDivideByPOT(
SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->mean_left_shift_), quant->mean_multiplier_),
quant->mean_right_shift_);
// trans to output scale
int32_t mean_scaled =
RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(mean * (1 << (unsigned int)quant->in_out_left_shift_),
quant->in_out_multiplier_),
quant->in_out_right_shift_);
if (isAddOverflow(mean_scaled, quant->out_zp_)) {
return NNACL_ERRCODE_ADD_OVERFLOW;
}
mean = mean_scaled + quant->out_zp_;

if (mean > INT8_MAX) {
*inner_dst = INT8_MAX;
} else if (mean < INT8_MIN) {
*inner_dst = INT8_MIN;
} else {
*inner_dst = (int8_t)mean;
}
}
}
return NNACL_OK;
}

// Get x such that (x-zp_in) * scale_in = sum(item-zp_in)*scale_in
// Assuming reduce n axes, this works for first n-1 reduce. One call for one reduce.
int ReduceSumInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) {
if (src_data == NULL || dst_data == NULL) {
return NNACL_NULL_PTR;
}
int i, j, k;
for (j = tid; j < outer_size; j += thread_num) {
const int32_t *outer_src = src_data + j * axis_size * inner_size;
int32_t *outer_dst = dst_data + j * inner_size;
for (k = 0; k < inner_size; k++) {
const int32_t *inner_src = outer_src + k;
int32_t *inner_dst = outer_dst + k;
int32_t sum = 0;
for (i = 0; i < axis_size; i++) {
int32_t tmp = inner_src[i * inner_size] - quant->in_zp_;
if (isAddOverflow(tmp, sum)) {
return NNACL_ERRCODE_ADD_OVERFLOW;
}
sum += tmp;
}

if (isAddOverflow(quant->in_zp_, sum)) {
return NNACL_ERRCODE_ADD_OVERFLOW;
}
*inner_dst = sum + quant->in_zp_;
}
}
return NNACL_OK;
}

// suppose reduce n axes, this works for last reduce axis.
// get y such that (y-zp_out) * scale_out = sum(item-zp_in)*scale_in
int ReduceSumLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) {
if (src_data == NULL || dst_data == NULL) {
return NNACL_NULL_PTR;
}
int i, j, k;
for (j = tid; j < outer_size; j += thread_num) {
const int32_t *outer_src = src_data + j * axis_size * inner_size;
int8_t *outer_dst = dst_data + j * inner_size;
for (k = 0; k < inner_size; k++) {
const int32_t *inner_src = outer_src + k;
int8_t *inner_dst = outer_dst + k;
int32_t sum = 0;
for (i = 0; i < axis_size; i++) {
int32_t tmp = inner_src[i * inner_size] - quant->in_zp_;
if (isAddOverflow(tmp, sum)) {
return NNACL_ERRCODE_ADD_OVERFLOW;
}
sum += tmp;
}
int32_t sum_scaled =
RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->in_out_left_shift_),
quant->in_out_multiplier_),
quant->in_out_right_shift_);
if (isAddOverflow(sum_scaled, quant->out_zp_)) {
return NNACL_ERRCODE_ADD_OVERFLOW;
}
sum = sum_scaled + quant->out_zp_;
if (sum > INT8_MAX) {
*inner_dst = INT8_MAX;
} else if (sum < INT8_MIN) {
*inner_dst = INT8_MIN;
} else {
*inner_dst = (int8_t)sum;
}
}
}
return NNACL_OK;
}

int ReduceMaxLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) {
if (src_data == NULL || dst_data == NULL) {
return NNACL_NULL_PTR;
}
int i, j, k;
for (j = tid; j < outer_size; j += thread_num) {
const int32_t *outer_src = src_data + j * axis_size * inner_size;
int8_t *outer_dst = dst_data + j * inner_size;
for (k = 0; k < inner_size; k++) {
const int32_t *inner_src = outer_src + k;
int8_t *inner_dst = outer_dst + k;
int32_t tmp = INT8_MIN;
for (i = 0; i < axis_size; i++) {
tmp = tmp > inner_src[i * inner_size] ? tmp : inner_src[i * inner_size];
}
int32_t tmp_scaled = RoundingDivideByPOT(
SaturatingRoundingDoublingHighMul((tmp - quant->in_zp_) * (1 << (unsigned int)quant->in_out_left_shift_),
quant->in_out_multiplier_),
quant->in_out_right_shift_);
if (isAddOverflow(tmp_scaled, quant->out_zp_)) {
return NNACL_ERRCODE_ADD_OVERFLOW;
}
tmp = tmp_scaled + quant->out_zp_;
if (tmp > INT8_MAX) {
*inner_dst = INT8_MAX;
} else if (tmp < INT8_MIN) {
*inner_dst = INT8_MIN;
} else {
*inner_dst = (int8_t)tmp;
}
}
}
return NNACL_OK;
}

int ReduceMaxInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) {
if (src_data == NULL || dst_data == NULL) {
return NNACL_NULL_PTR;
}
int i, j, k;
for (j = tid; j < outer_size; j += thread_num) {
const int32_t *outer_src = src_data + j * axis_size * inner_size;
int32_t *outer_dst = dst_data + j * inner_size;
for (k = 0; k < inner_size; k++) {
const int32_t *inner_src = outer_src + k;
int32_t *inner_dst = outer_dst + k;
int32_t tmp = INT8_MIN;
for (i = 0; i < axis_size; i++) {
tmp = tmp > inner_src[i * inner_size] ? tmp : inner_src[i * inner_size];
}

*inner_dst = tmp;
}
}
return NNACL_OK;
}

int ReduceMinLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) {
if (src_data == NULL || dst_data == NULL) {
return NNACL_NULL_PTR;
}
int i, j, k;
int base_offset = 20;
for (j = tid; j < outer_size; j += thread_num) {
const int32_t *outer_src = src_data + j * axis_size * inner_size;
int8_t *outer_dst = dst_data + j * inner_size;
for (k = 0; k < inner_size; k++) {
const int32_t *inner_src = outer_src + k;
int8_t *inner_dst = outer_dst + k;
int32_t tmp = INT8_MAX;
for (i = 0; i < axis_size; i++) {
tmp = tmp < inner_src[i * inner_size] ? tmp : inner_src[i * inner_size];
}
int32_t tmp_scaled =
RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
(tmp - quant->in_zp_) * (1 << (unsigned int)quant->in_out_left_shift_ + base_offset),
quant->in_out_multiplier_),
quant->in_out_right_shift_ + base_offset);
if (isAddOverflow(tmp_scaled, quant->out_zp_)) {
return NNACL_ERRCODE_ADD_OVERFLOW;
}
tmp = tmp_scaled + quant->out_zp_;
if (tmp > INT8_MAX) {
*inner_dst = INT8_MAX;
} else if (tmp < INT8_MIN) {
*inner_dst = INT8_MIN;
} else {
*inner_dst = (int8_t)tmp;
}
}
}
return NNACL_OK;
}

int ReduceMinInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) {
if (src_data == NULL || dst_data == NULL) {
return NNACL_NULL_PTR;
}
int i, j, k;
for (j = tid; j < outer_size; j += thread_num) {
const int32_t *outer_src = src_data + j * axis_size * inner_size;
int32_t *outer_dst = dst_data + j * inner_size;
for (k = 0; k < inner_size; k++) {
const int32_t *inner_src = outer_src + k;
int32_t *inner_dst = outer_dst + k;
int32_t tmp = INT8_MAX;
for (i = 0; i < axis_size; i++) {
tmp = tmp < inner_src[i * inner_size] ? tmp : inner_src[i * inner_size];
}
*inner_dst = tmp;
}
}
return NNACL_OK;
}

int ReduceProdLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) {
if (src_data == NULL || dst_data == NULL) {
return NNACL_NULL_PTR;
}
int i, j, k;
for (j = tid; j < outer_size; j += thread_num) {
const int32_t *outer_src = src_data + j * axis_size * inner_size;
int8_t *outer_dst = dst_data + j * inner_size;
for (k = 0; k < inner_size; k++) {
const int32_t *inner_src = outer_src + k;
int8_t *inner_dst = outer_dst + k;
int32_t prod = 1;
for (i = 0; i < axis_size; i++) {
// quant_out = prod(quant_in-zp) * (scale_in^num/scale_out) + zp_out
int32_t tmp = inner_src[i * inner_size] - quant->in_zp_;
if (isMulOverflow(prod, tmp)) {
return NNACL_ERRCODE_MUL_OVERFLOW;
}
prod *= tmp;
}
prod = RoundingDivideByPOT(
SaturatingRoundingDoublingHighMul(prod * (1 << (unsigned int)quant->prod_left_shift_), quant->prod_multiplier_),
quant->prod_right_shift_);
int32_t prod_scaled =
RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(prod * (1 << (unsigned int)quant->in_out_left_shift_),
quant->in_out_multiplier_),
quant->in_out_right_shift_);
if (isAddOverflow(prod_scaled, quant->out_zp_)) {
return NNACL_ERRCODE_ADD_OVERFLOW;
}
prod = prod_scaled + quant->out_zp_;
if (prod > INT8_MAX) {
*inner_dst = INT8_MAX;
} else if (prod < INT8_MIN) {
*inner_dst = INT8_MIN;
} else {
*inner_dst = (int8_t)prod;
}
}
}
return NNACL_OK;
}

int ReduceProdInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) {
if (src_data == NULL || dst_data == NULL) {
return NNACL_NULL_PTR;
}
int i, j, k;
for (j = tid; j < outer_size; j += thread_num) {
const int32_t *outer_src = src_data + j * axis_size * inner_size;
int32_t *outer_dst = dst_data + j * inner_size;
for (k = 0; k < inner_size; k++) {
const int32_t *inner_src = outer_src + k;
int32_t *inner_dst = outer_dst + k;
int32_t prod = 1;
for (i = 0; i < axis_size; i++) {
int32_t tmp = inner_src[i * inner_size] - quant->in_zp_;
if (isMulOverflow(prod, tmp)) {
return NNACL_ERRCODE_MUL_OVERFLOW;
}
prod *= tmp;
}
prod = RoundingDivideByPOT(
SaturatingRoundingDoublingHighMul(prod * (1 << (unsigned int)quant->prod_left_shift_), quant->prod_multiplier_),
quant->prod_right_shift_);
if (isAddOverflow(prod, quant->in_zp_)) {
return NNACL_ERRCODE_ADD_OVERFLOW;
}
*inner_dst = prod + quant->in_zp_; // todo overflow
}
}
return NNACL_OK;
}

int ReduceSumSquareLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) {
if (src_data == NULL || dst_data == NULL) {
return NNACL_NULL_PTR;
}
int i, j, k;
for (j = tid; j < outer_size; j += thread_num) {
const int32_t *outer_src = src_data + j * axis_size * inner_size;
int8_t *outer_dst = dst_data + j * inner_size;
for (k = 0; k < inner_size; k++) {
const int32_t *inner_src = outer_src + k;
int8_t *inner_dst = outer_dst + k;
int32_t sum = 0;
// quant_out = sum((quant_in - zp)^2) * scale_in^2 / scale_out + zp_out
for (i = 0; i < axis_size; i++) {
int32_t tmp;
if (isMulOverflow(inner_src[i * inner_size] - quant->in_zp_, inner_src[i * inner_size] - quant->in_zp_)) {
return NNACL_ERRCODE_MUL_OVERFLOW;
}
tmp = (inner_src[i * inner_size] - quant->in_zp_) * (inner_src[i * inner_size] - quant->in_zp_);
if (isAddOverflow(sum, tmp)) {
return NNACL_ERRCODE_ADD_OVERFLOW;
}
sum += tmp;
}
int32_t sum_scaled =
RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->sum_square_left_shift_),
quant->sum_square_multiplier_),
quant->sum_square_right_shift_);
if (isAddOverflow(sum_scaled, quant->out_zp_)) {
return NNACL_ERRCODE_ADD_OVERFLOW;
}
sum = sum_scaled + quant->out_zp_;

if (sum > INT8_MAX) {
*inner_dst = INT8_MAX;
} else if (sum < INT8_MIN) {
*inner_dst = INT8_MIN;
} else {
*inner_dst = (int8_t)sum;
}
}
}
return NNACL_OK;
}

int ReduceSumSquareInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num) {
if (src_data == NULL || dst_data == NULL) {
return NNACL_NULL_PTR;
}
int i, j, k;
for (j = tid; j < outer_size; j += thread_num) {
const int32_t *outer_src = src_data + j * axis_size * inner_size;
int32_t *outer_dst = dst_data + j * inner_size;
for (k = 0; k < inner_size; k++) {
const int32_t *inner_src = outer_src + k;
int32_t *inner_dst = outer_dst + k;
int32_t sum = 0;
for (i = 0; i < axis_size; i++) {
int32_t tmp;
if (isMulOverflow(inner_src[i * inner_size] - quant->in_zp_, inner_src[i * inner_size] - quant->in_zp_)) {
return NNACL_ERRCODE_MUL_OVERFLOW;
}
tmp = (inner_src[i * inner_size] - quant->in_zp_) * (inner_src[i * inner_size] - quant->in_zp_);
if (isAddOverflow(sum, tmp)) {
return NNACL_ERRCODE_ADD_OVERFLOW;
}
sum += tmp;
}
sum =
RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->sum_square_left_shift_),
quant->sum_square_multiplier_),
quant->sum_square_right_shift_);
if (isAddOverflow(sum, quant->in_zp_)) {
return NNACL_ERRCODE_ADD_OVERFLOW;
}
*inner_dst = sum + quant->in_zp_;
}
}
return NNACL_OK;
}

+ 53
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reduce_int8.h View File

@@ -0,0 +1,53 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_REDUCE_INT8_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_REDUCE_INT8_H_
#include "nnacl/quantization/quantize.h"
#ifdef __cplusplus
extern "C" {
#endif

int ReduceMeanInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num);
int ReduceMeanLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num);
int ReduceSumInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num);
int ReduceSumLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num);
int ReduceMaxInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num);
int ReduceMaxLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num);
int ReduceMinInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num);
int ReduceMinLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num);
int ReduceProdLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num);
int ReduceProdInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num);
int ReduceSumSquareLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int8_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num);
int ReduceSumSquareInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *src_data,
int32_t *dst_data, const ReduceQuantArg *quant, const int tid, const int thread_num);
bool isAddOverflow(int32_t x, int32_t y);
bool isMulOverflow(int32_t x, int32_t y);
#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_REDUCE_INT8_H_

+ 20
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/quantization/quantize.h View File

@@ -219,6 +219,26 @@ typedef struct DivQuantArg {
int output_multiplier_;
int output_shift_;
} DivQuantArg;

typedef struct ReduceQuantArg {
double in_scale_;
int32_t in_zp_;
double out_scale_;
int32_t out_zp_;
int32_t in_out_multiplier_;
int in_out_left_shift_;
int in_out_right_shift_;
int32_t mean_multiplier_;
int mean_left_shift_;
int mean_right_shift_;
int32_t prod_multiplier_;
int prod_left_shift_;
int prod_right_shift_;
int32_t sum_square_multiplier_;
int sum_square_left_shift_;
int sum_square_right_shift_;
} ReduceQuantArg;

#ifdef __cplusplus
extern "C" {
#endif


+ 30
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/reduce_parameter.h View File

@@ -0,0 +1,30 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_REDUCE_PARAMETER_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_REDUCE_PARAMETER_H_
#include "nnacl/op_base.h"
#define REDUCE_MAX_AXES_NUM 8

struct ReduceParameter {
OpParameter op_parameter_;
bool keep_dims_;
int axes_[REDUCE_MAX_AXES_NUM];
int num_axes_;
int mode_;
};

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_REDUCE_PARAMETER_H_

+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/resize_parameter.h View File

@@ -16,7 +16,7 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_RESIZE_PARAMETER_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_RESIZE_PARAMETER_H_

#include "src/runtime/kernel/arm/nnacl/op_base.h"
#include "nnacl/op_base.h"
typedef struct ResizeParameter {
OpParameter op_parameter_;
int method_;


+ 157
- 114
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc View File

@@ -13,204 +13,255 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <vector>
#include "mindspore/lite/src/lite_kernel.h"
#include "mindspore/lite/src/ir/tensor.h"
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/resize.h"
#include "nnacl/resize_parameter.h"
#include "mindspore/lite/src/kernel_registry.h"

namespace mindspore {

class TestResizeBilinearFp32 : public mindspore::CommonTest {
public:
TestResizeBilinearFp32() = default;
void Prepare(const std::vector<int> &input_shape, const std::vector<int> &output_shape, float *input_data,
float *output_data, const bool align_corners, const int thread_num);

void TearDown() override;

public:
int tid = 0;
int thread_num = 1;
float err_tol = 1e-5;
lite::tensor::Tensor in_tensor_;
lite::tensor::Tensor out_tensor_;
std::vector<lite::tensor::Tensor *> inputs_{&in_tensor_};
std::vector<lite::tensor::Tensor *> outputs_{&out_tensor_};
ResizeParameter param_ = {{}};
kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize};
lite::Context ctx_ = lite::Context();
kernel::KernelCreator creator_ = nullptr;
kernel::LiteKernel *kernel_ = nullptr;
};

void TestResizeBilinearFp32::TearDown() {
in_tensor_.SetData(nullptr);
out_tensor_.SetData(nullptr);
}

void TestResizeBilinearFp32::Prepare(const std::vector<int> &input_shape, const std::vector<int> &output_shape,
float *input_data, float *output_data, const bool align_corners,
const int thread_num) {
in_tensor_.set_data_type(kNumberTypeFloat32);
in_tensor_.set_shape(input_shape);
out_tensor_.set_data_type(kNumberTypeFloat32);
out_tensor_.set_shape(output_shape);
in_tensor_.SetData(input_data);
out_tensor_.SetData(output_data);

ResizeParameter param_ = {
{}, static_cast<int>(schema::ResizeMethod_BILINEAR), output_shape[1], output_shape[2], align_corners};
desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize};
ctx_ = lite::Context();
ctx_.thread_num_ = thread_num;
creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator_, nullptr);
kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(&param_), &ctx_, desc, nullptr);
ASSERT_NE(kernel_, nullptr);
}

// 1*1 -> 1*1
TEST_F(TestResizeBilinearFp32, ResizeBilinearTest1) {
std::vector<float> input = {1.0};
float input_data[] = {1.0f};
float output_data[1] = {0};
std::vector<int> input_shape = {1, 1, 1, 1};
std::vector<int> output_shape = {1, 1, 1, 1};
std::vector<float> expect = {1.0};
bool align_corners = false;

auto output_size = 1;
std::vector<float> output(output_size, 0.0);

ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 1*1
TEST_F(TestResizeBilinearFp32, ResizeBilinearTest2) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[1] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 1, 1, 1};
std::vector<float> expect = {0.0};
bool align_corners = false;

int output_size = 1;
std::vector<float> output(output_size, 0.0);

ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 1*2
TEST_F(TestResizeBilinearFp32, ResizeBilinearTest3) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[2] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 1, 2, 1};
std::vector<float> expect = {0.0, 1.0};
bool align_corners = false;

auto output_size = 2;
std::vector<float> output(output_size, 0.0);

ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 2*1
TEST_F(TestResizeBilinearFp32, ResizeBilinearTest4) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[2] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 2, 1, 1};
std::vector<float> expect = {0.0, 2.0};
bool align_corners = false;

auto output_size = 2;
std::vector<float> output(output_size, 0.0);

ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 2*2
TEST_F(TestResizeBilinearFp32, ResizeBilinearTest5) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[4] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 2, 2, 1};
std::vector<float> expect = {0.0, 1.0, 2.0, 3.0};
bool align_corners = false;

auto output_size = 4;
std::vector<float> output(output_size, 0.0);

ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 1*4
TEST_F(TestResizeBilinearFp32, ResizeBilinearTest6) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[4] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 1, 4, 1};
std::vector<float> expect = {0.0, 0.5, 1.0, 1.0};
bool align_corners = false;

auto output_size = 4;
std::vector<float> output(output_size, 0.0);

ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 4*1
TEST_F(TestResizeBilinearFp32, ResizeBilinearTest7) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[4] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 4, 1, 1};
std::vector<float> expect = {0.0, 1.0, 2.0, 2.0};
bool align_corners = false;

auto output_size = 4;
std::vector<float> output(output_size, 0.0);

ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 2*4
TEST_F(TestResizeBilinearFp32, ResizeBilinearTest8) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[8] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 2, 4, 1};
std::vector<float> expect = {0.0, 0.5, 1.0, 1.0, 2.0, 2.5, 3.0, 3.0};
bool align_corners = false;

auto output_size = 8;
std::vector<float> output(output_size, 0.0);

ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 4*2
TEST_F(TestResizeBilinearFp32, ResizeBilinearTest9) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[8] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 4, 2, 1};
std::vector<float> expect = {0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 3.0};
bool align_corners = false;

auto output_size = 8;
std::vector<float> output(output_size, 0.0);

ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 3*3
TEST_F(TestResizeBilinearFp32, ResizeBilinearTest10) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[9] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 3, 3, 1};
std::vector<float> expect = {0.0, 0.6666667, 1.0, 1.3333334, 2.0, 2.3333335, 2.0, 2.6666667, 3.0};
bool align_corners = false;

auto output_size = 9;
std::vector<float> output(output_size, 0.0);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 4*4
TEST_F(TestResizeBilinearFp32, ResizeBilinearTest11) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[16] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 4, 4, 1};
std::vector<float> expect = {0.0, 0.5, 1.0, 1.0, 1.0, 1.5, 2.0, 2.0, 2.0, 2.5, 3.0, 3.0, 2.0, 2.5, 3.0, 3.0};
bool align_corners = false;

auto output_size = 16;
std::vector<float> output(output_size, 0.0);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2*2*5 -> 2*4*4*5
TEST_F(TestResizeBilinearFp32, ResizeBilinearTest12) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0,
14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0,
28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0,
14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0,
28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0};
float output_data[160] = {0};
std::vector<int> input_shape = {2, 2, 2, 5};
std::vector<int> output_shape = {2, 4, 4, 5};
std::vector<float> expect = {
@@ -224,20 +275,21 @@ TEST_F(TestResizeBilinearFp32, ResizeBilinearTest12) {
33.5, 34.5, 35.5, 36.5, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0,
34.0, 32.5, 33.5, 34.5, 35.5, 36.5, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0};
bool align_corners = false;

auto output_size = 160;
std::vector<float> output(output_size, 0.0);

ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2*2*5 -> 2*4*4*5 align corners
TEST_F(TestResizeBilinearFp32, ResizeBilinearTest13) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0,
14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0,
28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0,
14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0,
28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0};
float output_data[160] = {0};
std::vector<int> input_shape = {2, 2, 2, 5};
std::vector<int> output_shape = {2, 4, 4, 5};
std::vector<float> expect = {
@@ -258,20 +310,21 @@ TEST_F(TestResizeBilinearFp32, ResizeBilinearTest13) {
30.0, 31.0, 32.0, 33.0, 34.0, 31.666666, 32.666668, 33.666668, 34.666668, 35.666668,
33.333332, 34.333332, 35.333332, 36.333332, 37.333332, 35.0, 36.0, 37.0, 38.0, 39.0};
bool align_corners = true;

auto output_size = 160;
std::vector<float> output(output_size, 0.0);

ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2*2*5 -> 2*4*4*5 thread_num 2
TEST_F(TestResizeBilinearFp32, ResizeBilinearTest14) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0,
14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0,
28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0,
14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0,
28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0};
float output_data[160] = {0};
std::vector<int> input_shape = {2, 2, 2, 5};
std::vector<int> output_shape = {2, 4, 4, 5};
std::vector<float> expect = {
@@ -285,24 +338,22 @@ TEST_F(TestResizeBilinearFp32, ResizeBilinearTest14) {
33.5, 34.5, 35.5, 36.5, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0,
34.0, 32.5, 33.5, 34.5, 35.5, 36.5, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0};
bool align_corners = false;

auto output_size = 160;
std::vector<float> output(output_size, 0.0);
thread_num = 2;
tid = 0;
ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
tid = 1;
ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
int thread_num = 2;

Prepare(input_shape, output_shape, input_data, output_data, align_corners, thread_num);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2*2*5 -> 2*4*4*5 thread_num 4
TEST_F(TestResizeBilinearFp32, ResizeBilinearTest15) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0,
14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0,
28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0,
14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0,
28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0};
float output_data[160] = {0};
std::vector<int> input_shape = {2, 2, 2, 5};
std::vector<int> output_shape = {2, 4, 4, 5};
std::vector<float> expect = {
@@ -319,19 +370,11 @@ TEST_F(TestResizeBilinearFp32, ResizeBilinearTest15) {

auto output_size = 160;
std::vector<float> output(output_size, 0.0);
thread_num = 4;
tid = 0;
ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
tid = 1;
ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
tid = 2;
ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
tid = 3;
ResizeBilinear(input.data(), output.data(), input_shape.data(), output_shape.data(), align_corners, tid,
thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
int thread_num = 4;
Prepare(input_shape, output_shape, input_data, output_data, align_corners, thread_num);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}
} // namespace mindspore

+ 160
- 79
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_nearest_neighbor_fp32_tests.cc View File

@@ -15,168 +15,250 @@
*/
#include <vector>
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/resize.h"
#include "nnacl/resize_parameter.h"
#include "mindspore/lite/src/kernel_registry.h"

namespace mindspore {

class TestResizeNearestNeighborFp32 : public mindspore::CommonTest {
public:
TestResizeNearestNeighborFp32() = default;
void Prepare(const std::vector<int> &input_shape, const std::vector<int> &output_shape, float *input_data,
float *output_data, const bool align_corners, const int thread_num);

void TearDown() override;

public:
int tid = 0;
int thread_num = 1;
float err_tol = 1e-5;
lite::tensor::Tensor in_tensor_;
lite::tensor::Tensor out_tensor_;
std::vector<lite::tensor::Tensor *> inputs_{&in_tensor_};
std::vector<lite::tensor::Tensor *> outputs_{&out_tensor_};
ResizeParameter param_ = {{}};
kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize};
lite::Context ctx_ = lite::Context();
kernel::KernelCreator creator_ = nullptr;
kernel::LiteKernel *kernel_ = nullptr;
};

void TestResizeNearestNeighborFp32::TearDown() {
in_tensor_.SetData(nullptr);
out_tensor_.SetData(nullptr);
}

void TestResizeNearestNeighborFp32::Prepare(const std::vector<int> &input_shape, const std::vector<int> &output_shape,
float *input_data, float *output_data, const bool align_corners,
const int thread_num) {
in_tensor_.set_data_type(kNumberTypeFloat32);
in_tensor_.set_shape(input_shape);
out_tensor_.set_data_type(kNumberTypeFloat32);
out_tensor_.set_shape(output_shape);
in_tensor_.SetData(input_data);
out_tensor_.SetData(output_data);

ResizeParameter param_ = {
{}, static_cast<int>(schema::ResizeMethod_NEAREST_NEIGHBOR), output_shape[1], output_shape[2], align_corners};
desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize};
ctx_ = lite::Context();
ctx_.thread_num_ = thread_num;
creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator_, nullptr);
kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(&param_), &ctx_, desc, nullptr);
ASSERT_NE(kernel_, nullptr);
}
// 1*1 -> 1*1
TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest1) {
std::vector<float> input = {1.0};
float input_data[] = {1.0};
float output_data[1] = {0};
std::vector<int> input_shape = {1, 1, 1, 1};
std::vector<int> output_shape = {1, 1, 1, 1};
std::vector<float> expect = {1.0};
size_t output_size = 1;
std::vector<float> output(output_size, 0.0);
bool align_corners = false;

Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 1*1
TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest2) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[1] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 1, 1, 1};
std::vector<float> expect = {0.0};
size_t output_size = 1;
std::vector<float> output(output_size, 0.0);
bool align_corners = false;

ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 1*2
TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest3) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[2] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 1, 2, 1};
std::vector<float> expect = {0.0, 1.0};
size_t output_size = 2;
std::vector<float> output(output_size, 0.0);
bool align_corners = false;

Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 2*1
TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest4) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[2] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 2, 1, 1};
std::vector<float> expect = {0.0, 2.0};
size_t output_size = 2;
std::vector<float> output(output_size, 0.0);
bool align_corners = false;

Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 2*2
TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest5) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[4] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 2, 2, 1};
std::vector<float> expect = {0.0, 1.0, 2.0, 3.0};
size_t output_size = 4;
std::vector<float> output(output_size, 0.0);
bool align_corners = false;

ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 1*4
TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest6) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[4] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 1, 4, 1};
std::vector<float> expect = {0.0, 0.0, 1.0, 1.0};
size_t output_size = 4;
std::vector<float> output(output_size, 0.0);
bool align_corners = false;

Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 4*1
TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest7) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[4] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 4, 1, 1};
std::vector<float> expect = {0.0, 0.0, 2.0, 2.0};
size_t output_size = 4;
std::vector<float> output(output_size, 0.0);
bool align_corners = false;

ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 2*4
TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest8) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[8] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 2, 4, 1};
std::vector<float> expect = {0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0};
size_t output_size = 8;
std::vector<float> output(output_size, 0.0);
bool align_corners = false;

Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 4*2
TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest9) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[8] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 4, 2, 1};
std::vector<float> expect = {0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0};
size_t output_size = 8;
std::vector<float> output(output_size, 0.0);
bool align_corners = false;

Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 3*3
TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest10) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[9] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 3, 3, 1};
std::vector<float> expect = {0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0};
size_t output_size = 9;
std::vector<float> output(output_size, 0.0);
bool align_corners = false;

ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2 -> 4*4
TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest11) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0};
float output_data[16] = {0};
std::vector<int> input_shape = {1, 2, 2, 1};
std::vector<int> output_shape = {1, 4, 4, 1};
std::vector<float> expect = {0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 2.0, 2.0, 3.0, 3.0};
size_t output_size = 16;
std::vector<float> output(output_size, 0.0);
bool align_corners = false;

Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2*2*5 -> 2*4*4*5
TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest12) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0,
14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0,
28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0,
14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0,
28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0};
float output_data[160] = {0};
std::vector<int> input_shape = {2, 2, 2, 5};
std::vector<int> output_shape = {2, 4, 4, 5};
std::vector<float> expect = {
@@ -190,17 +272,21 @@ TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest12) {
31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0,
34.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0};
size_t output_size = 160;
std::vector<float> output(output_size, 0.0);
bool align_corners = false;

ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
Prepare(input_shape, output_shape, input_data, output_data, align_corners, 1);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2*2*5 -> 2*4*4*5 thread_num 2
TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest13) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0,
14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0,
28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0,
14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0,
28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0};
float output_data[160] = {0};
std::vector<int> input_shape = {2, 2, 2, 5};
std::vector<int> output_shape = {2, 4, 4, 5};
std::vector<float> expect = {
@@ -214,21 +300,21 @@ TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest13) {
31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0,
34.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0};
size_t output_size = 160;
std::vector<float> output(output_size, 0.0);

thread_num = 2;
tid = 0;
ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
tid = 1;
ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
bool align_corners = false;

Prepare(input_shape, output_shape, input_data, output_data, align_corners, 2);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}

// 2*2*2*5 -> 2*4*4*5 thread_num 4
TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest14) {
std::vector<float> input = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0,
14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0,
28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0};
float input_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0,
14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0,
28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0};
float output_data[160] = {0};
std::vector<int> input_shape = {2, 2, 2, 5};
std::vector<int> output_shape = {2, 4, 4, 5};
std::vector<float> expect = {
@@ -242,17 +328,12 @@ TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest14) {
31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0, 30.0, 31.0, 32.0, 33.0,
34.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 35.0, 36.0, 37.0, 38.0, 39.0};
size_t output_size = 160;
std::vector<float> output(output_size, 0.0);

thread_num = 4;
tid = 0;
ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
tid = 1;
ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
tid = 2;
ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
tid = 3;
ResizeNearestNeighbor(input.data(), output.data(), input_shape.data(), output_shape.data(), tid, thread_num);
CompareOutputData(output.data(), expect.data(), output_size, err_tol);
bool align_corners = false;

Prepare(input_shape, output_shape, input_data, output_data, align_corners, 4);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputData(output_data, expect.data(), output_size, err_tol);
}
} // namespace mindspore

+ 355
- 0
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reduce_int8_tests.cc View File

@@ -0,0 +1,355 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <memory>
#include "utils/log_adapter.h"
#include "common/common_test.h"
#include "src/ir/tensor.h"
#include "mindspore/lite/src/kernel_registry.h"
#include "nnacl/fp32/reduce.h"

namespace mindspore {
using mindspore::lite::tensor::QuantArg;
using mindspore::lite::tensor::Tensor;
using mindspore::schema::ReduceMode;
using mindspore::schema::ReduceMode_ReduceMax;
using mindspore::schema::ReduceMode_ReduceMean;
using mindspore::schema::ReduceMode_ReduceMin;
using mindspore::schema::ReduceMode_ReduceProd;
using mindspore::schema::ReduceMode_ReduceSum;
using mindspore::schema::ReduceMode_ReduceSumSquare;

class TestReduceInt8 : public mindspore::CommonTest {
public:
TestReduceInt8() = default;
void Prepare(const std::vector<int> &in_shape, const std::vector<int> &out_shape, int8_t *input_data,
int8_t *output_data, ReduceMode mode, const int *axes, const int num_axes);
void TearDown() override;

public:
int thread_num_ = 1;

ReduceParameter param_ = {};
Tensor in_tensor_;
Tensor out_tensor_;
std::vector<Tensor *> inputs{&in_tensor_};
std::vector<Tensor *> outputs{&out_tensor_};
kernel::KernelKey desc_ = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Reduce};
kernel::KernelCreator creator_ = nullptr;
lite::Context ctx_ = lite::Context();
kernel::LiteKernel *kernel_ = nullptr;
const QuantArg quant_in_ = {0.005f, 5};
const QuantArg quant_out_ = {0.01f, 1};
float err_tol_ = 0.05;
};

void TestReduceInt8::TearDown() {
in_tensor_.SetData(nullptr);
out_tensor_.SetData(nullptr);
}

void TestReduceInt8::Prepare(const std::vector<int> &in_shape, const std::vector<int> &out_shape, int8_t *input_data,
int8_t *output_data, ReduceMode mode, const int *axes, const int num_axes) {
in_tensor_.set_data_type(kNumberTypeInt8);
in_tensor_.set_shape(in_shape);
in_tensor_.SetData(input_data);
in_tensor_.AddQuantParam(quant_in_);

out_tensor_.set_data_type(kNumberTypeInt8);
out_tensor_.set_shape(out_shape);
out_tensor_.SetData(output_data);
out_tensor_.AddQuantParam(quant_out_);

param_.mode_ = static_cast<int>(mode);
param_.num_axes_ = num_axes;
memcpy(param_.axes_, axes, num_axes * sizeof(int));

creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc_);

ctx_.thread_num_ = thread_num_;
kernel_ = creator_(inputs, outputs, reinterpret_cast<OpParameter *>(&param_), &ctx_, desc_, nullptr);
}

TEST_F(TestReduceInt8, Mean) {
/* 2 4 4 3 NHWC */
int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95};
int8_t output_data[32] = {0};
int axes[] = {3};
int num_axes = 1;
std::vector<int> input_shape = {2, 4, 4, 3};
std::vector<int> output_shape = {2, 4, 4, 1};
int output_size = 32;
int8_t correct[] = {-1, 1, 2, 3, 5, 7, 8, 10, 11, 12, 14, 16, 17, 19, 20, 22,
23, 25, 26, 28, 29, 30, 32, 34, 35, 37, 38, 40, 41, 43, 44, 46};

thread_num_ = 2;
Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMean, axes, num_axes);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

err_tol_ = 0.09375;
CompareOutputInt8(output_data, correct, output_size, err_tol_);
}

TEST_F(TestReduceInt8, MeanAllAxis) {
/* 2*4*4*3 NHWC */
int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95};
int8_t output_data[1] = {0};
int axes[] = {0};
int num_axes = 0;
std::vector<int> input_shape = {2, 4, 4, 3};
std::vector<int> output_shape = {1};
int output_size = 1;
int8_t correct[] = {22};
thread_num_ = 2;
Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMean, axes, num_axes);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

err_tol_ = 1.0f;
CompareOutputInt8(output_data, correct, output_size, err_tol_);
}

TEST_F(TestReduceInt8, Sum) {
/* 2*4*4*3 NHWC */
int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95};
int8_t output_data[32] = {0};
int axes[] = {-1};
int num_axes = 1;
std::vector<int> input_shape = {2, 4, 4, 3};
std::vector<int> output_shape = {2, 4, 4, 1};
int output_size = 32;
int8_t correct[] = {-5, -1, 4, 9, 13, 18, 22, 27, 31, 36, 40, 45, 49, 54, 58, 63,
67, 72, 76, 81, 85, 90, 94, 99, 103, 107, 112, 117, 121, 126, 127, 127};
thread_num_ = 2;
Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceSum, axes, num_axes);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

err_tol_ = 0.0625f;
CompareOutputInt8(output_data, correct, output_size, err_tol_);
}

TEST_F(TestReduceInt8, SumAllAxis) {
/* 2*4*4*3 NHWC */
int8_t input_data[96] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
};
int8_t output_data[1] = {0};
int axes[] = {0, 1, 2, 3};
int num_axes = 4;
std::vector<int> input_shape = {2, 4, 4, 3};
std::vector<int> output_shape = {1};
int output_size = 1;
int8_t correct[] = {-47};
thread_num_ = 2;
Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceSum, axes, num_axes);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputInt8(output_data, correct, output_size, err_tol_);
}

TEST_F(TestReduceInt8, Max) {
/* 2*4*4*3 NHWC */
int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95};
int8_t output_data[32] = {0};
int axes[] = {3};
int num_axes = 1;
std::vector<int> input_shape = {2, 4, 4, 3};
std::vector<int> output_shape = {2, 4, 4, 1};
int output_size = 32;
int8_t correct[] = {-1, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16, 18, 19, 21, 22,
24, 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, 45, 46};
thread_num_ = 2;
Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMax, axes, num_axes);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputInt8(output_data, correct, output_size, err_tol_);
}

TEST_F(TestReduceInt8, MaxAll) {
/* 2*4*4*3 NHWC */
int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95};
int8_t output_data[1] = {0};
int axes[] = {0, 1, 2, 3};
int num_axes = 4;
std::vector<int> input_shape = {2, 4, 4, 3};
std::vector<int> output_shape = {1};
int output_size = 1;
int8_t correct[] = {46};
thread_num_ = 2;
Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMax, axes, num_axes);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputInt8(output_data, correct, output_size, err_tol_);
}

TEST_F(TestReduceInt8, Min) {
/* 2*4*4*3 NHWC */
int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95};
int8_t output_data[32] = {0};
int axes[] = {3};
int num_axes = 1;
std::vector<int> input_shape = {2, 4, 4, 3};
std::vector<int> output_shape = {2, 4, 4, 1};
int output_size = 32;
int8_t correct[] = {-2, 0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21,
23, 24, 26, 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 42, 44, 45};
thread_num_ = 2;
Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMin, axes, num_axes);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputInt8(output_data, correct, output_size, err_tol_);
}

TEST_F(TestReduceInt8, MinAll) {
/* 2*4*4*3 NHWC */
int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95};
int8_t output_data[1] = {0};
int axes[] = {0};
int num_axes = 0;
std::vector<int> input_shape = {2, 4, 4, 3};
std::vector<int> output_shape = {1};
int output_size = 1;
int8_t correct[] = {-2};
thread_num_ = 2;
Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceMin, axes, num_axes);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputInt8(output_data, correct, output_size, err_tol_);
}

TEST_F(TestReduceInt8, Prod) {
/* 2*4*4*3 NHWC */
int8_t input_data[96] = {105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105,
105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105,
105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105,
105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105,
105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105,
105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105};
int8_t output_data[32] = {0};
int axes[] = {3};
int num_axes = 1;
std::vector<int> input_shape = {2, 4, 4, 3};
std::vector<int> output_shape = {2, 4, 4, 1};
int output_size = 32;
int8_t correct[] = {
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
};
thread_num_ = 2;
Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceProd, axes, num_axes);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputInt8(output_data, correct, output_size, err_tol_);
}

TEST_F(TestReduceInt8, Prod2Axis) {
/* 2*4*4*3 NHWC */
int8_t input_data[12] = {105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105};
int8_t output_data[8] = {0};
int axes[] = {2, 3};
int num_axes = 2;
std::vector<int> input_shape = {1, 2, 2, 3};
std::vector<int> output_shape = {1, 2};
int output_size = 2;
int8_t correct[] = {3, 3};
thread_num_ = 1;
Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceProd, axes, num_axes);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputInt8(output_data, correct, output_size, err_tol_);
}

TEST_F(TestReduceInt8, SumSquare) {
/* 2*4*4*3 NHWC */

int8_t input_data[96] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95};
int8_t output_data[32] = {0};
int axes[] = {3};
int num_axes = 1;
std::vector<int> input_shape = {2, 4, 4, 3};
std::vector<int> output_shape = {2, 4, 4, 1};
int output_size = 32;
int8_t correct[] = {1, 1, 1, 1, 1, 2, 2, 3, 4, 5, 6, 7, 9, 10, 12, 14,
16, 18, 20, 22, 25, 27, 30, 33, 36, 39, 42, 45, 49, 53, 56, 60};
thread_num_ = 1;
Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceSumSquare, axes, num_axes);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputInt8(output_data, correct, output_size, err_tol_);
}

TEST_F(TestReduceInt8, SumSquare2Axis) {
/* 2*4*4*3 NHWC */
int8_t input_data[12] = {105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105};
int8_t output_data[8] = {0};
int axes[] = {3, 2};
int num_axes = 2;
std::vector<int> input_shape = {1, 2, 2, 3};
std::vector<int> output_shape = {1, 2};
int output_size = 2;
int8_t correct[] = {114, 114};
thread_num_ = 1;
Prepare(input_shape, output_shape, input_data, output_data, ReduceMode_ReduceSumSquare, axes, num_axes);
auto ret = kernel_->Run();
EXPECT_EQ(0, ret);

CompareOutputInt8(output_data, correct, output_size, err_tol_);
}

} // namespace mindspore

+ 2
- 4
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc View File

@@ -18,10 +18,8 @@
#include "include/context.h"
#include "src/ir/tensor.h"
#include "common/common_test.h"
#include "src/common/file_utils.h"
#include "mindspore/lite/src/kernel_registry.h"
#include "src/runtime/kernel/arm/nnacl/int8/resize.h"
#include "src/runtime/kernel/arm/int8/resize_int8.h"
#include "nnacl/int8/resize.h"

namespace mindspore {
using mindspore::lite::tensor::QuantArg;
@@ -92,7 +90,7 @@ TEST_F(TestResizeBilinearInt8, Bilinear0) {
int8_t expect[16] = {4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 5, 5, 6, 6};

Prepare(in_shape, out_shape, input_data, output_data, quant_in, quant_out, align_corners, thread_num);
kernel_->Init();
kernel_->Init(); // todo delete
kernel_->Run();

CompareOutputInt8(output_data, expect, 16, err_percent_);


+ 2
- 2
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_nearest_neighbor_int8_tests.cc View File

@@ -19,7 +19,7 @@
#include "src/ir/tensor.h"
#include "common/common_test.h"
#include "mindspore/lite/src/kernel_registry.h"
#include "src/runtime/kernel/arm/nnacl/int8/resize.h"
#include "nnacl/int8/resize.h"

namespace mindspore {
using mindspore::lite::tensor::QuantArg;
@@ -92,7 +92,7 @@ TEST_F(TestResizeNearestNeighborInt8, NearestNeighbor0) {
err_percent_ = 0.25f;

Prepare(in_shape, out_shape, input_data, output_data, quant_in, quant_out, false, thread_num);
kernel_->Init();
kernel_->Init(); // todo delete
kernel_->Run();

CompareOutputInt8(output_data, expect, 16, err_percent_);


Loading…
Cancel
Save