From e6109bb759ae10d8e9b505ba895d31f53c4ea15c Mon Sep 17 00:00:00 2001 From: liuwenhao4 Date: Sat, 1 Aug 2020 15:36:37 +0800 Subject: [PATCH] Add multi-thread function for hms ops of reshape and concat, and fix some tiny mistakes. --- .../runtime/kernel/arm/base/concat_base.cc | 3 +- .../runtime/kernel/arm/base/reshape_base.cc | 4 +- .../kernel/arm/int8/arithmetic_int8.cc | 3 + .../runtime/kernel/arm/int8/concat_int8.cc | 142 ++++---- .../src/runtime/kernel/arm/int8/concat_int8.h | 11 +- .../src/runtime/kernel/arm/int8/crop_int8.h | 2 +- .../src/runtime/kernel/arm/int8/mul_int8.cc | 14 +- .../src/runtime/kernel/arm/int8/mul_int8.h | 2 +- .../runtime/kernel/arm/int8/reshape_int8.cc | 58 ++-- .../runtime/kernel/arm/int8/reshape_int8.h | 12 +- .../src/runtime/kernel/arm/int8/split_int8.h | 2 +- .../kernel/arm/nnacl/concat_parameter.h | 7 +- .../kernel/arm/nnacl/int8/concat_int8.cc | 57 ++-- .../kernel/arm/nnacl/int8/concat_int8.h | 4 +- .../kernel/arm/nnacl/int8/crop_int8.cc | 22 +- .../runtime/kernel/arm/nnacl/int8/mul_int8.cc | 1 + .../kernel/arm/nnacl/int8/reshape_int8.cc | 29 +- .../kernel/arm/nnacl/int8/reshape_int8.h | 5 +- .../kernel/arm/nnacl/quantization/quantize.h | 20 +- .../kernel/arm/nnacl/reshape_parameter.h | 1 + .../kernel/arm/int8/concat_int8_tests.cc | 247 ++++++++++++++ .../runtime/kernel/arm/int8/mul_int8_tests.cc | 311 ++++++++++++++++++ .../kernel/arm/int8/reshape_int8_tests.cc | 150 +++++++++ 23 files changed, 914 insertions(+), 193 deletions(-) create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/int8/concat_int8_tests.cc create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/int8/mul_int8_tests.cc create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reshape_int8_tests.cc diff --git a/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc index ed1e5416b9..24b0381d50 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc @@ -30,7 +30,8 @@ using mindspore::schema::PrimitiveType_Concat; namespace mindspore::kernel { int ConcatBaseCPUKernel::Init() { - axis_ = concat_param_->axis_ >= 0 ? concat_param_->axis_ : inputs_.front()->shape().size() + concat_param_->axis_; + auto axis = concat_param_->axis_; + axis_ = axis >= 0 ? axis : inputs_.front()->shape().size() + axis; return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc index 479bfce181..f9712c9968 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc @@ -44,7 +44,7 @@ kernel::LiteKernel *CpuReshapeInt8KernelCreator(const std::vectorInit(); @@ -68,7 +68,7 @@ kernel::LiteKernel *CpuReshapeInt32KernelCreator(const std::vectorInit(); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc index 3c01ca389b..670894964c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc @@ -126,6 +126,9 @@ int ArithmeticInt8CPUKernel::DoArithmetic(int thread_id) { MS_ASSERT(thread_count_ != 0); int stride = UP_DIV(element_num, thread_count_); int count = MSMIN(stride, element_num - stride * thread_id); + if (count <= 0) { + return RET_OK; + } int error_code = arithmetic_run_(tile_data0_ + stride * thread_id, tile_data1_ + stride * thread_id, output_data + stride * thread_id, count); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc index ea88847f16..ac96e28c9c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc @@ -15,6 +15,7 @@ */ #include "src/runtime/kernel/arm/int8/concat_int8.h" +#include #include "src/runtime/kernel/arm/nnacl/int8/concat_int8.h" #include "schema/model_generated.h" #include "include/errorcode.h" @@ -27,78 +28,46 @@ namespace mindspore::kernel { int ConcatInt8CPUKernel::Init() { ConcatBaseCPUKernel::Init(); - quant_concat_parm_ = concat_param_->concat_quant_arg_; - quant_concat_parm_ = new (std::nothrow) ConcatQuantArg; auto input_num = inputs_.size(); - quant_concat_parm_->input_num_ = input_num; - quant_concat_parm_->input_sizes_ = reinterpret_cast(malloc(sizeof(int) * input_num)); - if (quant_concat_parm_->input_sizes_ == nullptr) { - MS_LOG(ERROR) << "Null pointer reference: quant_concat_parm_->input_sizes_."; - return RET_ERROR; - } - + concat_param_->input_num_ = input_num; + concat_param_->input_shapes_ = reinterpret_cast(ctx_->allocator->Malloc(sizeof(int *) * input_num)); for (size_t i = 0; i < input_num; i++) { - quant_concat_parm_->input_sizes_[i] = 1; + concat_param_->input_shapes_[i] = reinterpret_cast(inputs_.at(i)->shape().data()); } - quant_concat_parm_->input_shapes_ = reinterpret_cast(malloc(sizeof(int *) * input_num)); - if (quant_concat_parm_->input_shapes_ == nullptr) { - MS_LOG(ERROR) << "Null pointer reference: quant_concat_parm_->input_shapes_."; - return RET_ERROR; + + before_axis_size = 1; + for (int i = 0; i < axis_; i++) { + before_axis_size *= outputs_.at(kOutputIndex)->DimensionSize(i); } - for (size_t i = 0; i < input_num; i++) { - auto *input_tensor = inputs_.at(i); - MS_ASSERT(input_tensor != nullptr); - auto input_size = input_tensor->shape().size(); - MS_ASSERT(input_size != NULL); - quant_concat_parm_->input_shapes_[i] = reinterpret_cast(malloc(sizeof(int) * input_size)); - if (quant_concat_parm_->input_shapes_[i] == nullptr) { - MS_LOG(ERROR) << "Null pointer reference: quant_concat_parm_->input_shapes_[" << i << "]."; - return RET_ERROR; - } - - ::memcpy(quant_concat_parm_->input_shapes_[i], input_tensor->shape().data(), sizeof(int) * input_size); - for (size_t j = 0; j < input_size; j++) { - auto *input_tensor_tmp = inputs_.at(i); - auto input_shape = input_tensor_tmp->shape()[j]; - quant_concat_parm_->input_sizes_[i] *= input_shape; - } + int64_t after_axis_size = 1; + auto output_tensor = outputs_.at(kOutputIndex); + int output_dim = output_tensor->shape().size(); + concat_param_->output_shapes_ = output_tensor->shape().data(); + for (size_t i = axis_ + 1; i < output_dim; i++) { + after_axis_size *= concat_param_->output_shapes_[i]; } + concat_param_->after_axis_size = after_axis_size; - quant_concat_parm_->in_quant_args_ = reinterpret_cast(malloc(sizeof(QuantArg) * input_num)); - if (quant_concat_parm_->in_quant_args_ == nullptr) { + concat_param_->quant_arg_.in_args_ = + reinterpret_cast(ctx_->allocator->Malloc(sizeof(QuantArg) * input_num)); + if (concat_param_->quant_arg_.in_args_ == nullptr) { MS_LOG(ERROR) << "Null pointer reference: quant_concat_parm_->in_quant_args_."; return RET_ERROR; } - for (size_t i = 0; i < input_num; i++) { auto *input_tensor = inputs_.at(i); auto quant_args = input_tensor->GetQuantParams(); - MS_ASSERT(quant_args.size() == 1); - quant_concat_parm_->in_quant_args_[i].scale_ = quant_args.front().scale; - quant_concat_parm_->in_quant_args_[i].zp_ = quant_args.front().zeroPoint; + concat_param_->quant_arg_.in_args_[i].scale_ = quant_args.front().scale; + concat_param_->quant_arg_.in_args_[i].zp_ = quant_args.front().zeroPoint; } - MS_ASSERT(outputs_.size() == 1); - auto output_tensor = outputs_.at(0); - MS_ASSERT(output_tensor != nullptr); - auto output_shape = output_tensor->shape(); - MS_ASSERT(output_shape != NULL); - auto output_dim = output_shape.size(); - quant_concat_parm_->output_dim_ = output_dim; - int output_size = 1; - for (size_t i = 0; i < output_dim; i++) { - output_size *= output_shape[i]; - } - quant_concat_parm_->output_size_ = output_size; - - quant_concat_parm_->output_shape_ = new int[output_size]; - ::memcpy(quant_concat_parm_->output_shape_, output_shape.data(), sizeof(int) * output_size); - auto quant_args = output_tensor->GetQuantParams(); - MS_ASSERT(quant_args.size() == 1); - quant_concat_parm_->out_quant_args_.scale_ = quant_args.front().scale; - quant_concat_parm_->out_quant_args_.zp_ = quant_args.front().zeroPoint; + concat_param_->quant_arg_.out_args_.scale_ = quant_args.front().scale; + concat_param_->quant_arg_.out_args_.zp_ = quant_args.front().zeroPoint; + + concat_param_->quant_arg_.output_activation_min_ = std::numeric_limits::min(); + concat_param_->quant_arg_.output_activation_max_ = std::numeric_limits::max(); return RET_OK; } @@ -106,39 +75,40 @@ int ConcatInt8CPUKernel::Init() { int ConcatInt8CPUKernel::ReSize() { return 0; } int ConcatInt8CPUKernel::Run() { - auto input_dim = quant_concat_parm_->input_num_; - int8_t **inputs_array = reinterpret_cast(malloc(sizeof(int8_t *) * input_dim)); - for (size_t i = 0; i < input_dim; i++) { - auto input_size = quant_concat_parm_->input_sizes_[i]; - inputs_array[i] = reinterpret_cast(malloc(sizeof(int8_t) * input_size)); - auto input_type = inputs_[i]->data_type(); - if (input_type == kNumberTypeUInt8) { - uint8_t *input_tmp = reinterpret_cast(inputs_[i]->Data()); - for (size_t j = 0; j < input_size; j++) { - inputs_array[i][j] = (int8_t)(input_tmp[j] - 128); - } - for (size_t j = 0; j < input_dim; j++) { - quant_concat_parm_->in_quant_args_[j].zp_ -= 128; - } - quant_concat_parm_->out_quant_args_.zp_ -= 128; - } else { - ::memcpy(inputs_array[i], inputs_.at(i)->Data(), sizeof(int8_t) * input_size); - } + auto input_num = concat_param_->input_num_; + count_unit_ = thread_count_ > 1 ? UP_DIV(before_axis_size, thread_count_) : before_axis_size; + concat_param_->count_unit_ = count_unit_; + input_data_ = reinterpret_cast(ctx_->allocator->Malloc(sizeof(int8_t *) * input_num)); + if (input_data_ == nullptr) { + MS_LOG(ERROR) << "Null pointer reference: inputs_array."; + return RET_ERROR; } - int8_t *output_addr = reinterpret_cast(outputs_.at(0)->Data()); - Concat(inputs_array, output_addr, quant_concat_parm_, axis_); - auto output_type = outputs_[0]->data_type(); - if (output_type == kNumberTypeUInt8) { - auto output_size = quant_concat_parm_->output_size_; - for (size_t i = 0; i < output_size; i++) { - output_addr[i] = (uint8_t)(output_addr[i] + 128); - } + for (size_t i = 0; i < input_num; i++) { + input_data_[i] = static_cast(inputs_.at(i)->Data()); } + output_data_ = reinterpret_cast(outputs_.at(0)->Data()); - for (int i = 0; i < input_dim; i++) { - free(*(inputs_array + i)); + auto ret = LiteBackendParallelLaunch(ConcatInt8Run, this, thread_count_); + + ctx_->allocator->Free(input_data_); + ctx_->allocator->Free(concat_param_->input_shapes_); + ctx_->allocator->Free(concat_param_->quant_arg_.in_args_); + + return ret; +} + +int ConcatInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { + auto concat = reinterpret_cast(cdata); + concat->DoExecute(task_id); + return lite::RET_OK; +} + +int ConcatInt8CPUKernel::DoExecute(int task_id) { + int64_t real_dst_count = MSMIN(before_axis_size - task_id * count_unit_, count_unit_); + if (real_dst_count <= 0) { + return lite::RET_OK; } - return RET_OK; + Concat(input_data_, output_data_, concat_param_, axis_, real_dst_count, task_id); + return lite::RET_OK; } } // namespace mindspore::kernel - diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h index 192f50b46c..01846f6da9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h @@ -21,6 +21,7 @@ #include "src/lite_kernel.h" #include "include/context.h" #include "src/runtime/kernel/arm/base/concat_base.h" +#include "src/runtime/runtime_api.h" using mindspore::lite::Context; @@ -30,15 +31,21 @@ class ConcatInt8CPUKernel : public ConcatBaseCPUKernel { ConcatInt8CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const Context *ctx) : ConcatBaseCPUKernel(parameter, inputs, outputs, ctx) {} - ~ConcatInt8CPUKernel() override { delete quant_concat_parm_; } + ~ConcatInt8CPUKernel() override {} int Init() override; int ReSize() override; int Run() override; + int DoExecute(int task_id); private: - ConcatQuantArg *quant_concat_parm_; + int64_t before_axis_size; + int64_t count_unit_; + int8_t **input_data_ = nullptr; + int8_t *output_data_ = nullptr; }; + +int ConcatInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata); } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CONCAT_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h index ebcfd22a65..b0ff4af359 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h @@ -39,7 +39,7 @@ class CropInt8CPUKernel : public CropBaseCPUKernel { int Init() override; int ReSize() override; int Run() override; - int DoExecute(int tId); + int DoExecute(int task_id); private: CropParameter *crop_para_; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc index a9fd7c1378..8c43954f33 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc @@ -68,7 +68,6 @@ int MulInt8CPUKernel::Run() { elements_num_ = inputs_.at(0)->ElementsNum(); count_unit_ = thread_count_ > 1 ? UP_DIV(elements_num_, thread_count_) : elements_num_; - if (inputs_.at(0)->ElementsNum() != inputs_.at(1)->ElementsNum()) { input0_data_ = static_cast(ctx_->allocator->Malloc(outputs_.at(0)->Size())); input1_data_ = static_cast(ctx_->allocator->Malloc(outputs_.at(0)->Size())); @@ -98,11 +97,14 @@ int MulInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { return lite::RET_OK; } -int MulInt8CPUKernel::DoExecute(int tId) { - int64_t real_dst_count = MSMIN(elements_num_ - tId * count_unit_, count_unit_); - int8_t *cur_input0_data = input0_data_ + tId * count_unit_; - int8_t *cur_input1_data = input1_data_ + tId * count_unit_; - int8_t *cur_output_data = output_data_ + tId * count_unit_; +int MulInt8CPUKernel::DoExecute(int task_id) { + int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_); + if (real_dst_count <= 0) { + return lite::RET_OK; + } + int8_t *cur_input0_data = input0_data_ + task_id * count_unit_; + int8_t *cur_input1_data = input1_data_ + task_id * count_unit_; + int8_t *cur_output_data = output_data_ + task_id * count_unit_; Mul(cur_input0_data, cur_input1_data, cur_output_data, real_dst_count, para_.mul_quant_arg_); return lite::RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h index 7725591a06..1acb1b15bc 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h @@ -32,7 +32,7 @@ class MulInt8CPUKernel : public LiteKernel { int Init() override; int ReSize() override; int Run() override; - int DoExecute(int tId); + int DoExecute(int task_id); private: const lite::Context *ctx_; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc index 7969b89a78..ad34ff8894 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc @@ -15,6 +15,7 @@ */ #include "src/runtime/kernel/arm/int8/reshape_int8.h" +#include #include "src/runtime/kernel/arm/nnacl/int8/reshape_int8.h" #include "schema/model_generated.h" #include "include/errorcode.h" @@ -29,13 +30,17 @@ int ReshapeInt8CPUKernel::Init() { ReshapeBaseCPUKernel::Init(); auto *input_tensor = inputs_.at(kInputIndex); auto in_quant_args = input_tensor->GetQuantParams(); - in_quant_arg_.scale_ = in_quant_args.front().scale; - in_quant_arg_.zp_ = in_quant_args.front().zeroPoint; + reshape_param_->quant_para_.in_args_.scale_ = in_quant_args.front().scale; + reshape_param_->quant_para_.in_args_.zp_ = in_quant_args.front().zeroPoint; auto *out_tensor = outputs_.at(kOutputIndex); auto out_quant_args = out_tensor->GetQuantParams(); - out_quant_arg_.scale_ = out_quant_args.front().scale; - out_quant_arg_.zp_ = out_quant_args.front().zeroPoint; + reshape_param_->quant_para_.out_args_.scale_ = out_quant_args.front().scale; + reshape_param_->quant_para_.out_args_.zp_ = out_quant_args.front().zeroPoint; + + reshape_param_->quant_para_.output_activation_min_ = std::numeric_limits::min(); + reshape_param_->quant_para_.output_activation_max_ = std::numeric_limits::max(); + return RET_OK; } @@ -44,31 +49,32 @@ int ReshapeInt8CPUKernel::ReSize() { return 0; } int ReshapeInt8CPUKernel::Run() { MS_ASSERT(inputs_.size() == 1); MS_ASSERT(outputs_.size() == 1); - auto input_type = inputs_[kInputIndex]->data_type(); - auto input_num = inputs_[kInputIndex]->ElementsNum(); - auto output_num = outputs_.at(kOutputIndex)->ElementsNum(); - MS_ASSERT(input_num == output_num); - int8_t *input_ptr = reinterpret_cast(inputs_.at(kInputIndex)->Data()); - int8_t *output_ptr = reinterpret_cast(outputs_.at(kOutputIndex)->Data()); - if (input_type == kNumberTypeUInt8) { - auto *input_tmp = reinterpret_cast(inputs_.at(kInputIndex)->Data()); - for (size_t i = 0; i < input_num; i++) { - input_ptr[i] = (int8_t)(input_tmp[i] - 128); - } - in_quant_arg_.zp_ -= 128; - out_quant_arg_.zp_ -= 128; - } + input_data_ = static_cast(inputs_.at(kInputIndex)->Data()); + output_data_ = static_cast(outputs_.at(kOutputIndex)->Data()); - size_t data_size = inputs_.at(kInputIndex)->Size(); - Reshape(input_ptr, output_ptr, data_size, input_num, in_quant_arg_, out_quant_arg_); + elements_num_ = inputs_.at(kInputIndex)->ElementsNum(); + count_unit_ = thread_count_ > 1 ? UP_DIV(elements_num_, thread_count_) : elements_num_; + + auto ret = LiteBackendParallelLaunch(ReshapeInt8Run, this, thread_count_); + return ret; +} - auto output_type = outputs_[kOutputIndex]->data_type(); - if (output_type == kNumberTypeUInt8) { - for (size_t i = 0; i < output_num; i++) { - output_ptr[i] = (uint8_t)(output_ptr[i] + 128); - } +int ReshapeInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { + auto reshape = reinterpret_cast(cdata); + reshape->DoExecute(task_id); + return lite::RET_OK; +} + +int ReshapeInt8CPUKernel::DoExecute(int task_id) { + int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_); + if (real_dst_count <= 0) { + return lite::RET_OK; } - return RET_OK; + int8_t *cur_input0_data = input_data_ + task_id * count_unit_; + int8_t *cur_output_data = output_data_ + task_id * count_unit_; + + Reshape(cur_input0_data, cur_output_data, real_dst_count, reshape_param_->quant_para_); + return lite::RET_OK; } } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h index cb1065a4c0..c533c42bf2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h @@ -19,9 +19,9 @@ #include #include "src/lite_kernel.h" - #include "include/context.h" #include "src/runtime/kernel/arm/base/reshape_base.h" +#include "src/runtime/runtime_api.h" using mindspore::lite::Context; @@ -36,11 +36,17 @@ class ReshapeInt8CPUKernel : public ReshapeBaseCPUKernel { int Init() override; int ReSize() override; int Run() override; + int DoExecute(int task_id); private: - QuantArg in_quant_arg_; - QuantArg out_quant_arg_; + int thread_count_; + int64_t elements_num_; + int64_t count_unit_; + int8_t *input_data_ = nullptr; + int8_t *output_data_ = nullptr; }; + +int ReshapeInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata); } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_RESHAPE_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.h index 501d595634..4e1ad3f213 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.h @@ -36,7 +36,7 @@ class SplitInt8CPUKernel : public SplitBaseCPUKernel { int Init() override; int ReSize() override; int Run() override; - int Split(int tId); + int Split(int task_id); private: int8_t *input_ptr_; diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/concat_parameter.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/concat_parameter.h index 5b998a4d2d..b4d06bb645 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/concat_parameter.h +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/concat_parameter.h @@ -20,9 +20,14 @@ #include "src/runtime/kernel/arm/nnacl/op_base.h" struct ConcatParameter { OpParameter op_parameter_; - ConcatQuantArg *concat_quant_arg_; + ConcatQuantArg quant_arg_; int axis_; int thread_count_; + int input_num_; + const int **input_shapes_; + const int *output_shapes_; + int64_t after_axis_size; + int64_t count_unit_; }; #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_CONCAT_PARAMETER_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/concat_int8.cc b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/concat_int8.cc index 804c80bf93..295113dc2c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/concat_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/concat_int8.cc @@ -15,50 +15,47 @@ */ #include "src/runtime/kernel/arm/nnacl/int8/concat_int8.h" +#include "src/runtime/kernel/arm/nnacl/concat_parameter.h" #include -void Concat(int8_t **inputs, int8_t *output_ptr, ConcatQuantArg *quant_concat_parm, int axis) { - float output_scale = quant_concat_parm->out_quant_args_.scale_; +void Concat(int8_t **inputs, int8_t *output, ConcatParameter *para, int axis, int64_t real_dst_count, int task_id) { + float output_scale = para->quant_arg_.out_args_.scale_; float output_inverse_scale = 1.f / output_scale; - int input_num = quant_concat_parm->input_num_; - int *output_shape = quant_concat_parm->output_shape_; - int output_dim = quant_concat_parm->output_dim_; - QuantArg *input_quant = quant_concat_parm->in_quant_args_; - int output_zp = quant_concat_parm->out_quant_args_.zp_; + int input_num = para->input_num_; + int count_unit_ = para->count_unit_; + int after_axis_size = para->after_axis_size; + const int *output_shape = para->output_shapes_; + int out_copy_size = output_shape[axis] * after_axis_size; + QuantArg *input_quant = para->quant_arg_.in_args_; + int output_zp = para->quant_arg_.out_args_.zp_; + int max_int8 = para->quant_arg_.output_activation_max_; + int min_int8 = para->quant_arg_.output_activation_min_; + int64_t start = task_id * count_unit_; + int64_t end = start + real_dst_count; - int before_axis_size = 1; - for (int i = 0; i < axis; i++) { - before_axis_size *= output_shape[i]; - } - - int after_axis_size = 1; - for (size_t i = axis + 1; i < output_dim; i++) { - after_axis_size *= output_shape[i]; - } - - for (int k = 0; k < before_axis_size; k++) { + for (int k = start; k < end; k++) { for (int i = 0; i < input_num; i++) { - int *input_shape = quant_concat_parm->input_shapes_[i]; - int copy_size = input_shape[axis] * after_axis_size; - int8_t *input_ptr = inputs[i] + k * copy_size; + const int *input_shape = para->input_shapes_[i]; + int in_copy_size = input_shape[axis] * after_axis_size; + int8_t *input_ptr = inputs[i] + k * in_copy_size; + int8_t *output_ptr = output + k * out_copy_size; if (input_quant[i].scale_ == output_scale && input_quant[i].zp_ == output_zp) { - memcpy(output_ptr, input_ptr, copy_size); + memcpy(output_ptr, input_ptr, in_copy_size); } else { float scale = input_quant[i].scale_ * output_inverse_scale; float bias = -input_quant[i].zp_ * scale; - for (int j = 0; j < copy_size; j++) { + for (int j = 0; j < in_copy_size; j++) { int32_t output_tmp = round(input_ptr[j] * scale + bias) + output_zp; - if (output_tmp > 127) { - output_ptr[j] = 127; - } else if (output_tmp < -128) { - output_ptr[j] = -128; + if (output_tmp > max_int8) { + output_ptr[j] = max_int8; + } else if (output_tmp < min_int8) { + output_ptr[j] = min_int8; } else { - output_ptr[j] = (int8_t)output_tmp; + output_ptr[j] = static_cast(output_tmp); } } } - output_ptr += copy_size; + output += in_copy_size; } } } - diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/concat_int8.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/concat_int8.h index 6b7edb43d5..0243a2fb54 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/concat_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/concat_int8.h @@ -18,8 +18,8 @@ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_CONCAT_INT8_H_ #include "src/runtime/kernel/arm/nnacl/op_base.h" +#include "src/runtime/kernel/arm/nnacl/concat_parameter.h" -void Concat(int8_t **inputs, int8_t *output_ptr, ConcatQuantArg *quant_concat_parm, int axis); +void Concat(int8_t **inputs, int8_t *output_ptr, ConcatParameter *para, int axis, int64_t real_dst_count, int task_id); #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_CONCAT_INT8_H_ - diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/crop_int8.cc b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/crop_int8.cc index e7708fcc2a..d9a6115a24 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/crop_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/crop_int8.cc @@ -40,6 +40,9 @@ void Crop1D(const int8_t *input, int8_t *output, int task_id, CropParameter *par const int out_batch = para->out_shape_[0]; const int thread_count = para->thread_count_; int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_batch, thread_count) : out_batch; + if (task_id_stride <= 0) { + return; + } float in_scale = para->quant_arg.in_args_.scale_; int32_t in_zp = para->quant_arg.in_args_.zp_; @@ -78,6 +81,9 @@ void Crop2D(const int8_t *input, int8_t *output, int task_id, CropParameter *par const int out_height = para->out_shape_[1]; const int thread_count = para->thread_count_; int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_height, thread_count) : out_height; + if (task_id_stride <= 0) { + return; + } float in_scale = para->quant_arg.in_args_.scale_; int32_t in_zp = para->quant_arg.in_args_.zp_; @@ -120,6 +126,12 @@ void Crop3D(const int8_t *input, int8_t *output, int task_id, CropParameter *par const int out_height = para->out_shape_[1]; const int out_width = para->out_shape_[2]; + const int thread_count = para->thread_count_; + int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_height, thread_count) : out_height; + if (task_id_stride <= 0) { + return; + } + const int in_stride_h = in_width; const int in_stride_n = in_stride_h * in_height; @@ -133,8 +145,6 @@ void Crop3D(const int8_t *input, int8_t *output, int task_id, CropParameter *par float scale = in_scale / out_scale; float bias = -in_zp * scale; - const int thread_count = para->thread_count_; - int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_height, thread_count) : out_height; for (int n = 0; n < out_batch; n++) { for (int t = 0; t < task_id_stride; t++) { auto h = t + task_id * task_id_stride; @@ -173,6 +183,12 @@ void Crop4D(const int8_t *input, int8_t *output, int task_id, CropParameter *par const int out_width = para->out_shape_[2]; const int out_channel = para->out_shape_[3]; + const int thread_count = para->thread_count_; + int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_height, thread_count) : out_height; + if (task_id_stride <= 0) { + return; + } + const int in_stride_w = in_channel; const int in_stride_h = in_channel * in_width; const int in_stride_n = in_stride_h * in_height; @@ -188,8 +204,6 @@ void Crop4D(const int8_t *input, int8_t *output, int task_id, CropParameter *par float scale = in_scale / out_scale; float bias = -in_zp * scale; - const int thread_count = para->thread_count_; - int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_height, thread_count) : out_height; for (int n = 0; n < out_batch; n++) { for (int t = 0; t < task_id_stride; t++) { auto h = t + task_id * task_id_stride; diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/mul_int8.cc b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/mul_int8.cc index 087e6b0f9d..774ce33176 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/mul_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/mul_int8.cc @@ -58,6 +58,7 @@ void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int16x8_t res_s16 = vcombine_s16(sum_low, sum_high); int8x8_t res_u8_n0 = vqmovn_s16(res_s16); vst1_s8(output_data, res_u8_n0); + output_data += 8; } } #endif diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reshape_int8.cc b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reshape_int8.cc index 3993e3d3d2..d02d4486a9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reshape_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reshape_int8.cc @@ -15,27 +15,26 @@ */ #include "src/runtime/kernel/arm/nnacl/int8/reshape_int8.h" +#include "src/runtime/kernel/arm/nnacl/reshape_parameter.h" #include -void Reshape(int8_t *input_ptr, int8_t *output_ptr, size_t data_size, int input_num, QuantArg in_quant_arg, - QuantArg out_quant_arg) { - if (in_quant_arg.scale_ == out_quant_arg.scale_ && in_quant_arg.zp_ == out_quant_arg.zp_) { - memcpy(output_ptr, input_ptr, data_size); +void Reshape(int8_t *input_ptr, int8_t *output_ptr, int64_t real_dst_count, ReshapeQuantArg para) { + if (para.in_args_.scale_ == para.out_args_.scale_ && para.in_args_.zp_ == para.out_args_.zp_) { + memcpy(output_ptr, input_ptr, real_dst_count); } else { - float output_inverse_scale = 1.f / out_quant_arg.scale_; - float scale = in_quant_arg.scale_ * output_inverse_scale; - float bias = -in_quant_arg.zp_ * scale; - int32_t output_zp = out_quant_arg.zp_; - for (int i = 0; i < input_num; i++) { + float output_inverse_scale = 1.f / para.out_args_.scale_; + float scale = para.in_args_.scale_ * output_inverse_scale; + float bias = -para.in_args_.zp_ * scale; + int32_t output_zp = para.out_args_.zp_; + for (int i = 0; i < real_dst_count; i++) { int32_t output_tmp = round(input_ptr[i] * scale + bias) + output_zp; - if (output_tmp > 127) { - output_ptr[i] = 127; - } else if (output_tmp < -128) { - output_ptr[i] = -128; + if (output_tmp > para.output_activation_max_) { + output_ptr[i] = para.output_activation_max_; + } else if (output_tmp < para.output_activation_min_) { + output_ptr[i] = para.output_activation_min_; } else { - output_ptr[i] = (int8_t)output_tmp; + output_ptr[i] = static_cast(output_tmp); } } } } - diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reshape_int8.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reshape_int8.h index 129178086a..0ae26d9347 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reshape_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reshape_int8.h @@ -17,9 +17,8 @@ #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_RESHAHPE_INT8_H_ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_RESHAHPE_INT8_H_ #include "src/runtime/kernel/arm/nnacl/op_base.h" +#include "src/runtime/kernel/arm/nnacl/reshape_parameter.h" -void Reshape(int8_t *input_ptr, int8_t *output_ptr, size_t data_size, int input_num, QuantArg in_quant_arg, - QuantArg out_quant_arg); +void Reshape(int8_t *input_ptr, int8_t *output_ptr, int64_t real_dst_count, ReshapeQuantArg para); #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_RESHAHPE_INT8_H_ - diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/quantization/quantize.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/quantization/quantize.h index 05aaad4c5c..cf9eb90745 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/quantization/quantize.h +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/quantization/quantize.h @@ -40,15 +40,10 @@ struct ConvQuantArg { }; struct ConcatQuantArg { - int *input_sizes_; - int output_size_; - int **input_shapes_; - int *output_shape_; - float alpha; - size_t input_num_; - size_t output_dim_; - QuantArg *in_quant_args_; - QuantArg out_quant_args_; + QuantArg *in_args_; + QuantArg out_args_; + int output_activation_min_; + int output_activation_max_; }; struct SqueezeQuantArg { @@ -166,6 +161,13 @@ struct SoftmaxQuantArg { QuantArg out_quant_arg_; }; +struct ReshapeQuantArg { + QuantArg in_args_; + QuantArg out_args_; + int output_activation_min_; + int output_activation_max_; +}; + void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift); inline void QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier, diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/reshape_parameter.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/reshape_parameter.h index 0243a60f86..c9de4c342d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/reshape_parameter.h +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/reshape_parameter.h @@ -21,6 +21,7 @@ struct ReshapeParameter { OpParameter op_parameter_; + ReshapeQuantArg quant_para_; int thread_count_; }; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/concat_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/concat_int8_tests.cc new file mode 100644 index 0000000000..5d9814e268 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/concat_int8_tests.cc @@ -0,0 +1,247 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "utils/log_adapter.h" +#include "common/common_test.h" +#include "mindspore/lite/src/runtime/kernel/arm/nnacl/concat_parameter.h" +#include "mindspore/lite/src/kernel_registry.h" +#include "mindspore/lite/src/lite_kernel.h" +#include "mindspore/lite/src/ir/tensor.h" + +namespace mindspore { + +class TestConcatInt8 : public mindspore::Common { + public: + TestConcatInt8() {} +}; + +TEST_F(TestConcatInt8, Concat1_axis0) { + std::vector input1 = {1, 2, 3, 4, 5, 6}; + std::vector shape1 = {3, 2}; + std::vector input2 = {7, 8, 9, 10, 11, 12}; + std::vector shape2 = {3, 2}; + std::vector input(2, nullptr); + input[0] = input1.data(); + input[1] = input2.data(); + + int8_t output[12]; + std::vector output_shape = {6, 2}; + + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 1.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + lite::tensor::Tensor *input_tensor2 = new lite::tensor::Tensor; + input_tensor2->SetData(input2.data()); + input_tensor2->set_shape(shape2); + input_tensor2->AddQuantParam(input_quant_arg); + input_tensor2->set_data_type(tid_int8); + + std::vector inputs_tensor(2); + inputs_tensor[0] = input_tensor1; + inputs_tensor[1] = input_tensor2; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + ConcatParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Concat; + op_param.axis_ = 0; + lite::Context *ctx = new lite::Context; + ctx->thread_num_ = 1; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Concat}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + PrintData("output data", output, input1.size() + input2.size()); + CompareOutputData(output, except_result.data(), input1.size() + input2.size(), 0.000001); + input_tensor1->SetData(nullptr); + input_tensor2->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete input_tensor2; + delete output0_tensor; + delete ctx; +} + +TEST_F(TestConcatInt8, Concat1_axis1_thread2) { + std::vector input1 = {10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25}; + std::vector shape1 = {2, 3, 2}; + std::vector input2 = {30, 31, 32, 33}; + std::vector shape2 = {2, 1, 2}; + std::vector input(2, nullptr); + input[0] = input1.data(); + input[1] = input2.data(); + + int8_t output[16]; + std::vector output_shape = {2, 4, 2}; + + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 1.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + lite::tensor::Tensor *input_tensor2 = new lite::tensor::Tensor; + input_tensor2->SetData(input2.data()); + input_tensor2->set_shape(shape2); + input_tensor2->AddQuantParam(input_quant_arg); + input_tensor2->set_data_type(tid_int8); + + std::vector inputs_tensor(2); + inputs_tensor[0] = input_tensor1; + inputs_tensor[1] = input_tensor2; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + ConcatParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Concat; + op_param.axis_ = 1; + lite::Context *ctx = new lite::Context; + ctx->thread_num_ = 2; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Concat}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {10, 11, 12, 13, 14, 15, 30, 31, 20, 21, 22, 23, 24, 25, 32, 33}; + PrintData("output data", output, input1.size() + input2.size()); + CompareOutputData(output, except_result.data(), input1.size() + input2.size(), 0.000001); + + input_tensor1->SetData(nullptr); + input_tensor2->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete input_tensor2; + delete output0_tensor; + delete ctx; +} + +TEST_F(TestConcatInt8, Concat1_axis1_thread2_quant1) { + std::vector input1 = {10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25}; + std::vector shape1 = {2, 3, 2}; + std::vector input2 = {30, 31, 32, 33}; + std::vector shape2 = {2, 1, 2}; + std::vector input(2, nullptr); + input[0] = input1.data(); + input[1] = input2.data(); + + int8_t output[16]; + std::vector output_shape = {2, 4, 2}; + + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 2.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + lite::tensor::Tensor *input_tensor2 = new lite::tensor::Tensor; + input_tensor2->SetData(input2.data()); + input_tensor2->set_shape(shape2); + input_tensor2->AddQuantParam(input_quant_arg); + input_tensor2->set_data_type(tid_int8); + + std::vector inputs_tensor(2); + inputs_tensor[0] = input_tensor1; + inputs_tensor[1] = input_tensor2; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + ConcatParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Concat; + op_param.axis_ = 1; + lite::Context *ctx = new lite::Context; + ctx->thread_num_ = 2; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Concat}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {5, 6, 6, 7, 7, 8, 15, 16, 10, 11, 11, 12, 12, 13, 16, 17}; + PrintData("output data", output, input1.size() + input2.size()); + CompareOutputData(output, except_result.data(), input1.size() + input2.size(), 0.000001); + + input_tensor1->SetData(nullptr); + input_tensor2->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete input_tensor2; + delete output0_tensor; + delete ctx; +} + +} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/mul_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/mul_int8_tests.cc new file mode 100644 index 0000000000..3aed147816 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/mul_int8_tests.cc @@ -0,0 +1,311 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "utils/log_adapter.h" +#include "common/common_test.h" +#include "mindspore/lite/src/runtime/kernel/arm/nnacl/mul_parameter.h" +#include "mindspore/lite/src/kernel_registry.h" +#include "mindspore/lite/src/lite_kernel.h" +#include "mindspore/lite/src/ir/tensor.h" + +namespace mindspore { + +class TestMulInt8 : public mindspore::Common { + public: + TestMulInt8() {} +}; + +TEST_F(TestMulInt8, Mul_quant0) { + std::vector input1 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + std::vector shape1 = {2, 3, 2}; + std::vector input2 = {1, 2, 3, 4}; + std::vector shape2 = {2, 1, 2}; + std::vector input(2, nullptr); + input[0] = input1.data(); + input[1] = input2.data(); + + int8_t output[12]; + std::vector output_shape = {2, 3, 2}; + + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 1.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + lite::tensor::Tensor *input_tensor2 = new lite::tensor::Tensor; + input_tensor2->SetData(input2.data()); + input_tensor2->set_shape(shape2); + input_tensor2->AddQuantParam(input_quant_arg); + input_tensor2->set_data_type(tid_int8); + + std::vector inputs_tensor(2); + inputs_tensor[0] = input_tensor1; + inputs_tensor[1] = input_tensor2; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + MulParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Mul; + lite::Context *ctx = new lite::Context; + ctx->thread_num_ = 1; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Mul}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {1, 4, 3, 8, 5, 12, 21, 32, 27, 40, 33, 48}; + PrintData("output data", output, input1.size()); + CompareOutputData(output, except_result.data(), input1.size(), 0.000001); + input_tensor1->SetData(nullptr); + input_tensor2->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete input_tensor2; + delete output0_tensor; + delete ctx; +} + +TEST_F(TestMulInt8, Mul_quant0_thread0) { + std::vector input1 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + std::vector shape1 = {2, 3, 3}; + std::vector input2 = {1, 1, 1, 1, 1, 1}; + std::vector shape2 = {2, 1, 3}; + std::vector input(2, nullptr); + input[0] = input1.data(); + input[1] = input2.data(); + + int8_t output[18]; + std::vector output_shape = {2, 3, 3}; + + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 1.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + lite::tensor::Tensor *input_tensor2 = new lite::tensor::Tensor; + input_tensor2->SetData(input2.data()); + input_tensor2->set_shape(shape2); + input_tensor2->AddQuantParam(input_quant_arg); + input_tensor2->set_data_type(tid_int8); + + std::vector inputs_tensor(2); + inputs_tensor[0] = input_tensor1; + inputs_tensor[1] = input_tensor2; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + MulParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Mul; + lite::Context *ctx = new lite::Context; + ctx->thread_num_ = 1; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Mul}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + PrintData("output data", output, input1.size()); + CompareOutputData(output, except_result.data(), input1.size(), 0.000001); + input_tensor1->SetData(nullptr); + input_tensor2->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete input_tensor2; + delete output0_tensor; + delete ctx; +} + +TEST_F(TestMulInt8, Mul_quant1) { + std::vector input1 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + std::vector shape1 = {2, 3, 2}; + std::vector input2 = {1, 2, 3, 4}; + std::vector shape2 = {2, 1, 2}; + std::vector input(2, nullptr); + input[0] = input1.data(); + input[1] = input2.data(); + + int8_t output[12]; + std::vector output_shape = {2, 3, 2}; + + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 2.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + lite::tensor::Tensor *input_tensor2 = new lite::tensor::Tensor; + input_tensor2->SetData(input2.data()); + input_tensor2->set_shape(shape2); + input_tensor2->AddQuantParam(input_quant_arg); + input_tensor2->set_data_type(tid_int8); + + std::vector inputs_tensor(2); + inputs_tensor[0] = input_tensor1; + inputs_tensor[1] = input_tensor2; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + MulParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Mul; + lite::Context *ctx = new lite::Context; + ctx->thread_num_ = 1; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Mul}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {1, 2, 2, 4, 3, 6, 11, 16, 14, 20, 17, 24}; + PrintData("output data", output, input1.size()); + CompareOutputData(output, except_result.data(), input1.size(), 0.000001); + input_tensor1->SetData(nullptr); + input_tensor2->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete input_tensor2; + delete output0_tensor; + delete ctx; +} + +TEST_F(TestMulInt8, Mul_quant1_thread1) { + std::vector input1 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + std::vector shape1 = {2, 3, 2}; + std::vector input2 = {1, 2, 3, 4}; + std::vector shape2 = {2, 1, 2}; + std::vector input(2, nullptr); + input[0] = input1.data(); + input[1] = input2.data(); + + int8_t output[12]; + std::vector output_shape = {2, 3, 2}; + + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 2.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + lite::tensor::Tensor *input_tensor2 = new lite::tensor::Tensor; + input_tensor2->SetData(input2.data()); + input_tensor2->set_shape(shape2); + input_tensor2->AddQuantParam(input_quant_arg); + input_tensor2->set_data_type(tid_int8); + + std::vector inputs_tensor(2); + inputs_tensor[0] = input_tensor1; + inputs_tensor[1] = input_tensor2; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + MulParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Mul; + lite::Context *ctx = new lite::Context; + ctx->thread_num_ = 2; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Mul}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {1, 2, 2, 4, 3, 6, 11, 16, 14, 20, 17, 24}; + PrintData("output data", output, input1.size()); + CompareOutputData(output, except_result.data(), input1.size(), 0.000001); + input_tensor1->SetData(nullptr); + input_tensor2->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete input_tensor2; + delete output0_tensor; + delete ctx; +} +} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reshape_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reshape_int8_tests.cc new file mode 100644 index 0000000000..e6921779b8 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reshape_int8_tests.cc @@ -0,0 +1,150 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "utils/log_adapter.h" +#include "common/common_test.h" +#include "mindspore/lite/src/runtime/kernel/arm/nnacl/reshape_parameter.h" +#include "mindspore/lite/src/kernel_registry.h" +#include "mindspore/lite/src/lite_kernel.h" +#include "mindspore/lite/src/ir/tensor.h" + +namespace mindspore { + +class TestReshapeInt8 : public mindspore::Common { + public: + TestReshapeInt8() {} +}; + +TEST_F(TestReshapeInt8, reshape_quant0) { + std::vector input1 = {10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25}; + std::vector shape1 = {2, 3, 2}; + std::vector input(1, nullptr); + input[0] = input1.data(); + + int8_t output[12]; + std::vector output_shape = {2, 6}; + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 1.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + std::vector inputs_tensor(1); + inputs_tensor[0] = input_tensor1; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + ReshapeParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Reshape; + lite::Context *ctx = new lite::Context; + ctx->thread_num_ = 1; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Reshape}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25}; + PrintData("output data", output, input1.size()); + PrintData("output data shape", output_tensor_shape.data(), output_tensor_shape.size()); + CompareOutputData(output, except_result.data(), input1.size(), 0.000001); + + input_tensor1->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete output0_tensor; + delete ctx; +} + +TEST_F(TestReshapeInt8, reshape_quant1_thread2) { + std::vector input1 = {10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25}; + std::vector shape1 = {2, 3, 2}; + std::vector input(1, nullptr); + input[0] = input1.data(); + + int8_t output[12]; + std::vector output_shape = {2, 6}; + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 2.0; + output_quant_arg.zeroPoint = 1; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + std::vector inputs_tensor(1); + inputs_tensor[0] = input_tensor1; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + ReshapeParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Reshape; + lite::Context *ctx = new lite::Context; + ctx->thread_num_ = 2; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Reshape}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {6, 7, 7, 8, 8, 9, 11, 12, 12, 13, 13, 14}; + PrintData("output data", output, input1.size()); + PrintData("output data shape", output_tensor_shape.data(), output_tensor_shape.size()); + CompareOutputData(output, except_result.data(), input1.size(), 0.000001); + + input_tensor1->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete output0_tensor; + delete ctx; +} + +} // namespace mindspore