| @@ -90,6 +90,7 @@ class ArithmeticCPUKernel : public LiteKernel { | |||
| case PrimitiveType_FloorMod: | |||
| arithmetic_run_ = ElementFloorMod; | |||
| arithmetic_broadcast_run_ = BroadcastFloorMod; | |||
| break; | |||
| case PrimitiveType_Equal: | |||
| arithmetic_run_ = ElementEqual; | |||
| arithmetic_broadcast_run_ = BroadcastEqual; | |||
| @@ -17,7 +17,7 @@ | |||
| #include <vector> | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "src/runtime/kernel/arm/opclib/arg_min_max.h" | |||
| #include "src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.h" | |||
| #include "include/errorcode.h" | |||
| using mindspore::lite::RET_OK; | |||
| @@ -31,12 +31,45 @@ int ArgMinMaxInt8CPUKernel::Init() { | |||
| } | |||
| auto param = reinterpret_cast<ArgMinMaxParameter *>(opParameter); | |||
| param->data_type_ = kNumberTypeInt8; | |||
| auto *input_tensor = inputs_.at(kInputIndex); | |||
| auto in_quant_args = input_tensor->GetQuantParams(); | |||
| in_quant_arg_.scale_ = in_quant_args.front().scale; | |||
| in_quant_arg_.zp_ = in_quant_args.front().zeroPoint; | |||
| auto *out_tensor = outputs_.at(kOutputIndex); | |||
| auto out_quant_args = out_tensor->GetQuantParams(); | |||
| out_quant_arg_.scale_ = out_quant_args.front().scale; | |||
| out_quant_arg_.zp_ = out_quant_args.front().zeroPoint; | |||
| return RET_OK; | |||
| } | |||
| int ArgMinMaxInt8CPUKernel::Run() { | |||
| auto ret = ArgMinMaxBaseCPUKernel::Run(); | |||
| FreeTmpMemory(); | |||
| return ret; | |||
| auto input = inputs_.at(0); | |||
| const int8_t *input_data = reinterpret_cast<const int8_t *>(inputs_.at(0)->Data()); | |||
| int8_t *output_data = reinterpret_cast<int8_t *>(outputs_.at(0)->Data()); | |||
| auto in_shape = input->shape().data(); | |||
| auto param = reinterpret_cast<ArgMinMaxParameter *>(opParameter); | |||
| if (param->topk_ == 1) { | |||
| ArgMinMaxQuant(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_); | |||
| return RET_OK; | |||
| } | |||
| switch (param->axis_) { | |||
| case 0: | |||
| ArgMinMaxDim0(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_); | |||
| break; | |||
| case 1: | |||
| ArgMinMaxDim1(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_); | |||
| break; | |||
| case 2: | |||
| ArgMinMaxDim2(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_); | |||
| break; | |||
| case 3: | |||
| ArgMinMaxDim3(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_); | |||
| break; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::kernel | |||
| @@ -18,6 +18,7 @@ | |||
| #include <vector> | |||
| #include "src/runtime/kernel/arm/base/arg_min_max_base.h" | |||
| #include "src/runtime/kernel/arm/opclib/quantization/quantize.h" | |||
| namespace mindspore::kernel { | |||
| class ArgMinMaxInt8CPUKernel : public ArgMinMaxBaseCPUKernel { | |||
| @@ -31,6 +32,9 @@ class ArgMinMaxInt8CPUKernel : public ArgMinMaxBaseCPUKernel { | |||
| int Init() override; | |||
| int ReSize() override { return 0; } | |||
| int Run() override; | |||
| private: | |||
| QuantArg in_quant_arg_; | |||
| QuantArg out_quant_arg_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,194 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/int8/arithmetic_int8.h" | |||
| #include "src/runtime/kernel/arm/opclib/int8/arithmetic_int8.h" | |||
| #include "src/runtime/kernel/arm/opclib/arithmetic_common.h" | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "include/errorcode.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_PARAM_INVALID; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Equal; | |||
| using mindspore::schema::PrimitiveType_NotEqual; | |||
| using mindspore::schema::PrimitiveType_LessEqual; | |||
| using mindspore::schema::PrimitiveType_Greater; | |||
| using mindspore::schema::PrimitiveType_GreaterEqual; | |||
| using mindspore::schema::PrimitiveType_Less; | |||
| namespace mindspore::kernel { | |||
| namespace { | |||
| int ArithmeticsInt8Launch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| auto arithmetic_kernel = reinterpret_cast<ArithmeticInt8CPUKernel *>(cdata); | |||
| auto error_code = arithmetic_kernel->DoArithmetic(thread_id); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "ArithmeticsRun error thread_id[" << thread_id << "] error_code[" << error_code << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace | |||
| ArithmeticInt8CPUKernel::~ArithmeticInt8CPUKernel() { | |||
| auto param = reinterpret_cast<ArithmeticParameter *>(opParameter); | |||
| if (!param->broadcasting_) { | |||
| return; | |||
| } | |||
| if (context_->allocator != nullptr) { | |||
| if (tile_data0_ != nullptr) { | |||
| context_->allocator->Free(tile_data0_); | |||
| } | |||
| if (tile_data1_ != nullptr) { | |||
| context_->allocator->Free(tile_data1_); | |||
| } | |||
| } else { | |||
| if (tile_data0_ != nullptr) { | |||
| free(tile_data0_); | |||
| } | |||
| if (tile_data1_ != nullptr) { | |||
| free(tile_data1_); | |||
| } | |||
| } | |||
| tile_data0_ = nullptr; | |||
| tile_data1_ = nullptr; | |||
| } | |||
| int ArithmeticInt8CPUKernel::Init() { | |||
| switch (opParameter->type_) { | |||
| case PrimitiveType_Equal: | |||
| arithmetic_run_ = ElementEqual; | |||
| break; | |||
| case PrimitiveType_NotEqual: | |||
| arithmetic_run_ = ElementNotEqual; | |||
| break; | |||
| case PrimitiveType_Less: | |||
| arithmetic_run_ = ElementEqual; | |||
| break; | |||
| case PrimitiveType_LessEqual: | |||
| arithmetic_run_ = ElementNotEqual; | |||
| break; | |||
| case PrimitiveType_Greater: | |||
| arithmetic_run_ = ElementGreater; | |||
| break; | |||
| case PrimitiveType_GreaterEqual: | |||
| arithmetic_run_ = ElementGreaterEqual; | |||
| break; | |||
| default: | |||
| MS_LOG(ERROR) << "Error Operator type " << opParameter->type_; | |||
| arithmetic_run_ = nullptr; | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| auto data_size = outputs_[0]->Size(); | |||
| auto param = reinterpret_cast<ArithmeticParameter *>(opParameter); | |||
| if (param->broadcasting_) { | |||
| if (context_->allocator != nullptr) { | |||
| tile_data0_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(data_size)); | |||
| tile_data1_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(data_size)); | |||
| } else { | |||
| tile_data0_ = reinterpret_cast<int8_t *>(malloc(data_size)); | |||
| tile_data1_ = reinterpret_cast<int8_t *>(malloc(data_size)); | |||
| } | |||
| } else { | |||
| tile_data0_ = nullptr; | |||
| tile_data1_ = nullptr; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ArithmeticInt8CPUKernel::ReSize() { return RET_OK; } | |||
| int ArithmeticInt8CPUKernel::DoArithmetic(int thread_id) { | |||
| auto input0_data = reinterpret_cast<int8_t *>(inputs_[0]->Data()); | |||
| auto input1_data1 = reinterpret_cast<int8_t *>(inputs_[1]->Data()); | |||
| auto output_data = reinterpret_cast<int8_t *>(outputs_[0]->Data()); | |||
| auto element_num = outputs_[0]->ElementsNum(); | |||
| auto param = reinterpret_cast<ArithmeticParameter *>(opParameter); | |||
| if (param->broadcasting_ && arithmetic_run_ != nullptr) { | |||
| MS_ASSERT(thread_count_ != 0); | |||
| int stride = UP_DIV(element_num, thread_count_); | |||
| int count = MSMIN(stride, element_num - stride * thread_id); | |||
| int error_code = arithmetic_run_(tile_data0_ + stride * thread_id, tile_data1_ + stride * thread_id, | |||
| output_data + stride * thread_id, count); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Arithmetic run fail! ret: " << error_code; | |||
| return RET_ERROR; | |||
| } | |||
| } else if (arithmetic_run_ != nullptr) { | |||
| int error_code = arithmetic_run_(input0_data, input1_data1, output_data, element_num); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Arithmetic run fail!ret: " << error_code; | |||
| return RET_ERROR; | |||
| } | |||
| } else { | |||
| MS_LOG(ERROR) << "arithmetic_run function is nullptr!"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ArithmeticInt8CPUKernel::Run() { | |||
| auto param = reinterpret_cast<ArithmeticParameter *>(opParameter); | |||
| if (param->broadcasting_) { | |||
| auto input_data0 = reinterpret_cast<int8_t *>(inputs_[0]->Data()); | |||
| auto input_data1 = reinterpret_cast<int8_t *>(inputs_[1]->Data()); | |||
| TileDimensionsInt8(input_data0, input_data1, tile_data0_, tile_data1_, param); | |||
| } | |||
| int error_code = LiteBackendParallelLaunch(ArithmeticsInt8Launch, this, thread_count_); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Arithmetic launch function fail! ret: " << error_code; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *CpuArithmeticInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *parameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| if (parameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input parameter is null!"; | |||
| return nullptr; | |||
| } | |||
| auto kernel = new (std::nothrow) ArithmeticInt8CPUKernel(parameter, inputs, outputs, ctx); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "Create ArithmeticInt8CPUKernel failed, name: " << parameter->name_; | |||
| return nullptr; | |||
| } | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_ << ", type: " | |||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(parameter->type_)); | |||
| delete kernel; | |||
| return nullptr; | |||
| } | |||
| return kernel; | |||
| } | |||
| REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Equal, CpuArithmeticInt8KernelCreator) | |||
| REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_NotEqual, CpuArithmeticInt8KernelCreator) | |||
| REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Less, CpuArithmeticInt8KernelCreator) | |||
| REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_LessEqual, CpuArithmeticInt8KernelCreator) | |||
| REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Greater, CpuArithmeticInt8KernelCreator) | |||
| REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_GreaterEqual, CpuArithmeticInt8KernelCreator) | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,47 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "schema/model_generated.h" | |||
| namespace mindspore::kernel { | |||
| class ArithmeticInt8CPUKernel : public LiteKernel { | |||
| typedef int (*ArithmeticRunInt8)(int8_t *input0, int8_t *input1, int8_t *output, int element_size); | |||
| public: | |||
| ArithmeticInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), thread_count_(ctx->thread_num_), context_(ctx) {} | |||
| ~ArithmeticInt8CPUKernel(); | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int DoArithmetic(int thread_id); | |||
| private: | |||
| int thread_count_; | |||
| int8_t *tile_data0_; | |||
| int8_t *tile_data1_; | |||
| const lite::Context *context_; | |||
| ArithmeticRunInt8 arithmetic_run_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_ | |||
| @@ -18,13 +18,27 @@ | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "src/runtime/kernel/arm/opclib/batch_to_space.h" | |||
| #include "src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.h" | |||
| #include "include/errorcode.h" | |||
| using mindspore::lite::RET_OK; | |||
| namespace mindspore::kernel { | |||
| int BatchToSpaceInt8CPUKernel::Init() { | |||
| return BatchToSpaceBaseCPUKernel::Init(); | |||
| auto ret = BatchToSpaceBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| return ret; | |||
| } | |||
| auto *input_tensor = inputs_.at(kInputIndex); | |||
| auto in_quant_args = input_tensor->GetQuantParams(); | |||
| in_quant_arg_.scale_ = in_quant_args.front().scale; | |||
| in_quant_arg_.zp_ = in_quant_args.front().zeroPoint; | |||
| auto *out_tensor = outputs_.at(kOutputIndex); | |||
| auto out_quant_args = out_tensor->GetQuantParams(); | |||
| out_quant_arg_.scale_ = out_quant_args.front().scale; | |||
| out_quant_arg_.zp_ = out_quant_args.front().zeroPoint; | |||
| return RET_OK; | |||
| } | |||
| int BatchToSpaceInt8CPUKernel::Run() { | |||
| @@ -36,12 +50,22 @@ int BatchToSpaceInt8CPUKernel::Run() { | |||
| auto out_shape = output->shape(); | |||
| BatchToSpaceParameter *param = reinterpret_cast<BatchToSpaceParameter *>(this->opParameter); | |||
| if (IsNoCrop()) { | |||
| BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, | |||
| sizeof(int8_t)); | |||
| if (in_quant_arg_.scale_ == out_quant_arg_.scale_ && in_quant_arg_.zp_ == out_quant_arg_.zp_) { | |||
| if (IsNoCrop()) { | |||
| BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, | |||
| sizeof(int8_t)); | |||
| } else { | |||
| BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_, | |||
| sizeof(int8_t)); | |||
| } | |||
| } else { | |||
| BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_, | |||
| sizeof(int8_t)); | |||
| if (IsNoCrop()) { | |||
| BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, | |||
| &in_quant_arg_, &out_quant_arg_); | |||
| } else { | |||
| BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_, | |||
| &in_quant_arg_, &out_quant_arg_); | |||
| } | |||
| } | |||
| return RET_OK; | |||
| @@ -31,6 +31,9 @@ class BatchToSpaceInt8CPUKernel : public BatchToSpaceBaseCPUKernel { | |||
| int Init() override; | |||
| int ReSize() override { return 0; } | |||
| int Run() override; | |||
| private: | |||
| QuantArg in_quant_arg_; | |||
| QuantArg out_quant_arg_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -18,6 +18,7 @@ | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "src/runtime/kernel/arm/opclib/depth_to_space.h" | |||
| #include "src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.h" | |||
| #include "include/errorcode.h" | |||
| using mindspore::lite::RET_OK; | |||
| @@ -31,6 +32,16 @@ int DepthToSpaceInt8CPUKernel::Init() { | |||
| } | |||
| DepthToSpaceParameter *param = reinterpret_cast<DepthToSpaceParameter *>(opParameter); | |||
| param->data_type_size_ = sizeof(int8_t); | |||
| auto *input_tensor = inputs_.at(kInputIndex); | |||
| auto in_quant_args = input_tensor->GetQuantParams(); | |||
| in_quant_arg_.scale_ = in_quant_args.front().scale; | |||
| in_quant_arg_.zp_ = in_quant_args.front().zeroPoint; | |||
| auto *out_tensor = outputs_.at(kOutputIndex); | |||
| auto out_quant_args = out_tensor->GetQuantParams(); | |||
| out_quant_arg_.scale_ = out_quant_args.front().scale; | |||
| out_quant_arg_.zp_ = out_quant_args.front().zeroPoint; | |||
| return RET_OK; | |||
| } | |||
| @@ -41,14 +52,11 @@ int DepthToSpaceInt8CPUKernel::Run() { | |||
| int8_t *output_data = reinterpret_cast<int8_t *>(output->Data()); | |||
| auto in_shape = input->shape(); | |||
| DepthToSpaceParameter *param = reinterpret_cast<DepthToSpaceParameter *>(opParameter); | |||
| if (input->GetFormat() == schema::Format_NHWC) { | |||
| if (in_quant_arg_.scale_ == out_quant_arg_.scale_ && in_quant_arg_.zp_ == out_quant_arg_.zp_) { | |||
| DepthToSpaceForNHWC(input_data, output_data, in_shape.data(), param); | |||
| return RET_OK; | |||
| } else { | |||
| MS_LOG(ERROR) << "Depth_to_space only support NHWC now!"; | |||
| return RET_ERROR; | |||
| DepthToSpaceForNHWC(input_data, output_data, in_shape.data(), param, &in_quant_arg_, &out_quant_arg_); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::kernel | |||
| @@ -31,6 +31,9 @@ class DepthToSpaceInt8CPUKernel : public DepthToSpaceBaseCPUKernel { | |||
| int Init() override; | |||
| int ReSize() override { return 0; } | |||
| int Run() override; | |||
| private: | |||
| QuantArg in_quant_arg_; | |||
| QuantArg out_quant_arg_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -15,10 +15,8 @@ | |||
| */ | |||
| #include "src/runtime/kernel/arm/opclib/arg_min_max.h" | |||
| #include "src/runtime/kernel/arm/opclib/fp32/arg_min_max.h" | |||
| #include "src/runtime/kernel/arm/opclib/int8/arg_min_max.h" | |||
| #define FLOAT_DATA_TYPE 43 | |||
| #define INT8_DATA_TYPE 32 | |||
| void GetCalcParameter(const int *shape, int dims_number, int axis, int *pre_axis_count, int *axis_count, | |||
| int *after_axis_count) { | |||
| @@ -51,16 +49,6 @@ void ArgMinMaxTopk1(const void *input, void *output, const int *shape, ArgMinMax | |||
| } | |||
| break; | |||
| } | |||
| case INT8_DATA_TYPE: { | |||
| if (param->get_max_) { | |||
| ArgMax(reinterpret_cast<const int8_t *>(input), reinterpret_cast<int8_t *>(output), param, pre_axis_count, | |||
| axis_count, after_axis_count); | |||
| } else { | |||
| ArgMin(reinterpret_cast<const int8_t *>(input), reinterpret_cast<int8_t *>(output), param, pre_axis_count, | |||
| axis_count, after_axis_count); | |||
| } | |||
| break; | |||
| } | |||
| default: | |||
| break; | |||
| } | |||
| @@ -100,40 +88,6 @@ void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMinMaxTopknInt8(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| if (param->get_max_) { | |||
| switch (param->axis_) { | |||
| case 0: | |||
| ArgMaxDim0(input, output, in_shape, param); | |||
| break; | |||
| case 1: | |||
| ArgMaxDim1(input, output, in_shape, param); | |||
| break; | |||
| case 2: | |||
| ArgMaxDim2(input, output, in_shape, param); | |||
| break; | |||
| case 3: | |||
| ArgMaxDim3(input, output, in_shape, param); | |||
| break; | |||
| } | |||
| } else { | |||
| switch (param->axis_) { | |||
| case 0: | |||
| ArgMinDim0(input, output, in_shape, param); | |||
| break; | |||
| case 1: | |||
| ArgMinDim1(input, output, in_shape, param); | |||
| break; | |||
| case 2: | |||
| ArgMinDim2(input, output, in_shape, param); | |||
| break; | |||
| case 3: | |||
| ArgMinDim3(input, output, in_shape, param); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| if (param->topk_ == 1) { | |||
| ArgMinMaxTopk1(input, output, in_shape, param); | |||
| @@ -145,10 +99,6 @@ void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxPa | |||
| ArgMinMaxTopknFp32(reinterpret_cast<const float *>(input), reinterpret_cast<float *>(output), in_shape, param); | |||
| return; | |||
| } | |||
| case INT8_DATA_TYPE: { | |||
| ArgMinMaxTopknInt8(reinterpret_cast<const int8_t *>(input), reinterpret_cast<int8_t *>(output), in_shape, param); | |||
| return; | |||
| } | |||
| default: | |||
| break; | |||
| } | |||
| @@ -18,6 +18,7 @@ | |||
| #include "src/runtime/kernel/arm/opclib/op_base.h" | |||
| #include "src/runtime/kernel/arm/opclib/arg_min_max_parameter.h" | |||
| #include "src/runtime/kernel/arm/opclib/quantization/quantize.h" | |||
| void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_ARG_MIN_MAX_H_ | |||
| @@ -15,19 +15,7 @@ | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_H_ | |||
| #include "src/runtime/kernel/arm/opclib/op_base.h" | |||
| struct DepthToSpaceParameter { | |||
| OpParameter op_parameter_; | |||
| int32_t block_size_; | |||
| int32_t in_stride_dim0_; | |||
| int32_t in_stride_dim1_; | |||
| int32_t in_stride_dim2_; | |||
| int32_t out_stride_dim0_; | |||
| int32_t out_stride_dim1_; | |||
| int32_t out_stride_dim2_; | |||
| uint8_t data_type_size_; | |||
| }; | |||
| #include "src/runtime/kernel/arm/opclib/depth_to_space_parameter.h" | |||
| void DepthToSpaceForNHWC(const void *input, void *output, int *in_shape, DepthToSpaceParameter *param); | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_H_ | |||
| @@ -0,0 +1,32 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_PARAMETER_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_PARAMETER_H_ | |||
| #include "src/runtime/kernel/arm/opclib/op_base.h" | |||
| struct DepthToSpaceParameter { | |||
| OpParameter op_parameter_; | |||
| int32_t block_size_; | |||
| int32_t in_stride_dim0_; | |||
| int32_t in_stride_dim1_; | |||
| int32_t in_stride_dim2_; | |||
| int32_t out_stride_dim0_; | |||
| int32_t out_stride_dim1_; | |||
| int32_t out_stride_dim2_; | |||
| uint8_t data_type_size_; | |||
| }; | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_PARAMETER_H_ | |||
| @@ -1,488 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/opclib/int8/arg_min_max.h" | |||
| #define INT8_MAX_VALUE 127 | |||
| int ArgCompareAscInt8(const void *a, const void *b) { | |||
| return reinterpret_cast<const ArgElement *>(a)->data_.i8_data_ | |||
| - reinterpret_cast<const ArgElement *>(b)->data_.i8_data_; | |||
| } | |||
| int ArgCompareDescInt8(const void *a, const void *b) { | |||
| return reinterpret_cast<const ArgElement *>(b)->data_.i8_data_ | |||
| - reinterpret_cast<const ArgElement *>(a)->data_.i8_data_; | |||
| } | |||
| void ArgMaxDim0OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | |||
| for (int j = 0; j < in_shape[0]; ++j) { | |||
| size_t offset = param->in_strides_[0] * j + i; | |||
| param->arg_elements_[j].index_ = j; | |||
| param->arg_elements_[j].data_.f_data_ = input[offset]; | |||
| } | |||
| qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareDescInt8); | |||
| for (int j = 0; j < param->topk_; ++j) { | |||
| size_t out_offset = j * param->out_strides_[0] + i; | |||
| output[out_offset] = param->arg_elements_[j].data_.f_data_; | |||
| } | |||
| } | |||
| } | |||
| void ArgMaxDim0OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | |||
| for (int j = 0; j < in_shape[0]; ++j) { | |||
| size_t offset = param->in_strides_[0] * j + i; | |||
| param->arg_elements_[j].index_ = j; | |||
| param->arg_elements_[j].data_.f_data_ = input[offset]; | |||
| } | |||
| qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareDescInt8); | |||
| for (int j = 0; j < param->topk_; ++j) { | |||
| size_t out_offset = j * param->out_strides_[0] + i; | |||
| output[out_offset] = param->arg_elements_[j].index_; | |||
| } | |||
| } | |||
| } | |||
| void ArgMinDim0OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | |||
| for (int j = 0; j < in_shape[0]; ++j) { | |||
| size_t offset = param->in_strides_[0] * j + i; | |||
| param->arg_elements_[j].index_ = j; | |||
| param->arg_elements_[j].data_.f_data_ = input[offset]; | |||
| } | |||
| qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareAscInt8); | |||
| for (int j = 0; j < param->topk_; ++j) { | |||
| size_t out_offset = j * param->out_strides_[0] + i; | |||
| output[out_offset] = param->arg_elements_[j].data_.f_data_; | |||
| } | |||
| } | |||
| } | |||
| void ArgMinDim0OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | |||
| for (int j = 0; j < in_shape[0]; ++j) { | |||
| size_t offset = param->in_strides_[0] * j + i; | |||
| param->arg_elements_[j].index_ = j; | |||
| param->arg_elements_[j].data_.f_data_ = input[offset]; | |||
| } | |||
| qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareAscInt8); | |||
| for (int j = 0; j < param->topk_; ++j) { | |||
| size_t out_offset = j * param->out_strides_[0] + i; | |||
| output[out_offset] = param->arg_elements_[j].index_; | |||
| } | |||
| } | |||
| } | |||
| void ArgMaxDim1OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| size_t out_dim0_offset = i * param->out_strides_[0]; | |||
| for (int j = 0; j < param->in_strides_[1]; ++j) { | |||
| for (int k = 0; k < in_shape1; ++k) { | |||
| size_t offset = param->in_strides_[1] * k + in_dim0_offset + j; | |||
| param->arg_elements_[k].index_ = k; | |||
| param->arg_elements_[k].data_.f_data_ = input[offset]; | |||
| } | |||
| qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareDescInt8); | |||
| for (int k = 0; k < param->topk_; ++k) { | |||
| size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; | |||
| output[out_offset] = param->arg_elements_[k].data_.f_data_; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void ArgMaxDim1OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| size_t out_dim0_offset = i * param->out_strides_[0]; | |||
| for (int j = 0; j < param->in_strides_[1]; ++j) { | |||
| for (int k = 0; k < in_shape1; ++k) { | |||
| size_t offset = param->in_strides_[1] * k + in_dim0_offset + j; | |||
| param->arg_elements_[k].index_ = k; | |||
| param->arg_elements_[k].data_.f_data_ = input[offset]; | |||
| } | |||
| qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareDescInt8); | |||
| for (int k = 0; k < param->topk_; ++k) { | |||
| size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; | |||
| output[out_offset] = param->arg_elements_[k].index_; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void ArgMinDim1OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| size_t out_dim0_offset = i * param->out_strides_[0]; | |||
| for (int j = 0; j < param->in_strides_[1]; ++j) { | |||
| for (int k = 0; k < in_shape1; ++k) { | |||
| size_t offset = param->in_strides_[1] * k + in_dim0_offset + j; | |||
| param->arg_elements_[k].index_ = k; | |||
| param->arg_elements_[k].data_.f_data_ = input[offset]; | |||
| } | |||
| qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareAscInt8); | |||
| for (int k = 0; k < param->topk_; ++k) { | |||
| size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; | |||
| output[out_offset] = param->arg_elements_[k].data_.f_data_; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void ArgMinDim1OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| size_t out_dim0_offset = i * param->out_strides_[0]; | |||
| for (int j = 0; j < param->in_strides_[1]; ++j) { | |||
| for (int k = 0; k < in_shape1; ++k) { | |||
| size_t offset = param->in_strides_[1] * k + in_dim0_offset + j; | |||
| param->arg_elements_[k].index_ = k; | |||
| param->arg_elements_[k].data_.f_data_ = input[offset]; | |||
| } | |||
| qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareAscInt8); | |||
| for (int k = 0; k < param->topk_; ++k) { | |||
| size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; | |||
| output[out_offset] = param->arg_elements_[k].index_; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void ArgMaxDim2OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| size_t out_dim0_offset = i * param->out_strides_[0]; | |||
| for (int j = 0; j < in_shape1; ++j) { | |||
| size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; | |||
| size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; | |||
| for (int k = 0; k < param->in_strides_[2]; ++k) { | |||
| for (int l = 0; l < in_shape2; ++l) { | |||
| size_t offset = param->in_strides_[2] * l + k + in_dim1_offset; | |||
| param->arg_elements_[l].index_ = l; | |||
| param->arg_elements_[l].data_.f_data_ = input[offset]; | |||
| } | |||
| qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareDescInt8); | |||
| for (int l = 0; l < param->topk_; ++l) { | |||
| size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; | |||
| output[out_offset] = param->arg_elements_[l].data_.f_data_; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void ArgMaxDim2OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| size_t out_dim0_offset = i * param->out_strides_[0]; | |||
| for (int j = 0; j < in_shape1; ++j) { | |||
| size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; | |||
| size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; | |||
| for (int k = 0; k < param->in_strides_[2]; ++k) { | |||
| for (int l = 0; l < in_shape2; ++l) { | |||
| size_t offset = param->in_strides_[2] * l + k + in_dim1_offset; | |||
| param->arg_elements_[l].index_ = l; | |||
| param->arg_elements_[l].data_.f_data_ = input[offset]; | |||
| } | |||
| qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareDescInt8); | |||
| for (int l = 0; l < param->topk_; ++l) { | |||
| size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; | |||
| output[out_offset] = param->arg_elements_[l].index_; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void ArgMinDim2OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| size_t out_dim0_offset = i * param->out_strides_[0]; | |||
| for (int j = 0; j < in_shape1; ++j) { | |||
| size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; | |||
| size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; | |||
| for (int k = 0; k < param->in_strides_[2]; ++k) { | |||
| for (int l = 0; l < in_shape2; ++l) { | |||
| size_t offset = param->in_strides_[2] * l + k + in_dim1_offset; | |||
| param->arg_elements_[l].index_ = l; | |||
| param->arg_elements_[l].data_.f_data_ = input[offset]; | |||
| } | |||
| qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8); | |||
| for (int l = 0; l < param->topk_; ++l) { | |||
| size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; | |||
| output[out_offset] = param->arg_elements_[l].data_.f_data_; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void ArgMinDim2OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| size_t out_dim0_offset = i * param->out_strides_[0]; | |||
| for (int j = 0; j < in_shape1; ++j) { | |||
| size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; | |||
| size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; | |||
| for (int k = 0; k < param->in_strides_[2]; ++k) { | |||
| for (int l = 0; l < in_shape2; ++l) { | |||
| size_t offset = param->in_strides_[2] * l + k + in_dim1_offset; | |||
| param->arg_elements_[l].index_ = l; | |||
| param->arg_elements_[l].data_.f_data_ = input[offset]; | |||
| } | |||
| qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8); | |||
| for (int l = 0; l < param->topk_; ++l) { | |||
| size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; | |||
| output[out_offset] = param->arg_elements_[l].index_; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void ArgMaxDim3OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| int in_shape3 = in_shape[3]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| size_t out_dim0_offset = i * param->out_strides_[0]; | |||
| for (int j = 0; j < in_shape1; ++j) { | |||
| size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; | |||
| size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; | |||
| for (int k = 0; k < in_shape2; ++k) { | |||
| size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset; | |||
| size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset; | |||
| for (int l = 0; l < in_shape3; ++l) { | |||
| size_t offset = l + in_dim2_offset; | |||
| param->arg_elements_[l].index_ = l; | |||
| param->arg_elements_[l].data_.f_data_ = input[offset]; | |||
| } | |||
| qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareDescInt8); | |||
| for (int l = 0; l < param->topk_; ++l) { | |||
| size_t out_offset = out_dim2_offset + l; | |||
| output[out_offset] = param->arg_elements_[l].data_.f_data_; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void ArgMaxDim3OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| int in_shape3 = in_shape[3]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| size_t out_dim0_offset = i * param->out_strides_[0]; | |||
| for (int j = 0; j < in_shape1; ++j) { | |||
| size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; | |||
| size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; | |||
| for (int k = 0; k < in_shape2; ++k) { | |||
| size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset; | |||
| size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset; | |||
| for (int l = 0; l < in_shape3; ++l) { | |||
| size_t offset = l + in_dim2_offset; | |||
| param->arg_elements_[l].index_ = l; | |||
| param->arg_elements_[l].data_.f_data_ = input[offset]; | |||
| } | |||
| qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareDescInt8); | |||
| for (int l = 0; l < param->topk_; ++l) { | |||
| size_t out_offset = out_dim2_offset + l; | |||
| output[out_offset] = param->arg_elements_[l].index_; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void ArgMinDim3OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| int in_shape3 = in_shape[3]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| size_t out_dim0_offset = i * param->out_strides_[0]; | |||
| for (int j = 0; j < in_shape1; ++j) { | |||
| size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; | |||
| size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; | |||
| for (int k = 0; k < in_shape2; ++k) { | |||
| size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset; | |||
| size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset; | |||
| for (int l = 0; l < in_shape3; ++l) { | |||
| size_t offset = l + in_dim2_offset; | |||
| param->arg_elements_[l].index_ = l; | |||
| param->arg_elements_[l].data_.f_data_ = input[offset]; | |||
| } | |||
| qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8); | |||
| for (int l = 0; l < param->topk_; ++l) { | |||
| size_t out_offset = out_dim2_offset + l; | |||
| output[out_offset] = param->arg_elements_[l].data_.f_data_; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void ArgMinDim3OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| int in_shape3 = in_shape[3]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| size_t out_dim0_offset = i * param->out_strides_[0]; | |||
| for (int j = 0; j < in_shape1; ++j) { | |||
| size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; | |||
| size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; | |||
| for (int k = 0; k < in_shape2; ++k) { | |||
| size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset; | |||
| size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset; | |||
| for (int l = 0; l < in_shape3; ++l) { | |||
| size_t offset = l + in_dim2_offset; | |||
| param->arg_elements_[l].index_ = l; | |||
| param->arg_elements_[l].data_.f_data_ = input[offset]; | |||
| } | |||
| qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8); | |||
| for (int l = 0; l < param->topk_; ++l) { | |||
| size_t out_offset = out_dim2_offset + l; | |||
| output[out_offset] = param->arg_elements_[l].index_; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void ArgMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| if (param->out_value_) { | |||
| ArgMaxDim0OutValue(input, output, in_shape, param); | |||
| } else { | |||
| ArgMaxDim0OutIndex(input, output, in_shape, param); | |||
| } | |||
| } | |||
| void ArgMinDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| if (param->out_value_) { | |||
| ArgMinDim0OutValue(input, output, in_shape, param); | |||
| } else { | |||
| ArgMinDim0OutIndex(input, output, in_shape, param); | |||
| } | |||
| } | |||
| void ArgMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| if (param->out_value_) { | |||
| ArgMaxDim1OutValue(input, output, in_shape, param); | |||
| } else { | |||
| ArgMaxDim1OutIndex(input, output, in_shape, param); | |||
| } | |||
| } | |||
| void ArgMinDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| if (param->out_value_) { | |||
| ArgMinDim1OutValue(input, output, in_shape, param); | |||
| } else { | |||
| ArgMinDim1OutIndex(input, output, in_shape, param); | |||
| } | |||
| } | |||
| void ArgMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| if (param->out_value_) { | |||
| ArgMaxDim2OutValue(input, output, in_shape, param); | |||
| } else { | |||
| ArgMaxDim2OutIndex(input, output, in_shape, param); | |||
| } | |||
| } | |||
| void ArgMinDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| if (param->out_value_) { | |||
| ArgMinDim2OutValue(input, output, in_shape, param); | |||
| } else { | |||
| ArgMinDim2OutIndex(input, output, in_shape, param); | |||
| } | |||
| } | |||
| void ArgMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| if (param->out_value_) { | |||
| ArgMaxDim3OutValue(input, output, in_shape, param); | |||
| } else { | |||
| ArgMaxDim3OutIndex(input, output, in_shape, param); | |||
| } | |||
| } | |||
| void ArgMinDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| if (param->out_value_) { | |||
| ArgMinDim3OutValue(input, output, in_shape, param); | |||
| } else { | |||
| ArgMinDim3OutIndex(input, output, in_shape, param); | |||
| } | |||
| } | |||
| void ArgMax(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||
| int after_axis_count) { | |||
| bool out_value = param->out_value_; | |||
| for (int i = 0; i < pre_axis_count; ++i) { | |||
| size_t output_offset = i * after_axis_count; | |||
| size_t input_offset = output_offset * axis_count; | |||
| for (int j = 0; j < after_axis_count; ++j) { | |||
| int8_t value = -INT8_MAX_VALUE; | |||
| int8_t index = 0; | |||
| for (int k = 0; k < axis_count; ++k) { | |||
| int8_t value_tmp = input[input_offset + k * after_axis_count + j]; | |||
| if (value_tmp > value) { | |||
| value = value_tmp; | |||
| index = k; | |||
| } | |||
| } | |||
| output[output_offset + j] = out_value ? value : index; | |||
| } | |||
| } | |||
| } | |||
| void ArgMin(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||
| int after_axis_count) { | |||
| bool out_value = param->out_value_; | |||
| for (int i = 0; i < pre_axis_count; ++i) { | |||
| size_t output_offset = i * after_axis_count; | |||
| size_t input_offset = output_offset * axis_count; | |||
| for (int j = 0; j < after_axis_count; ++j) { | |||
| int8_t value = INT8_MAX_VALUE; | |||
| int8_t index = 0; | |||
| for (int k = 0; k < axis_count; ++k) { | |||
| int8_t value_tmp = input[input_offset + k * after_axis_count + j]; | |||
| if (value_tmp < value) { | |||
| value = value_tmp; | |||
| index = k; | |||
| } | |||
| } | |||
| output[output_offset + j] = out_value ? value : index; | |||
| } | |||
| } | |||
| } | |||
| #undef INT8_MAX_VALUE | |||
| @@ -1,33 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_H_ | |||
| #include "src/runtime/kernel/arm/opclib/arg_min_max_parameter.h" | |||
| void ArgMax(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||
| int after_axis_count); | |||
| void ArgMin(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||
| int after_axis_count); | |||
| void ArgMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| void ArgMinDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| void ArgMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| void ArgMinDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| void ArgMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| void ArgMinDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| void ArgMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| void ArgMinDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_H_ | |||
| @@ -0,0 +1,221 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.h" | |||
| #include <float.h> | |||
| void CalcParameter(const int *shape, int dims_number, int axis, int *pre_axis_count, int *axis_count, | |||
| int *after_axis_count) { | |||
| *pre_axis_count = 1; | |||
| for (int i = 0; i < axis; ++i) { | |||
| *pre_axis_count = (*pre_axis_count) * shape[i]; | |||
| } | |||
| *axis_count = shape[axis]; | |||
| *after_axis_count = 1; | |||
| for (int i = axis + 1; i < dims_number; ++i) { | |||
| *after_axis_count = (*after_axis_count) * shape[i]; | |||
| } | |||
| } | |||
| void ArgMinMaxQuant(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||
| int after_axis_count, QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | |||
| bool out_value = param->out_value_; | |||
| float output_inverse_scale = 1.f / out_quant_arg->scale_; | |||
| float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; | |||
| int32_t output_zp = out_quant_arg->zp_; | |||
| for (int i = 0; i < pre_axis_count; ++i) { | |||
| size_t output_offset = i * after_axis_count; | |||
| size_t input_offset = output_offset * axis_count; | |||
| for (int j = 0; j < after_axis_count; ++j) { | |||
| float value = -FLT_MAX; | |||
| if (!param->get_max_) { | |||
| value = FLT_MAX; | |||
| } | |||
| float index = 0.0f; | |||
| for (int k = 0; k < axis_count; ++k) { | |||
| float value_tmp = input[input_offset + k * after_axis_count + j] * in_quant_arg->scale_ + bias; | |||
| if (param->get_max_) { | |||
| if (value_tmp > value) { | |||
| value = value_tmp; | |||
| index = k; | |||
| } | |||
| } else { | |||
| if (value_tmp < value) { | |||
| value = value_tmp; | |||
| index = k; | |||
| } | |||
| } | |||
| } | |||
| float real_out = out_value ? value : index; | |||
| output[output_offset + j] = real_out * output_inverse_scale + output_zp; | |||
| } | |||
| } | |||
| } | |||
| void ArgMinMaxQuant(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | |||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | |||
| int pre_axis_count = 1; | |||
| int axis_count = 1; | |||
| int after_axis_count = 1; | |||
| CalcParameter(in_shape, param->dims_size_, param->axis_, &pre_axis_count, &axis_count, &after_axis_count); | |||
| ArgMinMaxQuant(input, output, param, pre_axis_count, axis_count, after_axis_count, in_quant_arg, out_quant_arg); | |||
| return; | |||
| } | |||
| int ArgCompareAscInt8(const void *a, const void *b) { | |||
| return reinterpret_cast<const ArgElement *>(a)->data_.f_data_ | |||
| - reinterpret_cast<const ArgElement *>(b)->data_.f_data_; | |||
| } | |||
| int ArgCompareDescInt8(const void *a, const void *b) { | |||
| return reinterpret_cast<const ArgElement *>(b)->data_.f_data_ | |||
| - reinterpret_cast<const ArgElement *>(a)->data_.f_data_; | |||
| } | |||
| int8_t GetInt8Output(float real_out, float output_inverse_scale, int32_t output_zp) { | |||
| return real_out * output_inverse_scale + output_zp; | |||
| } | |||
| void ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | |||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | |||
| bool out_value = param->out_value_; | |||
| float output_inverse_scale = 1.f / out_quant_arg->scale_; | |||
| float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; | |||
| int32_t output_zp = out_quant_arg->zp_; | |||
| for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | |||
| for (int j = 0; j < in_shape[0]; ++j) { | |||
| size_t offset = param->in_strides_[0] * j + i; | |||
| param->arg_elements_[j].index_ = j; | |||
| param->arg_elements_[j].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias; | |||
| } | |||
| if (param->get_max_) { | |||
| qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareDescInt8); | |||
| } else { | |||
| qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareAscInt8); | |||
| } | |||
| for (int j = 0; j < param->topk_; ++j) { | |||
| size_t out_offset = j * param->out_strides_[0] + i; | |||
| float real_out = out_value ? param->arg_elements_[j].data_.f_data_ : param->arg_elements_[j].index_; | |||
| output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp); | |||
| } | |||
| } | |||
| } | |||
| void ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | |||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | |||
| bool out_value = param->out_value_; | |||
| float output_inverse_scale = 1.f / out_quant_arg->scale_; | |||
| float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; | |||
| int32_t output_zp = out_quant_arg->zp_; | |||
| int in_shape1 = in_shape[1]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| size_t out_dim0_offset = i * param->out_strides_[0]; | |||
| for (int j = 0; j < param->in_strides_[1]; ++j) { | |||
| for (int k = 0; k < in_shape1; ++k) { | |||
| size_t offset = param->in_strides_[1] * k + in_dim0_offset + j; | |||
| param->arg_elements_[k].index_ = k; | |||
| param->arg_elements_[k].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias; | |||
| } | |||
| if (param->get_max_) { | |||
| qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareDescInt8); | |||
| } else { | |||
| qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareAscInt8); | |||
| } | |||
| for (int k = 0; k < param->topk_; ++k) { | |||
| size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; | |||
| float real_out = out_value ? param->arg_elements_[k].data_.f_data_ : param->arg_elements_[k].index_; | |||
| output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | |||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | |||
| bool out_value = param->out_value_; | |||
| float output_inverse_scale = 1.f / out_quant_arg->scale_; | |||
| float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; | |||
| int32_t output_zp = out_quant_arg->zp_; | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| size_t out_dim0_offset = i * param->out_strides_[0]; | |||
| for (int j = 0; j < in_shape1; ++j) { | |||
| size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; | |||
| size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; | |||
| for (int k = 0; k < param->in_strides_[2]; ++k) { | |||
| for (int l = 0; l < in_shape2; ++l) { | |||
| size_t offset = param->in_strides_[2] * l + k + in_dim1_offset; | |||
| param->arg_elements_[l].index_ = l; | |||
| param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias; | |||
| } | |||
| if (param->get_max_) { | |||
| qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareDescInt8); | |||
| } else { | |||
| qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8); | |||
| } | |||
| for (int l = 0; l < param->topk_; ++l) { | |||
| size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; | |||
| float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_; | |||
| output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | |||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | |||
| bool out_value = param->out_value_; | |||
| float output_inverse_scale = 1.f / out_quant_arg->scale_; | |||
| float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; | |||
| int32_t output_zp = out_quant_arg->zp_; | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| int in_shape3 = in_shape[3]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| size_t out_dim0_offset = i * param->out_strides_[0]; | |||
| for (int j = 0; j < in_shape1; ++j) { | |||
| size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; | |||
| size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; | |||
| for (int k = 0; k < in_shape2; ++k) { | |||
| size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset; | |||
| size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset; | |||
| for (int l = 0; l < in_shape3; ++l) { | |||
| size_t offset = l + in_dim2_offset; | |||
| param->arg_elements_[l].index_ = l; | |||
| param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias; | |||
| } | |||
| if (param->get_max_) { | |||
| qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareDescInt8); | |||
| } else { | |||
| qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8); | |||
| } | |||
| for (int l = 0; l < param->topk_; ++l) { | |||
| size_t out_offset = out_dim2_offset + l; | |||
| float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_; | |||
| output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -0,0 +1,32 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_INT8_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_INT8_H_ | |||
| #include "src/runtime/kernel/arm/opclib/arg_min_max_parameter.h" | |||
| #include "src/runtime/kernel/arm/opclib/quantization/quantize.h" | |||
| void ArgMinMaxQuant(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | |||
| QuantArg *in_quant, QuantArg *out_quant); | |||
| void ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | |||
| QuantArg *in_quant, QuantArg *out_quant); | |||
| void ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | |||
| QuantArg *in_quant, QuantArg *out_quant); | |||
| void ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | |||
| QuantArg *in_quant, QuantArg *out_quant); | |||
| void ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | |||
| QuantArg *in_quant, QuantArg *out_quant); | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_INT8_H_ | |||
| @@ -0,0 +1,63 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/opclib/int8/arithmetic_int8.h" | |||
| #ifdef ENABLE_NEON | |||
| #include <arm_neon.h> | |||
| #endif | |||
| #include "src/runtime/kernel/arm/opclib/errorcode.h" | |||
| int ElementNotEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) { | |||
| for (int index = 0; index < element_size; ++index) { | |||
| output[index] = (int8_t)(input0[index] != input1[index]); | |||
| } | |||
| return OPCLIB_OK; | |||
| } | |||
| int ElementEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) { | |||
| for (int index = 0; index < element_size; ++index) { | |||
| output[index] = (int8_t)(input0[index] == input1[index]); | |||
| } | |||
| return OPCLIB_OK; | |||
| } | |||
| int ElementLess(int8_t *input0, int8_t *input1, int8_t *output, int element_size) { | |||
| for (int index = 0; index < element_size; ++index) { | |||
| output[index] = (int8_t)(input0[index] < input1[index]); | |||
| } | |||
| return OPCLIB_OK; | |||
| } | |||
| int ElementLessEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) { | |||
| for (int index = 0; index < element_size; ++index) { | |||
| output[index] = (int8_t)(input0[index] <= input1[index]); | |||
| } | |||
| return OPCLIB_OK; | |||
| } | |||
| int ElementGreater(int8_t *input0, int8_t *input1, int8_t *output, int element_size) { | |||
| for (int index = 0; index < element_size; ++index) { | |||
| output[index] = (int8_t)(input0[index] > input1[index]); | |||
| } | |||
| return OPCLIB_OK; | |||
| } | |||
| int ElementGreaterEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) { | |||
| for (int index = 0; index < element_size; ++index) { | |||
| output[index] = (int8_t)(input0[index] >= input1[index]); | |||
| } | |||
| return OPCLIB_OK; | |||
| } | |||
| @@ -0,0 +1,32 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARITHMETIC_INT8_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARITHMETIC_INT8_H_ | |||
| #include "src/runtime/kernel/arm/opclib/op_base.h" | |||
| int ElementNotEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size); | |||
| int ElementEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size); | |||
| int ElementLess(int8_t *input0, int8_t *input1, int8_t *output, int element_size); | |||
| int ElementLessEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size); | |||
| int ElementGreater(int8_t *input0, int8_t *input1, int8_t *output, int element_size); | |||
| int ElementGreaterEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size); | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARITHMETIC_INT8_H_ | |||
| @@ -0,0 +1,111 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.h" | |||
| #include "src/runtime/kernel/arm/opclib/arithmetic_common.h" | |||
| void BatchToSpaceNoCropForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block, | |||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | |||
| int block_h = block[0]; | |||
| int block_w = block[1]; | |||
| int in_h = in_shape[1]; | |||
| int in_w = in_shape[2]; | |||
| int in_c = in_shape[3]; | |||
| size_t stride_h = block_w * out_n; | |||
| size_t output_offset = 0; | |||
| size_t in_stride_h = in_w * in_c; | |||
| size_t in_stride_n = in_stride_h * in_h; | |||
| float output_inverse_scale = 1.f / out_quant_arg->scale_; | |||
| float scale = in_quant_arg->scale_ * output_inverse_scale; | |||
| float bias = -in_quant_arg->zp_ * scale; | |||
| int32_t output_zp = out_quant_arg->zp_; | |||
| for (int n = 0; n < out_n; ++n) { | |||
| for (int h = 0; h < in_h; ++h) { | |||
| size_t h_offset = h * in_stride_h; | |||
| for (int bh = 0; bh < block_h; ++bh) { | |||
| for (int w = 0; w < in_w; ++w) { | |||
| size_t w_offset = w * in_c; | |||
| for (int bw = 0; bw < block_w; ++bw) { | |||
| size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset; | |||
| for (int c = 0; c < in_c; ++c) { | |||
| int32_t output_tmp = round(input[in_offset + c] * scale + bias) + output_zp; | |||
| output_tmp = output_tmp > 127 ? 127 : output_tmp; | |||
| output_tmp = output_tmp < -128 ? -128 : output_tmp; | |||
| output[output_offset++] = output_tmp; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void BatchToSpaceForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block, | |||
| const int *crops, QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | |||
| int block_h = block[0]; | |||
| int block_w = block[1]; | |||
| int in_n = in_shape[0]; | |||
| int in_h = in_shape[1]; | |||
| int in_w = in_shape[2]; | |||
| int in_c = in_shape[3]; | |||
| int h_start = crops[0] / block_h; | |||
| int h_valid_begin = crops[0]; | |||
| int h_end = MSMIN((in_h * block_h - crops[1]) / block_h + 1, in_h); | |||
| int h_valid_end = in_h * block_h - crops[1] - 1; | |||
| int w_start = crops[2] / block_w; | |||
| int w_valid_begin = crops[2]; | |||
| int w_end = MSMIN((in_w * block_w - crops[3]) / block_w + 1, in_w); | |||
| int w_valid_end = in_w * block_w - crops[3] - 1; | |||
| size_t stride_h = block_w * out_n; | |||
| size_t output_offset = 0; | |||
| size_t in_stride_h = in_w * in_c; | |||
| size_t in_stride_n = in_stride_h * in_h; | |||
| float output_inverse_scale = 1.f / out_quant_arg->scale_; | |||
| float scale = in_quant_arg->scale_ * output_inverse_scale; | |||
| float bias = -in_quant_arg->zp_ * scale; | |||
| int32_t output_zp = out_quant_arg->zp_; | |||
| for (int n = 0; n < out_n; ++n) { | |||
| for (int h = h_start; h < h_end; ++h) { | |||
| size_t h_offset = h * in_stride_h; | |||
| for (int bh = 0; bh < block_h; ++bh) { | |||
| size_t h_index = h * block_h + bh; | |||
| if (h_index < h_valid_begin || h_index > h_valid_end) { | |||
| continue; | |||
| } | |||
| for (int w = w_start; w < w_end; ++w) { | |||
| size_t w_offset = w * in_c; | |||
| for (int bw = 0; bw < block_w; ++bw) { | |||
| size_t w_index = w * block_w + bw; | |||
| if (w_index < w_valid_begin || w_index > w_valid_end) { | |||
| continue; | |||
| } | |||
| size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset; | |||
| for (int c = 0; c < in_c; ++c) { | |||
| int32_t output_tmp = round(input[in_offset + c] * scale + bias) + output_zp; | |||
| output_tmp = output_tmp > 127 ? 127 : output_tmp; | |||
| output_tmp = output_tmp < -128 ? -128 : output_tmp; | |||
| output[output_offset++] = output_tmp; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -0,0 +1,25 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_BATCH_TO_SPACE_INT8_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_BATCH_TO_SPACE_INT8_H_ | |||
| #include "src/runtime/kernel/arm/opclib/op_base.h" | |||
| #include "src/runtime/kernel/arm/opclib/quantization/quantize.h" | |||
| void BatchToSpaceNoCropForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block, | |||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg); | |||
| void BatchToSpaceForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block, | |||
| const int *crops, QuantArg *in_quant_arg, QuantArg *out_quant_arg); | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_BATCH_TO_SPACE_INT8_H_ | |||
| @@ -0,0 +1,51 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.h" | |||
| #include <string.h> | |||
| void DepthToSpaceForNHWC(const int8_t *input, int8_t *output, int *in_shape, DepthToSpaceParameter *param, | |||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | |||
| int32_t block_size = param->block_size_; | |||
| int32_t in_shape_dim2 = in_shape[2]; | |||
| int32_t in_shape_dim1 = in_shape[1]; | |||
| size_t copy_size = block_size * param->out_stride_dim2_; | |||
| float output_inverse_scale = 1.f / out_quant_arg->scale_; | |||
| float scale = in_quant_arg->scale_ * output_inverse_scale; | |||
| float bias = -in_quant_arg->zp_ * scale; | |||
| int32_t output_zp = out_quant_arg->zp_; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_offset_n = i * param->in_stride_dim0_; | |||
| size_t out_offset_n = i * param->out_stride_dim0_; | |||
| for (int j = 0; j < in_shape_dim1; ++j) { | |||
| size_t in_offset_h = in_offset_n + j * param->in_stride_dim1_; | |||
| size_t out_offset_h = out_offset_n + j * block_size * param->out_stride_dim1_; | |||
| for (int k = 0; k < in_shape_dim2; ++k) { | |||
| size_t in_offset_w = in_offset_h + k * param->in_stride_dim2_; | |||
| size_t out_offset_w = out_offset_h + k * block_size * param->out_stride_dim2_; | |||
| for (int l = 0; l < block_size; ++l) { | |||
| size_t out_offset = out_offset_w + l * param->out_stride_dim1_; | |||
| size_t in_offset = in_offset_w + l * block_size * param->out_stride_dim2_; | |||
| for (int m = 0; m < copy_size; ++m) { | |||
| int32_t output_tmp = round(input[in_offset + m] * scale + bias) + output_zp; | |||
| output_tmp = output_tmp > 127 ? 127 : output_tmp; | |||
| output_tmp = output_tmp < -128 ? -128 : output_tmp; | |||
| output[out_offset + m] = output_tmp; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -0,0 +1,24 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_DEPTH_TO_SPACE_INT8_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_DEPTH_TO_SPACE_INT8_H_ | |||
| #include "src/runtime/kernel/arm/opclib/depth_to_space_parameter.h" | |||
| #include "src/runtime/kernel/arm/opclib/quantization/quantize.h" | |||
| void DepthToSpaceForNHWC(const int8_t *input, int8_t *output, int *in_shape, DepthToSpaceParameter *param, | |||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg); | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_DEPTH_TO_SPACE_INT8_H_ | |||
| @@ -0,0 +1,328 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/opclib/fp32/arg_min_max.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/opclib/arg_min_max.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/opclib/arithmetic_common.h" | |||
| namespace mindspore { | |||
| class TestArgMinMaxTestFp32 : public mindspore::Common { | |||
| public: | |||
| TestArgMinMaxTestFp32() = default; | |||
| }; | |||
| TEST_F(TestArgMinMaxTestFp32, ArgMaxTest1) { | |||
| std::vector<float> in = {10, 20, 30, 40, 90, | |||
| 20, 11, 15, 1, 50, | |||
| 30, 45, 25, 50, 30}; | |||
| std::vector<float> except_out = {2, 2, 0, 2, 0}; | |||
| std::vector<int> shape = {3, 5}; | |||
| float out[5]; | |||
| ArgMinMaxParameter param; | |||
| param.topk_ = 1; | |||
| param.out_value_ = false; | |||
| param.axis_ = 0; | |||
| param.data_type_ = 43; | |||
| param.dims_size_ = 2; | |||
| param.get_max_ = true; | |||
| ArgMinMax(in.data(), out, shape.data(), ¶m); | |||
| for (size_t i = 0; i < except_out.size(); ++i) { | |||
| std::cout << out[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(out, except_out.data(), except_out.size(), 0.000001); | |||
| } | |||
| TEST_F(TestArgMinMaxTestFp32, ArgMaxTest2) { | |||
| std::vector<float> in = {10, 20, 30, 40, 90, | |||
| 20, 11, 15, 1, 50, | |||
| 30, 45, 25, 50, 30}; | |||
| std::vector<float> except_out = {30, 45, 30, 50, 90}; | |||
| std::vector<int> shape = {3, 5}; | |||
| float out[5]; | |||
| ArgMinMaxParameter param; | |||
| param.topk_ = 1; | |||
| param.out_value_ = true; | |||
| param.axis_ = 0; | |||
| param.data_type_ = 43; | |||
| param.dims_size_ = 2; | |||
| param.get_max_ = true; | |||
| ArgMinMax(in.data(), out, shape.data(), ¶m); | |||
| CompareOutputData(out, except_out.data(), except_out.size(), 0.000001); | |||
| } | |||
| TEST_F(TestArgMinMaxTestFp32, ArgMinTest2) { | |||
| std::vector<float> in = {10, 20, 30, 40, 90, | |||
| 20, 11, 15, 1, 50, | |||
| 30, 45, 25, 50, 30}; | |||
| std::vector<float> except_out = {10, 11, 15, 1, 30}; | |||
| std::vector<int> shape = {3, 5}; | |||
| float out[5]; | |||
| ArgMinMaxParameter param; | |||
| param.topk_ = 1; | |||
| param.out_value_ = true; | |||
| param.axis_ = 0; | |||
| param.data_type_ = 43; | |||
| param.dims_size_ = 2; | |||
| param.get_max_ = false; | |||
| ArgMinMax(in.data(), out, shape.data(), ¶m); | |||
| CompareOutputData(out, except_out.data(), except_out.size(), 0.000001); | |||
| } | |||
| TEST_F(TestArgMinMaxTestFp32, ArgMaxTest3_axis2_out_data) { | |||
| std::vector<float> in = {10, 20, 30, 40, 90, | |||
| 20, 11, 15, 1, 50, | |||
| 30, 45, 25, 50, 30}; | |||
| std::vector<float> except_out = {30, 45, 30, 50, 90, 20, 20, 25, 40, 50}; | |||
| ArgMinMaxParameter param; | |||
| param.axis_ = 2; | |||
| std::vector<int> in_shape = {1, 1, 3, 5}; | |||
| param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement))); | |||
| param.out_value_ = true; | |||
| param.topk_ = 2; | |||
| std::vector<int> out_shape = {1, 1, 2, 5}; | |||
| ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); | |||
| ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); | |||
| float out[10]; | |||
| ArgMaxDim2(in.data(), out, in_shape.data(), ¶m); | |||
| CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); | |||
| } | |||
| TEST_F(TestArgMinMaxTestFp32, ArgMaxTest3_axis2_out_index) { | |||
| std::vector<float> in = {10, 20, 30, 40, 90, | |||
| 20, 11, 15, 1, 50, | |||
| 30, 45, 25, 50, 30}; | |||
| std::vector<float> except_out = {2, 2, 0, 2, 0, 1, 0, 2, 0, 1}; | |||
| ArgMinMaxParameter param; | |||
| param.axis_ = 2; | |||
| std::vector<int> in_shape = {1, 1, 3, 5}; | |||
| param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement))); | |||
| param.out_value_ = false; | |||
| param.topk_ = 2; | |||
| std::vector<int> out_shape = {1, 1, 2, 5}; | |||
| ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); | |||
| ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); | |||
| float out[10]; | |||
| ArgMaxDim2(in.data(), out, in_shape.data(), ¶m); | |||
| CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); | |||
| } | |||
| TEST_F(TestArgMinMaxTestFp32, ArgMaxTest4_axis3_out_data) { | |||
| std::vector<float> in = {10, 20, 30, 40, 90, | |||
| 20, 11, 15, 1, 50, | |||
| 30, 45, 25, 50, 30}; | |||
| std::vector<float> except_out = {90, 40, | |||
| 50, 20, | |||
| 50, 45}; | |||
| ArgMinMaxParameter param; | |||
| param.axis_ = 3; | |||
| std::vector<int> in_shape = {1, 1, 3, 5}; | |||
| param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement))); | |||
| param.out_value_ = true; | |||
| param.topk_ = 2; | |||
| std::vector<int> out_shape = {1, 1, 3, 2}; | |||
| ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); | |||
| ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); | |||
| float out[6]; | |||
| ArgMaxDim3(in.data(), out, in_shape.data(), ¶m); | |||
| CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); | |||
| } | |||
| TEST_F(TestArgMinMaxTestFp32, ArgMaxTest4_axis3_out_index) { | |||
| std::vector<float> in = {10, 20, 30, 40, 90, | |||
| 20, 11, 15, 1, 50, | |||
| 30, 45, 25, 50, 30}; | |||
| std::vector<float> except_out = {4, 3, | |||
| 4, 0, | |||
| 3, 1}; | |||
| ArgMinMaxParameter param; | |||
| param.axis_ = 3; | |||
| std::vector<int> in_shape = {1, 1, 3, 5}; | |||
| param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement))); | |||
| param.out_value_ = false; | |||
| param.topk_ = 2; | |||
| std::vector<int> out_shape = {1, 1, 3, 2}; | |||
| ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); | |||
| ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); | |||
| float out[6]; | |||
| ArgMaxDim3(in.data(), out, in_shape.data(), ¶m); | |||
| CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); | |||
| } | |||
| TEST_F(TestArgMinMaxTestFp32, ArgMaxTest5_axis1_out_index) { | |||
| std::vector<float> in = {100, 2, 300, | |||
| 4, 50, 6, | |||
| 11, 12, 13, | |||
| 34, 35, 36, | |||
| 9, 6, 17, | |||
| 10, 20, 30, | |||
| 10, 20, 30, | |||
| 40, 5, 60, | |||
| 7, 80, 90, | |||
| 10, 11, 120, | |||
| 18, 5, 16, | |||
| 9, 22, 23}; | |||
| std::vector<float> except_out = {0, 1, 0, | |||
| 1, 0, 1, | |||
| 1, 2, 2, | |||
| 2, 1, 2, | |||
| 2, 1, 1, | |||
| 0, 2, 1, | |||
| 0, 0, 0, | |||
| 1, 1, 0}; | |||
| ArgMinMaxParameter param; | |||
| param.axis_ = 1; | |||
| std::vector<int> in_shape = {2, 3, 2, 3}; | |||
| param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement))); | |||
| param.out_value_ = false; | |||
| param.topk_ = 2; | |||
| std::vector<int> out_shape = {2, 2, 2, 3}; | |||
| ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); | |||
| ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); | |||
| float out[24]; | |||
| ArgMaxDim1(in.data(), out, in_shape.data(), ¶m); | |||
| CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); | |||
| } | |||
| TEST_F(TestArgMinMaxTestFp32, ArgMaxTest5_axis1_out_data) { | |||
| std::vector<float> in = {100, 2, 300, | |||
| 4, 50, 6, | |||
| 11, 12, 13, | |||
| 34, 35, 36, | |||
| 9, 6, 17, | |||
| 10, 20, 30, | |||
| 10, 20, 30, | |||
| 40, 5, 60, | |||
| 7, 80, 90, | |||
| 10, 11, 120, | |||
| 18, 5, 16, | |||
| 9, 22, 23}; | |||
| std::vector<float> except_out = {100, 12, 300, | |||
| 34, 50, 36, | |||
| 11, 6, 17, | |||
| 10, 35, 30, | |||
| 18, 80, 90, | |||
| 40, 22, 120, | |||
| 10, 20, 30, | |||
| 10, 11, 60}; | |||
| ArgMinMaxParameter param; | |||
| param.axis_ = 1; | |||
| std::vector<int> in_shape = {2, 3, 2, 3}; | |||
| param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement))); | |||
| param.out_value_ = true; | |||
| param.topk_ = 2; | |||
| std::vector<int> out_shape = {2, 2, 2, 3}; | |||
| ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); | |||
| ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); | |||
| float out[24]; | |||
| ArgMaxDim1(in.data(), out, in_shape.data(), ¶m); | |||
| CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); | |||
| } | |||
| TEST_F(TestArgMinMaxTestFp32, ArgMaxTest6_axis0_out_index) { | |||
| std::vector<float> in = {100, 2, | |||
| 4, 50, | |||
| 11, 12, | |||
| 34, 35, | |||
| 10, 20, | |||
| 40, 5, | |||
| 7, 80, | |||
| 10, 11, | |||
| 55, 25, | |||
| 5, 15, | |||
| 18, 8, | |||
| 15, 16}; | |||
| std::vector<float> except_out = {0, 2, | |||
| 1, 0, | |||
| 2, 1, | |||
| 0, 0, | |||
| 2, 1, | |||
| 2, 2, | |||
| 0, 0, | |||
| 2, 2}; | |||
| ArgMinMaxParameter param; | |||
| param.axis_ = 1; | |||
| std::vector<int> in_shape = {3, 2, 2, 2}; | |||
| param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement))); | |||
| param.out_value_ = false; | |||
| param.topk_ = 2; | |||
| std::vector<int> out_shape = {2, 2, 2, 2}; | |||
| ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); | |||
| ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); | |||
| float out[16]; | |||
| ArgMaxDim0(in.data(), out, in_shape.data(), ¶m); | |||
| CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); | |||
| } | |||
| TEST_F(TestArgMinMaxTestFp32, ArgMaxTest6_axis0_out_data) { | |||
| std::vector<float> in = {100, 2, | |||
| 4, 50, | |||
| 11, 12, | |||
| 34, 35, | |||
| 10, 20, | |||
| 40, 5, | |||
| 7, 80, | |||
| 10, 11, | |||
| 55, 25, | |||
| 5, 15, | |||
| 18, 8, | |||
| 15, 16}; | |||
| std::vector<float> except_out = {100, 25, | |||
| 40, 50, | |||
| 18, 80, | |||
| 34, 35, | |||
| 55, 20, | |||
| 5, 15, | |||
| 11, 12, | |||
| 15, 16}; | |||
| ArgMinMaxParameter param; | |||
| param.axis_ = 1; | |||
| std::vector<int> in_shape = {3, 2, 2, 2}; | |||
| param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement))); | |||
| param.out_value_ = true; | |||
| param.topk_ = 2; | |||
| std::vector<int> out_shape = {2, 2, 2, 2}; | |||
| ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); | |||
| ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); | |||
| float out[16]; | |||
| ArgMaxDim0(in.data(), out, in_shape.data(), ¶m); | |||
| CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); | |||
| } | |||
| TEST_F(TestArgMinMaxTestFp32, ArgMinTest1_axis3_out_data) { | |||
| std::vector<float> in = {10, 20, 30, 40, 90, | |||
| 20, 11, 15, 1, 50, | |||
| 30, 45, 25, 50, 30}; | |||
| std::vector<float> except_out = {10, 20, | |||
| 1, 11, | |||
| 25, 30}; | |||
| ArgMinMaxParameter param; | |||
| param.axis_ = 3; | |||
| std::vector<int> in_shape = {1, 1, 3, 5}; | |||
| param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement))); | |||
| param.out_value_ = true; | |||
| param.topk_ = 2; | |||
| std::vector<int> out_shape = {1, 1, 3, 2}; | |||
| ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size()); | |||
| ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size()); | |||
| float out[6]; | |||
| ArgMinDim3(in.data(), out, in_shape.data(), ¶m); | |||
| CompareOutputData(out, except_out.data(), except_out.size(), 0.00001); | |||
| } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,197 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/opclib/batch_to_space.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/opclib/arithmetic_common.h" | |||
| namespace mindspore { | |||
| class BatchToSpaceTestFp32 : public mindspore::Common { | |||
| public: | |||
| BatchToSpaceTestFp32() = default; | |||
| }; | |||
| TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest1) { | |||
| float input[12] = {10, 30, 90, 2, 20, 120, 5, 50, 150, 6, 16, 160}; | |||
| constexpr int kOutSize = 12; | |||
| float expect_out[kOutSize] = {10, 30, 90, 2, 20, 120, 5, 50, 150, 6, 16, 160}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {4, 1, 1, 3}; | |||
| int out_n = 1; | |||
| int block[2] = {2, 2}; | |||
| BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float)); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_1) { | |||
| float input[12] = {10, 30, 90, 2, 20, 120, 5, 50, 150, 6, 16, 160}; | |||
| constexpr int kOutSize = 3; | |||
| float expect_out[kOutSize] = {5, 50, 150}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {4, 1, 1, 3}; | |||
| int out_n = 1; | |||
| int block[2] = {2, 2}; | |||
| int crops[4] = {1, 0, 0, 1}; | |||
| BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float)); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest2) { | |||
| float input[32] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120, | |||
| 5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160}; | |||
| constexpr int kOutSize = 32; | |||
| float expect_out[kOutSize] = {1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 6, 60, 7, 70, 8, 80, | |||
| 9, 90, 10, 100, 11, 110, 12, 120, 13, 130, 14, 140, 15, 150, 16, 160}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {4, 2, 2, 2}; | |||
| int out_n = 1; | |||
| int block[2] = {2, 2}; | |||
| BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float)); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_2) { | |||
| float input[32] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120, | |||
| 5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160}; | |||
| constexpr int kOutSize = 12; | |||
| float expect_out[kOutSize] = {6, 60, 7, 70, 8, 80, | |||
| 10, 100, 11, 110, 12, 120}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {4, 2, 2, 2}; | |||
| int out_n = 1; | |||
| int block[2] = {2, 2}; | |||
| int crops[4] = {1, 1, 1, 0}; | |||
| BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float)); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest3) { | |||
| float input[64] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120, | |||
| 5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160, | |||
| 21, 10, 23, 30, 29, 90, 211, 110, 22, 20, 24, 40, 210, 100, 212, 120, | |||
| 25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80, 214, 140, 216, 160}; | |||
| constexpr int kOutSize = 64; | |||
| float expect_out[kOutSize] = {1, 10, 5, 50, 3, 30, 7, 70, 21, 10, 25, 50, 23, 30, 27, 70, | |||
| 9, 90, 13, 130, 11, 110, 15, 150, 29, 90, 213, 130, 211, 110, 215, 150, | |||
| 2, 20, 6, 60, 4, 40, 8, 80, 22, 20, 26, 60, 24, 40, 28, 80, | |||
| 10, 100, 14, 140, 12, 120, 16, 160, 210, 100, 214, 140, 212, 120, 216, 160}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {8, 2, 2, 2}; | |||
| int out_n = 2; | |||
| int block[2] = {2, 2}; | |||
| BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float)); | |||
| for (int i = 0; i < kOutSize && i < 32; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_3) { | |||
| float input[64] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120, | |||
| 5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160, | |||
| 21, 10, 23, 30, 29, 90, 211, 110, 22, 20, 24, 40, 210, 100, 212, 120, | |||
| 25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80, 214, 140, 216, 160}; | |||
| constexpr int kOutSize = 16; | |||
| float expect_out[kOutSize] = {9, 90, 13, 130, 29, 90, 213, 130, | |||
| 10, 100, 14, 140, 210, 100, 214, 140}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {8, 2, 2, 2}; | |||
| int out_n = 2; | |||
| int block[2] = {2, 2}; | |||
| int crops[4] = {2, 0, 0, 2}; | |||
| BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float)); | |||
| for (int i = 0; i < kOutSize && i < 32; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest4) { | |||
| float input[96] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120, 5, 50, 7, 70, | |||
| 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160, 21, 10, 23, 30, 29, 90, 211, 110, | |||
| 22, 20, 24, 40, 210, 100, 212, 120, 25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80, | |||
| 214, 140, 216, 160, 31, 10, 33, 30, 39, 90, 311, 110, 32, 20, 34, 40, 310, 100, 312, 120, | |||
| 35, 50, 37, 70, 313, 130, 315, 150, 36, 60, 38, 80, 314, 140, 316, 160}; | |||
| constexpr int kOutSize = 96; | |||
| float expect_out[kOutSize] = { | |||
| 1, 10, 5, 50, 3, 30, 7, 70, 21, 10, 25, 50, 23, 30, 27, 70, 31, 10, 35, 50, 33, 30, 37, 70, | |||
| 9, 90, 13, 130, 11, 110, 15, 150, 29, 90, 213, 130, 211, 110, 215, 150, 39, 90, 313, 130, 311, 110, 315, 150, | |||
| 2, 20, 6, 60, 4, 40, 8, 80, 22, 20, 26, 60, 24, 40, 28, 80, 32, 20, 36, 60, 34, 40, 38, 80, | |||
| 10, 100, 14, 140, 12, 120, 16, 160, 210, 100, 214, 140, 212, 120, 216, 160, 310, 100, 314, 140, 312, 120, 316, 160}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {12, 2, 2, 2}; | |||
| int out_n = 2; | |||
| int block[2] = {3, 2}; | |||
| BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float)); | |||
| for (int i = 0; i < kOutSize && i < 32; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_4) { | |||
| float input[96] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120, 5, 50, 7, 70, | |||
| 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160, 21, 10, 23, 30, 29, 90, 211, 110, | |||
| 22, 20, 24, 40, 210, 100, 212, 120, 25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80, | |||
| 214, 140, 216, 160, 31, 10, 33, 30, 39, 90, 311, 110, 32, 20, 34, 40, 310, 100, 312, 120, | |||
| 35, 50, 37, 70, 313, 130, 315, 150, 36, 60, 38, 80, 314, 140, 316, 160}; | |||
| constexpr int kOutSize = 24; | |||
| float expect_out[kOutSize] = { | |||
| 25, 50, 23, 30, 35, 50, 33, 30, | |||
| 13, 130, 11, 110, 26, 60, 24, 40, 36, 60, 34, 40, 14, 140, 12, 120}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {12, 2, 2, 2}; | |||
| int out_n = 2; | |||
| int block[2] = {3, 2}; | |||
| int crops[4] = {1, 2, 1, 1}; | |||
| BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float)); | |||
| for (int i = 0; i < kOutSize && i < 32; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,234 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.h" | |||
| namespace mindspore { | |||
| class CropTestFp32 : public mindspore::Common { | |||
| public: | |||
| CropTestFp32() = default; | |||
| }; | |||
| TEST_F(CropTestFp32, CropTest1) { | |||
| float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; | |||
| const int kOutSize = 2; | |||
| float expect_out[kOutSize] = {8, 16}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {2, 2, 2, 2}; | |||
| int out_shape[4] = {2, 1, 1, 1}; | |||
| CropParameter crop_param; | |||
| crop_param.axis_ = 1; | |||
| crop_param.offset_[0] = 1; | |||
| crop_param.offset_[1] = 1; | |||
| crop_param.offset_[2] = 1; | |||
| crop_param.op_parameter_.thread_num_ = 1; | |||
| crop_param.thread_id_ = 0; | |||
| Crop4D(input, output, in_shape, out_shape, &crop_param); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| TEST_F(CropTestFp32, CropTest2) { | |||
| float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; | |||
| const int kOutSize = 4; | |||
| float expect_out[kOutSize] = {13, 14, 15, 16}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {2, 2, 2, 2}; | |||
| int out_shape[4] = {1, 1, 2, 2}; | |||
| CropParameter crop_param; | |||
| crop_param.axis_ = 0; | |||
| crop_param.offset_[0] = 1; | |||
| crop_param.offset_[1] = 1; | |||
| crop_param.offset_[2] = 0; | |||
| crop_param.offset_[3] = 0; | |||
| crop_param.op_parameter_.thread_num_ = 1; | |||
| crop_param.thread_id_ = 0; | |||
| Crop4D(input, output, in_shape, out_shape, &crop_param); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| TEST_F(CropTestFp32, CropTest3) { | |||
| float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; | |||
| const int kOutSize = 8; | |||
| float expect_out[kOutSize] = {2, 4, 6, 8, 10, 12, 14, 16}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {2, 2, 2, 2}; | |||
| int out_shape[4] = {2, 2, 2, 1}; | |||
| CropParameter crop_param; | |||
| crop_param.axis_ = 3; | |||
| crop_param.offset_[0] = 1; | |||
| crop_param.op_parameter_.thread_num_ = 1; | |||
| crop_param.thread_id_ = 0; | |||
| Crop4D(input, output, in_shape, out_shape, &crop_param); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| TEST_F(CropTestFp32, CropTest4) { | |||
| float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; | |||
| const int kOutSize = 8; | |||
| float expect_out[kOutSize] = {2, 4, 6, 8, 10, 12, 14, 16}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {2, 2, 2, 2}; | |||
| int out_shape[4] = {2, 2, 2, 1}; | |||
| CropParameter crop_param; | |||
| crop_param.axis_ = 3; | |||
| crop_param.offset_[0] = 1; | |||
| crop_param.op_parameter_.thread_num_ = 2; | |||
| crop_param.thread_id_ = 0; | |||
| Crop4D(input, output, in_shape, out_shape, &crop_param); | |||
| crop_param.thread_id_ = 1; | |||
| Crop4D(input, output, in_shape, out_shape, &crop_param); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| TEST_F(CropTestFp32, CropTest5) { | |||
| float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; | |||
| const int kOutSize = 2; | |||
| float expect_out[kOutSize] = {8, 16}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {2, 2, 2, 2}; | |||
| int out_shape[4] = {2, 1, 1, 1}; | |||
| CropParameter crop_param; | |||
| crop_param.axis_ = 1; | |||
| crop_param.offset_[0] = 1; | |||
| crop_param.offset_[1] = 1; | |||
| crop_param.offset_[2] = 1; | |||
| Crop4DNoParallel(input, output, in_shape, out_shape, &crop_param); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| TEST_F(CropTestFp32, CropTest6) { | |||
| float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; | |||
| const int kOutSize = 4; | |||
| float expect_out[kOutSize] = {13, 14, 15, 16}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {2, 2, 2, 2}; | |||
| int out_shape[4] = {1, 1, 2, 2}; | |||
| CropParameter crop_param; | |||
| crop_param.axis_ = 0; | |||
| crop_param.offset_[0] = 1; | |||
| crop_param.offset_[1] = 1; | |||
| crop_param.offset_[2] = 0; | |||
| crop_param.offset_[3] = 0; | |||
| Crop4DNoParallel(input, output, in_shape, out_shape, &crop_param); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| TEST_F(CropTestFp32, CropTest7) { | |||
| float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; | |||
| const int kOutSize = 8; | |||
| float expect_out[kOutSize] = {2, 4, 6, 8, 10, 12, 14, 16}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {2, 2, 2, 2}; | |||
| int out_shape[4] = {2, 2, 2, 1}; | |||
| CropParameter crop_param; | |||
| crop_param.axis_ = 3; | |||
| crop_param.offset_[0] = 1; | |||
| Crop4DNoParallel(input, output, in_shape, out_shape, &crop_param); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| TEST_F(CropTestFp32, CropTest8) { | |||
| float input[27] = {1, 2, 3, 4, 5, 6, 7, 8, 9, | |||
| 11, 12, 13, 14, 15, 16, 17, 18, 19, | |||
| 21, 22, 23, 24, 25, 26, 27, 28, 29}; | |||
| const int kOutSize = 4; | |||
| float expect_out[kOutSize] = {15, 16, 18, 19}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {1, 3, 3, 3}; | |||
| int out_shape[4] = {1, 1, 2, 2}; | |||
| CropParameter crop_param; | |||
| crop_param.axis_ = 1; | |||
| crop_param.offset_[0] = 1; | |||
| crop_param.offset_[1] = 1; | |||
| crop_param.offset_[2] = 1; | |||
| crop_param.op_parameter_.thread_num_ = 2; | |||
| crop_param.thread_id_ = 0; | |||
| Crop4D(input, output, in_shape, out_shape, &crop_param); | |||
| crop_param.thread_id_ = 1; | |||
| Crop4D(input, output, in_shape, out_shape, &crop_param); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| TEST_F(CropTestFp32, CropTest9) { | |||
| float input[64] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |||
| 11, 12, 13, 14, 15, 16, 17, 18, 19, 110, 111, 112, 113, 114, 115, 116, | |||
| 21, 22, 23, 24, 25, 26, 27, 28, 29, 210, 211, 212, 213, 214, 215, 216, | |||
| 31, 32, 33, 34, 35, 36, 37, 38, 39, 310, 311, 312, 313, 314, 315, 316}; | |||
| const int kOutSize = 8; | |||
| float expect_out[kOutSize] = {16, 17, 110, 111, 26, 27, 210, 211}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {1, 4, 4, 4}; | |||
| int out_shape[4] = {1, 2, 2, 2}; | |||
| CropParameter crop_param; | |||
| crop_param.axis_ = 1; | |||
| crop_param.offset_[0] = 1; | |||
| crop_param.offset_[1] = 1; | |||
| crop_param.offset_[2] = 1; | |||
| crop_param.op_parameter_.thread_num_ = 2; | |||
| crop_param.thread_id_ = 0; | |||
| Crop4D(input, output, in_shape, out_shape, &crop_param); | |||
| crop_param.thread_id_ = 1; | |||
| Crop4D(input, output, in_shape, out_shape, &crop_param); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,85 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/opclib/depth_to_space.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/opclib/arithmetic_common.h" | |||
| namespace mindspore { | |||
| class DepthToSpaceTestFp32 : public mindspore::Common { | |||
| public: | |||
| DepthToSpaceTestFp32() = default; | |||
| }; | |||
| TEST_F(DepthToSpaceTestFp32, DepthToSpaceTest2) { | |||
| float input[16] = {1, 2, 10, 20, 5, 6, 3, 8, 18, 10, 11, 55, 3, 4, 15, 25}; | |||
| constexpr int kOutSize = 16; | |||
| float expect_out[kOutSize] = {1, 2, 5, 6, 10, 20, 3, 8, 18, 10, 3, 4, 11, 55, 15, 25}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {1, 2, 2, 4}; | |||
| int out_shape[4] = {1, 4, 4, 1}; | |||
| DepthToSpaceParameter param; | |||
| param.block_size_ = 2; | |||
| int in_strides[4]; | |||
| ComputeStrides(in_shape, in_strides, 4); | |||
| int out_strides[4]; | |||
| ComputeStrides(out_shape, out_strides, 4); | |||
| param.in_stride_dim0_ = in_strides[0]; | |||
| param.in_stride_dim1_ = in_strides[1]; | |||
| param.in_stride_dim2_ = in_strides[2]; | |||
| param.out_stride_dim0_ = out_strides[0]; | |||
| param.out_stride_dim1_ = out_strides[1]; | |||
| param.out_stride_dim2_ = out_strides[2]; | |||
| param.data_type_size_ = sizeof(float); | |||
| DepthToSpaceForNHWC((const void *)input, output, in_shape, ¶m); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| TEST_F(DepthToSpaceTestFp32, DepthToSpaceTest3) { | |||
| float input[8] = {1, 2, 3, 4, 5, 6, 7, 8}; | |||
| constexpr int kOutSize = 8; | |||
| float expect_out[kOutSize] = {1, 2, 3, 4, 5, 6, 7, 8}; | |||
| float output[kOutSize]; | |||
| int in_shape[4] = {1, 1, 1, 8}; | |||
| int out_shape[4] = {1, 2, 2, 2}; | |||
| DepthToSpaceParameter param; | |||
| param.block_size_ = 2; | |||
| int in_strides[4]; | |||
| ComputeStrides(in_shape, in_strides, 4); | |||
| int out_strides[4]; | |||
| ComputeStrides(out_shape, out_strides, 4); | |||
| param.in_stride_dim0_ = in_strides[0]; | |||
| param.in_stride_dim1_ = in_strides[1]; | |||
| param.in_stride_dim2_ = in_strides[2]; | |||
| param.out_stride_dim0_ = out_strides[0]; | |||
| param.out_stride_dim1_ = out_strides[1]; | |||
| param.out_stride_dim2_ = out_strides[2]; | |||
| param.data_type_size_ = sizeof(float); | |||
| DepthToSpaceForNHWC((const void *)input, output, in_shape, ¶m); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| CompareOutputData(output, expect_out, kOutSize, 0.000001); | |||
| } | |||
| } // namespace mindspore | |||