Merge pull request !3889 from zhanyuan/mastertags/v0.7.0-beta
| @@ -33,29 +33,30 @@ int MatMul::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor | |||
| auto output = outputs_.front(); | |||
| MS_ASSERT(output != nullptr); | |||
| std::vector<int> x_shape = input0->shape(); | |||
| std::vector<int> w_shape = input1->shape(); | |||
| if (x_shape.size() < 2 || w_shape.size() < 2) { | |||
| std::vector<int> a_shape = input0->shape(); | |||
| std::vector<int> b_shape = input1->shape(); | |||
| if (a_shape.size() < 3 || b_shape.size() < 3) { | |||
| MS_LOG(ERROR) << "inputs shape is invalid"; | |||
| return RET_INPUT_TENSOR_ERROR; | |||
| } | |||
| for (int i = 0; i < a_shape.size() - 2; ++i) { | |||
| if (a_shape[i] != b_shape[i]) { | |||
| MS_LOG(ERROR) << "Op MatMul's dimensions must be equal"; | |||
| return RET_INPUT_TENSOR_ERROR; | |||
| } | |||
| } | |||
| auto matmul_prim = this->primitive->value_as_MatMul(); | |||
| if (matmul_prim->transposeA()) { | |||
| int tmp = x_shape.back(); | |||
| x_shape[x_shape.size() - 1] = x_shape[x_shape.size() - 2]; | |||
| x_shape[x_shape.size() - 2] = tmp; | |||
| std::swap(a_shape[a_shape.size() - 1], a_shape[a_shape.size() - 2]); | |||
| } | |||
| if (matmul_prim->transposeB()) { | |||
| int tmp = w_shape.back(); | |||
| w_shape[w_shape.size() - 1] = w_shape[w_shape.size() - 2]; | |||
| w_shape[w_shape.size() - 2] = tmp; | |||
| std::swap(b_shape[b_shape.size() - 1], b_shape[b_shape.size() - 2]); | |||
| } | |||
| auto y_shape_size = std::max(x_shape.size(), w_shape.size()); | |||
| std::vector<int> y_shape(y_shape_size); | |||
| y_shape = x_shape; | |||
| y_shape[y_shape_size - 1] = w_shape[w_shape.size() - 1]; | |||
| output->set_shape(y_shape); | |||
| std::vector<int> c_shape(a_shape); | |||
| c_shape[c_shape.size() - 1] = b_shape[b_shape.size() - 1]; | |||
| output->set_shape(c_shape); | |||
| output->set_data_type(input0->data_type()); | |||
| output->SetFormat(input0->GetFormat()); | |||
| @@ -139,6 +139,8 @@ Primitive *Primitive::CreatePrimitive(schema::Primitive *primitive) { | |||
| return new lite::SpaceToBatch(const_cast<schema::Primitive *>(primitive)); | |||
| case schema::PrimitiveType_QuantDTypeCast: | |||
| return new lite::QuantDTypeCast(const_cast<schema::Primitive *>(primitive)); | |||
| case schema::PrimitiveType_MatMul: | |||
| return new lite::MatMul(const_cast<schema::Primitive *>(primitive)); | |||
| default: | |||
| break; | |||
| } | |||
| @@ -0,0 +1,72 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/base/matmul_base.h" | |||
| #include "src/runtime/kernel/arm/fp32/matmul.h" | |||
| #include "src/runtime/kernel/arm/int8/matmul_int8.h" | |||
| #include "src/kernel_factory.h" | |||
| #include "include/errorcode.h" | |||
| #include "include/context.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_MatMul; | |||
| namespace mindspore::kernel { | |||
| kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *opParameter, | |||
| const lite::Context *ctx, const kernel::KernelKey &desc) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | |||
| auto input_tensor = inputs.at(kInputIndex); | |||
| auto data_type = input_tensor->data_type(); | |||
| kernel::LiteKernel *kernel = nullptr; | |||
| switch (data_type) { | |||
| case kNumberTypeInt8: | |||
| case kNumberTypeUInt8: { | |||
| kernel = new (std::nothrow) MatmulInt8CPUKernel(opParameter, inputs, outputs, ctx); | |||
| if (!kernel) { | |||
| MS_LOG(ERROR) << "kernel is nullptr."; | |||
| return nullptr; | |||
| } | |||
| break; | |||
| } | |||
| case kNumberTypeFloat32: { | |||
| kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs, ctx); | |||
| if (!kernel) { | |||
| MS_LOG(ERROR) << "kernel is nullptr."; | |||
| return nullptr; | |||
| } | |||
| break; | |||
| } | |||
| default: | |||
| break; | |||
| } | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| delete kernel; | |||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||
| return nullptr; | |||
| } | |||
| return kernel; | |||
| } | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MatMul, CpuMatmulKernelCreator) | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,49 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MATMUL_BASE_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MATMUL_BASE_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "include/context.h" | |||
| #include "src/runtime/kernel/arm/opclib/matmul.h" | |||
| using mindspore::lite::Context; | |||
| namespace mindspore::kernel { | |||
| class MatmulBaseCPUKernel : public LiteKernel { | |||
| public: | |||
| MatmulBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->threadNum) { | |||
| params_ = reinterpret_cast<MatMulParameter *>(opParameter); | |||
| } | |||
| ~MatmulBaseCPUKernel() = default; | |||
| int Init() override { return 0; } | |||
| int ReSize() override { return 0; } | |||
| int Run() override { return 0; } | |||
| protected: | |||
| MatMulParameter *params_; | |||
| int thread_count_; | |||
| int thread_stride_; | |||
| const Context *ctx_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MATMUL_BASE_H_ | |||
| @@ -15,44 +15,102 @@ | |||
| */ | |||
| #include "src/runtime/kernel/arm/fp32/matmul.h" | |||
| #include <vector> | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "src/runtime/kernel/arm/opclib/fp32/matmul.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "include/errorcode.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_MEMORY_FAILED; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_MatMul; | |||
| namespace mindspore::kernel { | |||
| MatmulCPUKernel::~MatmulCPUKernel() { | |||
| ctx_->allocator->Free(a_c8_ptr_); | |||
| ctx_->allocator->Free(b_r8_ptr_); | |||
| ctx_->allocator->Free(c_r8x8_ptr_); | |||
| } | |||
| int MatmulCPUKernel::ReSize() { return RET_OK; } | |||
| int MatmulCPUKernel::Run() { return RET_OK; } | |||
| int MatmulCPUKernel::Init() { | |||
| int batch = 1; | |||
| auto x_shape = inputs_[0]->shape(); | |||
| auto o_shape = outputs_[0]->shape(); | |||
| for (int i = 0; i < x_shape.size() - 2; ++i) { | |||
| batch *= x_shape[i]; | |||
| } | |||
| params_->batch = batch; | |||
| params_->row_ = o_shape[o_shape.size() - 2]; | |||
| params_->col_ = o_shape[o_shape.size() - 1]; | |||
| params_->deep_ = params_->a_transpose_ ? x_shape[x_shape.size() - 2] : x_shape[x_shape.size() - 1]; | |||
| params_->row_8_ = UP_ROUND(params_->row_, 8); | |||
| params_->col_8_ = UP_ROUND(params_->col_, 8); | |||
| thread_count_ = MSMIN(thread_count_, UP_DIV(params_->col_8_, 8)); | |||
| thread_stride_ = UP_DIV(UP_DIV(params_->col_8_, 8), thread_count_); | |||
| int MatmulCPUKernel::Init() { return RET_OK; } | |||
| a_c8_ptr_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(params_->row_8_ * params_->deep_ * sizeof(float))); | |||
| if (!a_c8_ptr_) { | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| memset(a_c8_ptr_, 0, params_->row_8_ * params_->deep_ * sizeof(float)); | |||
| b_r8_ptr_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(params_->col_8_ * params_->deep_ * sizeof(float))); | |||
| if (!b_r8_ptr_) { | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| memset(b_r8_ptr_, 0, params_->col_8_ * params_->deep_ * sizeof(float)); | |||
| c_r8x8_ptr_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(params_->row_8_ * params_->col_8_ * sizeof(float))); | |||
| if (!c_r8x8_ptr_) { | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| memset(c_r8x8_ptr_, 0, params_->row_8_ * params_->col_8_ * sizeof(float)); | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *CpuMatmulFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc) { | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_MatMul); | |||
| auto *kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new MatmulCPUKernel fail!"; | |||
| return nullptr; | |||
| int MatmulCPUKernel::RunImpl(int task_id) { | |||
| int cur_oc = MSMIN(thread_stride_, UP_DIV(params_->col_8_, 8) - task_id * thread_stride_); | |||
| if (cur_oc <= 0) { | |||
| return RET_OK; | |||
| } | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| delete kernel; | |||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||
| return nullptr; | |||
| auto cur_b = b_r8_ptr_ + task_id * thread_stride_ * C8NUM * params_->deep_; | |||
| auto cur_c = c_r8x8_ptr_ + task_id * thread_stride_ * C8NUM * params_->row_8_; | |||
| MatMul(a_c8_ptr_, cur_b, cur_c, NULL, ActType_No, params_->deep_, params_->row_8_, cur_oc * 8); | |||
| return RET_OK; | |||
| } | |||
| int MatmulFloatRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| auto op = reinterpret_cast<MatmulCPUKernel *>(cdata); | |||
| auto error_code = op->RunImpl(task_id); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "MatmulFp32Run error task_id[" << task_id << "] error_code[" << error_code << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| return kernel; | |||
| return RET_OK; | |||
| } | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MatMul, CpuMatmulFp32KernelCreator) | |||
| int MatmulCPUKernel::Run() { | |||
| auto a_ptr = reinterpret_cast<float *>(inputs_[0]->Data()); | |||
| auto b_ptr = reinterpret_cast<float *>(inputs_[1]->Data()); | |||
| auto c_ptr = reinterpret_cast<float *>(outputs_[0]->Data()); | |||
| auto a_stride = params_->row_ * params_->deep_; | |||
| auto b_stride = params_->deep_ * params_->col_; | |||
| auto c_stride = params_->row_ * params_->col_; | |||
| for (int i = 0; i < params_->batch; ++i) { | |||
| auto cur_a_ptr = a_ptr + i * a_stride; | |||
| auto cur_b_ptr = b_ptr + i * b_stride; | |||
| auto cur_c_ptr = c_ptr + i * c_stride; | |||
| if (params_->a_transpose_) { | |||
| RowMajor2Row8Major(cur_a_ptr, a_c8_ptr_, params_->deep_, params_->row_); | |||
| } else { | |||
| RowMajor2Col8Major(cur_a_ptr, a_c8_ptr_, params_->row_, params_->deep_); | |||
| } | |||
| if (params_->b_transpose_) { | |||
| RowMajor2Col8Major(cur_b_ptr, b_r8_ptr_, params_->col_, params_->deep_); | |||
| } else { | |||
| RowMajor2Row8Major(cur_b_ptr, b_r8_ptr_, params_->deep_, params_->col_); | |||
| } | |||
| LiteBackendParallelLaunch(MatmulFloatRun, this, thread_count_); | |||
| Row8x8Major2RowMajor(c_r8x8_ptr_, cur_c_ptr, params_->row_, params_->col_); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::kernel | |||
| @@ -19,27 +19,26 @@ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "src/runtime/kernel/arm/opclib/matmul.h" | |||
| #include "src/runtime/kernel/arm/base/matmul_base.h" | |||
| namespace mindspore::kernel { | |||
| class MatmulCPUKernel : public LiteKernel { | |||
| class MatmulCPUKernel : public MatmulBaseCPUKernel { | |||
| public: | |||
| explicit MatmulCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||
| : LiteKernel(parameter, inputs, outputs) { | |||
| matmul_param_ = reinterpret_cast<MatMulParameter *>(parameter); | |||
| } | |||
| ~MatmulCPUKernel() override = default; | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : MatmulBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||
| ~MatmulCPUKernel() override; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| private: | |||
| MatMulParameter *matmul_param_; | |||
| float *a_c8_ptr_; | |||
| float *b_r8_ptr_; | |||
| float *c_r8x8_ptr_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_MATMUL_H_ | |||
| @@ -42,7 +42,7 @@ class FullconnectionInt8CPUKernel : public FullconnectionBaseCPUKernel { | |||
| int RunImpl(int task_id); | |||
| private: | |||
| FcQuantArg quant_params_; | |||
| MatmulQuantArg quant_params_; | |||
| int8_t *a_c8_ptr_; | |||
| int8_t *b_r8_ptr_; | |||
| int *c_r8x8_ptr_; | |||
| @@ -0,0 +1,142 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/int8/matmul_int8.h" | |||
| #include "src/runtime/kernel/arm/opclib/int8/matmul.h" | |||
| #include "src/runtime/kernel/arm/opclib/common_func.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "include/errorcode.h" | |||
| using mindspore::lite::RET_MEMORY_FAILED; | |||
| using mindspore::lite::RET_OK; | |||
| namespace mindspore::kernel { | |||
| MatmulInt8CPUKernel::~MatmulInt8CPUKernel() { | |||
| ctx_->allocator->Free(a_c8_ptr_); | |||
| ctx_->allocator->Free(b_r8_ptr_); | |||
| ctx_->allocator->Free(c_r8x8_ptr_); | |||
| } | |||
| int MatmulInt8CPUKernel::Init() { | |||
| int batch = 1; | |||
| auto x_shape = inputs_[0]->shape(); | |||
| auto o_shape = outputs_[0]->shape(); | |||
| for (int i = 0; i < x_shape.size() - 2; ++i) { | |||
| batch *= x_shape[i]; | |||
| } | |||
| params_->batch = batch; | |||
| params_->row_ = o_shape[o_shape.size() - 2]; | |||
| params_->col_ = o_shape[o_shape.size() - 1]; | |||
| params_->deep_ = params_->a_transpose_ ? x_shape[x_shape.size() - 2] : x_shape[x_shape.size() - 1]; | |||
| params_->row_8_ = UP_ROUND(params_->row_, 8); | |||
| params_->col_8_ = UP_ROUND(params_->col_, 8); | |||
| thread_count_ = MSMIN(thread_count_, UP_DIV(params_->col_8_, 8)); | |||
| thread_stride_ = UP_DIV(UP_DIV(params_->col_8_, 8), thread_count_); | |||
| a_c8_ptr_ = reinterpret_cast<int8_t *>(ctx_->allocator->Malloc(params_->row_8_ * params_->deep_ * sizeof(int8_t))); | |||
| if (!a_c8_ptr_) { | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| memset(a_c8_ptr_, 0, params_->row_8_ * params_->deep_ * sizeof(int8_t)); | |||
| b_r8_ptr_ = reinterpret_cast<int8_t *>(ctx_->allocator->Malloc(params_->col_8_ * params_->deep_ * sizeof(int8_t))); | |||
| if (!b_r8_ptr_) { | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| memset(b_r8_ptr_, 0, params_->col_8_ * params_->deep_ * sizeof(int8_t)); | |||
| c_r8x8_ptr_ = reinterpret_cast<int *>(ctx_->allocator->Malloc(params_->row_8_ * params_->col_8_ * sizeof(int))); | |||
| if (!c_r8x8_ptr_) { | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| memset(c_r8x8_ptr_, 0, params_->row_8_ * params_->col_8_ * sizeof(int)); | |||
| auto input_tensor = inputs_[0]; | |||
| auto params = input_tensor->GetQuantParams(); | |||
| MS_ASSERT(params.size() == 1); | |||
| quant_params_.input.zp_ = params.front().zeroPoint; | |||
| quant_params_.input.scale_ = params.front().scale; | |||
| auto weight_tensor = inputs_[1]; | |||
| params = weight_tensor->GetQuantParams(); | |||
| MS_ASSERT(params.size() == 1); | |||
| quant_params_.weight.zp_ = params.front().zeroPoint; | |||
| quant_params_.weight.scale_ = params.front().scale; | |||
| auto output_tensor = outputs_[0]; | |||
| params = output_tensor->GetQuantParams(); | |||
| MS_ASSERT(params.size() == 1); | |||
| quant_params_.output.zp_ = params.front().zeroPoint; | |||
| quant_params_.output.scale_ = params.front().scale; | |||
| double real_multiplier = quant_params_.input.scale_ * quant_params_.weight.scale_ / quant_params_.output.scale_; | |||
| QuantizeRoundParameter(real_multiplier, &quant_params_.quant_multiplier, &quant_params_.left_shift, | |||
| &quant_params_.right_shift); | |||
| return RET_OK; | |||
| } | |||
| int MatmulInt8CPUKernel::ReSize() { return RET_OK; } | |||
| int MatmulInt8CPUKernel::RunImpl(int task_id) { | |||
| int cur_oc = MSMIN(thread_stride_, UP_DIV(params_->col_8_, 8) - task_id * thread_stride_); | |||
| if (cur_oc <= 0) { | |||
| return RET_OK; | |||
| } | |||
| auto cur_b = b_r8_ptr_ + task_id * thread_stride_ * C8NUM * params_->deep_; | |||
| auto cur_c = c_r8x8_ptr_ + task_id * thread_stride_ * C8NUM * params_->row_8_; | |||
| MatMulInt8(a_c8_ptr_, cur_b, cur_c, params_->row_8_, cur_oc * 8, params_->deep_, quant_params_.input.zp_, | |||
| quant_params_.weight.zp_); | |||
| return RET_OK; | |||
| } | |||
| int MatmulInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| auto op = reinterpret_cast<MatmulInt8CPUKernel *>(cdata); | |||
| auto ret = op->RunImpl(task_id); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "MatmulInt8Run error task_id[" << task_id << "] error_code[" << ret << "]"; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int MatmulInt8CPUKernel::Run() { | |||
| auto a_ptr = reinterpret_cast<int8_t *>(inputs_[0]->Data()); | |||
| auto b_ptr = reinterpret_cast<int8_t *>(inputs_[1]->Data()); | |||
| auto c_ptr = reinterpret_cast<int8_t *>(outputs_[0]->Data()); | |||
| auto a_stride = params_->row_ * params_->deep_; | |||
| auto b_stride = params_->deep_ * params_->col_; | |||
| auto c_stride = params_->row_ * params_->col_; | |||
| for (int i = 0; i < params_->batch; ++i) { | |||
| auto cur_a_ptr = a_ptr + i * a_stride; | |||
| auto cur_b_ptr = b_ptr + i * b_stride; | |||
| auto cur_c_ptr = c_ptr + i * c_stride; | |||
| if (params_->a_transpose_) { | |||
| RowMajor2Row8MajorInt8(cur_a_ptr, a_c8_ptr_, params_->deep_, params_->row_); | |||
| } else { | |||
| RowMajor2Col8MajorInt8(cur_a_ptr, a_c8_ptr_, params_->row_, params_->deep_); | |||
| } | |||
| if (params_->b_transpose_) { | |||
| RowMajor2Col8MajorInt8(cur_b_ptr, b_r8_ptr_, params_->col_, params_->deep_); | |||
| } else { | |||
| RowMajor2Row8MajorInt8(cur_b_ptr, b_r8_ptr_, params_->deep_, params_->col_); | |||
| } | |||
| LiteBackendParallelLaunch(MatmulInt8Run, this, thread_count_); | |||
| auto &q = quant_params_; | |||
| SimplePostFuncInt8(c_r8x8_ptr_, cur_c_ptr, params_->col_, params_->row_, params_->row_8_, q.quant_multiplier, | |||
| q.left_shift, q.right_shift, q.output.zp_); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,47 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MATMUL_INT8_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MATMUL_INT8_H_ | |||
| #include <vector> | |||
| #include "include/context.h" | |||
| #include "src/runtime/kernel/arm/opclib/quantization/quantize.h" | |||
| #include "src/runtime/kernel/arm/base/matmul_base.h" | |||
| using mindspore::lite::Context; | |||
| namespace mindspore::kernel { | |||
| class MatmulInt8CPUKernel : public MatmulBaseCPUKernel { | |||
| public: | |||
| MatmulInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx) | |||
| : MatmulBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||
| ~MatmulInt8CPUKernel() override; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| private: | |||
| MatmulQuantArg quant_params_; | |||
| int8_t *a_c8_ptr_; | |||
| int8_t *b_r8_ptr_; | |||
| int *c_r8x8_ptr_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MATMUL_INT8_H_ | |||
| @@ -236,3 +236,20 @@ void PostFuncInt8(const int *in, const int *bias, int8_t *out, int oc, int plane | |||
| } | |||
| return; | |||
| } | |||
| void SimplePostFuncInt8(const int *in, int8_t *out, int oc, int plane, int plane8, int32_t multiplier, | |||
| int32_t left_shift, int32_t right_shift, int32_t zp) { | |||
| /* (int32_t)row8x8-major * multiplier => (int8_t)row-major */ | |||
| for (int r = 0; r < plane; r++) { | |||
| for (int c = 0; c < oc; c++) { | |||
| int c8div = c / 8, c8mod = c % 8; | |||
| int src_index = c8div * plane8 * 8 + r * 8 + c8mod; | |||
| int dst_index = r * oc + c; | |||
| int32_t value = in[src_index]; | |||
| value = MultiplyByQuantizedMultiplier(value, multiplier, left_shift, right_shift) + zp; | |||
| value = MSMIN(CHAR_MAX, value); | |||
| value = MSMAX(CHAR_MIN, value); | |||
| out[dst_index] = (int8_t)value; | |||
| } | |||
| } | |||
| } | |||
| @@ -33,6 +33,8 @@ void ReluFp32(float *data, int ele_num); | |||
| void Relu6Fp32(float *data, int ele_num); | |||
| void PostFuncInt8(const int *in, const int *bias, int8_t *out, int oc, int plane, int plane8, int32_t multiplier, | |||
| int32_t left_shift, int32_t right_shift, int32_t zp, int8_t mini, int8_t maxi); | |||
| void SimplePostFuncInt8(const int *in, int8_t *out, int oc, int plane, int plane8, int32_t multiplier, | |||
| int32_t left_shift, int32_t right_shift, int32_t zp); | |||
| void IndirectGemmFp32_8x8(float *output, const float *input, const float *weight, const float *bias, size_t step, | |||
| size_t ic4, size_t output_channel, size_t offset, size_t mode, size_t writeC4, size_t relu, | |||
| size_t relu6); | |||
| @@ -65,9 +65,7 @@ void MatMul8x8(const float *a, const float *b, float *c, const float *bias, ActT | |||
| size_t bi = c8div * deep * 8 + d * 8 + c8mod; | |||
| value = value + a[ai] * b[bi]; | |||
| } | |||
| if (bias != nullptr) { | |||
| value += bias[col]; | |||
| } | |||
| if (bias != nullptr) value += bias[col]; | |||
| if (act_type == ActType_Relu6) value = MSMIN(6.0f, value); | |||
| if (act_type != ActType_No) value = MSMAX(0.0f, value); | |||
| c[ci] = value; | |||
| @@ -18,6 +18,17 @@ | |||
| #include <limits.h> | |||
| #include "src/runtime/kernel/arm/opclib/quantization/fixed_point.h" | |||
| void RowMajor2Row8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col) { | |||
| for (int r = 0; r < row; r++) { | |||
| int8_t *src = src_ptr + r * col; | |||
| for (int c = 0; c < col; c++) { | |||
| int cd8 = c / 8; | |||
| int cm8 = c % 8; | |||
| dst_ptr[cd8 * 8 * row + r * 8 + cm8] = src[c]; | |||
| } | |||
| } | |||
| } | |||
| void RowMajor2Col8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col) { | |||
| for (int r = 0; r < row; r++) { | |||
| int rd8 = r / 8; | |||
| @@ -26,7 +37,6 @@ void RowMajor2Col8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col) | |||
| dst_ptr[rd8 * col * 8 + c * 8 + rm8] = src_ptr[r * col + c]; | |||
| } | |||
| } | |||
| return; | |||
| } | |||
| void MatMulInt8(const int8_t *a, const int8_t *b, int32_t *c, const int row8, const int col8, const int deep, | |||
| @@ -46,5 +56,4 @@ void MatMulInt8(const int8_t *a, const int8_t *b, int32_t *c, const int row8, co | |||
| c[ci] = value; | |||
| } | |||
| } | |||
| return; | |||
| } | |||
| @@ -22,7 +22,7 @@ | |||
| void MatMulInt8(const int8_t *a, const int8_t *b, int32_t *c, const int row8, const int col8, const int deep, | |||
| const int32_t a_zp, const int32_t b_zp); | |||
| void RowMajor2Row8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col); | |||
| void RowMajor2Col8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col); | |||
| #endif // MINDSPORE_LITE_SRC_BACKEND_ARM_OPCLIB_INT8_MATMUL_H_ | |||
| @@ -29,6 +29,7 @@ struct MatMulParameter { | |||
| int col_8_; | |||
| int deep_; | |||
| bool has_bias_; | |||
| int batch; | |||
| bool a_transpose_; /* false : row-major */ | |||
| bool b_transpose_; /* true : col-major */ | |||
| ActType act_type_; | |||
| @@ -22,6 +22,7 @@ | |||
| #include <stdlib.h> | |||
| #include <limits.h> | |||
| #include <limits> | |||
| #include "src/runtime/kernel/arm/opclib/op_base.h" | |||
| struct QuantArg { | |||
| double scale_; | |||
| @@ -49,7 +50,7 @@ struct ConcatQuantArg { | |||
| QuantArg out_quant_args_; | |||
| }; | |||
| struct FcQuantArg { | |||
| struct MatmulQuantArg { | |||
| QuantArg input; | |||
| QuantArg weight; | |||
| QuantArg output; | |||
| @@ -137,4 +138,22 @@ inline void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32 | |||
| *mini = min; | |||
| *maxi = max; | |||
| } | |||
| // quantize from float to int8 | |||
| inline void Quantize(float *input_data, int length, float scale, int zero_point, int8_t *output_data) { | |||
| for (int i = 0; i < length; ++i) { | |||
| int r = (int)round(input_data[i] / scale + zero_point); | |||
| int8_t q = r > CHAR_MAX ? CHAR_MAX : r; | |||
| q = q < CHAR_MIN ? CHAR_MIN : q; | |||
| output_data[i] = q; | |||
| } | |||
| } | |||
| // dequantize from int8 to float | |||
| inline void Dequantize(int8_t *input_data, int length, float scale, int zero_point, float *output_data) { | |||
| for (int i = 0; i < length; ++i) { | |||
| output_data[i] = scale * (input_data[i] - zero_point); | |||
| } | |||
| } | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_QUANTIZATION_QUANTIZE_H_ | |||
| @@ -0,0 +1,169 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "src/lite_kernel.h" | |||
| namespace mindspore { | |||
| class TestMatMulFp32 : public mindspore::Common { | |||
| public: | |||
| TestMatMulFp32() {} | |||
| }; | |||
| int MMTestInit(std::vector<lite::tensor::Tensor *> *inputs_, std::vector<lite::tensor::Tensor *> *outputs_, | |||
| float *a_ptr, float *b_ptr, std::vector<int> a_shape, std::vector<int> b_shape, | |||
| std::vector<int> c_shape) { | |||
| auto in_t = | |||
| new lite::tensor::Tensor(kNumberTypeFloat, a_shape, schema::Format_NHWC, static_cast<schema::NodeType>(1)); | |||
| in_t->MallocData(); | |||
| memcpy(in_t->Data(), a_ptr, sizeof(float) * in_t->ElementsNum()); | |||
| inputs_->push_back(in_t); | |||
| auto weight_t = | |||
| new lite::tensor::Tensor(kNumberTypeFloat, b_shape, schema::Format_NHWC, static_cast<schema::NodeType>(1)); | |||
| weight_t->MallocData(); | |||
| memcpy(weight_t->Data(), b_ptr, sizeof(float) * weight_t->ElementsNum()); | |||
| inputs_->push_back(weight_t); | |||
| auto out_t = | |||
| new lite::tensor::Tensor(kNumberTypeFloat, c_shape, schema::Format_NHWC, static_cast<schema::NodeType>(1)); | |||
| out_t->MallocData(); | |||
| outputs_->push_back(out_t); | |||
| return out_t->ElementsNum(); | |||
| } | |||
| TEST_F(TestMatMulFp32, simple) { | |||
| std::vector<lite::tensor::Tensor *> inputs_; | |||
| std::vector<lite::tensor::Tensor *> outputs_; | |||
| auto matmul_param = new MatMulParameter(); | |||
| matmul_param->a_transpose_ = false; | |||
| matmul_param->b_transpose_ = false; | |||
| matmul_param->has_bias_ = false; | |||
| float a[] = {-3.2366564, -4.7733846, -7.8329225, 16.146885, 5.060793, -6.1471, -1.7680453, -6.5721383, | |||
| 17.87506, -5.1192183, 10.742863, 1.4536934, 19.693445, 19.45783, 5.063163, 0.5234792}; | |||
| float b[] = {-0.0024438887, 0.0006738146, -0.008169129, 0.0021510671, -0.012470592, -0.0053063435, | |||
| 0.006050155, 0.008656233, 0.012911413, -0.0028635843, -0.00034080597, -0.0010622552, | |||
| -0.012254699, -0.01312836, 0.0025241964, -0.004706142, 0.002451482, -0.009558459, | |||
| 0.004481974, 0.0033251503, -0.011705584, -0.001720293, -0.0039410214, -0.0073637343}; | |||
| std::vector<int> a_shape = {1, 2, 8}; | |||
| std::vector<int> b_shape = {1, 8, 3}; | |||
| std::vector<int> c_shape = {1, 2, 3}; | |||
| int total_size = MMTestInit(&inputs_, &outputs_, a, b, a_shape, b_shape, c_shape); | |||
| auto ctx = new lite::Context; | |||
| ctx->threadNum = 2; | |||
| auto mm = new kernel::MatmulCPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx); | |||
| mm->Init(); | |||
| mm->Run(); | |||
| float correct[] = {-0.1256939023733139, -0.07744802534580231, 0.07410638779401779, | |||
| -0.3049793541431427, -0.027687929570674896, -0.18109679222106934}; | |||
| CompareOutputData(reinterpret_cast<float *>(outputs_[0]->Data()), correct, total_size, 0.0001); | |||
| delete matmul_param; | |||
| delete mm; | |||
| for (auto t : inputs_) delete t; | |||
| for (auto t : outputs_) delete t; | |||
| } | |||
| TEST_F(TestMatMulFp32, simple_transb) { | |||
| std::vector<lite::tensor::Tensor *> inputs_; | |||
| std::vector<lite::tensor::Tensor *> outputs_; | |||
| auto matmul_param = new MatMulParameter(); | |||
| matmul_param->a_transpose_ = false; | |||
| matmul_param->b_transpose_ = true; | |||
| matmul_param->has_bias_ = false; | |||
| float a[] = {-3.2366564, -4.7733846, -7.8329225, 16.146885, 5.060793, -6.1471, -1.7680453, -6.5721383, | |||
| 17.87506, -5.1192183, 10.742863, 1.4536934, 19.693445, 19.45783, 5.063163, 0.5234792}; | |||
| float b[] = {-0.0024438887, 0.0006738146, -0.008169129, 0.0021510671, -0.012470592, -0.0053063435, | |||
| 0.006050155, 0.008656233, 0.012911413, -0.0028635843, -0.00034080597, -0.0010622552, | |||
| -0.012254699, -0.01312836, 0.0025241964, -0.004706142, 0.002451482, -0.009558459, | |||
| 0.004481974, 0.0033251503, -0.011705584, -0.001720293, -0.0039410214, -0.0073637343}; | |||
| std::vector<int> a_shape = {1, 2, 8}; | |||
| std::vector<int> b_shape = {1, 3, 8}; | |||
| std::vector<int> c_shape = {1, 2, 3}; | |||
| int total_size = MMTestInit(&inputs_, &outputs_, a, b, a_shape, b_shape, c_shape); | |||
| auto ctx = new lite::Context; | |||
| ctx->threadNum = 2; | |||
| auto mm = new kernel::MatmulCPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx); | |||
| mm->Init(); | |||
| mm->Run(); | |||
| float correct[] = {0.00533547, 0.002545945, 0.062974121, -0.445441471, -0.246223617, -0.142070031}; | |||
| CompareOutputData(reinterpret_cast<float *>(outputs_[0]->Data()), correct, total_size, 0.0001); | |||
| delete matmul_param; | |||
| delete mm; | |||
| for (auto t : inputs_) delete t; | |||
| for (auto t : outputs_) delete t; | |||
| } | |||
| TEST_F(TestMatMulFp32, batch) { | |||
| std::vector<lite::tensor::Tensor *> inputs_; | |||
| std::vector<lite::tensor::Tensor *> outputs_; | |||
| auto matmul_param = new MatMulParameter(); | |||
| matmul_param->a_transpose_ = false; | |||
| matmul_param->b_transpose_ = true; | |||
| matmul_param->has_bias_ = false; | |||
| float a[] = {-4.946672525326248, 11.154420027909701, -7.831129637356922, 17.309845099949953, -10.46177877610444, | |||
| 2.5412751480833897, 2.700113860276929, -12.616715572097341, -15.513316568881574, -9.513294738065516, | |||
| 17.931148376418896, -10.83801964632579, -14.023733862948017, -14.50805001403956, 0.7952221556310306, | |||
| 6.619720423569035, -19.277904230909357, -13.450479287024839, 19.914652156692625, 16.542571697048878, | |||
| -2.9715041389268926, 4.949555349889412, -1.9408110276290103, -15.062828261031868, 0.20012569643335, | |||
| 8.260383531209776, 3.1092344458607357, 16.742272486091487, 17.31277252415167, -16.60303202099434, | |||
| -8.980314693173042, -11.735087989358268, -14.918976184088514, -11.347592686892733, 11.808756029220604, | |||
| -18.76179414554809, 7.579758962360987, 3.13240880962163, 6.528181981442103, -16.802624652419794, | |||
| -14.323146919914901, -16.197579076296144, 9.738053920125779, -12.245780062949866, 8.817905278096319, | |||
| 0.5261391331275007, -18.26152522535471, -2.400461208771226}; | |||
| float b[] = { | |||
| -0.895183867395529, -0.8146900207660068, -0.27931593219652817, 0.783554361201179, -0.05080215007779798, | |||
| -0.9879631271568501, 0.07710949009001333, -0.9562579726211344, 0.29505553318356825, -0.26651960351085124, | |||
| -0.12755456259718279, -0.8221417897250098, -0.5094334041431876, -0.9117373380256013, 0.991501784215064, | |||
| 0.20131976450979394, 0.07889260559412059, -0.8138407752750305, -0.047622075866657454, -0.2778043115153188, | |||
| -0.6269973420163957, -0.44345812666611617, -0.8571568605933642, 0.020192166011526735, 0.4860054298402434, | |||
| 0.41525925469513614, -0.40270506445219967, -0.8716538067535347, 0.5276448387223114, 0.6064500154192936, | |||
| -0.9553204135772526, 0.3253219646257437, -0.7237956595774822, 0.3271284879679077, -0.534543967339336, | |||
| -0.4076498484281894, 0.01574797075171963, -0.37322004720586244, 0.16425071396119928, -0.5328652244800547, | |||
| 0.7389336170615435, -0.6552069958923377, -0.042305872596973604, -0.6714941466767734, -0.9281411415119043, | |||
| -0.7748558258281224, -0.6209799945964443, 0.02526428593887675, -0.44984776800225856, 0.6281401952319337, | |||
| 0.9907258228680276, 0.6288646615999687, -0.82076880150175, 0.3065944740797497, -0.29201038744043584, | |||
| -0.025685501802048982, -0.07273175145419652, 0.9370449239208709, -0.8233807408078093, -0.4195634619023012, | |||
| 0.9799555630257346, -0.23461882935715228, -0.8884793313829993, -0.4760267734754635, -0.2874539543614072, | |||
| -0.8795685985480997, -0.08099698251915255, -0.1626521023321741, -0.9337167240793414, 0.40924842916829207, | |||
| -0.7375713045221615, -0.0065659291539015285}; | |||
| std::vector<int> a_shape = {3, 2, 8}; | |||
| std::vector<int> b_shape = {3, 3, 8}; | |||
| std::vector<int> c_shape = {3, 2, 3}; | |||
| int total_size = MMTestInit(&inputs_, &outputs_, a, b, a_shape, b_shape, c_shape); | |||
| auto ctx = new lite::Context; | |||
| ctx->threadNum = 1; | |||
| auto mm = new kernel::MatmulCPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx); | |||
| mm->Init(); | |||
| mm->Run(); | |||
| float correct[] = {21.38518524169922, -14.514888763427734, -11.040614128112793, 16.91403579711914, | |||
| 27.07421112060547, 23.35394287109375, -39.006141662597656, -2.021998405456543, | |||
| -17.63555145263672, -8.490625381469727, 5.317771911621094, -14.561882019042969, | |||
| -7.251564025878906, -2.508212089538574, 5.86458683013916, -3.466249465942383, | |||
| 8.869029998779297, 25.034008026123047}; | |||
| float *output = reinterpret_cast<float *>(outputs_[0]->Data()); | |||
| for (int i = 0; i < 18; ++i) printf("%f ", output[i]); | |||
| CompareOutputData(reinterpret_cast<float *>(outputs_[0]->Data()), correct, total_size, 0.0001); | |||
| delete matmul_param; | |||
| delete mm; | |||
| for (auto t : inputs_) delete t; | |||
| for (auto t : outputs_) delete t; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -13,13 +13,11 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include "utils/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h" | |||
| #include "mindspore/lite/src/kernel_registry.h" | |||
| #include "mindspore/lite/src/lite_kernel.h" | |||
| @@ -30,21 +28,6 @@ class TestFcInt8 : public mindspore::Common { | |||
| TestFcInt8() {} | |||
| }; | |||
| void Quantize(float *input_data, int length, float scale, int zero_point, int8_t *output_data) { | |||
| for (int i = 0; i < length; ++i) { | |||
| int8_t q = static_cast<int8_t>(std::max<float>( | |||
| std::numeric_limits<int8_t>::min(), | |||
| std::min<float>(std::numeric_limits<int8_t>::max(), std::round(zero_point + (input_data[i] / scale))))); | |||
| output_data[i] = q; | |||
| } | |||
| } | |||
| void Dequantize(int8_t *input_data, int length, float scale, int zero_point, float *output_data) { | |||
| for (int i = 0; i < length; ++i) { | |||
| output_data[i] = scale * (input_data[i] - zero_point); | |||
| } | |||
| } | |||
| int FcInt8TestInit(std::vector<lite::tensor::Tensor *> *inputs_, std::vector<lite::tensor::Tensor *> *outputs_, | |||
| MatMulParameter *matmal_param, float **correct, double *scale, int *zeropoint) { | |||
| float input_max = 20; | |||
| @@ -0,0 +1,126 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "utils/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h" | |||
| #include "mindspore/lite/src/kernel_registry.h" | |||
| #include "mindspore/lite/src/lite_kernel.h" | |||
| namespace mindspore { | |||
| class TestMatmulInt8 : public mindspore::Common { | |||
| public: | |||
| TestMatmulInt8() {} | |||
| }; | |||
| int MMInt8TestInit(std::vector<lite::tensor::Tensor *> *inputs_, std::vector<lite::tensor::Tensor *> *outputs_, | |||
| MatMulParameter *matmal_param, float **correct, double *scale, int *zeropoint) { | |||
| float input_max = 20; | |||
| float input_min = -20; | |||
| float weight_max = 1; | |||
| float weight_min = -1; | |||
| float output_max = 30; | |||
| float output_min = -30; | |||
| double input_scale = | |||
| (input_max - input_min) / (std::numeric_limits<int8_t>::max() - std::numeric_limits<int8_t>::min()); | |||
| int input_zp = std::numeric_limits<int8_t>::max() - input_max / input_scale; | |||
| double weight_scale = | |||
| (weight_max - weight_min) / (std::numeric_limits<int8_t>::max() - std::numeric_limits<int8_t>::min()); | |||
| int weight_zp = std::numeric_limits<int8_t>::max() - weight_max / weight_scale; | |||
| double output_scale = | |||
| (output_max - output_min) / (std::numeric_limits<int8_t>::max() - std::numeric_limits<int8_t>::min()); | |||
| int output_zp = std::numeric_limits<int8_t>::max() - output_max / output_scale; | |||
| *scale = output_scale; | |||
| *zeropoint = output_zp; | |||
| auto in_t = | |||
| new lite::tensor::Tensor(kNumberTypeInt8, {1, 2, 8}, schema::Format_NHWC, static_cast<schema::NodeType>(1)); | |||
| in_t->MallocData(); | |||
| float in[] = {6.583835634764597, 11.337275140963907, -4.125256949459629, 10.994337291530833, | |||
| 19.086065139532636, 3.620842999158455, 13.167624585590346, -18.326739299407755, | |||
| 14.877693740734841, -17.092677920571653, 19.24147072807235, -15.14805323833401, | |||
| -18.075654829688737, -0.9164404591894204, -3.836646280336332, -10.870298671273918}; | |||
| Quantize(in, in_t->ElementsNum(), input_scale, input_zp, reinterpret_cast<int8_t *>(in_t->Data())); | |||
| auto in_quant_arg = new mindspore::lite::tensor::QuantArg(); | |||
| in_quant_arg->zeroPoint = input_zp; | |||
| in_quant_arg->scale = input_scale; | |||
| in_t->AddQuantParam(*in_quant_arg); | |||
| inputs_->push_back(in_t); | |||
| auto weight_t = | |||
| new lite::tensor::Tensor(kNumberTypeInt8, {1, 3, 8}, schema::Format_NHWC, static_cast<schema::NodeType>(1)); | |||
| weight_t->MallocData(); | |||
| float weight[] = {0.3651070698591563, -0.5856943921727129, -0.7472032663840145, 0.9489992871641959, | |||
| -0.8179490270358738, -0.873058811259344, 0.39876672713807215, -0.1816769383004213, | |||
| -0.13584645926733696, -0.7614673836659709, -0.2535825872616164, -0.05265760030895916, | |||
| 0.28558728305658754, 0.15404213943520118, -0.1634824450738006, -0.5068199082730189, | |||
| -0.026961256849111326, -0.1508441942453307, 0.9375335677537737, 0.3304690744194263, | |||
| -0.5091563780251127, 0.029887336278646925, -0.39540496207319276, 0.46094065001445084}; | |||
| Quantize(weight, weight_t->ElementsNum(), weight_scale, weight_zp, reinterpret_cast<int8_t *>(weight_t->Data())); | |||
| auto weight_quant_arg = new mindspore::lite::tensor::QuantArg(); | |||
| weight_quant_arg->zeroPoint = weight_zp; | |||
| weight_quant_arg->scale = weight_scale; | |||
| weight_t->AddQuantParam(*weight_quant_arg); | |||
| inputs_->push_back(weight_t); | |||
| auto out_t = | |||
| new lite::tensor::Tensor(kNumberTypeInt8, {1, 2, 3}, schema::Format_NHWC, static_cast<schema::NodeType>(1)); | |||
| out_t->MallocData(); | |||
| auto output_quant_arg = new mindspore::lite::tensor::QuantArg(); | |||
| output_quant_arg->zeroPoint = output_zp; | |||
| output_quant_arg->scale = output_scale; | |||
| out_t->AddQuantParam(*output_quant_arg); | |||
| outputs_->push_back(out_t); | |||
| *correct = reinterpret_cast<float *>(malloc(out_t->ElementsNum() * sizeof(float))); | |||
| float nchw_co[] = {-0.912632942, 4.08398056, -25.385608673, 2.720281124, 7.745952606, 20.893184662}; | |||
| memcpy(*correct, nchw_co, out_t->ElementsNum() * sizeof(float)); | |||
| matmal_param->b_transpose_ = true; | |||
| matmal_param->a_transpose_ = false; | |||
| matmal_param->has_bias_ = false; | |||
| return out_t->ElementsNum(); | |||
| } | |||
| TEST_F(TestMatmulInt8, mmint8) { | |||
| std::vector<lite::tensor::Tensor *> inputs_; | |||
| std::vector<lite::tensor::Tensor *> outputs_; | |||
| auto matmul_param = new MatMulParameter(); | |||
| float *correct; | |||
| double output_scale; | |||
| int output_zp; | |||
| int total_size = MMInt8TestInit(&inputs_, &outputs_, matmul_param, &correct, &output_scale, &output_zp); | |||
| auto ctx = new lite::Context; | |||
| ctx->threadNum = 2; | |||
| kernel::MatmulInt8CPUKernel *mm = | |||
| new kernel::MatmulInt8CPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx); | |||
| mm->Init(); | |||
| mm->Run(); | |||
| float fout[6] = {0}; | |||
| Dequantize(reinterpret_cast<int8_t *>(outputs_[0]->Data()), outputs_[0]->ElementsNum(), output_scale, output_zp, | |||
| fout); | |||
| CompareOutputData(fout, correct, 6, 0.3); | |||
| delete matmul_param; | |||
| delete mm; | |||
| for (auto t : inputs_) delete t; | |||
| for (auto t : outputs_) delete t; | |||
| free(correct); | |||
| } | |||
| } // namespace mindspore | |||