|
|
|
@@ -0,0 +1,156 @@ |
|
|
|
/** |
|
|
|
* Copyright 2021 Huawei Technologies Co., Ltd |
|
|
|
* |
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
|
|
|
* you may not use this file except in compliance with the License. |
|
|
|
* You may obtain a copy of the License at |
|
|
|
* |
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0 |
|
|
|
* |
|
|
|
* Unless required by applicable law or agreed to in writing, software |
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS, |
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
|
|
* See the License for the specific language governing permissions and |
|
|
|
* limitations under the License. |
|
|
|
*/ |
|
|
|
|
|
|
|
#include "micro/coder/opcoders/nnacl/int8/add_int8_coder.h" |
|
|
|
#include <algorithm> |
|
|
|
#include <type_traits> |
|
|
|
#include "nnacl/int8/quantize.h" |
|
|
|
#include "micro/coder/log.h" |
|
|
|
#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h" |
|
|
|
#include "micro/coder/opcoders/file_collector.h" |
|
|
|
|
|
|
|
using mindspore::schema::PrimitiveType_Add; |
|
|
|
|
|
|
|
namespace mindspore::lite::micro { |
|
|
|
|
|
|
|
int AddInt8Coder::Prepare(CoderContext *const context) { |
|
|
|
input0 = input_tensors().at(0); |
|
|
|
input1 = input_tensors().at(1); |
|
|
|
MS_CHECK_PTR(input0); |
|
|
|
MS_CHECK_PTR(input1); |
|
|
|
|
|
|
|
MS_CHECK_RET_CODE(Init(), "Init failed"); |
|
|
|
MS_CHECK_RET_CODE(ReSize(), "ReSize failed"); |
|
|
|
|
|
|
|
return RET_OK; |
|
|
|
} |
|
|
|
|
|
|
|
int AddInt8Coder::DoCode(CoderContext *const context) { |
|
|
|
Collect(context, {"wrapper/int8/conv1x1_init_int8.h"}, {"add_int8_wrapper.c", "add_int8.c", "thread_pool.c"}); |
|
|
|
|
|
|
|
nnacl::NNaclInt8Serializer code; |
|
|
|
|
|
|
|
code.CodeStruct("para", para_); |
|
|
|
code.CodeStruct("arith_para", *arith_para_); |
|
|
|
code.CodeBaseStruct("AddArgs", "args", "para", "arith_para", in_size_, out_size_, thread_num_s_, elements_num_, |
|
|
|
support_opt_add_, input0, input1, output_tensor_); |
|
|
|
|
|
|
|
if (arith_para_->broadcasting_) { |
|
|
|
code.CodeFunction("ParallelLaunch", "THREAD_POOL_DEFAULT", "AddBroadcastRun", "&args", thread_num_s_); |
|
|
|
} else { |
|
|
|
code.CodeFunction("ParallelLaunch", "THREAD_POOL_DEFAULT", "AddRun", "&args", thread_num_s_); |
|
|
|
} |
|
|
|
|
|
|
|
return RET_OK; |
|
|
|
} |
|
|
|
|
|
|
|
int AddInt8Coder::Init() { |
|
|
|
para_.in0_args_.zp_ = input0->quant_params().front().zeroPoint * -1; |
|
|
|
para_.in1_args_.zp_ = input1->quant_params().front().zeroPoint * -1; |
|
|
|
para_.out_zp_ = output_tensor_->quant_params().front().zeroPoint; |
|
|
|
|
|
|
|
const double in0_scale = input0->quant_params().front().scale; |
|
|
|
const double in1_scale = input1->quant_params().front().scale; |
|
|
|
const double out_scale = output_tensor_->quant_params().front().scale; |
|
|
|
|
|
|
|
para_.left_shift_ = 20; |
|
|
|
const double twice_max_input_scale = 2 * std::max(in0_scale, in1_scale); |
|
|
|
const double in0_multiplier = in0_scale / twice_max_input_scale; |
|
|
|
const double in1_multiplier = in1_scale / twice_max_input_scale; |
|
|
|
const double out_multiplier = twice_max_input_scale / ((1 << para_.left_shift_) * out_scale); |
|
|
|
|
|
|
|
QuantizeMultiplierSmallerThanOne(in0_multiplier, ¶_.in0_args_.multiplier_, ¶_.in0_args_.left_shift_); |
|
|
|
QuantizeMultiplierSmallerThanOne(in1_multiplier, ¶_.in1_args_.multiplier_, ¶_.in1_args_.left_shift_); |
|
|
|
QuantizeMultiplierSmallerThanOne(out_multiplier, ¶_.out_multiplier_, ¶_.out_left_shift_); |
|
|
|
|
|
|
|
para_.in0_args_.right_shift_ = -para_.in0_args_.left_shift_ > 0 ? 0 : para_.in0_args_.left_shift_; |
|
|
|
para_.in1_args_.right_shift_ = -para_.in1_args_.left_shift_ > 0 ? 0 : para_.in1_args_.left_shift_; |
|
|
|
para_.out_right_shift_ = -para_.out_left_shift_ > 0 ? 0 : para_.out_left_shift_; |
|
|
|
|
|
|
|
para_.in0_args_.left_shift_ = -para_.in0_args_.left_shift_ > 0 ? -para_.in0_args_.left_shift_ : 0; |
|
|
|
para_.in1_args_.left_shift_ = -para_.in1_args_.left_shift_ > 0 ? -para_.in1_args_.left_shift_ : 0; |
|
|
|
para_.out_left_shift_ = -para_.out_left_shift_ > 0 ? -para_.out_left_shift_ : 0; |
|
|
|
|
|
|
|
auto act = arith_para_->activation_type_; |
|
|
|
CalculateActivationRangeQuantized(act == ActType_Relu, act == ActType_Relu6, para_.out_zp_, |
|
|
|
static_cast<float>(out_scale), ¶_.min_, ¶_.max_); |
|
|
|
return RET_OK; |
|
|
|
} |
|
|
|
|
|
|
|
int AddInt8Coder::ReSize() { |
|
|
|
support_opt_add_ = (input0->ElementsNum() == 1) || (input1->ElementsNum() == 1); |
|
|
|
if (support_opt_add_) { |
|
|
|
arith_para_->broadcasting_ = false; |
|
|
|
} |
|
|
|
|
|
|
|
elements_num_ = output_tensor_->ElementsNum(); |
|
|
|
|
|
|
|
arith_para_->in_elements_num0_ = input_tensors_[0]->ElementsNum(); |
|
|
|
arith_para_->in_elements_num1_ = input_tensors_[1]->ElementsNum(); |
|
|
|
arith_para_->out_elements_num_ = output_tensors_[0]->ElementsNum(); |
|
|
|
|
|
|
|
for (size_t i = 0; i < input_tensors_.at(0)->shape().size(); i++) { |
|
|
|
if (arith_para_->in_shape0_[i] == -1) { |
|
|
|
MS_CHECK_RET_CODE(memcpy_s(arith_para_->in_shape0_, std::extent<decltype(arith_para_->in_shape0_)>::value, |
|
|
|
input0->shape().data(), input0->shape().size() * sizeof(int)), |
|
|
|
"memcpy failed"); |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
for (size_t i = 0; i < input_tensors_.at(1)->shape().size(); i++) { |
|
|
|
if (arith_para_->in_shape1_[i] == -1) { |
|
|
|
MS_CHECK_RET_CODE(memcpy_s(arith_para_->in_shape1_, std::extent<decltype(arith_para_->in_shape1_)>::value, |
|
|
|
input1->shape().data(), input1->shape().size() * sizeof(int)), |
|
|
|
"memcpy failed"); |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
for (size_t i = 0; i < output_tensor_->shape().size(); i++) { |
|
|
|
if (arith_para_->out_shape_[i] == -1) { |
|
|
|
MS_CHECK_RET_CODE(memcpy_s(arith_para_->out_shape_, std::extent<decltype(arith_para_->out_shape_)>::value, |
|
|
|
output_tensor_->shape().data(), output_tensor_->shape().size() * sizeof(int)), |
|
|
|
"memcpy failed"); |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (arith_para_->broadcasting_) { |
|
|
|
size_t break_pos_ = 0; |
|
|
|
for (auto i = arith_para_->ndim_ - 1; i >= 0; --i) { |
|
|
|
if (arith_para_->in_shape0_[i] != arith_para_->in_shape1_[i]) { |
|
|
|
break_pos_ = i; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
in_size_ = 1; |
|
|
|
out_size_ = 1; |
|
|
|
for (size_t i = 0; i < arith_para_->ndim_; i++) { |
|
|
|
if (i > break_pos_) { |
|
|
|
in_size_ *= arith_para_->out_shape_[i]; |
|
|
|
} else { |
|
|
|
out_size_ *= arith_para_->out_shape_[i]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
ComputeStrides(arith_para_->in_shape0_, arith_para_->in_strides0_, arith_para_->ndim_); |
|
|
|
ComputeStrides(arith_para_->in_shape1_, arith_para_->in_strides1_, arith_para_->ndim_); |
|
|
|
ComputeStrides(arith_para_->out_shape_, arith_para_->out_strides_, arith_para_->ndim_); |
|
|
|
} |
|
|
|
return RET_OK; |
|
|
|
} |
|
|
|
|
|
|
|
REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_Add, CPUOpCoderCreator<AddInt8Coder>) |
|
|
|
} // namespace mindspore::lite::micro |