| @@ -20,26 +20,27 @@ | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/common_func.h" | |||
| #include "nnacl/nnacl_utils.h" | |||
| #define ARITHMETIC_SUPPORT_DIMS_NUM 10 | |||
| typedef struct ArithmeticParameter { | |||
| OpParameter op_parameter_; | |||
| bool broadcasting_; | |||
| size_t ndim_; | |||
| int activation_type_; | |||
| int in_shape0_[10]; | |||
| int in_shape0_[ARITHMETIC_SUPPORT_DIMS_NUM]; | |||
| int in_elements_num0_; | |||
| int in_shape1_[10]; | |||
| int in_shape1_[ARITHMETIC_SUPPORT_DIMS_NUM]; | |||
| int in_elements_num1_; | |||
| int out_shape_[10]; | |||
| int out_shape_[ARITHMETIC_SUPPORT_DIMS_NUM]; | |||
| int out_elements_num_; | |||
| int in_strides0_[10]; | |||
| int in_strides1_[10]; | |||
| int out_strides_[10]; | |||
| int in_strides0_[ARITHMETIC_SUPPORT_DIMS_NUM]; | |||
| int in_strides1_[ARITHMETIC_SUPPORT_DIMS_NUM]; | |||
| int out_strides_[ARITHMETIC_SUPPORT_DIMS_NUM]; | |||
| int multiples0_[10]; | |||
| int multiples1_[10]; | |||
| int multiples0_[ARITHMETIC_SUPPORT_DIMS_NUM]; | |||
| int multiples1_[ARITHMETIC_SUPPORT_DIMS_NUM]; | |||
| int eltwise_mode_; // eltwise need | |||
| } ArithmeticParameter; | |||
| @@ -56,8 +56,11 @@ int AddNFp16CPUKernel::AddNParallelRun(int thread_id, float lhs_scale, float rhs | |||
| int AddNFp16CPUKernel::Run() { | |||
| elements_num_ = out_tensors_[0]->ElementsNum(); | |||
| auto input0_data = reinterpret_cast<float16_t *>(in_tensors_[0]->MutableData()); | |||
| CHECK_NULL_RETURN(input0_data); | |||
| auto input1_data = reinterpret_cast<float16_t *>(in_tensors_[1]->MutableData()); | |||
| CHECK_NULL_RETURN(input1_data); | |||
| auto out_data = reinterpret_cast<float16_t *>(out_tensors_[0]->MutableData()); | |||
| CHECK_NULL_RETURN(out_data); | |||
| if (static_cast<int>(elements_num_) < op_parameter_->thread_num_) { | |||
| if (in_tensors_[0]->shape() == in_tensors_[1]->shape()) { | |||
| ElementAddFp16(input0_data, input1_data, out_data, elements_num_); | |||
| @@ -71,6 +74,7 @@ int AddNFp16CPUKernel::Run() { | |||
| } | |||
| for (size_t i = 2; i < in_tensors_.size(); ++i) { | |||
| CHECK_NULL_RETURN(in_tensors_[i]->data()); | |||
| if (in_tensors_[i]->shape() == out_tensors_[0]->shape()) { | |||
| ElementAddFp16(reinterpret_cast<float16_t *>(in_tensors_[i]->data()), out_data, out_data, elements_num_); | |||
| } else { | |||
| @@ -95,6 +99,7 @@ int AddNFp16CPUKernel::Run() { | |||
| } | |||
| for (size_t i = 2; i < in_tensors_.size(); ++i) { | |||
| in1_addr_ = reinterpret_cast<float16_t *>(in_tensors_[i]->MutableData()); | |||
| CHECK_NULL_RETURN(in1_addr_); | |||
| in2_addr_ = out_data; | |||
| ret = ParallelLaunch(this->ms_context_, AddNLaunch, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| @@ -55,7 +55,7 @@ int ArithmeticCPUKernel::ReSize() { | |||
| CalcMultiplesAndStrides(param_); | |||
| if (param_->broadcasting_) { | |||
| outside_ = 1; | |||
| for (int i = static_cast<int>(param_->ndim_) - 1; i >= 0; --i) { | |||
| for (int i = static_cast<int>(param_->ndim_) - 1; i >= 0 && i < ARITHMETIC_SUPPORT_DIMS_NUM; --i) { | |||
| if (param_->in_shape0_[i] != param_->in_shape1_[i]) { | |||
| break_pos_ = i; | |||
| break; | |||
| @@ -139,6 +139,7 @@ int ArithmeticCPUKernel::ConstTensorBroadCast() { | |||
| if (input0_ptr_ == nullptr) { | |||
| return RET_ERROR; | |||
| } | |||
| CHECK_NULL_RETURN(in_tensors_[0]->data()); | |||
| TileConstTensor(in_tensors_[0]->data(), input0_ptr_, param_->ndim_, param_->in_shape0_, param_->in_strides0_, | |||
| param_->out_strides_, param_->multiples0_); | |||
| input0_broadcast_ = true; | |||
| @@ -155,6 +156,7 @@ int ArithmeticCPUKernel::ConstTensorBroadCast() { | |||
| FreeConstTileBuff(); | |||
| return RET_ERROR; | |||
| } | |||
| CHECK_NULL_RETURN(in_tensors_[1]->data()); | |||
| TileConstTensor(in_tensors_[1]->data(), input1_ptr_, param_->ndim_, param_->in_shape1_, param_->in_strides1_, | |||
| param_->out_strides_, param_->multiples1_); | |||
| input1_broadcast_ = true; | |||
| @@ -393,6 +395,7 @@ int ArithmeticCPUKernel::DoArithmetic(int task_id) { | |||
| if (count <= 0) { | |||
| return RET_OK; | |||
| } | |||
| CHECK_LESS_RETURN(ARITHMETIC_SUPPORT_DIMS_NUM, param_->ndim_); | |||
| int offset = stride * task_id * data_type_len_; | |||
| /* run opt function, one of input is scalar */ | |||
| if (IsScalarClac()) { // 2 32 240 240, 1 1 1 1 | |||
| @@ -442,11 +445,14 @@ int ArithmeticCPUKernel::Run() { | |||
| } | |||
| if (!input0_broadcast_) { | |||
| input0_ptr_ = in_tensors_[0]->data(); | |||
| CHECK_NULL_RETURN(input0_ptr_); | |||
| } | |||
| if (!input1_broadcast_) { | |||
| input1_ptr_ = in_tensors_[1]->data(); | |||
| CHECK_NULL_RETURN(input1_ptr_); | |||
| } | |||
| output_ptr_ = out_tensors_[0]->data(); | |||
| CHECK_NULL_RETURN(output_ptr_); | |||
| return ParallelLaunch(this->ms_context_, ArithmeticsRun, this, op_parameter_->thread_num_); | |||
| } | |||
| @@ -26,7 +26,7 @@ using mindspore::schema::PrimitiveType_Unique; | |||
| namespace mindspore::kernel { | |||
| int UniqueCPUKernel::Prepare() { | |||
| CHECK_LESS_RETURN(in_tensors_.size(), 1); | |||
| CHECK_LESS_RETURN(out_tensors_.size(), 2); | |||
| CHECK_LESS_RETURN(out_tensors_.size(), C2NUM); | |||
| return RET_OK; | |||
| } | |||
| @@ -34,11 +34,11 @@ int UniqueCPUKernel::ReSize() { return RET_OK; } | |||
| int UniqueCPUKernel::Run() { | |||
| auto input = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData()); | |||
| MS_ASSERT(input); | |||
| CHECK_NULL_RETURN(input); | |||
| auto output0 = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | |||
| MS_ASSERT(output0); | |||
| CHECK_NULL_RETURN(output0); | |||
| auto output1 = reinterpret_cast<int *>(out_tensors_.at(1)->MutableData()); | |||
| MS_ASSERT(output1); | |||
| CHECK_NULL_RETURN(output1); | |||
| int output0_len = 0; | |||
| Unique(input, in_tensors_.at(0)->ElementsNum(), output0, &output0_len, output1); | |||
| @@ -52,6 +52,8 @@ int ArithmeticsInt8Launch(void *cdata, int task_id, float lhs_scale, float rhs_s | |||
| } // namespace | |||
| int ArithmeticInt8CPUKernel::Prepare() { | |||
| CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); | |||
| CHECK_LESS_RETURN(out_tensors_.size(), 1); | |||
| switch (op_parameter_->type_) { | |||
| case PrimitiveType_Equal: | |||
| arithmetic_run_ = ElementEqualInt8; | |||
| @@ -105,8 +107,11 @@ int ArithmeticInt8CPUKernel::ReSize() { return RET_OK; } | |||
| int ArithmeticInt8CPUKernel::DoArithmetic(int thread_id) { | |||
| auto input0_data = reinterpret_cast<int8_t *>(in_tensors_[0]->MutableData()); | |||
| auto input1_data1 = reinterpret_cast<int8_t *>(in_tensors_[1]->MutableData()); | |||
| CHECK_NULL_RETURN(input0_data); | |||
| auto input1_data = reinterpret_cast<int8_t *>(in_tensors_[1]->MutableData()); | |||
| CHECK_NULL_RETURN(input1_data); | |||
| auto output_data = reinterpret_cast<uint8_t *>(out_tensors_[0]->MutableData()); | |||
| CHECK_NULL_RETURN(output_data); | |||
| auto element_num = out_tensors_[0]->ElementsNum(); | |||
| auto param = reinterpret_cast<ArithmeticParameter *>(op_parameter_); | |||
| int error_code; | |||
| @@ -125,7 +130,7 @@ int ArithmeticInt8CPUKernel::DoArithmetic(int thread_id) { | |||
| return error_code; | |||
| } | |||
| } else if (arithmetic_run_ != nullptr) { | |||
| error_code = arithmetic_run_(input0_data, input1_data1, output_data, element_num, &quant_args_); | |||
| error_code = arithmetic_run_(input0_data, input1_data, output_data, element_num, &quant_args_); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Arithmetic run fail!ret: " << error_code; | |||
| return error_code; | |||
| @@ -141,7 +146,9 @@ int ArithmeticInt8CPUKernel::Run() { | |||
| auto param = reinterpret_cast<ArithmeticParameter *>(op_parameter_); | |||
| if (param->broadcasting_) { | |||
| auto input_data0 = reinterpret_cast<int8_t *>(in_tensors_[0]->MutableData()); | |||
| CHECK_NULL_RETURN(input_data0); | |||
| auto input_data1 = reinterpret_cast<int8_t *>(in_tensors_[1]->MutableData()); | |||
| CHECK_NULL_RETURN(input_data1); | |||
| tile_data0_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(out_tensors_[0]->Size())); | |||
| tile_data1_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(out_tensors_[0]->Size())); | |||
| if (tile_data0_ == nullptr || tile_data1_ == nullptr) { | |||
| @@ -39,6 +39,8 @@ DepthToSpaceInt8CPUKernel::~DepthToSpaceInt8CPUKernel() { | |||
| } | |||
| int DepthToSpaceInt8CPUKernel::Prepare() { | |||
| CHECK_LESS_RETURN(in_tensors_.size(), 1); | |||
| CHECK_LESS_RETURN(out_tensors_.size(), 1); | |||
| param_->data_type_size_ = sizeof(int8_t); | |||
| in_quant_arg_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg))); | |||
| @@ -48,6 +50,7 @@ int DepthToSpaceInt8CPUKernel::Prepare() { | |||
| } | |||
| auto *input_tensor = in_tensors_.at(kInputIndex); | |||
| auto in_quant_args = input_tensor->quant_params(); | |||
| CHECK_LESS_RETURN(in_quant_args.size(), 1); | |||
| in_quant_arg_->scale_ = in_quant_args.front().scale; | |||
| in_quant_arg_->zp_ = in_quant_args.front().zeroPoint; | |||
| @@ -58,6 +61,7 @@ int DepthToSpaceInt8CPUKernel::Prepare() { | |||
| } | |||
| auto *out_tensor = out_tensors_.at(kOutputIndex); | |||
| auto out_quant_args = out_tensor->quant_params(); | |||
| CHECK_LESS_RETURN(out_quant_args.size(), 1); | |||
| out_quant_arg_->scale_ = out_quant_args.front().scale; | |||
| out_quant_arg_->zp_ = out_quant_args.front().zeroPoint; | |||
| if (!InferShapeDone()) { | |||
| @@ -70,7 +74,9 @@ int DepthToSpaceInt8CPUKernel::Run() { | |||
| auto input = in_tensors_[0]; | |||
| auto output = out_tensors_[0]; | |||
| const int8_t *input_data = reinterpret_cast<const int8_t *>(input->data()); | |||
| CHECK_NULL_RETURN(input_data); | |||
| int8_t *output_data = reinterpret_cast<int8_t *>(output->data()); | |||
| CHECK_NULL_RETURN(output_data); | |||
| auto in_shape = input->shape(); | |||
| if (std::abs(in_quant_arg_->scale_ - out_quant_arg_->scale_) < FLT_EPSILON && | |||
| in_quant_arg_->zp_ == out_quant_arg_->zp_) { | |||
| @@ -26,6 +26,7 @@ namespace mindspore::kernel { | |||
| int FullconnectionInt8CPUKernel::Prepare() { | |||
| CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); | |||
| CHECK_LESS_RETURN(out_tensors_.size(), 1); | |||
| CHECK_NULL_RETURN(param_); | |||
| param_->batch = 1; | |||
| param_->a_transpose_ = false; | |||
| param_->b_transpose_ = true; | |||
| @@ -45,6 +46,7 @@ int FullconnectionInt8CPUKernel::Prepare() { | |||
| } | |||
| int FullconnectionInt8CPUKernel::ReSize() { | |||
| CHECK_NULL_RETURN(param_); | |||
| int row = 1; | |||
| for (size_t i = 0; i < out_tensors_.at(0)->shape().size() - 1; ++i) { | |||
| row *= (out_tensors_.at(0)->shape()).at(i); | |||
| @@ -31,9 +31,12 @@ using mindspore::schema::PrimitiveType_Gather; | |||
| namespace mindspore::kernel { | |||
| int GatherInt8CPUKernel::Prepare() { | |||
| CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); | |||
| CHECK_LESS_RETURN(out_tensors_.size(), 1); | |||
| axis_ = (reinterpret_cast<GatherParameter *>(op_parameter_))->axis_; | |||
| auto in_quant_args = in_tensors_.at(0)->quant_params(); | |||
| CHECK_LESS_RETURN(in_quant_args.size(), 1); | |||
| auto out_quant_args = out_tensors_.at(0)->quant_params(); | |||
| CHECK_LESS_RETURN(out_quant_args.size(), 1); | |||
| param_.alpha_ = in_quant_args.front().scale / out_quant_args.front().scale; | |||
| param_.zp_in_ = in_quant_args.front().zeroPoint; | |||
| param_.zp_out_ = out_quant_args.front().zeroPoint; | |||
| @@ -36,10 +36,12 @@ int PowerInt8CPUKernel::Prepare() { | |||
| MSLITE_CHECK_PTR(output); | |||
| auto in_quant_args = input->quant_params(); | |||
| CHECK_LESS_RETURN(in_quant_args.size(), 1); | |||
| param_->quant_arg_.in_args_.scale_ = in_quant_args.front().scale; | |||
| param_->quant_arg_.in_args_.zp_ = in_quant_args.front().zeroPoint; | |||
| auto out_quant_args = output->quant_params(); | |||
| CHECK_LESS_RETURN(out_quant_args.size(), 1); | |||
| param_->quant_arg_.out_args_.scale_ = out_quant_args.front().scale; | |||
| param_->quant_arg_.out_args_.zp_ = out_quant_args.front().zeroPoint; | |||
| @@ -69,6 +71,7 @@ int PowerInt8CPUKernel::DoPower(int task_id) { | |||
| if (in_tensors_.size() == 2) { | |||
| auto exp_tensor = in_tensors_.at(1); | |||
| auto exp_quant_args = exp_tensor->quant_params(); | |||
| CHECK_LESS_RETURN(exp_quant_args.size(), 1); | |||
| param_->quant_arg_.exp_args_.scale_ = exp_quant_args.front().scale; | |||
| param_->quant_arg_.exp_args_.zp_ = exp_quant_args.front().zeroPoint; | |||
| exp_ptr = reinterpret_cast<int8_t *>(exp_tensor->MutableData()); | |||
| @@ -27,6 +27,8 @@ namespace { | |||
| constexpr size_t kMaxShapeSize = 20; | |||
| } // namespace | |||
| int TransposeInt8CPUKernel::Prepare() { | |||
| CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); | |||
| CHECK_LESS_RETURN(out_tensors_.size(), 1); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| @@ -54,7 +56,7 @@ int TransposeInt8CPUKernel::ReSize() { | |||
| // get perm data | |||
| auto perm_tensor = in_tensors_.at(1); | |||
| int *perm_data = reinterpret_cast<int *>(perm_tensor->data()); | |||
| MS_ASSERT(perm_data != nullptr); | |||
| CHECK_NULL_RETURN(perm_data); | |||
| transpose_param_->num_axes_ = perm_tensor->ElementsNum(); | |||
| for (int i = 0; i < transpose_param_->num_axes_; ++i) { | |||
| transpose_param_->perm_[i] = perm_data[i]; | |||
| @@ -70,11 +72,11 @@ int TransposeInt8CPUKernel::ReSize() { | |||
| } | |||
| int TransposeInt8CPUKernel::DoTranspose(int task_id) { | |||
| MS_ASSERT(in_ptr_); | |||
| MS_ASSERT(out_ptr_); | |||
| MS_ASSERT(in_shape_); | |||
| MS_ASSERT(out_shape_); | |||
| MS_ASSERT(transpose_param_); | |||
| CHECK_NULL_RETURN(in_ptr_); | |||
| CHECK_NULL_RETURN(out_ptr_); | |||
| CHECK_NULL_RETURN(in_shape_); | |||
| CHECK_NULL_RETURN(out_shape_); | |||
| CHECK_NULL_RETURN(transpose_param_); | |||
| TransposeDimsInt8(in_ptr_, out_ptr_, out_shape_, transpose_param_, task_id, op_parameter_->thread_num_); | |||
| return RET_OK; | |||
| } | |||
| @@ -106,7 +108,9 @@ int TransposeInt8CPUKernel::Run() { | |||
| auto out_dims = out_tensor->shape(); | |||
| in_ptr_ = reinterpret_cast<int8_t *>(in_tensor->data()); | |||
| CHECK_NULL_RETURN(in_ptr_); | |||
| out_ptr_ = reinterpret_cast<int8_t *>(out_tensor->data()); | |||
| CHECK_NULL_RETURN(out_ptr_); | |||
| GetNHNCTransposeFunc(in_tensor, out_tensor, transpose_param_); | |||
| if (NHNCTransposeFunc_ != nullptr) { | |||
| NHNCTransposeFunc_(in_ptr_, out_ptr_, nhnc_param_[0], nhnc_param_[1], nhnc_param_[2]); | |||