From: @zhaozhenlong Reviewed-by: @zhanghaibo5,@ddwsky Signed-off-by: @zhanghaibo5tags/v1.1.0
| @@ -19,13 +19,13 @@ | |||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #include "nnacl/quantization/quantize.h" | #include "nnacl/quantization/quantize.h" | ||||
| #define SPLIT_STRIDES_SIZE 32 | |||||
| typedef struct SplitParameter { | typedef struct SplitParameter { | ||||
| OpParameter op_parameter_; | OpParameter op_parameter_; | ||||
| SplitQuantArg quant_arg_; | SplitQuantArg quant_arg_; | ||||
| int num_split_; | int num_split_; | ||||
| int *split_sizes_; | int *split_sizes_; | ||||
| int strides_[32]; | |||||
| int strides_[SPLIT_STRIDES_SIZE]; | |||||
| int split_dim_; | int split_dim_; | ||||
| int n_dims_; | int n_dims_; | ||||
| int split_count_; | int split_count_; | ||||
| @@ -38,11 +38,16 @@ int PoolingBaseCPUKernel::SetQuantParam() { | |||||
| pooling_quant_arg_[0] = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg))); | pooling_quant_arg_[0] = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg))); | ||||
| if (pooling_quant_arg_[0] == nullptr) { | if (pooling_quant_arg_[0] == nullptr) { | ||||
| MS_LOG(ERROR) << "malloc pooling_quant_arg[0] failed."; | MS_LOG(ERROR) << "malloc pooling_quant_arg[0] failed."; | ||||
| free(pooling_quant_arg_); | |||||
| pooling_quant_arg_ = nullptr; | |||||
| return RET_MEMORY_FAILED; | return RET_MEMORY_FAILED; | ||||
| } | } | ||||
| pooling_quant_arg_[1] = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg))); | pooling_quant_arg_[1] = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg))); | ||||
| if (pooling_quant_arg_[1] == nullptr) { | if (pooling_quant_arg_[1] == nullptr) { | ||||
| MS_LOG(ERROR) << "malloc pooling_quant_arg[1] failed."; | MS_LOG(ERROR) << "malloc pooling_quant_arg[1] failed."; | ||||
| free(*pooling_quant_arg_); | |||||
| free(pooling_quant_arg_); | |||||
| pooling_quant_arg_ = nullptr; | |||||
| return RET_MEMORY_FAILED; | return RET_MEMORY_FAILED; | ||||
| } | } | ||||
| auto *input_tensor = in_tensors_.at(kInputIndex); | auto *input_tensor = in_tensors_.at(kInputIndex); | ||||
| @@ -79,11 +79,14 @@ int PriorBoxCPUKernel::GeneratePriorBox() { | |||||
| if (!exist) { | if (!exist) { | ||||
| different_aspect_ratios.emplace_back(ratio); | different_aspect_ratios.emplace_back(ratio); | ||||
| if (prior_box_param_->flip) { | if (prior_box_param_->flip) { | ||||
| MS_ASSERT(fabs(ratio) > 1e-5); | |||||
| different_aspect_ratios.emplace_back(1.0f / ratio); | different_aspect_ratios.emplace_back(1.0f / ratio); | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| MS_ASSERT(fmap_w); | |||||
| MS_ASSERT(fmap_h); | |||||
| for (int i = 0; i < fmap_h; i++) { | for (int i = 0; i < fmap_h; i++) { | ||||
| float cy = i + prior_box_param_->offset; | float cy = i + prior_box_param_->offset; | ||||
| for (int j = 0; j < fmap_w; j++) { | for (int j = 0; j < fmap_w; j++) { | ||||
| @@ -39,8 +39,11 @@ int QuantDTypeCastCPUKernel::Init() { | |||||
| return RET_PARAM_INVALID; | return RET_PARAM_INVALID; | ||||
| } | } | ||||
| auto in_tensor = in_tensors_.front(); | auto in_tensor = in_tensors_.front(); | ||||
| MS_ASSERT(in_tensor); | |||||
| auto out_tensor = out_tensors_.front(); | auto out_tensor = out_tensors_.front(); | ||||
| MS_ASSERT(out_tensor); | |||||
| auto param = reinterpret_cast<QuantDTypeCastParameter *>(op_parameter_); | auto param = reinterpret_cast<QuantDTypeCastParameter *>(op_parameter_); | ||||
| MS_ASSERT(param); | |||||
| if (param->srcT == kNumberTypeFloat32 && param->dstT == kNumberTypeInt8) { | if (param->srcT == kNumberTypeFloat32 && param->dstT == kNumberTypeInt8) { | ||||
| if (in_tensor->data_type() != kNumberTypeFloat32 || out_tensor->data_type() != kNumberTypeInt8) { | if (in_tensor->data_type() != kNumberTypeFloat32 || out_tensor->data_type() != kNumberTypeInt8) { | ||||
| MS_LOG(ERROR) << "param data type and tensor data type do not match."; | MS_LOG(ERROR) << "param data type and tensor data type do not match."; | ||||
| @@ -177,7 +180,11 @@ int QuantDTypeCastCPUKernel::Run() { | |||||
| out_tensors_[0]->data_type() == TypeId::kNumberTypeInt8) { | out_tensors_[0]->data_type() == TypeId::kNumberTypeInt8) { | ||||
| int8_ptr_ = reinterpret_cast<int8_t *>(in_tensors_[0]->data_c()); | int8_ptr_ = reinterpret_cast<int8_t *>(in_tensors_[0]->data_c()); | ||||
| int8_out_ptr_ = reinterpret_cast<int8_t *>(out_tensors_[0]->data_c()); | int8_out_ptr_ = reinterpret_cast<int8_t *>(out_tensors_[0]->data_c()); | ||||
| float32_ptr_ = new float[in_tensors_[0]->ElementsNum()]; | |||||
| float32_ptr_ = new (std::nothrow) float[in_tensors_[0]->ElementsNum()]; | |||||
| if (float32_ptr_ == nullptr) { | |||||
| MS_LOG(ERROR) << "new float[] failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } else if (in_tensors_[0]->data_type() == TypeId::kNumberTypeUInt8 && | } else if (in_tensors_[0]->data_type() == TypeId::kNumberTypeUInt8 && | ||||
| out_tensors_[0]->data_type() == TypeId::kNumberTypeFloat32) { | out_tensors_[0]->data_type() == TypeId::kNumberTypeFloat32) { | ||||
| uint8_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_[0]->data_c()); | uint8_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_[0]->data_c()); | ||||
| @@ -37,16 +37,20 @@ int SplitBaseCPUKernel::ReSize() { | |||||
| auto in_tensor = in_tensors_.front(); | auto in_tensor = in_tensors_.front(); | ||||
| auto input_shape = in_tensor->shape(); | auto input_shape = in_tensor->shape(); | ||||
| MS_ASSERT(param); | |||||
| MS_ASSERT(input_shape.size() >= 2 && input_shape.size() <= SPLIT_STRIDES_SIZE); | |||||
| param->strides_[input_shape.size() - 1] = 1; | param->strides_[input_shape.size() - 1] = 1; | ||||
| for (int i = input_shape.size() - 2; i >= 0; i--) { | for (int i = input_shape.size() - 2; i >= 0; i--) { | ||||
| param->strides_[i] = param->strides_[i + 1] * input_shape[i + 1]; | param->strides_[i] = param->strides_[i + 1] * input_shape[i + 1]; | ||||
| } | } | ||||
| MS_ASSERT(static_cast<size_t>(param->split_dim_) < input_shape.size()); | |||||
| param->split_count_ = | param->split_count_ = | ||||
| param->strides_[0] * input_shape[0] / (input_shape[param->split_dim_] * param->strides_[param->split_dim_]); | param->strides_[0] * input_shape[0] / (input_shape[param->split_dim_] * param->strides_[param->split_dim_]); | ||||
| param->n_dims_ = input_shape.size(); | param->n_dims_ = input_shape.size(); | ||||
| if (param->split_sizes_[0] == 0) { | if (param->split_sizes_[0] == 0) { | ||||
| MS_ASSERT(param->num_split_ > 0 && static_cast<int>(param->num_split_) < input_shape.size()); | |||||
| if (input_shape[param->split_dim_] % param->num_split_ != 0) { | if (input_shape[param->split_dim_] % param->num_split_ != 0) { | ||||
| MS_LOG(ERROR) << "Default split size is not usable."; | MS_LOG(ERROR) << "Default split size is not usable."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -57,6 +61,7 @@ int SplitBaseCPUKernel::ReSize() { | |||||
| } | } | ||||
| } | } | ||||
| MS_ASSERT(param->num_split_ >= 1 && param->num_split_ <= SPLIT_STRIDES_SIZE); | |||||
| if (param->split_sizes_[param->num_split_ - 1] == -1) { | if (param->split_sizes_[param->num_split_ - 1] == -1) { | ||||
| int split_shape_end = input_shape[param->split_dim_]; | int split_shape_end = input_shape[param->split_dim_]; | ||||
| for (int i = 0; i < param->num_split_ - 1; i++) { | for (int i = 0; i < param->num_split_ - 1; i++) { | ||||
| @@ -67,6 +72,7 @@ int SplitBaseCPUKernel::ReSize() { | |||||
| num_unit_ = param->split_count_ * param->num_split_; | num_unit_ = param->split_count_ * param->num_split_; | ||||
| thread_n_num_ = MSMIN(thread_count_, num_unit_); | thread_n_num_ = MSMIN(thread_count_, num_unit_); | ||||
| MS_ASSERT(thread_n_num_); | |||||
| thread_n_stride_ = UP_DIV(num_unit_, thread_n_num_); | thread_n_stride_ = UP_DIV(num_unit_, thread_n_num_); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -45,7 +45,8 @@ ARITHMETIC_COMPARE_FUNC_INFO_FP16 arithmetic_cp_fun_table_fp16[] = { | |||||
| ElementOptGreaterEqualFp16}}; | ElementOptGreaterEqualFp16}}; | ||||
| ArithmeticCompareFuncFp16 GetArithmeticCompareFun(int primitive_type, int activation_type) { | ArithmeticCompareFuncFp16 GetArithmeticCompareFun(int primitive_type, int activation_type) { | ||||
| for (size_t i = 0; i < sizeof(arithmetic_cp_fun_table_fp16); i++) { | |||||
| size_t length = sizeof(arithmetic_cp_fun_table_fp16) / sizeof(ARITHMETIC_COMPARE_FUNC_INFO_FP16); | |||||
| for (size_t i = 0; i < length; i++) { | |||||
| if (arithmetic_cp_fun_table_fp16[i].primitive_type_ == primitive_type && | if (arithmetic_cp_fun_table_fp16[i].primitive_type_ == primitive_type && | ||||
| arithmetic_cp_fun_table_fp16[i].activation_type_ == activation_type) { | arithmetic_cp_fun_table_fp16[i].activation_type_ == activation_type) { | ||||
| return arithmetic_cp_fun_table_fp16[i].func_; | return arithmetic_cp_fun_table_fp16[i].func_; | ||||
| @@ -55,7 +56,8 @@ ArithmeticCompareFuncFp16 GetArithmeticCompareFun(int primitive_type, int activa | |||||
| } | } | ||||
| ArithmeticCompareOptFuncFp16 GetOptimizedArithmeticCompareFun(int primitive_type, int activation_type) { | ArithmeticCompareOptFuncFp16 GetOptimizedArithmeticCompareFun(int primitive_type, int activation_type) { | ||||
| for (size_t i = 0; i < sizeof(arithmetic_cp_fun_table_fp16); i++) { | |||||
| size_t length = sizeof(arithmetic_cp_fun_table_fp16) / sizeof(ARITHMETIC_COMPARE_FUNC_INFO_FP16); | |||||
| for (size_t i = 0; i < length; i++) { | |||||
| if (arithmetic_cp_fun_table_fp16[i].primitive_type_ == primitive_type && | if (arithmetic_cp_fun_table_fp16[i].primitive_type_ == primitive_type && | ||||
| arithmetic_cp_fun_table_fp16[i].activation_type_ == activation_type) { | arithmetic_cp_fun_table_fp16[i].activation_type_ == activation_type) { | ||||
| return arithmetic_cp_fun_table_fp16[i].opt_func_; | return arithmetic_cp_fun_table_fp16[i].opt_func_; | ||||
| @@ -72,7 +72,8 @@ ARITHMETIC_FUNC_INFO_FP16 arithmetic_fun_table_fp16[] = { | |||||
| {PrimitiveType_Minimum, schema::ActivationType_NO_ACTIVATION, ElementMinimumFp16, ElementOptMinimumFp16}}; | {PrimitiveType_Minimum, schema::ActivationType_NO_ACTIVATION, ElementMinimumFp16, ElementOptMinimumFp16}}; | ||||
| ArithmeticFuncFp16 GetArithmeticFun(int primitive_type, int activation_type) { | ArithmeticFuncFp16 GetArithmeticFun(int primitive_type, int activation_type) { | ||||
| for (size_t i = 0; i < sizeof(arithmetic_fun_table_fp16); i++) { | |||||
| size_t length = sizeof(arithmetic_fun_table_fp16) / sizeof(ARITHMETIC_FUNC_INFO_FP16); | |||||
| for (size_t i = 0; i < length; i++) { | |||||
| if (arithmetic_fun_table_fp16[i].primitive_type_ == primitive_type && | if (arithmetic_fun_table_fp16[i].primitive_type_ == primitive_type && | ||||
| arithmetic_fun_table_fp16[i].activation_type_ == activation_type) { | arithmetic_fun_table_fp16[i].activation_type_ == activation_type) { | ||||
| return arithmetic_fun_table_fp16[i].func_; | return arithmetic_fun_table_fp16[i].func_; | ||||
| @@ -82,7 +83,8 @@ ArithmeticFuncFp16 GetArithmeticFun(int primitive_type, int activation_type) { | |||||
| } | } | ||||
| ArithmeticOptFuncFp16 GetOptimizedArithmeticFun(int primitive_type, int activation_type) { | ArithmeticOptFuncFp16 GetOptimizedArithmeticFun(int primitive_type, int activation_type) { | ||||
| for (size_t i = 0; i < sizeof(arithmetic_fun_table_fp16); i++) { | |||||
| size_t length = sizeof(arithmetic_fun_table_fp16) / sizeof(ARITHMETIC_FUNC_INFO_FP16); | |||||
| for (size_t i = 0; i < length; i++) { | |||||
| if (arithmetic_fun_table_fp16[i].primitive_type_ == primitive_type && | if (arithmetic_fun_table_fp16[i].primitive_type_ == primitive_type && | ||||
| arithmetic_fun_table_fp16[i].activation_type_ == activation_type) { | arithmetic_fun_table_fp16[i].activation_type_ == activation_type) { | ||||
| return arithmetic_fun_table_fp16[i].opt_func_; | return arithmetic_fun_table_fp16[i].opt_func_; | ||||
| @@ -71,7 +71,7 @@ int ConcatFp16CPUKernel::MallocTmpBuffer() { | |||||
| void ConcatFp16CPUKernel::FreeTmpBuffer() { | void ConcatFp16CPUKernel::FreeTmpBuffer() { | ||||
| for (size_t i = 0; i < fp16_inputs_.size(); i++) { | for (size_t i = 0; i < fp16_inputs_.size(); i++) { | ||||
| auto &in_tensor = in_tensors_.at(i); | auto &in_tensor = in_tensors_.at(i); | ||||
| auto in_ptr = fp16_inputs_.at(i); | |||||
| auto &in_ptr = fp16_inputs_.at(i); | |||||
| if (in_tensor->data_type() == kNumberTypeFloat32 || in_tensor->data_type() == kNumberTypeFloat) { | if (in_tensor->data_type() == kNumberTypeFloat32 || in_tensor->data_type() == kNumberTypeFloat) { | ||||
| if (in_ptr != nullptr) { | if (in_ptr != nullptr) { | ||||
| context_->allocator->Free(in_ptr); | context_->allocator->Free(in_ptr); | ||||
| @@ -33,7 +33,7 @@ using mindspore::schema::PrimitiveType_DepthwiseConv2D; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| ConvolutionDepthwiseFp16CPUKernel::~ConvolutionDepthwiseFp16CPUKernel() { | ConvolutionDepthwiseFp16CPUKernel::~ConvolutionDepthwiseFp16CPUKernel() { | ||||
| if (packed_weight_ != nullptr) { | if (packed_weight_ != nullptr) { | ||||
| delete packed_weight_; | |||||
| free(packed_weight_); | |||||
| packed_weight_ = nullptr; | packed_weight_ = nullptr; | ||||
| } | } | ||||
| } | } | ||||
| @@ -68,6 +68,7 @@ int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() { | |||||
| if (in_tensors_.size() == kInputSize2) { | if (in_tensors_.size() == kInputSize2) { | ||||
| auto bias_tensor = in_tensors_.at(kBiasIndex); | auto bias_tensor = in_tensors_.at(kBiasIndex); | ||||
| auto ori_bias = reinterpret_cast<float *>(bias_tensor->MutableData()); | auto ori_bias = reinterpret_cast<float *>(bias_tensor->MutableData()); | ||||
| MS_ASSERT(ori_bias); | |||||
| for (int i = 0; i < bias_tensor->ElementsNum(); i++) { | for (int i = 0; i < bias_tensor->ElementsNum(); i++) { | ||||
| bias_fp16[i] = (float16_t)ori_bias[i]; | bias_fp16[i] = (float16_t)ori_bias[i]; | ||||
| } | } | ||||
| @@ -55,6 +55,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitBuffer() { | |||||
| packed_output_ = reinterpret_cast<float16_t *>(context_->allocator->Malloc(pack_output_size * sizeof(float16_t))); | packed_output_ = reinterpret_cast<float16_t *>(context_->allocator->Malloc(pack_output_size * sizeof(float16_t))); | ||||
| if (packed_output_ == nullptr) { | if (packed_output_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| context_->allocator->Free(packed_input_); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| } | } | ||||
| @@ -86,6 +87,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitWeightBias() { | |||||
| if (in_tensors_.size() == kInputSize2) { | if (in_tensors_.size() == kInputSize2) { | ||||
| auto bias_tensor = in_tensors_.at(kBiasIndex); | auto bias_tensor = in_tensors_.at(kBiasIndex); | ||||
| auto ori_bias = reinterpret_cast<float *>(bias_tensor->MutableData()); | auto ori_bias = reinterpret_cast<float *>(bias_tensor->MutableData()); | ||||
| MS_ASSERT(ori_bias); | |||||
| for (int i = 0; i < bias_tensor->ElementsNum(); i++) { | for (int i = 0; i < bias_tensor->ElementsNum(); i++) { | ||||
| bias_fp16[i] = (float16_t)ori_bias[i]; | bias_fp16[i] = (float16_t)ori_bias[i]; | ||||
| } | } | ||||
| @@ -161,8 +163,6 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | |||||
| conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); | conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); | ||||
| } else { | } else { | ||||
| packed_input_ = execute_input_; | packed_input_ = execute_input_; | ||||
| } | |||||
| if (!need_align_) { | |||||
| packed_output_ = execute_output_; | packed_output_ = execute_output_; | ||||
| } | } | ||||
| @@ -342,6 +342,7 @@ int DeConvWinogradFp16CPUKernel::InitDataParam() { | |||||
| auto fp16_bias_data = reinterpret_cast<float16_t *>(bias_data_); | auto fp16_bias_data = reinterpret_cast<float16_t *>(bias_data_); | ||||
| if (in_tensors_.size() == kInputSize2) { | if (in_tensors_.size() == kInputSize2) { | ||||
| auto src_bias = reinterpret_cast<float *>(in_tensors_.at(kBiasIndex)->MutableData()); | auto src_bias = reinterpret_cast<float *>(in_tensors_.at(kBiasIndex)->MutableData()); | ||||
| MS_ASSERT(src_bias); | |||||
| for (int i = 0; i < conv_param_->output_channel_; ++i) { | for (int i = 0; i < conv_param_->output_channel_; ++i) { | ||||
| fp16_bias_data[i] = (float16_t)src_bias[i]; | fp16_bias_data[i] = (float16_t)src_bias[i]; | ||||
| } | } | ||||
| @@ -122,6 +122,10 @@ int FullconnectionFP16CPUKernel::ReSize() { | |||||
| if (out_tensors_[0]->data_type() == kNumberTypeFloat32) { | if (out_tensors_[0]->data_type() == kNumberTypeFloat32) { | ||||
| output_fp16_ = | output_fp16_ = | ||||
| reinterpret_cast<float16_t *>(ctx_->allocator->Malloc(fc_param_->row_ * fc_param_->col_ * sizeof(float16_t))); | reinterpret_cast<float16_t *>(ctx_->allocator->Malloc(fc_param_->row_ * fc_param_->col_ * sizeof(float16_t))); | ||||
| if (output_fp16_ == nullptr) { | |||||
| FreeTmpBuffer(); | |||||
| return RET_MEMORY_FAILED; | |||||
| } | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -27,8 +27,10 @@ using mindspore::schema::PrimitiveType_FusedBatchNorm; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) { | int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) { | ||||
| auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); | auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); | ||||
| MS_ASSERT(param); | |||||
| if (in_tensors_.at(0)->data_type() == kNumberTypeFloat32) { | if (in_tensors_.at(0)->data_type() == kNumberTypeFloat32) { | ||||
| MS_ASSERT(in_tensors_.size() == 5); | |||||
| MS_ASSERT(out_tensors_.size() == 1); | |||||
| auto input = in_tensors_.at(0); | auto input = in_tensors_.at(0); | ||||
| auto scale = in_tensors_.at(1); | auto scale = in_tensors_.at(1); | ||||
| auto offset = in_tensors_.at(2); | auto offset = in_tensors_.at(2); | ||||
| @@ -50,6 +52,7 @@ int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) { | |||||
| context_->allocator->Free(mean_fp16); | context_->allocator->Free(mean_fp16); | ||||
| context_->allocator->Free(variance_fp16); | context_->allocator->Free(variance_fp16); | ||||
| context_->allocator->Free(output_fp16); | context_->allocator->Free(output_fp16); | ||||
| return RET_ERROR; | |||||
| } | } | ||||
| Float32ToFloat16(reinterpret_cast<float *>(input->MutableData()), reinterpret_cast<float16_t *>(input_fp16), | Float32ToFloat16(reinterpret_cast<float *>(input->MutableData()), reinterpret_cast<float16_t *>(input_fp16), | ||||
| input->ElementsNum()); | input->ElementsNum()); | ||||
| @@ -155,6 +155,8 @@ int GroupConvolutionFP16CPUKernel::SeparateInput(int group_id) { | |||||
| if (in_tensors_.front()->data_type() == kNumberTypeFloat16) { | if (in_tensors_.front()->data_type() == kNumberTypeFloat16) { | ||||
| float16_t *src_ptr = reinterpret_cast<float16_t *>(ori_in_data_) + group_id * sub_in_channel; | float16_t *src_ptr = reinterpret_cast<float16_t *>(ori_in_data_) + group_id * sub_in_channel; | ||||
| float16_t *dst_ptr = reinterpret_cast<float16_t *>(sub_in_data); | float16_t *dst_ptr = reinterpret_cast<float16_t *>(sub_in_data); | ||||
| MS_ASSERT(src_ptr); | |||||
| MS_ASSERT(dst_ptr); | |||||
| for (int i = 0; i < in_plane; ++i) { | for (int i = 0; i < in_plane; ++i) { | ||||
| memcpy(dst_ptr, src_ptr, sub_in_channel * sizeof(float16_t)); | memcpy(dst_ptr, src_ptr, sub_in_channel * sizeof(float16_t)); | ||||
| src_ptr += ori_in_channel; | src_ptr += ori_in_channel; | ||||
| @@ -163,6 +165,8 @@ int GroupConvolutionFP16CPUKernel::SeparateInput(int group_id) { | |||||
| } else { | } else { | ||||
| float *src_ptr = reinterpret_cast<float *>(ori_in_data_) + group_id * sub_in_channel; | float *src_ptr = reinterpret_cast<float *>(ori_in_data_) + group_id * sub_in_channel; | ||||
| float *dst_ptr = reinterpret_cast<float *>(sub_in_data); | float *dst_ptr = reinterpret_cast<float *>(sub_in_data); | ||||
| MS_ASSERT(src_ptr); | |||||
| MS_ASSERT(dst_ptr); | |||||
| for (int i = 0; i < in_plane; ++i) { | for (int i = 0; i < in_plane; ++i) { | ||||
| memcpy(dst_ptr, src_ptr, sub_in_channel * sizeof(float)); | memcpy(dst_ptr, src_ptr, sub_in_channel * sizeof(float)); | ||||
| src_ptr += ori_in_channel; | src_ptr += ori_in_channel; | ||||
| @@ -180,6 +184,7 @@ void GroupConvolutionFP16CPUKernel::PostConcat(int group_id) { | |||||
| int sub_out_channel = conv_param_->output_channel_; | int sub_out_channel = conv_param_->output_channel_; | ||||
| int ori_out_channel = sub_out_channel * group_num_; | int ori_out_channel = sub_out_channel * group_num_; | ||||
| auto sub_out_data = reinterpret_cast<float16_t *>(group_convs_[group_id]->out_tensors().front()->data_c()); | auto sub_out_data = reinterpret_cast<float16_t *>(group_convs_[group_id]->out_tensors().front()->data_c()); | ||||
| MS_ASSERT(sub_out_data); | |||||
| float16_t *src_ptr = sub_out_data; | float16_t *src_ptr = sub_out_data; | ||||
| float16_t *dst_ptr = ori_out_data_ + group_id * sub_out_channel; | float16_t *dst_ptr = ori_out_data_ + group_id * sub_out_channel; | ||||
| for (int i = 0; i < out_plane; ++i) { | for (int i = 0; i < out_plane; ++i) { | ||||
| @@ -192,6 +197,7 @@ void GroupConvolutionFP16CPUKernel::PostConcat(int group_id) { | |||||
| int GroupConvolutionFP16CPUKernel::Run() { | int GroupConvolutionFP16CPUKernel::Run() { | ||||
| ori_in_data_ = in_tensors().front()->data_c(); | ori_in_data_ = in_tensors().front()->data_c(); | ||||
| ori_out_data_ = reinterpret_cast<float16_t *>(out_tensors().front()->data_c()); | ori_out_data_ = reinterpret_cast<float16_t *>(out_tensors().front()->data_c()); | ||||
| MS_ASSERT(ori_out_data_); | |||||
| for (int i = 0; i < group_num_; ++i) { | for (int i = 0; i < group_num_; ++i) { | ||||
| // first, separate group conv input into several parts. This step must be in runtime stage. | // first, separate group conv input into several parts. This step must be in runtime stage. | ||||
| auto ret = SeparateInput(i); | auto ret = SeparateInput(i); | ||||
| @@ -339,6 +339,7 @@ kernel::LiteKernel *CpuMatmulFp16KernelCreator(const std::vector<lite::Tensor *> | |||||
| auto *dequant_weight = kernel::DequantUtil::DequantWeight(weight_tensor); | auto *dequant_weight = kernel::DequantUtil::DequantWeight(weight_tensor); | ||||
| if (dequant_weight == nullptr) { | if (dequant_weight == nullptr) { | ||||
| MS_LOG(ERROR) << "dequant data is nullptr."; | MS_LOG(ERROR) << "dequant data is nullptr."; | ||||
| free(opParameter); | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| weight_tensor->set_data_type(kNumberTypeFloat32); | weight_tensor->set_data_type(kNumberTypeFloat32); | ||||
| @@ -87,6 +87,8 @@ int QuantDTypeCastFp16CPUKernel::QuantDTypeCast(int task_id) { | |||||
| auto quant_arg = !out_tensors_.front()->GetQuantParams().empty() ? out_tensors_.front()->GetQuantParams().front() | auto quant_arg = !out_tensors_.front()->GetQuantParams().empty() ? out_tensors_.front()->GetQuantParams().front() | ||||
| : in_tensors_.front()->GetQuantParams().front(); | : in_tensors_.front()->GetQuantParams().front(); | ||||
| int ret; | int ret; | ||||
| MS_ASSERT(int8_ptr_); | |||||
| MS_ASSERT(float16_ptr_); | |||||
| if (inverse_) { | if (inverse_) { | ||||
| ret = DoDequantizeInt8ToFp16(int8_ptr_ + thread_offset, float16_ptr_ + thread_offset, quant_arg.scale, | ret = DoDequantizeInt8ToFp16(int8_ptr_ + thread_offset, float16_ptr_ + thread_offset, quant_arg.scale, | ||||
| quant_arg.zeroPoint, num_unit_thread); | quant_arg.zeroPoint, num_unit_thread); | ||||
| @@ -118,7 +118,7 @@ int ReduceFp16CPUKernel::Run() { | |||||
| } | } | ||||
| void ReduceFp16CPUKernel::FreeTmpBuffer() { | void ReduceFp16CPUKernel::FreeTmpBuffer() { | ||||
| for (auto buffer : data_buffers_) { | |||||
| for (auto &buffer : data_buffers_) { | |||||
| if (buffer != nullptr) { | if (buffer != nullptr) { | ||||
| context_->allocator->Free(buffer); | context_->allocator->Free(buffer); | ||||
| buffer = nullptr; | buffer = nullptr; | ||||