From: @fuzhiye Reviewed-by: Signed-off-by:tags/v1.1.0
| @@ -40,15 +40,13 @@ using mindspore::schema::Format::Format_NHWC; | |||
| namespace mindspore::kernel { | |||
| int ConvolutionFP16CPUKernel::InitWeightBias() { | |||
| auto filter_tensor = in_tensors_.at(kWeightIndex); | |||
| int kernel_h = filter_tensor->Height(); | |||
| int kernel_w = filter_tensor->Width(); | |||
| int in_channel = filter_tensor->Channel(); | |||
| int out_channel = filter_tensor->Batch(); | |||
| conv_param_->input_channel_ = in_channel; | |||
| conv_param_->output_channel_ = out_channel; | |||
| int oc8 = UP_DIV(out_channel, C8NUM); | |||
| int kernel_plane = kernel_h * kernel_w; | |||
| int pack_weight_size = oc8 * C8NUM * in_channel * kernel_plane; | |||
| int oc8 = UP_ROUND(out_channel, C8NUM); | |||
| int kernel_plane = filter_tensor->Height() * filter_tensor->Width(); | |||
| int pack_weight_size = oc8 * in_channel * kernel_plane; | |||
| // init weight | |||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteFilter(); | |||
| @@ -69,15 +67,15 @@ int ConvolutionFP16CPUKernel::InitWeightBias() { | |||
| } | |||
| // init bias | |||
| bias_data_ = malloc(oc8 * C8NUM * sizeof(float16_t)); | |||
| bias_data_ = malloc(oc8 * sizeof(float16_t)); | |||
| if (bias_data_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc bias_data_ failed."; | |||
| return RET_ERROR; | |||
| } | |||
| memset(bias_data_, 0, oc8 * C8NUM * sizeof(float16_t)); | |||
| memset(bias_data_, 0, oc8 * sizeof(float16_t)); | |||
| auto fp16_bias_data = reinterpret_cast<float16_t *>(bias_data_); | |||
| if (in_tensors_.size() == kInputSize2) { | |||
| auto ori_bias = reinterpret_cast<float *>(in_tensors_.at(kBiasIndex)->MutableData()); | |||
| auto ori_bias = reinterpret_cast<float *>(in_tensors_.at(kBiasIndex)->data_c()); | |||
| for (int i = 0; i < out_channel; ++i) { | |||
| fp16_bias_data[i] = (float16_t)ori_bias[i]; | |||
| } | |||
| @@ -89,9 +87,8 @@ int ConvolutionFP16CPUKernel::InitWeightBias() { | |||
| int ConvolutionFP16CPUKernel::InitTmpBuffer() { | |||
| const int cal_num = 16; | |||
| int in_channel = conv_param_->input_channel_; | |||
| int kernel_plane = conv_param_->kernel_h_ * conv_param_->kernel_w_; | |||
| int unit_size = kernel_plane * in_channel * cal_num * thread_count_; | |||
| int unit_size = | |||
| conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ * cal_num * thread_count_; | |||
| packed_input_ = reinterpret_cast<float16_t *>(ctx_->allocator->Malloc(unit_size * sizeof(float16_t))); | |||
| if (packed_input_ == nullptr) { | |||
| @@ -205,19 +202,13 @@ kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &i | |||
| void FreeMemoryFp16(const std::vector<kernel::LiteKernel *> &group_convs, const std::vector<lite::Tensor *> &new_inputs, | |||
| const std::vector<lite::Tensor *> &new_outputs) { | |||
| for (auto sub_conv : group_convs) { | |||
| if (sub_conv != nullptr) { | |||
| delete sub_conv; | |||
| } | |||
| delete sub_conv; | |||
| } | |||
| for (auto in_tensor : new_inputs) { | |||
| if (in_tensor != nullptr) { | |||
| delete in_tensor; | |||
| } | |||
| delete in_tensor; | |||
| } | |||
| for (auto out_tensor : new_outputs) { | |||
| if (out_tensor != nullptr) { | |||
| delete out_tensor; | |||
| } | |||
| delete out_tensor; | |||
| } | |||
| } | |||
| @@ -332,8 +323,10 @@ kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor | |||
| std::vector<int> in_shape; | |||
| std::vector<int> out_shape; | |||
| int batch = inputs.front()->Batch(); | |||
| conv_param->input_batch_ = batch; | |||
| conv_param->output_batch_ = batch; | |||
| if (infered_flag) { | |||
| int batch = inputs.front()->Batch(); | |||
| conv_param->input_channel_ = new_in_channel; | |||
| conv_param->output_channel_ = new_out_channel; | |||
| CheckIfUseWinogradFp16(&use_winograd, &out_unit, conv_param); | |||
| @@ -77,11 +77,6 @@ int GroupConvolutionFP16CPUKernel::PreProcess() { | |||
| return ret; | |||
| } | |||
| (const_cast<mindspore::lite::PrimitiveC *>(primitive_))->set_infer_flag(true); | |||
| ret = ReSize(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ReSize fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| // if infershape func is called in runtime stage, we should malloc memory and set shape info for outputs of sub | |||
| // kernels here. | |||
| @@ -119,6 +114,11 @@ int GroupConvolutionFP16CPUKernel::PreProcess() { | |||
| } | |||
| } | |||
| } | |||
| ret = ReSize(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ReSize fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| } | |||
| auto outputs = this->out_tensors(); | |||
| @@ -136,9 +136,7 @@ int GroupConvolutionFP16CPUKernel::PreProcess() { | |||
| int GroupConvolutionFP16CPUKernel::SeparateInput(int group_id) { | |||
| // input may either be float32 or float16 | |||
| int in_h = conv_param_->input_h_; | |||
| int in_w = conv_param_->input_w_; | |||
| int in_plane = in_h * in_w; | |||
| int in_plane = conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_batch_; | |||
| int sub_in_channel = conv_param_->input_channel_; | |||
| int ori_in_channel = sub_in_channel * group_num_; | |||
| auto sub_in_data = group_convs_.at(group_id)->in_tensors().front()->data_c(); | |||
| @@ -178,9 +176,7 @@ int GroupConvolutionFP16CPUKernel::SeparateInput(int group_id) { | |||
| void GroupConvolutionFP16CPUKernel::PostConcat(int group_id) { | |||
| // output is must float16 data type | |||
| int out_h = conv_param_->output_h_; | |||
| int out_w = conv_param_->output_w_; | |||
| int out_plane = out_h * out_w; | |||
| int out_plane = conv_param_->output_h_ * conv_param_->output_w_ * conv_param_->output_batch_; | |||
| int sub_out_channel = conv_param_->output_channel_; | |||
| int ori_out_channel = sub_out_channel * group_num_; | |||
| auto sub_out_data = reinterpret_cast<float16_t *>(group_convs_.at(group_id)->out_tensors().front()->data_c()); | |||
| @@ -37,18 +37,15 @@ using mindspore::schema::Format::Format_NHWC; | |||
| namespace mindspore::kernel { | |||
| int ConvolutionCPUKernel::InitWeightBias() { | |||
| auto filter_tensor = in_tensors_.at(kWeightIndex); | |||
| int kernel_h = filter_tensor->Height(); | |||
| int kernel_w = filter_tensor->Width(); | |||
| int in_channel = filter_tensor->Channel(); | |||
| int out_channel = filter_tensor->Batch(); | |||
| conv_param_->input_channel_ = in_channel; | |||
| conv_param_->output_channel_ = out_channel; | |||
| int kernel_plane = kernel_h * kernel_w; | |||
| const int oc_block = C8NUM; | |||
| int oc_block_num = UP_DIV(out_channel, C8NUM); | |||
| int pack_weight_size = oc_block_num * oc_block * in_channel * kernel_plane; | |||
| int kernel_plane = filter_tensor->Height() * filter_tensor->Width(); | |||
| int oc_block_num = UP_ROUND(out_channel, C8NUM); | |||
| int pack_weight_size = oc_block_num * in_channel * kernel_plane; | |||
| auto origin_weight = reinterpret_cast<float *>(filter_tensor->MutableData()); | |||
| auto origin_weight = reinterpret_cast<float *>(filter_tensor->data_c()); | |||
| packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float))); | |||
| if (packed_weight_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc packed weight failed."; | |||
| @@ -57,15 +54,15 @@ int ConvolutionCPUKernel::InitWeightBias() { | |||
| memset(packed_weight_, 0, pack_weight_size * sizeof(float)); | |||
| RowMajor2Col8Major(origin_weight, packed_weight_, out_channel, in_channel * kernel_plane); | |||
| bias_data_ = reinterpret_cast<float *>(malloc(oc_block_num * oc_block * sizeof(float))); | |||
| bias_data_ = reinterpret_cast<float *>(malloc(oc_block_num * sizeof(float))); | |||
| if (bias_data_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc bias failed."; | |||
| return RET_ERROR; | |||
| } | |||
| memset(bias_data_, 0, oc_block_num * oc_block * sizeof(float)); | |||
| memset(bias_data_, 0, oc_block_num * sizeof(float)); | |||
| if (in_tensors_.size() == kInputSize2) { | |||
| auto ori_bias = reinterpret_cast<float *>(in_tensors_.at(kBiasIndex)->MutableData()); | |||
| auto ori_bias = reinterpret_cast<float *>(in_tensors_.at(kBiasIndex)->data_c()); | |||
| memcpy(bias_data_, ori_bias, out_channel * sizeof(float)); | |||
| } else { | |||
| MS_ASSERT(in_tensors_.size() == kInputSize1); | |||
| @@ -74,13 +71,12 @@ int ConvolutionCPUKernel::InitWeightBias() { | |||
| } | |||
| int ConvolutionCPUKernel::InitTmpBuffer() { | |||
| int in_channel = conv_param_->input_channel_; | |||
| MS_ASSERT(ctx_->allocator != nullptr); | |||
| #ifdef ENABLE_ARM32 | |||
| int unit_size = conv_param_->kernel_h_ * conv_param_->kernel_w_ * in_channel * C4NUM * thread_count_; | |||
| int unit_size = conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ * C4NUM * thread_count_; | |||
| #else | |||
| int unit_size = conv_param_->kernel_h_ * conv_param_->kernel_w_ * in_channel * C12NUM * thread_count_; | |||
| int unit_size = | |||
| conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ * C12NUM * thread_count_; | |||
| #endif | |||
| packed_input_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(unit_size * sizeof(float))); | |||
| if (packed_input_ == nullptr) { | |||
| @@ -124,9 +120,8 @@ int ConvolutionCPUKernel::ReSize() { | |||
| } | |||
| int ConvolutionCPUKernel::RunImpl(int task_id) { | |||
| auto input_tensor = in_tensors_.at(kInputIndex); | |||
| auto ori_input_data = reinterpret_cast<float *>(input_tensor->MutableData()); | |||
| auto output_addr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData()); | |||
| auto ori_input_data = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->data_c()); | |||
| auto output_addr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->data_c()); | |||
| ConvFp32(ori_input_data, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), col_major_input_, | |||
| output_addr, task_id, conv_param_); | |||
| return RET_OK; | |||
| @@ -171,19 +166,13 @@ ConvParameter *CreateNewConvParameter(ConvParameter *parameter) { | |||
| void FreeMemory(const std::vector<kernel::LiteKernel *> &group_convs, const std::vector<lite::Tensor *> &new_inputs, | |||
| const std::vector<lite::Tensor *> &new_outputs) { | |||
| for (auto sub_conv : group_convs) { | |||
| if (sub_conv != nullptr) { | |||
| delete sub_conv; | |||
| } | |||
| delete sub_conv; | |||
| } | |||
| for (auto in_tensor : new_inputs) { | |||
| if (in_tensor != nullptr) { | |||
| delete in_tensor; | |||
| } | |||
| delete in_tensor; | |||
| } | |||
| for (auto out_tensor : new_outputs) { | |||
| if (out_tensor != nullptr) { | |||
| delete out_tensor; | |||
| } | |||
| delete out_tensor; | |||
| } | |||
| } | |||
| @@ -304,8 +293,10 @@ kernel::LiteKernel *CpuGroupConvFp32KernelCreator(const std::vector<lite::Tensor | |||
| } else { | |||
| new_out_channel = inputs.at(kWeightIndex)->Batch() / group; | |||
| } | |||
| int batch = inputs.front()->Batch(); | |||
| conv_param->input_batch_ = batch; | |||
| conv_param->output_batch_ = batch; | |||
| if (infered_flag) { | |||
| int batch = inputs.front()->Batch(); | |||
| int in_h = inputs.front()->Height(); | |||
| int in_w = inputs.front()->Width(); | |||
| conv_param->input_channel_ = new_in_channel; | |||
| @@ -82,11 +82,6 @@ int GroupConvolutionCPUKernel::PreProcess() { | |||
| return ret; | |||
| } | |||
| (const_cast<mindspore::lite::PrimitiveC *>(primitive_))->set_infer_flag(true); | |||
| ret = ReSize(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ReSize fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| // if infershape func is called in runtime stage, we should malloc memory and set shape info for outputs of sub | |||
| // kernels here. | |||
| @@ -124,6 +119,11 @@ int GroupConvolutionCPUKernel::PreProcess() { | |||
| } | |||
| } | |||
| } | |||
| ret = ReSize(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ReSize fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| } | |||
| auto outputs = this->out_tensors(); | |||
| @@ -140,9 +140,7 @@ int GroupConvolutionCPUKernel::PreProcess() { | |||
| } | |||
| void GroupConvolutionCPUKernel::SeparateInput(int group_id) { | |||
| int in_h = conv_param_->input_h_; | |||
| int in_w = conv_param_->input_w_; | |||
| int in_plane = in_h * in_w; | |||
| int in_plane = conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_batch_; | |||
| int sub_in_channel = conv_param_->input_channel_; | |||
| int ori_in_channel = sub_in_channel * group_num_; | |||
| auto sub_in_data = reinterpret_cast<float *>(group_convs_.at(group_id)->in_tensors().front()->data_c()); | |||
| @@ -156,9 +154,7 @@ void GroupConvolutionCPUKernel::SeparateInput(int group_id) { | |||
| } | |||
| void GroupConvolutionCPUKernel::PostConcat(int group_id) { | |||
| int out_h = conv_param_->output_h_; | |||
| int out_w = conv_param_->output_w_; | |||
| int out_plane = out_h * out_w; | |||
| int out_plane = conv_param_->output_h_ * conv_param_->output_w_ * conv_param_->output_batch_; | |||
| int sub_out_channel = conv_param_->output_channel_; | |||
| int ori_out_channel = sub_out_channel * group_num_; | |||
| auto sub_out_data = reinterpret_cast<float *>(group_convs_.at(group_id)->out_tensors().front()->data_c()); | |||
| @@ -60,9 +60,7 @@ int ConvolutionInt8CPUKernel::InitWeightBias() { | |||
| auto filter_tensor = in_tensors_.at(kWeightIndex); | |||
| auto input_channel = filter_tensor->Channel(); | |||
| auto output_channel = filter_tensor->Batch(); | |||
| int kernel_h = filter_tensor->Height(); | |||
| int kernel_w = filter_tensor->Width(); | |||
| int kernel_plane = kernel_h * kernel_w; | |||
| int kernel_plane = filter_tensor->Height() * filter_tensor->Width(); | |||
| conv_param_->input_channel_ = input_channel; | |||
| conv_param_->output_channel_ = output_channel; | |||
| int up_round_deep; | |||
| @@ -84,7 +82,7 @@ int ConvolutionInt8CPUKernel::InitWeightBias() { | |||
| int32_t input_zp = conv_param_->conv_quant_arg_.input_quant_args_[0].zp_; | |||
| // init weight | |||
| auto origin_weight = reinterpret_cast<int8_t *>(in_tensors_.at(kWeightIndex)->MutableData()); | |||
| auto origin_weight = reinterpret_cast<int8_t *>(in_tensors_.at(kWeightIndex)->data_c()); | |||
| packed_weight_ = reinterpret_cast<int8_t *>(malloc(pack_weight_size)); | |||
| if (packed_weight_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc packed_weight_ failed."; | |||
| @@ -109,7 +107,7 @@ int ConvolutionInt8CPUKernel::InitWeightBias() { | |||
| } | |||
| memset(bias_data_, 0, bias_size); | |||
| if (in_tensors_.size() == kInputSize2) { | |||
| auto ori_bias = reinterpret_cast<int32_t *>(in_tensors_.at(kBiasIndex)->MutableData()); | |||
| auto ori_bias = reinterpret_cast<int32_t *>(in_tensors_.at(kBiasIndex)->data_c()); | |||
| memcpy(bias_data_, ori_bias, output_channel * sizeof(int32_t)); | |||
| } else { | |||
| MS_ASSERT(in_tensors_.size() == kInputSize1); | |||
| @@ -210,9 +208,8 @@ int ConvolutionInt8CPUKernel::ReSize() { | |||
| } | |||
| int ConvolutionInt8CPUKernel::RunImpl(int task_id) { | |||
| auto input_tensor = in_tensors_.at(kInputIndex); | |||
| auto ori_input_data = reinterpret_cast<int8_t *>(input_tensor->MutableData()); | |||
| auto output_addr = reinterpret_cast<int8_t *>(out_tensors_.at(kOutputIndex)->MutableData()); | |||
| auto ori_input_data = reinterpret_cast<int8_t *>(in_tensors_.at(kInputIndex)->data_c()); | |||
| auto output_addr = reinterpret_cast<int8_t *>(out_tensors_.at(kOutputIndex)->data_c()); | |||
| ConvInt8(ori_input_data, packed_input_, matmul_packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), | |||
| output_addr, filter_zp_ptr_, input_sum_, task_id, conv_param_, matmul_func_, support_optimize_); | |||
| return RET_OK; | |||
| @@ -325,9 +322,11 @@ kernel::LiteKernel *CpuGroupConvInt8KernelCreator(const std::vector<lite::Tensor | |||
| } else { | |||
| new_out_channel = inputs.at(kWeightIndex)->Batch() / group; | |||
| } | |||
| int batch = inputs.front()->Batch(); | |||
| conv_param->input_batch_ = batch; | |||
| conv_param->output_batch_ = batch; | |||
| bool infered_flag = primitive != nullptr && primitive->infer_flag(); | |||
| if (infered_flag) { | |||
| int batch = inputs.front()->Batch(); | |||
| int in_h = inputs.front()->Height(); | |||
| int in_w = inputs.front()->Width(); | |||
| conv_param->input_channel_ = new_in_channel; | |||
| @@ -27,7 +27,7 @@ using mindspore::schema::PrimitiveType_Conv2D; | |||
| namespace mindspore::kernel { | |||
| void GroupConvolutionInt8CPUKernel::SeparateInput(int group_id) { | |||
| int in_plane = conv_param_->input_h_ * conv_param_->input_w_; | |||
| int in_plane = conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_batch_; | |||
| int sub_in_channel = conv_param_->input_channel_; | |||
| int ori_in_channel = sub_in_channel * group_num_; | |||
| auto sub_in_data = reinterpret_cast<int8_t *>(group_convs_.at(group_id)->in_tensors().front()->data_c()); | |||
| @@ -41,7 +41,7 @@ void GroupConvolutionInt8CPUKernel::SeparateInput(int group_id) { | |||
| } | |||
| void GroupConvolutionInt8CPUKernel::PostConcat(int group_id) { | |||
| int out_plane = conv_param_->output_h_ * conv_param_->output_w_; | |||
| int out_plane = conv_param_->output_h_ * conv_param_->output_w_ * conv_param_->output_batch_; | |||
| int sub_out_channel = conv_param_->output_channel_; | |||
| int ori_out_channel = sub_out_channel * group_num_; | |||
| auto sub_out_data = reinterpret_cast<int8_t *>(group_convs_.at(group_id)->out_tensors().front()->data_c()); | |||