| @@ -210,12 +210,14 @@ int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvPa | |||||
| int ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, 0.5f, | int ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, 0.5f, | ||||
| DECONV_WINOGRAD_DEFAULT_UNIT, unit->h_size_); | DECONV_WINOGRAD_DEFAULT_UNIT, unit->h_size_); | ||||
| if (ret != NNACL_OK) { | if (ret != NNACL_OK) { | ||||
| free(current_unit_weight); | |||||
| return NNACL_ERRCODE_WINOGRAD_GENERATOR_ERROR; | return NNACL_ERRCODE_WINOGRAD_GENERATOR_ERROR; | ||||
| } | } | ||||
| /* winograd AT */ | /* winograd AT */ | ||||
| unit->winograd_.AT_ = malloc(unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float16_t)); | unit->winograd_.AT_ = malloc(unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float16_t)); | ||||
| if (unit->winograd_.AT_ == NULL) { | if (unit->winograd_.AT_ == NULL) { | ||||
| free(current_unit_weight); | |||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| Float32ToFloat16(matrix_at, unit->winograd_.AT_, unit->winograd_.i_ * unit->winograd_.o_); | Float32ToFloat16(matrix_at, unit->winograd_.AT_, unit->winograd_.i_ * unit->winograd_.o_); | ||||
| @@ -223,6 +225,8 @@ int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvPa | |||||
| /* winograd BT */ | /* winograd BT */ | ||||
| unit->winograd_.BT_ = malloc(unit->winograd_.o_ * unit->winograd_.o_ * sizeof(float16_t)); | unit->winograd_.BT_ = malloc(unit->winograd_.o_ * unit->winograd_.o_ * sizeof(float16_t)); | ||||
| if (unit->winograd_.BT_ == NULL) { | if (unit->winograd_.BT_ == NULL) { | ||||
| free(current_unit_weight); | |||||
| free(unit->winograd_.AT_); | |||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| Float32ToFloat16(matrix_bt, unit->winograd_.BT_, unit->winograd_.o_ * unit->winograd_.o_); | Float32ToFloat16(matrix_bt, unit->winograd_.BT_, unit->winograd_.o_ * unit->winograd_.o_); | ||||
| @@ -231,6 +235,9 @@ int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvPa | |||||
| size = conv_param->input_channel_ * output_channel * unit->winograd_.kh_ * unit->winograd_.kw_; | size = conv_param->input_channel_ * output_channel * unit->winograd_.kh_ * unit->winograd_.kw_; | ||||
| float16_t *winograd_unit_weight = (float16_t *)malloc(size * sizeof(float16_t)); | float16_t *winograd_unit_weight = (float16_t *)malloc(size * sizeof(float16_t)); | ||||
| if (winograd_unit_weight == NULL) { | if (winograd_unit_weight == NULL) { | ||||
| free(current_unit_weight); | |||||
| free(unit->winograd_.AT_); | |||||
| free(unit->winograd_.BT_); | |||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| @@ -49,12 +49,14 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||||
| int ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, 0.5f, | int ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, 0.5f, | ||||
| DECONV_WINOGRAD_DEFAULT_UNIT, unit->h_size_); | DECONV_WINOGRAD_DEFAULT_UNIT, unit->h_size_); | ||||
| if (ret != NNACL_OK) { | if (ret != NNACL_OK) { | ||||
| free(current_unit_weight); | |||||
| return NNACL_ERRCODE_WINOGRAD_GENERATOR_ERROR; | return NNACL_ERRCODE_WINOGRAD_GENERATOR_ERROR; | ||||
| } | } | ||||
| /* winograd AT */ | /* winograd AT */ | ||||
| unit->winograd_.AT_ = malloc(unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float)); | unit->winograd_.AT_ = malloc(unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float)); | ||||
| if (unit->winograd_.AT_ == NULL) { | if (unit->winograd_.AT_ == NULL) { | ||||
| free(current_unit_weight); | |||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| memcpy(unit->winograd_.AT_, matrix_at, unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float)); | memcpy(unit->winograd_.AT_, matrix_at, unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float)); | ||||
| @@ -62,6 +64,8 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||||
| /* winograd BT */ | /* winograd BT */ | ||||
| unit->winograd_.BT_ = malloc(unit->winograd_.o_ * unit->winograd_.o_ * sizeof(float)); | unit->winograd_.BT_ = malloc(unit->winograd_.o_ * unit->winograd_.o_ * sizeof(float)); | ||||
| if (unit->winograd_.BT_ == NULL) { | if (unit->winograd_.BT_ == NULL) { | ||||
| free(current_unit_weight); | |||||
| free(unit->winograd_.AT_); | |||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| memcpy(unit->winograd_.BT_, matrix_bt, unit->winograd_.o_ * unit->winograd_.o_ * sizeof(float)); | memcpy(unit->winograd_.BT_, matrix_bt, unit->winograd_.o_ * unit->winograd_.o_ * sizeof(float)); | ||||
| @@ -70,6 +74,9 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||||
| size = conv_param->input_channel_ * conv_param->output_channel_ * unit->winograd_.kh_ * unit->winograd_.kw_; | size = conv_param->input_channel_ * conv_param->output_channel_ * unit->winograd_.kh_ * unit->winograd_.kw_; | ||||
| float *winograd_unit_weight = (float *)malloc(size * sizeof(float)); | float *winograd_unit_weight = (float *)malloc(size * sizeof(float)); | ||||
| if (winograd_unit_weight == NULL) { | if (winograd_unit_weight == NULL) { | ||||
| free(current_unit_weight); | |||||
| free(unit->winograd_.AT_); | |||||
| free(unit->winograd_.BT_); | |||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| WinogradWeightTransform(current_unit_weight, winograd_unit_weight, matrix_g, matrix_gt, C4NUM, unit->winograd_.kh_, | WinogradWeightTransform(current_unit_weight, winograd_unit_weight, matrix_g, matrix_gt, C4NUM, unit->winograd_.kh_, | ||||
| @@ -89,9 +89,14 @@ int DetectionPostProcessBaseCPUKernel::Run() { | |||||
| if (parameter->use_regular_nms_) { | if (parameter->use_regular_nms_) { | ||||
| parameter->score_with_class_all_ = | parameter->score_with_class_all_ = | ||||
| context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(ScoreWithIndex)); | context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(ScoreWithIndex)); | ||||
| if (parameter->score_with_class_all_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc parameter->score_with_class_all_failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| parameter->indexes_ = context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(int)); | parameter->indexes_ = context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(int)); | ||||
| if (!parameter->score_with_class_all_ || !parameter->indexes_) { | |||||
| MS_LOG(ERROR) << "malloc parameter->score_with_class_all_ || parameter->indexes_ failed."; | |||||
| if (parameter->indexes_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc parameter->indexes_ failed."; | |||||
| context_->allocator->Free(parameter->score_with_class_all_); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| } else { | } else { | ||||
| @@ -31,11 +31,76 @@ DeConvWinogradFp16CPUKernel::~DeConvWinogradFp16CPUKernel() { | |||||
| return; | return; | ||||
| } | } | ||||
| void DeConvWinogradFp16CPUKernel::FreeResizeBuf() { return; } | |||||
| void DeConvWinogradFp16CPUKernel::FreeResizeBuf() { | |||||
| for (int i = 0; i < deconv_param_->compute_size_; i++) { | |||||
| DeConvComputeUnit &unit = deconv_param_->compute_units_[i]; | |||||
| if (unit.tmp_buffer_ != nullptr) { | |||||
| free(unit.tmp_buffer_); | |||||
| unit.tmp_buffer_ = nullptr; | |||||
| } | |||||
| if (unit.use_winograd_) { | |||||
| if (unit.winograd_.b_buffer_ != nullptr) { | |||||
| free(unit.winograd_.b_buffer_); | |||||
| unit.winograd_.b_buffer_ = nullptr; | |||||
| } | |||||
| } | |||||
| } | |||||
| for (int i = 0; i < DECONV_WINOGRAD_BUFFER_COUNT; i++) { | |||||
| DeConvWgABuffer &wg = deconv_param_->a_buffer_[i]; | |||||
| if (wg.buf_init_) { | |||||
| if (wg.dest_buffer_ != nullptr) { | |||||
| free(wg.dest_buffer_); | |||||
| wg.dest_buffer_ = nullptr; | |||||
| } | |||||
| if (wg.middle_buffer_ != nullptr) { | |||||
| free(wg.middle_buffer_); | |||||
| wg.middle_buffer_ = nullptr; | |||||
| } | |||||
| } | |||||
| wg.buf_init_ = false; | |||||
| } | |||||
| if (tile_input_ != nullptr) { | |||||
| free(tile_input_); | |||||
| tile_input_ = nullptr; | |||||
| } | |||||
| if (tile_output_ != nullptr) { | |||||
| free(tile_output_); | |||||
| tile_output_ = nullptr; | |||||
| } | |||||
| if (nc4hw4_output_ != nullptr) { | |||||
| free(nc4hw4_output_); | |||||
| nc4hw4_output_ = nullptr; | |||||
| } | |||||
| return; | |||||
| } | |||||
| void DeConvWinogradFp16CPUKernel::FreeDeconvParam() { | void DeConvWinogradFp16CPUKernel::FreeDeconvParam() { | ||||
| for (int i = 0; i < deconv_param_->compute_size_; i++) { | |||||
| DeConvComputeUnit &unit = deconv_param_->compute_units_[i]; | |||||
| if (unit.weight_ != nullptr) { | |||||
| free(unit.weight_); | |||||
| unit.weight_ = nullptr; | |||||
| } | |||||
| if (unit.use_winograd_) { | |||||
| if (unit.winograd_.AT_ != nullptr) { | |||||
| free(unit.winograd_.AT_); | |||||
| unit.winograd_.AT_ = nullptr; | |||||
| } | |||||
| if (unit.winograd_.BT_ != nullptr) { | |||||
| free(unit.winograd_.BT_); | |||||
| unit.winograd_.BT_ = nullptr; | |||||
| } | |||||
| } | |||||
| } | |||||
| if (deconv_param_ != nullptr) { | if (deconv_param_ != nullptr) { | ||||
| delete deconv_param_; | |||||
| delete (deconv_param_); | |||||
| deconv_param_ = nullptr; | deconv_param_ = nullptr; | ||||
| } | } | ||||
| return; | return; | ||||
| @@ -47,6 +112,9 @@ int DeConvWinogradFp16CPUKernel::InitParameter() { | |||||
| nc4hw4_output_ = | nc4hw4_output_ = | ||||
| reinterpret_cast<float16_t *>(malloc(deconv_param_->oc_up4_ * deconv_param_->output_plane_ * sizeof(float16_t))); | reinterpret_cast<float16_t *>(malloc(deconv_param_->oc_up4_ * deconv_param_->output_plane_ * sizeof(float16_t))); | ||||
| if (nc4hw4_output_ == nullptr) { | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| deconv_param_->in_tile_w_count_ = UP_DIV(conv_param_->input_w_, DECONV_WINOGRAD_DEFAULT_UNIT); | deconv_param_->in_tile_w_count_ = UP_DIV(conv_param_->input_w_, DECONV_WINOGRAD_DEFAULT_UNIT); | ||||
| deconv_param_->in_tile_h_count_ = UP_DIV(conv_param_->input_h_, DECONV_WINOGRAD_DEFAULT_UNIT); | deconv_param_->in_tile_h_count_ = UP_DIV(conv_param_->input_h_, DECONV_WINOGRAD_DEFAULT_UNIT); | ||||
| @@ -62,6 +130,9 @@ int DeConvWinogradFp16CPUKernel::InitParameter() { | |||||
| int size = deconv_param_->thread_num_ * DECONV_WINOGRAD_DEFAULT_UNIT * DECONV_WINOGRAD_DEFAULT_UNIT * | int size = deconv_param_->thread_num_ * DECONV_WINOGRAD_DEFAULT_UNIT * DECONV_WINOGRAD_DEFAULT_UNIT * | ||||
| DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->ic_up4_; | DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->ic_up4_; | ||||
| tile_input_ = reinterpret_cast<float16_t *>(malloc(size * sizeof(float16_t))); | tile_input_ = reinterpret_cast<float16_t *>(malloc(size * sizeof(float16_t))); | ||||
| if (tile_input_ == nullptr) { | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| memset(tile_input_, 0, size * sizeof(float16_t)); | memset(tile_input_, 0, size * sizeof(float16_t)); | ||||
| deconv_param_->out_tile_w_ = (DECONV_WINOGRAD_DEFAULT_UNIT - 1) * conv_param_->stride_w_ + conv_param_->kernel_w_; | deconv_param_->out_tile_w_ = (DECONV_WINOGRAD_DEFAULT_UNIT - 1) * conv_param_->stride_w_ + conv_param_->kernel_w_; | ||||
| @@ -69,6 +140,9 @@ int DeConvWinogradFp16CPUKernel::InitParameter() { | |||||
| size = deconv_param_->thread_num_ * deconv_param_->out_tile_w_ * deconv_param_->out_tile_h_ * | size = deconv_param_->thread_num_ * deconv_param_->out_tile_w_ * deconv_param_->out_tile_h_ * | ||||
| DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->oc_up4_; | DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->oc_up4_; | ||||
| tile_output_ = reinterpret_cast<float16_t *>(malloc(size * sizeof(float16_t))); | tile_output_ = reinterpret_cast<float16_t *>(malloc(size * sizeof(float16_t))); | ||||
| if (tile_output_ == nullptr) { | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| for (int i = 0; i < deconv_param_->compute_size_; i++) { | for (int i = 0; i < deconv_param_->compute_size_; i++) { | ||||
| DeConvComputeUnit &unit = deconv_param_->compute_units_[i]; | DeConvComputeUnit &unit = deconv_param_->compute_units_[i]; | ||||
| @@ -79,18 +153,33 @@ int DeConvWinogradFp16CPUKernel::InitParameter() { | |||||
| size = unit.winograd_.kh_ * unit.winograd_.kw_ * DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->ic_up4_; | size = unit.winograd_.kh_ * unit.winograd_.kw_ * DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->ic_up4_; | ||||
| deconv_param_->a_buffer_[unit.winograd_.kh_].middle_buffer_ = | deconv_param_->a_buffer_[unit.winograd_.kh_].middle_buffer_ = | ||||
| malloc(deconv_param_->thread_num_ * size * sizeof(float16_t)); | malloc(deconv_param_->thread_num_ * size * sizeof(float16_t)); | ||||
| if (deconv_param_->a_buffer_[unit.winograd_.kh_].middle_buffer_ == nullptr) { | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| deconv_param_->a_buffer_[unit.winograd_.kh_].dest_buffer_ = | deconv_param_->a_buffer_[unit.winograd_.kh_].dest_buffer_ = | ||||
| malloc(deconv_param_->thread_num_ * size * sizeof(float16_t)); | malloc(deconv_param_->thread_num_ * size * sizeof(float16_t)); | ||||
| if (deconv_param_->a_buffer_[unit.winograd_.kh_].dest_buffer_ == nullptr) { | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| } | } | ||||
| unit.winograd_.b_buffer_ = malloc(deconv_param_->thread_num_ * unit.winograd_.kh_ * unit.winograd_.kw_ * | unit.winograd_.b_buffer_ = malloc(deconv_param_->thread_num_ * unit.winograd_.kh_ * unit.winograd_.kw_ * | ||||
| deconv_param_->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE * sizeof(float16_t)); | deconv_param_->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE * sizeof(float16_t)); | ||||
| if (unit.winograd_.b_buffer_ == nullptr) { | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| unit.tmp_buffer_ = malloc(deconv_param_->thread_num_ * unit.winograd_.kh_ * unit.winograd_.kw_ * | unit.tmp_buffer_ = malloc(deconv_param_->thread_num_ * unit.winograd_.kh_ * unit.winograd_.kw_ * | ||||
| deconv_param_->oc_div4_ * DECONV_WINOGRAD_DEFAULT_TILE * C4NUM * sizeof(float16_t)); | deconv_param_->oc_div4_ * DECONV_WINOGRAD_DEFAULT_TILE * C4NUM * sizeof(float16_t)); | ||||
| if (unit.tmp_buffer_ == nullptr) { | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| } else { | } else { | ||||
| unit.tmp_buffer_ = malloc(deconv_param_->thread_num_ * deconv_param_->oc_div4_ * unit.w_size_ * unit.h_size_ * | unit.tmp_buffer_ = malloc(deconv_param_->thread_num_ * deconv_param_->oc_div4_ * unit.w_size_ * unit.h_size_ * | ||||
| DECONV_WINOGRAD_DEFAULT_TILE * C4NUM * sizeof(float16_t)); | DECONV_WINOGRAD_DEFAULT_TILE * C4NUM * sizeof(float16_t)); | ||||
| if (unit.tmp_buffer_ == nullptr) { | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| @@ -204,9 +293,15 @@ int DeConvWinogradFp16CPUKernel::InitComputeParam() { | |||||
| unit.winograd_.b_buffer_ = nullptr; | unit.winograd_.b_buffer_ = nullptr; | ||||
| unit.weight_ = malloc(unit.winograd_.kh_ * unit.winograd_.kw_ * deconv_param_->oc_up4_ * | unit.weight_ = malloc(unit.winograd_.kh_ * unit.winograd_.kw_ * deconv_param_->oc_up4_ * | ||||
| deconv_param_->ic_up4_ * sizeof(float16_t)); | deconv_param_->ic_up4_ * sizeof(float16_t)); | ||||
| if (unit.weight_ == nullptr) { | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| } else { | } else { | ||||
| unit.use_winograd_ = false; | unit.use_winograd_ = false; | ||||
| unit.weight_ = malloc(h_size * w_size * deconv_param_->ic_up4_ * deconv_param_->oc_up4_ * sizeof(float16_t)); | unit.weight_ = malloc(h_size * w_size * deconv_param_->ic_up4_ * deconv_param_->oc_up4_ * sizeof(float16_t)); | ||||
| if (unit.weight_ == nullptr) { | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| } | } | ||||
| unit.tmp_buffer_ = nullptr; | unit.tmp_buffer_ = nullptr; | ||||
| deconv_param_->compute_units_[cur_count] = unit; | deconv_param_->compute_units_[cur_count] = unit; | ||||
| @@ -226,7 +321,7 @@ int DeConvWinogradFp16CPUKernel::InitDataParam() { | |||||
| } | } | ||||
| for (int i = 0; i < deconv_param_->compute_size_; i++) { | for (int i = 0; i < deconv_param_->compute_size_; i++) { | ||||
| DeConvComputeUnit *unit = &deconv_param_->compute_units_[i]; | DeConvComputeUnit *unit = &deconv_param_->compute_units_[i]; | ||||
| ret = PackDeConvWgDataFp16(fp16_weight_, unit, conv_param_, deconv_param_); | |||||
| ret = PackDeConvWgDataFp16(execute_weight_, unit, conv_param_, deconv_param_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -113,6 +113,10 @@ int DeConvolutionWinogradCPUKernel::InitParameter() { | |||||
| int size = deconv_param_->thread_num_ * DECONV_WINOGRAD_DEFAULT_UNIT * DECONV_WINOGRAD_DEFAULT_UNIT * | int size = deconv_param_->thread_num_ * DECONV_WINOGRAD_DEFAULT_UNIT * DECONV_WINOGRAD_DEFAULT_UNIT * | ||||
| DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->ic_up4_; | DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->ic_up4_; | ||||
| tile_input_ = reinterpret_cast<float *>(malloc(size * sizeof(float))); | tile_input_ = reinterpret_cast<float *>(malloc(size * sizeof(float))); | ||||
| if (tile_input_ == nullptr) { | |||||
| MS_LOG(ERROR) << "tile_input_ error!"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| memset(tile_input_, 0, size * sizeof(float)); | memset(tile_input_, 0, size * sizeof(float)); | ||||
| deconv_param_->out_tile_w_ = (DECONV_WINOGRAD_DEFAULT_UNIT - 1) * conv_param_->stride_w_ + conv_param_->kernel_w_; | deconv_param_->out_tile_w_ = (DECONV_WINOGRAD_DEFAULT_UNIT - 1) * conv_param_->stride_w_ + conv_param_->kernel_w_; | ||||
| @@ -127,18 +131,38 @@ int DeConvolutionWinogradCPUKernel::InitParameter() { | |||||
| size = unit.winograd_.kh_ * unit.winograd_.kw_ * DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->ic_up4_; | size = unit.winograd_.kh_ * unit.winograd_.kw_ * DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->ic_up4_; | ||||
| deconv_param_->a_buffer_[unit.winograd_.kh_].middle_buffer_ = | deconv_param_->a_buffer_[unit.winograd_.kh_].middle_buffer_ = | ||||
| malloc(deconv_param_->thread_num_ * size * sizeof(float)); | malloc(deconv_param_->thread_num_ * size * sizeof(float)); | ||||
| if (deconv_param_->a_buffer_[unit.winograd_.kh_].middle_buffer_ == nullptr) { | |||||
| MS_LOG(ERROR) << "middle_buffer_ error!"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| deconv_param_->a_buffer_[unit.winograd_.kh_].dest_buffer_ = | deconv_param_->a_buffer_[unit.winograd_.kh_].dest_buffer_ = | ||||
| malloc(deconv_param_->thread_num_ * size * sizeof(float)); | malloc(deconv_param_->thread_num_ * size * sizeof(float)); | ||||
| if (deconv_param_->a_buffer_[unit.winograd_.kh_].dest_buffer_ == nullptr) { | |||||
| MS_LOG(ERROR) << "dest_buffer_ error!"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| } | } | ||||
| unit.winograd_.b_buffer_ = malloc(deconv_param_->thread_num_ * unit.winograd_.kh_ * unit.winograd_.kw_ * | unit.winograd_.b_buffer_ = malloc(deconv_param_->thread_num_ * unit.winograd_.kh_ * unit.winograd_.kw_ * | ||||
| deconv_param_->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE * sizeof(float)); | deconv_param_->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE * sizeof(float)); | ||||
| if (unit.winograd_.b_buffer_ == nullptr) { | |||||
| MS_LOG(ERROR) << "b_buffer_ error!"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| unit.tmp_buffer_ = malloc(deconv_param_->thread_num_ * unit.winograd_.kh_ * unit.winograd_.kw_ * | unit.tmp_buffer_ = malloc(deconv_param_->thread_num_ * unit.winograd_.kh_ * unit.winograd_.kw_ * | ||||
| deconv_param_->oc_div4_ * DECONV_WINOGRAD_DEFAULT_TILE * C4NUM * sizeof(float)); | deconv_param_->oc_div4_ * DECONV_WINOGRAD_DEFAULT_TILE * C4NUM * sizeof(float)); | ||||
| if (unit.tmp_buffer_ == nullptr) { | |||||
| MS_LOG(ERROR) << "tmp_buffer_ error!"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| } else { | } else { | ||||
| unit.tmp_buffer_ = malloc(deconv_param_->thread_num_ * deconv_param_->oc_div4_ * unit.w_size_ * unit.h_size_ * | unit.tmp_buffer_ = malloc(deconv_param_->thread_num_ * deconv_param_->oc_div4_ * unit.w_size_ * unit.h_size_ * | ||||
| DECONV_WINOGRAD_DEFAULT_TILE * C4NUM * sizeof(float)); | DECONV_WINOGRAD_DEFAULT_TILE * C4NUM * sizeof(float)); | ||||
| if (unit.tmp_buffer_ == nullptr) { | |||||
| MS_LOG(ERROR) << "tmp_buffer_ error!"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| @@ -217,9 +241,17 @@ int DeConvolutionWinogradCPUKernel::InitComputeParam() { | |||||
| unit.winograd_.b_buffer_ = nullptr; | unit.winograd_.b_buffer_ = nullptr; | ||||
| unit.weight_ = malloc(unit.winograd_.kh_ * unit.winograd_.kw_ * deconv_param_->oc_up4_ * | unit.weight_ = malloc(unit.winograd_.kh_ * unit.winograd_.kw_ * deconv_param_->oc_up4_ * | ||||
| deconv_param_->ic_up4_ * sizeof(float)); | deconv_param_->ic_up4_ * sizeof(float)); | ||||
| if (unit.weight_ == nullptr) { | |||||
| MS_LOG(ERROR) << "weight_ error!"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| } else { | } else { | ||||
| unit.use_winograd_ = false; | unit.use_winograd_ = false; | ||||
| unit.weight_ = malloc(h_size * w_size * deconv_param_->ic_up4_ * deconv_param_->oc_up4_ * sizeof(float)); | unit.weight_ = malloc(h_size * w_size * deconv_param_->ic_up4_ * deconv_param_->oc_up4_ * sizeof(float)); | ||||
| if (unit.weight_ == nullptr) { | |||||
| MS_LOG(ERROR) << "weight_ error!"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| } | } | ||||
| unit.tmp_buffer_ = nullptr; | unit.tmp_buffer_ = nullptr; | ||||
| deconv_param_->compute_units_[cur_count] = unit; | deconv_param_->compute_units_[cur_count] = unit; | ||||
| @@ -244,6 +276,10 @@ int DeConvolutionWinogradCPUKernel::InitDataParam() { | |||||
| /* bias */ | /* bias */ | ||||
| bias_data_ = malloc(deconv_param_->oc_up4_ * sizeof(float)); | bias_data_ = malloc(deconv_param_->oc_up4_ * sizeof(float)); | ||||
| if (bias_data_ == nullptr) { | |||||
| MS_LOG(ERROR) << "bias_data_ error!"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| memset(bias_data_, 0, deconv_param_->oc_up4_ * sizeof(float)); | memset(bias_data_, 0, deconv_param_->oc_up4_ * sizeof(float)); | ||||
| if (in_tensors_.size() == 3) { | if (in_tensors_.size() == 3) { | ||||
| auto bias_tensor = in_tensors_.at(kBiasIndex); | auto bias_tensor = in_tensors_.at(kBiasIndex); | ||||
| @@ -28,7 +28,7 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Conv2D; | using mindspore::schema::PrimitiveType_Conv2D; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| void ProcessFilterUint8(int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param) { | |||||
| int ProcessFilterUint8(int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param) { | |||||
| auto input_channel = conv_param->input_channel_; | auto input_channel = conv_param->input_channel_; | ||||
| auto output_channel = conv_param->output_channel_; | auto output_channel = conv_param->output_channel_; | ||||
| auto kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_; | auto kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_; | ||||
| @@ -36,11 +36,15 @@ void ProcessFilterUint8(int8_t *origin_weight, int16_t *dst_weight, ConvParamete | |||||
| size_t tmp_size = output_channel * iC8 * C8NUM * kernel_plane * sizeof(int16_t); | size_t tmp_size = output_channel * iC8 * C8NUM * kernel_plane * sizeof(int16_t); | ||||
| auto tmp_addr = reinterpret_cast<int16_t *>(malloc(tmp_size)); | auto tmp_addr = reinterpret_cast<int16_t *>(malloc(tmp_size)); | ||||
| if (tmp_addr == nullptr) { | |||||
| return RET_ERROR; | |||||
| } | |||||
| memset(tmp_addr, 0, tmp_size); | memset(tmp_addr, 0, tmp_size); | ||||
| PackWeightToC8Int8(origin_weight, tmp_addr, conv_param); | PackWeightToC8Int8(origin_weight, tmp_addr, conv_param); | ||||
| Conv3x3Int8FilterTransform(tmp_addr, dst_weight, iC8, output_channel, kernel_plane); | Conv3x3Int8FilterTransform(tmp_addr, dst_weight, iC8, output_channel, kernel_plane); | ||||
| free(tmp_addr); | free(tmp_addr); | ||||
| return RET_OK; | |||||
| } | } | ||||
| void Convolution3x3Int8CPUKernel::FreeTmpBuffer() { | void Convolution3x3Int8CPUKernel::FreeTmpBuffer() { | ||||
| @@ -91,7 +95,11 @@ int Convolution3x3Int8CPUKernel::InitWeightBias() { | |||||
| } | } | ||||
| memset(transformed_filter_addr_, 0, transformed_size); | memset(transformed_filter_addr_, 0, transformed_size); | ||||
| auto weight_data = reinterpret_cast<int8_t *>(in_tensors_.at(kWeightIndex)->MutableData()); | auto weight_data = reinterpret_cast<int8_t *>(in_tensors_.at(kWeightIndex)->MutableData()); | ||||
| ProcessFilterUint8(weight_data, transformed_filter_addr_, conv_param_); | |||||
| auto ret = ProcessFilterUint8(weight_data, transformed_filter_addr_, conv_param_); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "ProcessFilterUint8 failed."; | |||||
| return ret; | |||||
| } | |||||
| // init bias | // init bias | ||||
| size_t new_bias_size = oC4 * C4NUM * sizeof(int32_t); | size_t new_bias_size = oC4 * C4NUM * sizeof(int32_t); | ||||
| @@ -48,7 +48,7 @@ class Convolution3x3Int8CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int32_t *tmp_dst_buffer_ = nullptr; | int32_t *tmp_dst_buffer_ = nullptr; | ||||
| int8_t *tmp_out_ = nullptr; | int8_t *tmp_out_ = nullptr; | ||||
| }; | }; | ||||
| void ProcessFilterUint8(int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param); | |||||
| int ProcessFilterUint8(int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param); | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CONVOLUTION_3X3_INT8_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CONVOLUTION_3X3_INT8_H_ | ||||
| @@ -110,9 +110,14 @@ int MulInt8CPUKernel::Run() { | |||||
| count_unit_ = thread_count_ > 1 ? UP_DIV(elements_num_, thread_count_) : elements_num_; | count_unit_ = thread_count_ > 1 ? UP_DIV(elements_num_, thread_count_) : elements_num_; | ||||
| if (in_tensors_.at(0)->ElementsNum() != in_tensors_.at(1)->ElementsNum()) { | if (in_tensors_.at(0)->ElementsNum() != in_tensors_.at(1)->ElementsNum()) { | ||||
| input0_data_ = static_cast<int8_t *>(ctx_->allocator->Malloc(out_tensors_.at(0)->Size())); | input0_data_ = static_cast<int8_t *>(ctx_->allocator->Malloc(out_tensors_.at(0)->Size())); | ||||
| if (input0_data_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc input0_data_ failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| input1_data_ = static_cast<int8_t *>(ctx_->allocator->Malloc(out_tensors_.at(0)->Size())); | input1_data_ = static_cast<int8_t *>(ctx_->allocator->Malloc(out_tensors_.at(0)->Size())); | ||||
| if (!input0_data_ || !input1_data_) { | |||||
| MS_LOG(ERROR) << "malloc input0_data_ || input1_data_ failed."; | |||||
| if (input1_data_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc input1_data_ failed."; | |||||
| ctx_->allocator->Free(input0_data_); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| TileDimensionsInt8(static_cast<int8_t *>(in_tensors_.at(0)->MutableData()), | TileDimensionsInt8(static_cast<int8_t *>(in_tensors_.at(0)->MutableData()), | ||||