Merge pull request !6486 from liuzhongkai/code_cleantags/v1.0.0
| @@ -271,9 +271,9 @@ void ConvWinogardFp32(float *input_data, float *trans_weight, const float *bias_ | |||||
| int out_h_block = UP_DIV(conv_param->output_h_, out_unit); | int out_h_block = UP_DIV(conv_param->output_h_, out_unit); | ||||
| int output_count = out_w_block * out_h_block; | int output_count = out_w_block * out_h_block; | ||||
| #ifdef ENABLE_ARM32 | #ifdef ENABLE_ARM32 | ||||
| int tile_num = 4; | |||||
| const int tile_num = 4; | |||||
| #else | #else | ||||
| int tile_num = 12; | |||||
| const int tile_num = 12; | |||||
| #endif | #endif | ||||
| int output_tile_count = UP_DIV(output_count, tile_num); | int output_tile_count = UP_DIV(output_count, tile_num); | ||||
| int out_channel = conv_param->output_channel_; | int out_channel = conv_param->output_channel_; | ||||
| @@ -470,9 +470,9 @@ void Conv3x3Fp32(float *input_data, float *transed_weight, const float *bias_dat | |||||
| int out_h_block = UP_DIV(conv_param->output_h_, OUPUT_UNIT); | int out_h_block = UP_DIV(conv_param->output_h_, OUPUT_UNIT); | ||||
| int output_count = out_w_block * out_h_block; | int output_count = out_w_block * out_h_block; | ||||
| #ifdef ENABLE_ARM32 | #ifdef ENABLE_ARM32 | ||||
| int tile_num = 4; | |||||
| const int tile_num = 4; | |||||
| #else | #else | ||||
| int tile_num = 12; | |||||
| const int tile_num = 12; | |||||
| #endif | #endif | ||||
| int output_tile_count = UP_DIV(output_count, tile_num); | int output_tile_count = UP_DIV(output_count, tile_num); | ||||
| const int input_unit_square = 4 * 4; | const int input_unit_square = 4 * 4; | ||||
| @@ -41,9 +41,9 @@ int DeConvPostFp32C12x8(const float *src, float *tmp, const float *bias, float * | |||||
| size_t output_plane = conv_param->output_w_ * conv_param->output_h_; | size_t output_plane = conv_param->output_w_ * conv_param->output_h_; | ||||
| int oc8 = UP_ROUND(output_channel, C8NUM); | int oc8 = UP_ROUND(output_channel, C8NUM); | ||||
| #ifdef ENABLE_ARM32 | #ifdef ENABLE_ARM32 | ||||
| int tile_num = 4; | |||||
| const int tile_num = 4; | |||||
| #else | #else | ||||
| int tile_num = 12; | |||||
| const int tile_num = 12; | |||||
| #endif | #endif | ||||
| int in_plane12 = UP_ROUND(input_plane, tile_num); | int in_plane12 = UP_ROUND(input_plane, tile_num); | ||||
| int src_iw_stride = C8NUM; | int src_iw_stride = C8NUM; | ||||
| @@ -55,8 +55,8 @@ void DecodeBoxes(const int num_boxes, const float *input_boxes, const float *anc | |||||
| BboxCorner *decoded_box = (BboxCorner *)(decoded_boxes) + i; | BboxCorner *decoded_box = (BboxCorner *)(decoded_boxes) + i; | ||||
| float y_center = box->y / scaler.y * anchor->h + anchor->y; | float y_center = box->y / scaler.y * anchor->h + anchor->y; | ||||
| float x_center = box->x / scaler.x * anchor->w + anchor->x; | float x_center = box->x / scaler.x * anchor->w + anchor->x; | ||||
| float h_half = 0.5f * expf(box->h / scaler.h) * anchor->h; | |||||
| float w_half = 0.5f * expf(box->w / scaler.w) * anchor->w; | |||||
| const float h_half = 0.5f * expf(box->h / scaler.h) * anchor->h; | |||||
| const float w_half = 0.5f * expf(box->w / scaler.w) * anchor->w; | |||||
| decoded_box->ymin = y_center - h_half; | decoded_box->ymin = y_center - h_half; | ||||
| decoded_box->xmin = x_center - w_half; | decoded_box->xmin = x_center - w_half; | ||||
| decoded_box->ymax = y_center + h_half; | decoded_box->ymax = y_center + h_half; | ||||
| @@ -68,9 +68,9 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float * | |||||
| } | } | ||||
| // input transform | // input transform | ||||
| #ifdef ENABLE_ARM32 | #ifdef ENABLE_ARM32 | ||||
| int tile_num = 4; | |||||
| const int tile_num = 4; | |||||
| #else | #else | ||||
| int tile_num = 12; | |||||
| const int tile_num = 12; | |||||
| #endif | #endif | ||||
| int dst_ic4_offset = dst_plane_offset + ic * C4NUM; | int dst_ic4_offset = dst_plane_offset + ic * C4NUM; | ||||
| size_t dst_step = tile_num * ic4 * C4NUM; | size_t dst_step = tile_num * ic4 * C4NUM; | ||||
| @@ -337,9 +337,9 @@ void Conv3x3Fp32InputTransform(const float *input_data, float *trans_input, floa | |||||
| // input transform | // input transform | ||||
| #ifdef ENABLE_ARM32 | #ifdef ENABLE_ARM32 | ||||
| int tile_num = 4; | |||||
| const int tile_num = 4; | |||||
| #else | #else | ||||
| int tile_num = 12; | |||||
| const int tile_num = 12; | |||||
| #endif | #endif | ||||
| int dst_ic4_offset = dst_plane_offset + ic * C4NUM; | int dst_ic4_offset = dst_plane_offset + ic * C4NUM; | ||||
| size_t dst_step = tile_num * ic4 * C4NUM; | size_t dst_step = tile_num * ic4 * C4NUM; | ||||
| @@ -51,6 +51,7 @@ int ConvolutionWinogradFP16CPUKernel::WinogradFilterTransformFp16(const float16_ | |||||
| } | } | ||||
| auto matrix_gt_data_fp16 = reinterpret_cast<float16_t *>(malloc(input_unit_ * kernel_unit_ * sizeof(float16_t))); | auto matrix_gt_data_fp16 = reinterpret_cast<float16_t *>(malloc(input_unit_ * kernel_unit_ * sizeof(float16_t))); | ||||
| if (matrix_gt_data_fp16 == nullptr) { | if (matrix_gt_data_fp16 == nullptr) { | ||||
| free(matrix_g_data_fp16); | |||||
| MS_LOG(ERROR) << "malloc matrix_gt_data_fp16 failed."; | MS_LOG(ERROR) << "malloc matrix_gt_data_fp16 failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -61,16 +62,25 @@ int ConvolutionWinogradFP16CPUKernel::WinogradFilterTransformFp16(const float16_ | |||||
| // separate into two steps ===> tmp = G*g ===> out = tmp * GT | // separate into two steps ===> tmp = G*g ===> out = tmp * GT | ||||
| auto tmp_weight_data = reinterpret_cast<float16_t *>(malloc(kernel_unit_ * kernel_unit_ * sizeof(float16_t))); | auto tmp_weight_data = reinterpret_cast<float16_t *>(malloc(kernel_unit_ * kernel_unit_ * sizeof(float16_t))); | ||||
| if (tmp_weight_data == nullptr) { | if (tmp_weight_data == nullptr) { | ||||
| free(matrix_g_data_fp16); | |||||
| free(matrix_gt_data_fp16); | |||||
| MS_LOG(ERROR) << "malloc tmp_weight_data failed."; | MS_LOG(ERROR) << "malloc tmp_weight_data failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto tmp_data = reinterpret_cast<float16_t *>(malloc(input_unit_ * kernel_unit_ * sizeof(float16_t))); | auto tmp_data = reinterpret_cast<float16_t *>(malloc(input_unit_ * kernel_unit_ * sizeof(float16_t))); | ||||
| if (tmp_data == nullptr) { | if (tmp_data == nullptr) { | ||||
| free(tmp_weight_data); | |||||
| free(matrix_g_data_fp16); | |||||
| free(matrix_gt_data_fp16); | |||||
| MS_LOG(ERROR) << "malloc tmp_data failed."; | MS_LOG(ERROR) << "malloc tmp_data failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto trans_out_data = reinterpret_cast<float16_t *>(malloc(input_unit_ * input_unit_ * sizeof(float16_t))); | auto trans_out_data = reinterpret_cast<float16_t *>(malloc(input_unit_ * input_unit_ * sizeof(float16_t))); | ||||
| if (trans_out_data == nullptr) { | if (trans_out_data == nullptr) { | ||||
| free(tmp_data); | |||||
| free(tmp_weight_data); | |||||
| free(matrix_g_data_fp16); | |||||
| free(matrix_gt_data_fp16); | |||||
| MS_LOG(ERROR) << "malloc trans_out_data failed."; | MS_LOG(ERROR) << "malloc trans_out_data failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -206,11 +216,14 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() { | |||||
| } | } | ||||
| auto matrix_gt = reinterpret_cast<float *>(malloc(input_unit_ * kernel_unit_ * sizeof(float))); | auto matrix_gt = reinterpret_cast<float *>(malloc(input_unit_ * kernel_unit_ * sizeof(float))); | ||||
| if (matrix_gt == nullptr) { | if (matrix_gt == nullptr) { | ||||
| free(matrix_g); | |||||
| MS_LOG(ERROR) << "malloc matrix_gt failed."; | MS_LOG(ERROR) << "malloc matrix_gt failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ret = MallocTransformMatrices(); | ret = MallocTransformMatrices(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| free(matrix_g); | |||||
| free(matrix_gt); | |||||
| MS_LOG(ERROR) << "Malloc transform matrices failed."; | MS_LOG(ERROR) << "Malloc transform matrices failed."; | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -221,6 +234,8 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() { | |||||
| float matrix_bt[MAX_LEN]; | float matrix_bt[MAX_LEN]; | ||||
| ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, 0.5f, output_unit_, kernel_unit_); | ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, 0.5f, output_unit_, kernel_unit_); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| free(matrix_g); | |||||
| free(matrix_gt); | |||||
| MS_LOG(ERROR) << "get matrix g from CookToomFilter failed."; | MS_LOG(ERROR) << "get matrix g from CookToomFilter failed."; | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -235,6 +250,8 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() { | |||||
| ret = WinogradFilterTransformFp16(execute_weight_, matrix_g, matrix_gt, oc_block); | ret = WinogradFilterTransformFp16(execute_weight_, matrix_g, matrix_gt, oc_block); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| free(matrix_g); | |||||
| free(matrix_gt); | |||||
| MS_LOG(ERROR) << "winograd filter transfrom failed."; | MS_LOG(ERROR) << "winograd filter transfrom failed."; | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -242,6 +259,8 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() { | |||||
| // init bias | // init bias | ||||
| bias_data_ = malloc(oc_block_num * oc_block * sizeof(float16_t)); | bias_data_ = malloc(oc_block_num * oc_block * sizeof(float16_t)); | ||||
| if (bias_data_ == nullptr) { | if (bias_data_ == nullptr) { | ||||
| free(matrix_g); | |||||
| free(matrix_gt); | |||||
| MS_LOG(ERROR) << "malloc bias_data_ failed."; | MS_LOG(ERROR) << "malloc bias_data_ failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -200,7 +200,7 @@ int MatmulFP16CPUKernel::Run() { | |||||
| } | } | ||||
| auto b = reinterpret_cast<float *>(in_tensors_[1]->MutableData()); | auto b = reinterpret_cast<float *>(in_tensors_[1]->MutableData()); | ||||
| auto out_tensor = out_tensors_[0]; | auto out_tensor = out_tensors_[0]; | ||||
| float16_t *c_ptr; | |||||
| float16_t *c_ptr = nullptr; | |||||
| if (out_tensor->data_type() == kNumberTypeFloat32) { | if (out_tensor->data_type() == kNumberTypeFloat32) { | ||||
| c_ptr = output_ptr_; | c_ptr = output_ptr_; | ||||
| } else { | } else { | ||||
| @@ -96,9 +96,9 @@ int Convolution3x3CPUKernel::InitTmpBuffer() { | |||||
| MS_ASSERT(ctx_->allocator != nullptr); | MS_ASSERT(ctx_->allocator != nullptr); | ||||
| #ifdef ENABLE_ARM32 | #ifdef ENABLE_ARM32 | ||||
| int tile_num = 4; | |||||
| const int tile_num = 4; | |||||
| #else | #else | ||||
| int tile_num = 12; | |||||
| const int tile_num = 12; | |||||
| #endif | #endif | ||||
| size_t nhwc4_input_size = | size_t nhwc4_input_size = | ||||
| ic4 * C4NUM * conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * sizeof(float); | ic4 * C4NUM * conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * sizeof(float); | ||||
| @@ -47,11 +47,14 @@ int ConvolutionWinogradCPUKernel::WinogradFilterTransform(const float *weight_da | |||||
| } | } | ||||
| auto tmp_data = reinterpret_cast<float *>(malloc(input_unit_ * kernel_unit_ * sizeof(float))); | auto tmp_data = reinterpret_cast<float *>(malloc(input_unit_ * kernel_unit_ * sizeof(float))); | ||||
| if (tmp_data == nullptr) { | if (tmp_data == nullptr) { | ||||
| free(tmp_weight_data); | |||||
| MS_LOG(ERROR) << "malloc tmp_data failed."; | MS_LOG(ERROR) << "malloc tmp_data failed."; | ||||
| return RET_MEMORY_FAILED; | return RET_MEMORY_FAILED; | ||||
| } | } | ||||
| auto trans_out_data = reinterpret_cast<float *>(malloc(input_unit_ * input_unit_ * sizeof(float))); | auto trans_out_data = reinterpret_cast<float *>(malloc(input_unit_ * input_unit_ * sizeof(float))); | ||||
| if (trans_out_data == nullptr) { | if (trans_out_data == nullptr) { | ||||
| free(tmp_data); | |||||
| free(tmp_weight_data); | |||||
| MS_LOG(ERROR) << "malloc trans_out_data failed."; | MS_LOG(ERROR) << "malloc trans_out_data failed."; | ||||
| return RET_MEMORY_FAILED; | return RET_MEMORY_FAILED; | ||||
| } | } | ||||
| @@ -201,7 +201,7 @@ int ReduceCPUKernel::CalculateCoeffOutput() { | |||||
| int ReduceCPUKernel::MallocTmpBuffer() { | int ReduceCPUKernel::MallocTmpBuffer() { | ||||
| data_buffers_.clear(); | data_buffers_.clear(); | ||||
| for (auto size : buffer_sizes_) { | for (auto size : buffer_sizes_) { | ||||
| void *buffer; | |||||
| void *buffer = nullptr; | |||||
| if (data_type_ == kDataTypeFloat) { | if (data_type_ == kDataTypeFloat) { | ||||
| buffer = context_->allocator->Malloc(size * sizeof(float)); | buffer = context_->allocator->Malloc(size * sizeof(float)); | ||||
| } else { | } else { | ||||
| @@ -143,6 +143,7 @@ int SqueezeInt8CPUKernel::Run() { | |||||
| auto input_size = quant_Squeeze_parm_->input_sizes_[i]; | auto input_size = quant_Squeeze_parm_->input_sizes_[i]; | ||||
| inputs_array[i] = reinterpret_cast<int8_t *>(malloc(sizeof(int8_t) * input_size)); | inputs_array[i] = reinterpret_cast<int8_t *>(malloc(sizeof(int8_t) * input_size)); | ||||
| if (inputs_array[i] == nullptr) { | if (inputs_array[i] == nullptr) { | ||||
| free(inputs_array); | |||||
| MS_LOG(ERROR) << "malloc inputs_array[" << i << "]" | MS_LOG(ERROR) << "malloc inputs_array[" << i << "]" | ||||
| << " failed."; | << " failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||