| @@ -1,4 +1,4 @@ | |||||
| #ifdef __arm__ | |||||
| #if 0 | |||||
| #ifndef __aarch64__ | #ifndef __aarch64__ | ||||
| .text | .text | ||||
| @@ -152,9 +152,8 @@ void AppendSlidingParamConvDw(SlidingWindowParam *sliding, const ConvParameter * | |||||
| } | } | ||||
| /*conv depthwise fp32 begin*/ | /*conv depthwise fp32 begin*/ | ||||
| #ifndef ENABLE_ARM64 | |||||
| void DepthwiseBorderPixel(float *dst, const float *src, const float *weight, const float *bias, int height, int width, | |||||
| int in_kh_step, int in_kw_step, int kernel_w_step, bool is_relu, bool is_relu6) { | |||||
| void ConvDwBorderPixel(float *dst, const float *src, const float *weight, const float *bias, int height, int width, | |||||
| int in_kh_step, int in_kw_step, int kernel_w_step, bool is_relu, bool is_relu6) { | |||||
| const float *src_kh = src; | const float *src_kh = src; | ||||
| const float *weight_kh = weight; | const float *weight_kh = weight; | ||||
| for (int c = 0; c < C4NUM; c++) { | for (int c = 0; c < C4NUM; c++) { | ||||
| @@ -179,10 +178,9 @@ void DepthwiseBorderPixel(float *dst, const float *src, const float *weight, con | |||||
| dst[c] = (is_relu6) ? (MSMIN(6, MSMAX(0, dst[c]))) : (dst[c]); | dst[c] = (is_relu6) ? (MSMIN(6, MSMAX(0, dst[c]))) : (dst[c]); | ||||
| } | } | ||||
| } | } | ||||
| #endif | |||||
| void DepthwiseBorder(float *dst, const float *src, const float *weight, const float *bias, int top, int bottom, | |||||
| int left, int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding) { | |||||
| void ConvDwBorder(float *dst, const float *src, const float *weight, const float *bias, int top, int bottom, int left, | |||||
| int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding) { | |||||
| bool relu = conv_param->act_type_ == ActType_Relu; | bool relu = conv_param->act_type_ == ActType_Relu; | ||||
| bool relu6 = conv_param->act_type_ == ActType_Relu6; | bool relu6 = conv_param->act_type_ == ActType_Relu6; | ||||
| float *dst_h = dst + top * sliding->out_h_step_; | float *dst_h = dst + top * sliding->out_h_step_; | ||||
| @@ -207,8 +205,8 @@ void DepthwiseBorder(float *dst, const float *src, const float *weight, const fl | |||||
| sliding->in_kh_step_ * sizeof(float), sliding->in_kw_step_ * sizeof(float), | sliding->in_kh_step_ * sizeof(float), sliding->in_kw_step_ * sizeof(float), | ||||
| conv_param->kernel_w_ * C4NUM * sizeof(float), relu, relu6); | conv_param->kernel_w_ * C4NUM * sizeof(float), relu, relu6); | ||||
| #else | #else | ||||
| DepthwiseBorderPixel(dst_kernel, src_kernel, weight_kernel, bias, end_kh - start_kh, end_kw - start_kw, | |||||
| sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_ * C4NUM, relu, relu6); | |||||
| ConvDwBorderPixel(dst_kernel, src_kernel, weight_kernel, bias, end_kh - start_kh, end_kw - start_kw, | |||||
| sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_ * C4NUM, relu, relu6); | |||||
| #endif | #endif | ||||
| dst_kernel += sliding->block_channel_; | dst_kernel += sliding->block_channel_; | ||||
| } // width loop | } // width loop | ||||
| @@ -217,9 +215,9 @@ void DepthwiseBorder(float *dst, const float *src, const float *weight, const fl | |||||
| } | } | ||||
| #ifndef ENABLE_ARM64 | #ifndef ENABLE_ARM64 | ||||
| void DepthwiseCenter(float *dst, const float *src, const float *weight, const float *bias, int height, int width, | |||||
| int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step, | |||||
| int in_kh_step, int in_kw_step, bool is_relu, bool is_relu6) { | |||||
| void ConvDwCenter(float *dst, const float *src, const float *weight, const float *bias, int height, int width, | |||||
| int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step, | |||||
| int in_kh_step, int in_kw_step, bool is_relu, bool is_relu6) { | |||||
| float *dst_h = dst; | float *dst_h = dst; | ||||
| const float *src_h = src; | const float *src_h = src; | ||||
| for (int oh = 0; oh < height; oh++) { | for (int oh = 0; oh < height; oh++) { | ||||
| @@ -260,7 +258,7 @@ void DepthwiseCenter(float *dst, const float *src, const float *weight, const fl | |||||
| #endif | #endif | ||||
| // conv depthwise fp32: sliding window | // conv depthwise fp32: sliding window | ||||
| void ConvDwC4Fp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, | |||||
| void ConvDwSWFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, | |||||
| const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id) { | const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id) { | ||||
| bool relu = conv_param->act_type_ == ActType_Relu; | bool relu = conv_param->act_type_ == ActType_Relu; | ||||
| bool relu6 = conv_param->act_type_ == ActType_Relu6; | bool relu6 = conv_param->act_type_ == ActType_Relu6; | ||||
| @@ -272,14 +270,13 @@ void ConvDwC4Fp32(float *output_data, const float *input_data, const float *weig | |||||
| float *dst_data = dst + oc * C4NUM; | float *dst_data = dst + oc * C4NUM; | ||||
| const float *weight = weight_data + oc * sliding->kernel_step_; | const float *weight = weight_data + oc * sliding->kernel_step_; | ||||
| const float *bias = bias_data + oc * C4NUM; | const float *bias = bias_data + oc * C4NUM; | ||||
| DepthwiseBorder(dst_data, src_data, weight, bias, 0, sliding->top_, 0, conv_param->output_w_, conv_param, | |||||
| sliding); | |||||
| DepthwiseBorder(dst_data, src_data, weight, bias, sliding->bottom_, conv_param->output_h_, 0, | |||||
| conv_param->output_w_, conv_param, sliding); | |||||
| DepthwiseBorder(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, 0, sliding->left_, conv_param, | |||||
| sliding); | |||||
| DepthwiseBorder(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, sliding->right_, | |||||
| conv_param->output_w_, conv_param, sliding); | |||||
| ConvDwBorder(dst_data, src_data, weight, bias, 0, sliding->top_, 0, conv_param->output_w_, conv_param, sliding); | |||||
| ConvDwBorder(dst_data, src_data, weight, bias, sliding->bottom_, conv_param->output_h_, 0, conv_param->output_w_, | |||||
| conv_param, sliding); | |||||
| ConvDwBorder(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, 0, sliding->left_, conv_param, | |||||
| sliding); | |||||
| ConvDwBorder(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, sliding->right_, | |||||
| conv_param->output_w_, conv_param, sliding); | |||||
| if (sliding->right_ > sliding->left_ && sliding->bottom_ > sliding->top_) { | if (sliding->right_ > sliding->left_ && sliding->bottom_ > sliding->top_) { | ||||
| int in_h_start = sliding->top_ * conv_param->stride_h_ - conv_param->pad_u_; | int in_h_start = sliding->top_ * conv_param->stride_h_ - conv_param->pad_u_; | ||||
| @@ -293,10 +290,10 @@ void ConvDwC4Fp32(float *output_data, const float *input_data, const float *weig | |||||
| sliding->in_sw_step_ * sizeof(float), sliding->in_kh_step_ * sizeof(float), | sliding->in_sw_step_ * sizeof(float), sliding->in_kh_step_ * sizeof(float), | ||||
| sliding->in_kw_step_ * sizeof(float), relu, relu6); | sliding->in_kw_step_ * sizeof(float), relu, relu6); | ||||
| #else | #else | ||||
| DepthwiseCenter(out_t, in_t, weight, bias, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_, | |||||
| conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, sliding->block_channel_, | |||||
| sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_, relu, | |||||
| relu6); | |||||
| ConvDwCenter(out_t, in_t, weight, bias, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_, | |||||
| conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, sliding->block_channel_, | |||||
| sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_, relu, | |||||
| relu6); | |||||
| #endif | #endif | ||||
| } | } | ||||
| } // output C4 loop | } // output C4 loop | ||||
| @@ -308,8 +305,8 @@ void ConvDwC4Fp32(float *output_data, const float *input_data, const float *weig | |||||
| /*conv depthwise fp32 end*/ | /*conv depthwise fp32 end*/ | ||||
| /*deconv depthwise fp32 begin*/ | /*deconv depthwise fp32 begin*/ | ||||
| void DeconvDepthwiseBorderPixel(float *dst, const float *src, const float *weight, int height, int width, | |||||
| int in_kh_step, int in_kw_step, int kernel_w_step) { | |||||
| void DeconvDwBorderPixel(float *dst, const float *src, const float *weight, int height, int width, int in_kh_step, | |||||
| int in_kw_step, int kernel_w_step) { | |||||
| float *dst_kh = dst; | float *dst_kh = dst; | ||||
| const float *weight_kh = weight; | const float *weight_kh = weight; | ||||
| for (int kh = 0; kh < height; kh++) { | for (int kh = 0; kh < height; kh++) { | ||||
| @@ -335,8 +332,8 @@ void DeconvDepthwiseBorderPixel(float *dst, const float *src, const float *weigh | |||||
| } // kernel_h loop | } // kernel_h loop | ||||
| } | } | ||||
| void DeconvDepthwiseBorder(float *dst, const float *src, const float *weight, int top, int bottom, int left, int right, | |||||
| const ConvParameter *conv_param, const SlidingWindowParam *sliding) { | |||||
| void DeconvDwBorder(float *dst, const float *src, const float *weight, int top, int bottom, int left, int right, | |||||
| const ConvParameter *conv_param, const SlidingWindowParam *sliding) { | |||||
| const float *src_h = src + top * sliding->out_h_step_; | const float *src_h = src + top * sliding->out_h_step_; | ||||
| for (int ih = top; ih < bottom; ih++) { | for (int ih = top; ih < bottom; ih++) { | ||||
| int oh = ih * conv_param->stride_h_ - conv_param->pad_u_; | int oh = ih * conv_param->stride_h_ - conv_param->pad_u_; | ||||
| @@ -358,8 +355,8 @@ void DeconvDepthwiseBorder(float *dst, const float *src, const float *weight, in | |||||
| sliding->in_kh_step_ * sizeof(float), sliding->in_kw_step_ * sizeof(float), | sliding->in_kh_step_ * sizeof(float), sliding->in_kw_step_ * sizeof(float), | ||||
| conv_param->kernel_w_ * C4NUM * sizeof(float)); | conv_param->kernel_w_ * C4NUM * sizeof(float)); | ||||
| #else | #else | ||||
| DeconvDepthwiseBorderPixel(dst_kernel, src_kernel, weight_kernel, end_kh - start_kh, end_kw - start_kw, | |||||
| sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_ * C4NUM); | |||||
| DeconvDwBorderPixel(dst_kernel, src_kernel, weight_kernel, end_kh - start_kh, end_kw - start_kw, | |||||
| sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_ * C4NUM); | |||||
| #endif | #endif | ||||
| src_kernel += sliding->block_channel_; | src_kernel += sliding->block_channel_; | ||||
| } // width loop | } // width loop | ||||
| @@ -368,9 +365,9 @@ void DeconvDepthwiseBorder(float *dst, const float *src, const float *weight, in | |||||
| } | } | ||||
| #ifndef ENABLE_ARM64 | #ifndef ENABLE_ARM64 | ||||
| void DeconvDepthwiseCenter(float *dst, const float *src, const float *weight, int height, int width, int kernel_h, | |||||
| int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step, | |||||
| int in_kh_step, int in_kw_step) { | |||||
| void DeconvDwCenter(float *dst, const float *src, const float *weight, int height, int width, int kernel_h, | |||||
| int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step, int in_kh_step, | |||||
| int in_kw_step) { | |||||
| float *dst_h = dst; | float *dst_h = dst; | ||||
| const float *src_h = src; | const float *src_h = src; | ||||
| for (int oh = 0; oh < height; oh++) { | for (int oh = 0; oh < height; oh++) { | ||||
| @@ -401,7 +398,7 @@ void DeconvDepthwiseCenter(float *dst, const float *src, const float *weight, in | |||||
| } | } | ||||
| #endif | #endif | ||||
| void DeconvDepthwisePostFunc(float *dst, const float *bias, int block_channel, const ConvParameter *conv_param) { | |||||
| void DeconvDwPost(float *dst, const float *bias, int block_channel, const ConvParameter *conv_param) { | |||||
| bool relu = conv_param->act_type_ == ActType_Relu; | bool relu = conv_param->act_type_ == ActType_Relu; | ||||
| bool relu6 = conv_param->act_type_ == ActType_Relu6; | bool relu6 = conv_param->act_type_ == ActType_Relu6; | ||||
| float *dst_k = dst; | float *dst_k = dst; | ||||
| @@ -416,7 +413,7 @@ void DeconvDepthwisePostFunc(float *dst, const float *bias, int block_channel, c | |||||
| } | } | ||||
| // deconv depthwise fp32: sliding window | // deconv depthwise fp32: sliding window | ||||
| void DeconvDwC4Fp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, | |||||
| void DeconvDwSWFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, | |||||
| const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id) { | const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id) { | ||||
| const float *src = input_data; | const float *src = input_data; | ||||
| float *dst = output_data; | float *dst = output_data; | ||||
| @@ -426,13 +423,13 @@ void DeconvDwC4Fp32(float *output_data, const float *input_data, const float *we | |||||
| float *dst_data = dst + oc * C4NUM; | float *dst_data = dst + oc * C4NUM; | ||||
| const float *weight = weight_data + oc * sliding->kernel_step_; | const float *weight = weight_data + oc * sliding->kernel_step_; | ||||
| const float *bias = bias_data + oc * C4NUM; | const float *bias = bias_data + oc * C4NUM; | ||||
| DeconvDepthwiseBorder(dst_data, src_data, weight, 0, sliding->top_, 0, conv_param->input_w_, conv_param, sliding); | |||||
| DeconvDepthwiseBorder(dst_data, src_data, weight, sliding->bottom_, conv_param->input_h_, 0, conv_param->input_w_, | |||||
| conv_param, sliding); | |||||
| DeconvDepthwiseBorder(dst_data, src_data, weight, sliding->top_, sliding->bottom_, 0, sliding->left_, conv_param, | |||||
| sliding); | |||||
| DeconvDepthwiseBorder(dst_data, src_data, weight, sliding->top_, sliding->bottom_, sliding->right_, | |||||
| conv_param->input_w_, conv_param, sliding); | |||||
| DeconvDwBorder(dst_data, src_data, weight, 0, sliding->top_, 0, conv_param->input_w_, conv_param, sliding); | |||||
| DeconvDwBorder(dst_data, src_data, weight, sliding->bottom_, conv_param->input_h_, 0, conv_param->input_w_, | |||||
| conv_param, sliding); | |||||
| DeconvDwBorder(dst_data, src_data, weight, sliding->top_, sliding->bottom_, 0, sliding->left_, conv_param, | |||||
| sliding); | |||||
| DeconvDwBorder(dst_data, src_data, weight, sliding->top_, sliding->bottom_, sliding->right_, conv_param->input_w_, | |||||
| conv_param, sliding); | |||||
| if (sliding->right_ > sliding->left_ && sliding->bottom_ > sliding->top_) { | if (sliding->right_ > sliding->left_ && sliding->bottom_ > sliding->top_) { | ||||
| int oh_h_start = sliding->top_ * conv_param->stride_h_ - conv_param->pad_u_; | int oh_h_start = sliding->top_ * conv_param->stride_h_ - conv_param->pad_u_; | ||||
| @@ -447,13 +444,12 @@ void DeconvDwC4Fp32(float *output_data, const float *input_data, const float *we | |||||
| sliding->in_sw_step_ * sizeof(float), sliding->in_kh_step_ * sizeof(float), | sliding->in_sw_step_ * sizeof(float), sliding->in_kh_step_ * sizeof(float), | ||||
| sliding->in_kw_step_ * sizeof(float)); | sliding->in_kw_step_ * sizeof(float)); | ||||
| #else | #else | ||||
| DeconvDepthwiseCenter(out_t, in_t, weight, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_, | |||||
| conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, | |||||
| sliding->block_channel_, sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, | |||||
| sliding->in_kw_step_); | |||||
| DeconvDwCenter(out_t, in_t, weight, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_, | |||||
| conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, sliding->block_channel_, | |||||
| sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_); | |||||
| #endif | #endif | ||||
| } | } | ||||
| DeconvDepthwisePostFunc(dst_data, bias, sliding->block_channel_, conv_param); | |||||
| DeconvDwPost(dst_data, bias, sliding->block_channel_, conv_param); | |||||
| } // output C4 loop | } // output C4 loop | ||||
| src += sliding->out_step_; | src += sliding->out_step_; | ||||
| dst += sliding->in_step_; | dst += sliding->in_step_; | ||||
| @@ -42,13 +42,10 @@ void InitSlidingParamConvDw(SlidingWindowParam *sliding, const ConvParameter *co | |||||
| void AppendSlidingParamConvDw(SlidingWindowParam *sliding, const ConvParameter *conv_param, int block); | void AppendSlidingParamConvDw(SlidingWindowParam *sliding, const ConvParameter *conv_param, int block); | ||||
| void DepthwiseBorder(float *dst, const float *src, const float *weight, const float *bias, int top, int bottom, | |||||
| int left, int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding); | |||||
| void ConvDwC4Fp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, | |||||
| void ConvDwSWFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, | |||||
| const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id); | const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id); | ||||
| void DeconvDwC4Fp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, | |||||
| void DeconvDwSWFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, | |||||
| const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id); | const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| @@ -139,7 +139,7 @@ void ConvDwInt8(int8_t *output_data, int32_t *row_buffer, const int8_t *input_da | |||||
| /*conv depthwise int8 end*/ | /*conv depthwise int8 end*/ | ||||
| /*conv depthwise 3x3 int8 begin*/ | /*conv depthwise 3x3 int8 begin*/ | ||||
| bool CheckIfUse3X3(const ConvParameter *conv_param) { | |||||
| bool CheckConvDwInt8Use3X3(const ConvParameter *conv_param) { | |||||
| bool use_3x3 = conv_param->kernel_h_ == 3 && conv_param->kernel_w_ == 3 && | bool use_3x3 = conv_param->kernel_h_ == 3 && conv_param->kernel_w_ == 3 && | ||||
| (conv_param->stride_h_ == 1 || conv_param->stride_h_ == 2) && | (conv_param->stride_h_ == 1 || conv_param->stride_h_ == 2) && | ||||
| (conv_param->stride_w_ == 1 || conv_param->stride_w_ == 2) && | (conv_param->stride_w_ == 1 || conv_param->stride_w_ == 2) && | ||||
| @@ -158,8 +158,8 @@ bool CheckIfUse3X3(const ConvParameter *conv_param) { | |||||
| return use_3x3; | return use_3x3; | ||||
| } | } | ||||
| void InitInputBuffer(int8_t *buffer, const int8_t *input, const ConvParameter *conv_param, int block_input_h, | |||||
| int block_input_w) { | |||||
| void ConvDw3x3Int8InitBuffer(int8_t *buffer, const int8_t *input, const ConvParameter *conv_param, int block_input_h, | |||||
| int block_input_w) { | |||||
| for (int h = 0; h < block_input_h; h++) { | for (int h = 0; h < block_input_h; h++) { | ||||
| const int8_t *src = input; | const int8_t *src = input; | ||||
| for (int w = 0; w < block_input_w; w++) { | for (int w = 0; w < block_input_w; w++) { | ||||
| @@ -257,7 +257,7 @@ void ConvDw3x3Int8Row(int8_t *output, int8_t *buffer, const int8_t *input, const | |||||
| const int32_t *bias_ptr = bias; | const int32_t *bias_ptr = bias; | ||||
| int c = 0; | int c = 0; | ||||
| for (; c <= conv_param->output_channel_ - 64; c += 64) { | for (; c <= conv_param->output_channel_ - 64; c += 64) { | ||||
| InitInputBuffer(buffer, input_ptr, conv_param, block_input_h, block_input_w); | |||||
| ConvDw3x3Int8InitBuffer(buffer, input_ptr, conv_param, block_input_h, block_input_w); | |||||
| ConvDw3x3Int8Block(output_ptr, buffer, weight_ptr, bias_ptr, 0, 64, 64, ih_offset, conv_param->input_channel_, | ConvDw3x3Int8Block(output_ptr, buffer, weight_ptr, bias_ptr, 0, 64, 64, ih_offset, conv_param->input_channel_, | ||||
| block_output_h, block_output_w, in_zp, out_zp, out_multiplier, left_shift, right_shift, | block_output_h, block_output_w, in_zp, out_zp, out_multiplier, left_shift, right_shift, | ||||
| acc_min, acc_max, conv_param->stride_h_); | acc_min, acc_max, conv_param->stride_h_); | ||||
| @@ -489,10 +489,10 @@ void ConvDw3x3Int8Pad(int8_t *output_data, const int8_t *input_data, const int16 | |||||
| /*conv depthwise 3x3 int8 end*/ | /*conv depthwise 3x3 int8 end*/ | ||||
| /*conv depthwise sliding window perchannel int8 begin*/ | /*conv depthwise sliding window perchannel int8 begin*/ | ||||
| void DepthwiseBorderPixelInt8(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height, | |||||
| int width, int in_kh_step, int in_kw_step, int kernel_w, int8_t *input_zp, | |||||
| int32_t *out_zp, const int *out_multiplier, const int *left_shift, const int *right_shift, | |||||
| int32_t *acc_min, int32_t *acc_max) { | |||||
| void ConvDwInt8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height, | |||||
| int width, int in_kh_step, int in_kw_step, int kernel_w, int8_t *input_zp, int32_t *out_zp, | |||||
| const int *out_multiplier, const int *left_shift, const int *right_shift, int32_t *acc_min, | |||||
| int32_t *acc_max) { | |||||
| int tmp_buffer[C8NUM]; | int tmp_buffer[C8NUM]; | ||||
| for (int i = 0; i < C8NUM; i++) { | for (int i = 0; i < C8NUM; i++) { | ||||
| tmp_buffer[i] = 0; | tmp_buffer[i] = 0; | ||||
| @@ -525,10 +525,10 @@ void DepthwiseBorderPixelInt8(int8_t *dst, const int8_t *src, const int16_t *wei | |||||
| } | } | ||||
| } | } | ||||
| void DepthwiseBorderInt8(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int top, | |||||
| int bottom, int left, int right, const ConvParameter *conv_param, | |||||
| const SlidingWindowParam *sliding, int8_t *in_zp, int32_t *out_zp, const int *out_multiplier, | |||||
| const int *left_shift, const int *right_shift, int32_t *acc_min, int32_t *acc_max) { | |||||
| void ConvDwInt8Border(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int top, int bottom, | |||||
| int left, int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding, | |||||
| int8_t *in_zp, int32_t *out_zp, const int *out_multiplier, const int *left_shift, | |||||
| const int *right_shift, int32_t *acc_min, int32_t *acc_max) { | |||||
| int8_t *dst_h = dst + top * sliding->out_h_step_; | int8_t *dst_h = dst + top * sliding->out_h_step_; | ||||
| for (int oh = top; oh < bottom; oh++) { | for (int oh = top; oh < bottom; oh++) { | ||||
| int ih = oh * conv_param->stride_h_ - conv_param->pad_u_; | int ih = oh * conv_param->stride_h_ - conv_param->pad_u_; | ||||
| @@ -546,9 +546,9 @@ void DepthwiseBorderInt8(int8_t *dst, const int8_t *src, const int16_t *weight, | |||||
| const int8_t *src_kernel = src_w + start_kh * sliding->in_kh_step_ + start_kw * sliding->in_kw_step_; | const int8_t *src_kernel = src_w + start_kh * sliding->in_kh_step_ + start_kw * sliding->in_kw_step_; | ||||
| const int16_t *weight_kernel = weight + (start_kh * conv_param->kernel_w_ + start_kw) * C8NUM; | const int16_t *weight_kernel = weight + (start_kh * conv_param->kernel_w_ + start_kw) * C8NUM; | ||||
| DepthwiseBorderPixelInt8(dst_kernel, src_kernel, weight_kernel, bias, end_kh - start_kh, end_kw - start_kw, | |||||
| sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_, in_zp, out_zp, | |||||
| out_multiplier, left_shift, right_shift, acc_min, acc_max); | |||||
| ConvDwInt8BorderPixel(dst_kernel, src_kernel, weight_kernel, bias, end_kh - start_kh, end_kw - start_kw, | |||||
| sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_, in_zp, out_zp, | |||||
| out_multiplier, left_shift, right_shift, acc_min, acc_max); | |||||
| dst_kernel += sliding->block_channel_; | dst_kernel += sliding->block_channel_; | ||||
| } // width loop | } // width loop | ||||
| @@ -556,12 +556,11 @@ void DepthwiseBorderInt8(int8_t *dst, const int8_t *src, const int16_t *weight, | |||||
| } // height loop | } // height loop | ||||
| } | } | ||||
| #ifndef ENABLE_ARM64 | |||||
| void DepthwiseCenterInt8(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height, | |||||
| int width, int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step, | |||||
| int in_sw_step, int in_kh_step, int in_kw_step, int8_t *in_zp, int32_t *out_zp, | |||||
| int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift, int32_t *acc_min, | |||||
| int32_t *acc_max) { | |||||
| #ifndef ENABLE_ARM | |||||
| void ConvDwInt8Center(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height, int width, | |||||
| int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step, | |||||
| int in_kh_step, int in_kw_step, int8_t *in_zp, int32_t *out_zp, int32_t *out_multiplier, | |||||
| int32_t *left_shift, int32_t *right_shift, int32_t *acc_min, int32_t *acc_max) { | |||||
| int tmp_buffer[C8NUM]; | int tmp_buffer[C8NUM]; | ||||
| int8_t *dst_h = dst; | int8_t *dst_h = dst; | ||||
| const int8_t *src_h = src; | const int8_t *src_h = src; | ||||
| @@ -608,7 +607,7 @@ void DepthwiseCenterInt8(int8_t *dst, const int8_t *src, const int16_t *weight, | |||||
| } | } | ||||
| #endif | #endif | ||||
| void ConvDwSWInt8(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data, | |||||
| void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data, | |||||
| int8_t *input_zp, int32_t *output_zp, const ConvParameter *conv_param, | int8_t *input_zp, int32_t *output_zp, const ConvParameter *conv_param, | ||||
| const SlidingWindowParam *sliding, int task_id) { | const SlidingWindowParam *sliding, int task_id) { | ||||
| const int8_t *src = input_data; | const int8_t *src = input_data; | ||||
| @@ -628,37 +627,26 @@ void ConvDwSWInt8(int8_t *output_data, const int8_t *input_data, const int16_t * | |||||
| int8_t *in_zp = input_zp + oc * C8NUM; | int8_t *in_zp = input_zp + oc * C8NUM; | ||||
| int32_t *out_zp = output_zp + oc * C8NUM; | int32_t *out_zp = output_zp + oc * C8NUM; | ||||
| DepthwiseBorderInt8(dst_data, src_data, weight, bias, 0, sliding->top_, 0, conv_param->output_w_, conv_param, | |||||
| sliding, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max); | |||||
| DepthwiseBorderInt8(dst_data, src_data, weight, bias, sliding->bottom_, conv_param->output_h_, 0, | |||||
| conv_param->output_w_, conv_param, sliding, in_zp, out_zp, out_multiplier, left_shift, | |||||
| right_shift, acc_min, acc_max); | |||||
| DepthwiseBorderInt8(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, 0, sliding->left_, | |||||
| conv_param, sliding, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min, | |||||
| acc_max); | |||||
| DepthwiseBorderInt8(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, sliding->right_, | |||||
| conv_param->output_w_, conv_param, sliding, in_zp, out_zp, out_multiplier, left_shift, | |||||
| right_shift, acc_min, acc_max); | |||||
| ConvDwInt8Border(dst_data, src_data, weight, bias, 0, sliding->top_, 0, conv_param->output_w_, conv_param, | |||||
| sliding, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max); | |||||
| ConvDwInt8Border(dst_data, src_data, weight, bias, sliding->bottom_, conv_param->output_h_, 0, | |||||
| conv_param->output_w_, conv_param, sliding, in_zp, out_zp, out_multiplier, left_shift, | |||||
| right_shift, acc_min, acc_max); | |||||
| ConvDwInt8Border(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, 0, sliding->left_, conv_param, | |||||
| sliding, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max); | |||||
| ConvDwInt8Border(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, sliding->right_, | |||||
| conv_param->output_w_, conv_param, sliding, in_zp, out_zp, out_multiplier, left_shift, | |||||
| right_shift, acc_min, acc_max); | |||||
| if (sliding->right_ > sliding->left_ && sliding->bottom_ > sliding->top_) { | if (sliding->right_ > sliding->left_ && sliding->bottom_ > sliding->top_) { | ||||
| int in_h_start = sliding->top_ * conv_param->stride_h_ - conv_param->pad_u_; | int in_h_start = sliding->top_ * conv_param->stride_h_ - conv_param->pad_u_; | ||||
| int in_w_start = sliding->left_ * conv_param->stride_w_ - conv_param->pad_l_; | int in_w_start = sliding->left_ * conv_param->stride_w_ - conv_param->pad_l_; | ||||
| const int8_t *in_t = src_data + in_h_start * sliding->in_h_step_ + in_w_start * sliding->block_channel_; | const int8_t *in_t = src_data + in_h_start * sliding->in_h_step_ + in_w_start * sliding->block_channel_; | ||||
| int8_t *out_t = dst_data + sliding->top_ * sliding->out_h_step_ + sliding->left_ * sliding->block_channel_; | int8_t *out_t = dst_data + sliding->top_ * sliding->out_h_step_ + sliding->left_ * sliding->block_channel_; | ||||
| #ifdef ENABLE_ARM | |||||
| ConvDwInt8Center(out_t, in_t, weight, bias, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_, | ConvDwInt8Center(out_t, in_t, weight, bias, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_, | ||||
| conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_ * sizeof(int8_t), | |||||
| sliding->block_channel_ * sizeof(int8_t), sliding->in_sh_step_ * sizeof(int8_t), | |||||
| sliding->in_sw_step_ * sizeof(int8_t), sliding->in_kh_step_ * sizeof(int8_t), | |||||
| sliding->in_kw_step_ * sizeof(int8_t), in_zp, out_zp, out_multiplier, left_shift, right_shift, | |||||
| acc_min, acc_max); | |||||
| #else | |||||
| DepthwiseCenterInt8(out_t, in_t, weight, bias, sliding->bottom_ - sliding->top_, | |||||
| sliding->right_ - sliding->left_, conv_param->kernel_h_, conv_param->kernel_w_, | |||||
| sliding->out_h_step_, sliding->block_channel_, sliding->in_sh_step_, sliding->in_sw_step_, | |||||
| sliding->in_kh_step_, sliding->in_kw_step_, in_zp, out_zp, out_multiplier, left_shift, | |||||
| right_shift, acc_min, acc_max); | |||||
| #endif | |||||
| conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, sliding->block_channel_, | |||||
| sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_, in_zp, | |||||
| out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max); | |||||
| } | } | ||||
| } // output C8 loop | } // output C8 loop | ||||
| src += sliding->in_step_; | src += sliding->in_step_; | ||||
| @@ -669,8 +657,8 @@ void ConvDwSWInt8(int8_t *output_data, const int8_t *input_data, const int16_t * | |||||
| /*conv depthwise sliding window perchannel int8 end*/ | /*conv depthwise sliding window perchannel int8 end*/ | ||||
| /*deconv depthwise int8 begin*/ | /*deconv depthwise int8 begin*/ | ||||
| void DeconvDepthwiseBorderPixelInt8(int32_t *dst, const int16_t *src, const int16_t *weight, int height, int width, | |||||
| int in_kh_step, int in_kw_step, int kernel_w) { | |||||
| void DeconvDwInt8BorderPixel(int32_t *dst, const int16_t *src, const int16_t *weight, int height, int width, | |||||
| int in_kh_step, int in_kw_step, int kernel_w) { | |||||
| int32_t *dst_kh = dst; | int32_t *dst_kh = dst; | ||||
| const int16_t *weight_kh = weight; | const int16_t *weight_kh = weight; | ||||
| for (int kh = 0; kh < height; kh++) { | for (int kh = 0; kh < height; kh++) { | ||||
| @@ -688,8 +676,8 @@ void DeconvDepthwiseBorderPixelInt8(int32_t *dst, const int16_t *src, const int1 | |||||
| } // kernel_h loop | } // kernel_h loop | ||||
| } | } | ||||
| void DeconvDepthwiseBorderInt8(int32_t *dst, const int16_t *src, const int16_t *weight, int top, int bottom, int left, | |||||
| int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding) { | |||||
| void DeconvDwInt8Border(int32_t *dst, const int16_t *src, const int16_t *weight, int top, int bottom, int left, | |||||
| int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding) { | |||||
| const int16_t *src_h = src + top * sliding->out_h_step_; | const int16_t *src_h = src + top * sliding->out_h_step_; | ||||
| for (int ih = top; ih < bottom; ih++) { | for (int ih = top; ih < bottom; ih++) { | ||||
| int oh = ih * conv_param->stride_h_ - conv_param->pad_u_; | int oh = ih * conv_param->stride_h_ - conv_param->pad_u_; | ||||
| @@ -707,8 +695,8 @@ void DeconvDepthwiseBorderInt8(int32_t *dst, const int16_t *src, const int16_t * | |||||
| const int16_t *weight_kernel = weight + (start_kh * conv_param->kernel_w_ + start_kw) * C4NUM; | const int16_t *weight_kernel = weight + (start_kh * conv_param->kernel_w_ + start_kw) * C4NUM; | ||||
| int32_t *dst_kernel = dst_w + start_kh * sliding->in_kh_step_ + start_kw * sliding->in_kw_step_; | int32_t *dst_kernel = dst_w + start_kh * sliding->in_kh_step_ + start_kw * sliding->in_kw_step_; | ||||
| DeconvDepthwiseBorderPixelInt8(dst_kernel, src_kernel, weight_kernel, end_kh - start_kh, end_kw - start_kw, | |||||
| sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_); | |||||
| DeconvDwInt8BorderPixel(dst_kernel, src_kernel, weight_kernel, end_kh - start_kh, end_kw - start_kw, | |||||
| sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_); | |||||
| src_kernel += sliding->block_channel_; | src_kernel += sliding->block_channel_; | ||||
| } // width loop | } // width loop | ||||
| src_h += sliding->out_h_step_; | src_h += sliding->out_h_step_; | ||||
| @@ -716,9 +704,9 @@ void DeconvDepthwiseBorderInt8(int32_t *dst, const int16_t *src, const int16_t * | |||||
| } | } | ||||
| #ifndef ENABLE_ARM64 | #ifndef ENABLE_ARM64 | ||||
| void DeconvDepthwiseCenterInt8(int32_t *dst, const int16_t *src, const int16_t *weight, int height, int width, | |||||
| int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step, | |||||
| int in_sw_step, int in_kh_step, int in_kw_step) { | |||||
| void DeconvDwInt8Center(int32_t *dst, const int16_t *src, const int16_t *weight, int height, int width, int kernel_h, | |||||
| int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step, int in_kh_step, | |||||
| int in_kw_step) { | |||||
| int32_t *dst_h = dst; | int32_t *dst_h = dst; | ||||
| const int16_t *src_h = src; | const int16_t *src_h = src; | ||||
| for (int oh = 0; oh < height; oh++) { | for (int oh = 0; oh < height; oh++) { | ||||
| @@ -784,14 +772,14 @@ void DeconvDwInt8(int8_t *output_data, int32_t *output_buffer, const int16_t *in | |||||
| const int16_t *weight = weight_data + oc * sliding->kernel_step_; | const int16_t *weight = weight_data + oc * sliding->kernel_step_; | ||||
| const int32_t *bias = bias_data + oc * C4NUM; | const int32_t *bias = bias_data + oc * C4NUM; | ||||
| int8_t *dst_data = dst + oc * C4NUM; | int8_t *dst_data = dst + oc * C4NUM; | ||||
| DeconvDepthwiseBorderInt8(output_buffer, src_data, weight, 0, sliding->top_, 0, conv_param->input_w_, conv_param, | |||||
| sliding); | |||||
| DeconvDepthwiseBorderInt8(output_buffer, src_data, weight, sliding->bottom_, conv_param->input_h_, 0, | |||||
| conv_param->input_w_, conv_param, sliding); | |||||
| DeconvDepthwiseBorderInt8(output_buffer, src_data, weight, sliding->top_, sliding->bottom_, 0, sliding->left_, | |||||
| conv_param, sliding); | |||||
| DeconvDepthwiseBorderInt8(output_buffer, src_data, weight, sliding->top_, sliding->bottom_, sliding->right_, | |||||
| conv_param->input_w_, conv_param, sliding); | |||||
| DeconvDwInt8Border(output_buffer, src_data, weight, 0, sliding->top_, 0, conv_param->input_w_, conv_param, | |||||
| sliding); | |||||
| DeconvDwInt8Border(output_buffer, src_data, weight, sliding->bottom_, conv_param->input_h_, 0, | |||||
| conv_param->input_w_, conv_param, sliding); | |||||
| DeconvDwInt8Border(output_buffer, src_data, weight, sliding->top_, sliding->bottom_, 0, sliding->left_, | |||||
| conv_param, sliding); | |||||
| DeconvDwInt8Border(output_buffer, src_data, weight, sliding->top_, sliding->bottom_, sliding->right_, | |||||
| conv_param->input_w_, conv_param, sliding); | |||||
| if (sliding->right_ > sliding->left_ && sliding->bottom_ > sliding->top_) { | if (sliding->right_ > sliding->left_ && sliding->bottom_ > sliding->top_) { | ||||
| int oh_h_start = sliding->top_ * conv_param->stride_h_ - conv_param->pad_u_; | int oh_h_start = sliding->top_ * conv_param->stride_h_ - conv_param->pad_u_; | ||||
| @@ -806,10 +794,9 @@ void DeconvDwInt8(int8_t *output_data, int32_t *output_buffer, const int16_t *in | |||||
| sliding->in_sw_step_ * sizeof(int32_t), sliding->in_kh_step_ * sizeof(int32_t), | sliding->in_sw_step_ * sizeof(int32_t), sliding->in_kh_step_ * sizeof(int32_t), | ||||
| sliding->in_kw_step_ * sizeof(int32_t)); | sliding->in_kw_step_ * sizeof(int32_t)); | ||||
| #else | #else | ||||
| DeconvDepthwiseCenterInt8(out_t, in_t, weight, sliding->bottom_ - sliding->top_, | |||||
| sliding->right_ - sliding->left_, conv_param->kernel_h_, conv_param->kernel_w_, | |||||
| sliding->out_h_step_, sliding->block_channel_, sliding->in_sh_step_, | |||||
| sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_); | |||||
| DeconvDwInt8Center(out_t, in_t, weight, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_, | |||||
| conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, sliding->block_channel_, | |||||
| sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_); | |||||
| #endif | #endif | ||||
| } | } | ||||
| DeconvDwInt8Post(dst_data, output_buffer, bias, sliding->block_channel_, | DeconvDwInt8Post(dst_data, output_buffer, bias, sliding->block_channel_, | ||||
| @@ -24,7 +24,7 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| bool CheckIfUse3X3(const ConvParameter *conv_param); | |||||
| bool CheckConvDwInt8Use3X3(const ConvParameter *conv_param); | |||||
| void ConvDwInt8(int8_t *output_data, int32_t *output_row, const int8_t *input_data, const int16_t *weight_data, | void ConvDwInt8(int8_t *output_data, int32_t *output_row, const int8_t *input_data, const int16_t *weight_data, | ||||
| const int32_t *bias_data, const ConvParameter *conv_param, int task_id); | const int32_t *bias_data, const ConvParameter *conv_param, int task_id); | ||||
| @@ -36,7 +36,7 @@ void ConvDw3x3Int8(int8_t *output_data, int8_t *buffer, const int8_t *input_data | |||||
| const int32_t *bias_data, const ConvParameter *conv_param, const SlidingWindowParam *sliding, | const int32_t *bias_data, const ConvParameter *conv_param, const SlidingWindowParam *sliding, | ||||
| int task_id); | int task_id); | ||||
| void ConvDwSWInt8(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data, | |||||
| void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data, | |||||
| int8_t *input_zp, int32_t *output_zp, const ConvParameter *conv_param, | int8_t *input_zp, int32_t *output_zp, const ConvParameter *conv_param, | ||||
| const SlidingWindowParam *sliding, int task_id); | const SlidingWindowParam *sliding, int task_id); | ||||
| @@ -118,7 +118,7 @@ int ConvolutionDepthwiseSWCPUKernel::ReSize() { | |||||
| } | } | ||||
| int ConvolutionDepthwiseSWCPUKernel::Execute(int task_id) { | int ConvolutionDepthwiseSWCPUKernel::Execute(int task_id) { | ||||
| ConvDwC4Fp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_, | |||||
| ConvDwSWFp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_, | |||||
| sliding_, task_id); | sliding_, task_id); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -130,7 +130,7 @@ int DeconvolutionDepthwiseCPUKernel::ReSize() { | |||||
| } | } | ||||
| int DeconvolutionDepthwiseCPUKernel::Execute(int task_id) { | int DeconvolutionDepthwiseCPUKernel::Execute(int task_id) { | ||||
| DeconvDwC4Fp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_, | |||||
| DeconvDwSWFp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_, | |||||
| sliding_, task_id); | sliding_, task_id); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -99,26 +99,26 @@ int ConvolutionDepthwise3x3Int8CPUKernel::Init() { | |||||
| MS_LOG(ERROR) << "new sliding window param."; | MS_LOG(ERROR) << "new sliding window param."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | |||||
| int ConvolutionDepthwise3x3Int8CPUKernel::ReSize() { | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| InitSlidingParamConvDw(sliding_, conv_param_, conv_param_->input_channel_); | |||||
| auto ret = ConvolutionBaseCPUKernel::SetQuantParam(); | auto ret = ConvolutionBaseCPUKernel::SetQuantParam(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Set quant param failed."; | MS_LOG(ERROR) << "Set quant param failed."; | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| conv_param_->thread_num_ = MSMIN(thread_count_, conv_param_->output_h_); | |||||
| ret = InitWeightBias(); | ret = InitWeightBias(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!"; | MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!"; | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | |||||
| int ConvolutionDepthwise3x3Int8CPUKernel::ReSize() { | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| InitSlidingParamConvDw(sliding_, conv_param_, conv_param_->input_channel_); | |||||
| conv_param_->thread_num_ = MSMIN(thread_count_, conv_param_->output_h_); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -90,14 +90,6 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() { | |||||
| } | } | ||||
| int ConvolutionDepthwiseInt8CPUKernel::Init() { | int ConvolutionDepthwiseInt8CPUKernel::Init() { | ||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | |||||
| int ConvolutionDepthwiseInt8CPUKernel::ReSize() { | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| auto ret = ConvolutionBaseCPUKernel::SetQuantParam(); | auto ret = ConvolutionBaseCPUKernel::SetQuantParam(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Set quant param failed."; | MS_LOG(ERROR) << "Set quant param failed."; | ||||
| @@ -109,6 +101,14 @@ int ConvolutionDepthwiseInt8CPUKernel::ReSize() { | |||||
| MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!"; | MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!"; | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | |||||
| int ConvolutionDepthwiseInt8CPUKernel::ReSize() { | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -181,7 +181,7 @@ kernel::LiteKernel *CpuConvDwInt8KernelCreator(const std::vector<lite::Tensor *> | |||||
| conv_param->output_w_ = outputs[kOutputIndex]->Width(); | conv_param->output_w_ = outputs[kOutputIndex]->Width(); | ||||
| } | } | ||||
| auto weight_quant_size = inputs[kWeightIndex]->GetQuantParams().size(); | auto weight_quant_size = inputs[kWeightIndex]->GetQuantParams().size(); | ||||
| if (CheckIfUse3X3(conv_param) && weight_quant_size == 1) { | |||||
| if (CheckConvDwInt8Use3X3(conv_param) && weight_quant_size == 1) { | |||||
| #ifdef ENABLE_ARM64 | #ifdef ENABLE_ARM64 | ||||
| kernel = | kernel = | ||||
| new (std::nothrow) kernel::ConvolutionDepthwise3x3Int8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | new (std::nothrow) kernel::ConvolutionDepthwise3x3Int8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | ||||
| @@ -275,16 +275,6 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Init() { | |||||
| MS_LOG(ERROR) << "new sliding window param."; | MS_LOG(ERROR) << "new sliding window param."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | |||||
| int ConvolutionDepthwiseSWInt8CPUKernel::ReSize() { | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| InitSlidingParamConvDw(sliding_, conv_param_, C8NUM); | |||||
| auto ret = ConvolutionBaseCPUKernel::SetQuantParam(); | auto ret = ConvolutionBaseCPUKernel::SetQuantParam(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Set quant param failed."; | MS_LOG(ERROR) << "Set quant param failed."; | ||||
| @@ -295,17 +285,25 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReSize() { | |||||
| MS_LOG(ERROR) << "reinit quant param failed."; | MS_LOG(ERROR) << "reinit quant param failed."; | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = InitWeightBias(); | ret = InitWeightBias(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!"; | MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!"; | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | |||||
| int ConvolutionDepthwiseSWInt8CPUKernel::ReSize() { | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| InitSlidingParamConvDw(sliding_, conv_param_, C8NUM); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int ConvolutionDepthwiseSWInt8CPUKernel::Execute(int task_id) { | int ConvolutionDepthwiseSWInt8CPUKernel::Execute(int task_id) { | ||||
| ConvDwSWInt8(packed_output_, packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), input_zp_, | |||||
| ConvDwInt8SW(packed_output_, packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), input_zp_, | |||||
| output_zp_, conv_param_, sliding_, task_id); | output_zp_, conv_param_, sliding_, task_id); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||