| @@ -94,3 +94,15 @@ int HSwishFp16(const float16_t *src, float16_t *dst, int ele_num) { | |||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int SwishFp16(const float16_t *src, float16_t *dst, int ele_num) { | |||||
| int ret = SigmoidFp16(src, dst, ele_num); | |||||
| if (ret != NNACL_OK) { | |||||
| return NNACL_ERR; | |||||
| } | |||||
| int index = 0; | |||||
| for (; index < ele_num; index++) { | |||||
| dst[index] = src[index] * dst[index]; | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| @@ -38,6 +38,7 @@ int LReluFp16(const float16_t *src, float16_t *dst, int ele_num, float16_t alpha | |||||
| int SigmoidFp16(const float16_t *src, float16_t *dst, int ele_num); | int SigmoidFp16(const float16_t *src, float16_t *dst, int ele_num); | ||||
| int TanhFp16(const float16_t *src, float16_t *dst, int ele_num); | int TanhFp16(const float16_t *src, float16_t *dst, int ele_num); | ||||
| int HSwishFp16(const float16_t *src, float16_t *dst, int ele_num); | int HSwishFp16(const float16_t *src, float16_t *dst, int ele_num); | ||||
| int SwishFp16(const float16_t *src, float16_t *dst, int ele_num); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -110,10 +110,10 @@ void DeConvWgMergeFp16(const float16_t *src, float16_t *dst, size_t src_stride, | |||||
| return; | return; | ||||
| } | } | ||||
| void _deConvWinogradFp16(float16_t *tile_in, float16_t *tile_out, float16_t *weight_buf, float16_t *tmp_buf, | |||||
| float16_t *at_buf, float16_t *a_mid_buf, float16_t *trans_a_buf, bool *transfered, | |||||
| float16_t *bt_buf, float16_t *b_tmp_buf, int unit_size, int w_start, int h_start, | |||||
| ConvParameter *conv_param, DeConvParam *deconv_param) { | |||||
| void DeConvWgCalWgFp16(float16_t *tile_in, float16_t *tile_out, float16_t *weight_buf, float16_t *tmp_buf, | |||||
| float16_t *at_buf, float16_t *a_mid_buf, float16_t *trans_a_buf, bool *transfered, | |||||
| float16_t *bt_buf, float16_t *b_tmp_buf, int unit_size, int w_start, int h_start, | |||||
| ConvParameter *conv_param, DeConvParam *deconv_param) { | |||||
| int winograd_plane = unit_size * unit_size; | int winograd_plane = unit_size * unit_size; | ||||
| if (!transfered[unit_size]) { | if (!transfered[unit_size]) { | ||||
| WinogradTransLeftFp16(tile_in, at_buf, a_mid_buf, DECONV_WINOGRAD_DEFAULT_UNIT, unit_size, | WinogradTransLeftFp16(tile_in, at_buf, a_mid_buf, DECONV_WINOGRAD_DEFAULT_UNIT, unit_size, | ||||
| @@ -151,8 +151,8 @@ void _deConvWinogradFp16(float16_t *tile_in, float16_t *tile_out, float16_t *wei | |||||
| return; | return; | ||||
| } | } | ||||
| void _deConvCommonFp16(float16_t *tile_in, float16_t *tile_out, float16_t *weight, float16_t *tmp_buf, int h_start, | |||||
| int w_start, int h_size, int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) { | |||||
| void DeConvWgCalCommFp16(float16_t *tile_in, float16_t *tile_out, float16_t *weight, float16_t *tmp_buf, int h_start, | |||||
| int w_start, int h_size, int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) { | |||||
| int count = deconv_param->oc_div4_ * w_size * h_size; | int count = deconv_param->oc_div4_ * w_size * h_size; | ||||
| int in_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_; | int in_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_; | ||||
| int out_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->oc_up4_; | int out_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->oc_up4_; | ||||
| @@ -218,6 +218,7 @@ int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvPa | |||||
| unit->winograd_.AT_ = malloc(unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float16_t)); | unit->winograd_.AT_ = malloc(unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float16_t)); | ||||
| if (unit->winograd_.AT_ == NULL) { | if (unit->winograd_.AT_ == NULL) { | ||||
| free(current_unit_weight); | free(current_unit_weight); | ||||
| current_unit_weight = NULL; | |||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| Float32ToFloat16(matrix_at, unit->winograd_.AT_, unit->winograd_.i_ * unit->winograd_.o_); | Float32ToFloat16(matrix_at, unit->winograd_.AT_, unit->winograd_.i_ * unit->winograd_.o_); | ||||
| @@ -227,6 +228,8 @@ int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvPa | |||||
| if (unit->winograd_.BT_ == NULL) { | if (unit->winograd_.BT_ == NULL) { | ||||
| free(current_unit_weight); | free(current_unit_weight); | ||||
| free(unit->winograd_.AT_); | free(unit->winograd_.AT_); | ||||
| current_unit_weight = NULL; | |||||
| unit->winograd_.AT_ = NULL; | |||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| Float32ToFloat16(matrix_bt, unit->winograd_.BT_, unit->winograd_.o_ * unit->winograd_.o_); | Float32ToFloat16(matrix_bt, unit->winograd_.BT_, unit->winograd_.o_ * unit->winograd_.o_); | ||||
| @@ -238,6 +241,9 @@ int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvPa | |||||
| free(current_unit_weight); | free(current_unit_weight); | ||||
| free(unit->winograd_.AT_); | free(unit->winograd_.AT_); | ||||
| free(unit->winograd_.BT_); | free(unit->winograd_.BT_); | ||||
| current_unit_weight = NULL; | |||||
| unit->winograd_.AT_ = NULL; | |||||
| unit->winograd_.BT_ = NULL; | |||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| @@ -268,6 +274,7 @@ int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvPa | |||||
| } | } | ||||
| free(current_unit_weight); | free(current_unit_weight); | ||||
| current_unit_weight = NULL; | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| @@ -320,14 +327,14 @@ void DeconvWgFp16(float16_t *nhwc_input_, float16_t *tile_in, float16_t *tile_ou | |||||
| float16_t *tmp_b = (float16_t *)unit->winograd_.b_buffer_ + task_id * unit->winograd_.kh_ * unit->winograd_.kw_ * | float16_t *tmp_b = (float16_t *)unit->winograd_.b_buffer_ + task_id * unit->winograd_.kh_ * unit->winograd_.kw_ * | ||||
| DECONV_WINOGRAD_DEFAULT_TILE * | DECONV_WINOGRAD_DEFAULT_TILE * | ||||
| deconv_param->oc_up4_; | deconv_param->oc_up4_; | ||||
| _deConvWinogradFp16(tile_in, tile_out, (float16_t *)unit->weight_, tmp_buf, unit->winograd_.AT_, mid_a, dst_a, | |||||
| transfered, unit->winograd_.BT_, tmp_b, unit->winograd_.kh_, unit->w_start_, unit->h_start_, | |||||
| conv_param, deconv_param); | |||||
| DeConvWgCalWgFp16(tile_in, tile_out, (float16_t *)unit->weight_, tmp_buf, unit->winograd_.AT_, mid_a, dst_a, | |||||
| transfered, unit->winograd_.BT_, tmp_b, unit->winograd_.kh_, unit->w_start_, unit->h_start_, | |||||
| conv_param, deconv_param); | |||||
| } else { | } else { | ||||
| float16_t *tmp_buf = (float16_t *)unit->tmp_buffer_ + task_id * deconv_param->oc_div4_ * unit->w_size_ * | float16_t *tmp_buf = (float16_t *)unit->tmp_buffer_ + task_id * deconv_param->oc_div4_ * unit->w_size_ * | ||||
| unit->h_size_ * DECONV_WINOGRAD_DEFAULT_TILE * C4NUM; | unit->h_size_ * DECONV_WINOGRAD_DEFAULT_TILE * C4NUM; | ||||
| _deConvCommonFp16(tile_in, tile_out, (float16_t *)unit->weight_, tmp_buf, unit->h_start_, unit->w_start_, | |||||
| unit->h_size_, unit->w_size_, conv_param, deconv_param); | |||||
| DeConvWgCalCommFp16(tile_in, tile_out, (float16_t *)unit->weight_, tmp_buf, unit->h_start_, unit->w_start_, | |||||
| unit->h_size_, unit->w_size_, conv_param, deconv_param); | |||||
| } | } | ||||
| } | } | ||||
| return; | return; | ||||
| @@ -340,9 +340,9 @@ void DeConvWgMerge(const float *src, float *dst, size_t src_stride, size_t dst_s | |||||
| return; | return; | ||||
| } | } | ||||
| void _deConvWinograd(const float *tile_in, float *tile_out, float *weight_buf, float *tmp_buf, const float *at_buf, | |||||
| float *a_mid_buf, float *trans_a_buf, bool *transfered, const float *bt_buf, float *b_tmp_buf, | |||||
| int unit_size, int w_start, int h_start, ConvParameter *conv_param, DeConvParam *deconv_param) { | |||||
| void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, float *weight_buf, float *tmp_buf, float *at_buf, | |||||
| float *a_mid_buf, float *trans_a_buf, bool *transfered, const float *bt_buf, float *b_tmp_buf, | |||||
| int unit_size, int w_start, int h_start, ConvParameter *conv_param, DeConvParam *deconv_param) { | |||||
| int winograd_plane = unit_size * unit_size; | int winograd_plane = unit_size * unit_size; | ||||
| if (!transfered[unit_size]) { | if (!transfered[unit_size]) { | ||||
| WinogradTransLeft(tile_in, at_buf, a_mid_buf, DECONV_WINOGRAD_DEFAULT_UNIT, unit_size, DECONV_WINOGRAD_DEFAULT_UNIT, | WinogradTransLeft(tile_in, at_buf, a_mid_buf, DECONV_WINOGRAD_DEFAULT_UNIT, unit_size, DECONV_WINOGRAD_DEFAULT_UNIT, | ||||
| @@ -380,8 +380,8 @@ void _deConvWinograd(const float *tile_in, float *tile_out, float *weight_buf, f | |||||
| return; | return; | ||||
| } | } | ||||
| void _deConvCommon(float *tile_in, float *tile_out, const float *weight, float *tmp_buf, int h_start, int w_start, | |||||
| int h_size, int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) { | |||||
| void DeConvWgCalCommFp32(float *tile_in, float *tile_out, const float *weight, float *tmp_buf, int h_start, int w_start, | |||||
| int h_size, int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) { | |||||
| int count = deconv_param->oc_div4_ * w_size * h_size; | int count = deconv_param->oc_div4_ * w_size * h_size; | ||||
| int in_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_; | int in_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_; | ||||
| int out_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->oc_up4_; | int out_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->oc_up4_; | ||||
| @@ -461,14 +461,14 @@ void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_ind | |||||
| DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_; | DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_; | ||||
| float *tmp_b_buf = (float *)unit->winograd_.b_buffer_ + task_id * unit->winograd_.kh_ * unit->winograd_.kw_ * | float *tmp_b_buf = (float *)unit->winograd_.b_buffer_ + task_id * unit->winograd_.kh_ * unit->winograd_.kw_ * | ||||
| deconv_param->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE; | deconv_param->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE; | ||||
| _deConvWinograd(tile_in, tile_out, (float *)unit->weight_, tmp_buf, unit->winograd_.AT_, wg_mid_a_buf, | |||||
| wg_dst_a_buf, transfered, unit->winograd_.BT_, tmp_b_buf, unit->winograd_.kh_, unit->w_start_, | |||||
| unit->h_start_, conv_param, deconv_param); | |||||
| DeConvWgCalWgFp32(tile_in, tile_out, (float *)unit->weight_, tmp_buf, unit->winograd_.AT_, wg_mid_a_buf, | |||||
| wg_dst_a_buf, transfered, unit->winograd_.BT_, tmp_b_buf, unit->winograd_.kh_, unit->w_start_, | |||||
| unit->h_start_, conv_param, deconv_param); | |||||
| } else { | } else { | ||||
| float *tmp_buf = (float *)unit->tmp_buffer_ + task_id * deconv_param->oc_div4_ * unit->w_size_ * unit->h_size_ * | float *tmp_buf = (float *)unit->tmp_buffer_ + task_id * deconv_param->oc_div4_ * unit->w_size_ * unit->h_size_ * | ||||
| DECONV_WINOGRAD_DEFAULT_TILE * C4NUM; | DECONV_WINOGRAD_DEFAULT_TILE * C4NUM; | ||||
| _deConvCommon(tile_in, tile_out, (float *)unit->weight_, tmp_buf, unit->h_start_, unit->w_start_, unit->h_size_, | |||||
| unit->w_size_, conv_param, deconv_param); | |||||
| DeConvWgCalCommFp32(tile_in, tile_out, (float *)unit->weight_, tmp_buf, unit->h_start_, unit->w_start_, | |||||
| unit->h_size_, unit->w_size_, conv_param, deconv_param); | |||||
| } | } | ||||
| } | } | ||||
| return; | return; | ||||
| @@ -41,14 +41,6 @@ void PostConvFuncCommInt8(const int32_t *in, int8_t *out, const int32_t *bias, s | |||||
| return; | return; | ||||
| } | } | ||||
| void PostFuncInt8C8(const int32_t *in, const int32_t *bias, int8_t *out, size_t oc, size_t plane, int32_t multiplier, | |||||
| int32_t left_shift, int32_t right_shift, int32_t zp, int32_t mini, int32_t maxi) { | |||||
| /* ((int32_t)row8x8-major + bias) * multiplier + output_zp => (int8)relu => (int8_t)row-major */ | |||||
| PostConvFuncCommInt8(in, out, bias, oc, plane, oc, UP_ROUND(plane, C8NUM) * C8NUM, multiplier, mini, maxi, left_shift, | |||||
| right_shift, zp, C8NUM); | |||||
| return; | |||||
| } | |||||
| void PostFuncInt8C4(const int32_t *in, const int32_t *bias, int8_t *out, size_t oc, size_t plane, size_t stride, | void PostFuncInt8C4(const int32_t *in, const int32_t *bias, int8_t *out, size_t oc, size_t plane, size_t stride, | ||||
| int32_t multiplier, int32_t left_shift, int32_t right_shift, int32_t zp, int32_t mini, | int32_t multiplier, int32_t left_shift, int32_t right_shift, int32_t zp, int32_t mini, | ||||
| int32_t maxi) { | int32_t maxi) { | ||||
| @@ -17,52 +17,6 @@ | |||||
| #include "nnacl/int8/deconv_int8.h" | #include "nnacl/int8/deconv_int8.h" | ||||
| #include "nnacl/int8/matmul_int8.h" | #include "nnacl/int8/matmul_int8.h" | ||||
| #include "nnacl/int8/common_func_int8.h" | #include "nnacl/int8/common_func_int8.h" | ||||
| int DeConvPostInt8C8(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel, | |||||
| ConvParameter *conv_param) { | |||||
| /* row8x8-major(ih*iw x oc*kh*kw) -> row8-major(oh*ow x oc) */ | |||||
| size_t input_plane = conv_param->input_w_ * conv_param->input_h_; | |||||
| size_t kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_; | |||||
| size_t output_plane = conv_param->output_w_ * conv_param->output_h_; | |||||
| int oc8 = UP_DIV(output_channel, C8NUM); | |||||
| int in_plane8 = UP_ROUND(input_plane, 8); | |||||
| for (int c = 0; c < oc8; c++) { | |||||
| int32_t *dst_ptr = tmp + c * output_plane * C8NUM; | |||||
| const int32_t *src_ptr = src + c * in_plane8 * kernel_plane * C8NUM; | |||||
| memset(dst_ptr, 0, output_plane * C8NUM * sizeof(int32_t)); | |||||
| for (int ih = 0; ih < conv_param->input_h_; ih++) { | |||||
| for (int iw = 0; iw < conv_param->input_w_; iw++) { | |||||
| int oh = ih * conv_param->stride_h_ - conv_param->pad_u_; | |||||
| int ow = iw * conv_param->stride_w_ - conv_param->pad_l_; | |||||
| int kh_start = MSMAX(0, UP_DIV(-oh, conv_param->dilation_h_)); | |||||
| int kh_end = MSMIN(conv_param->kernel_h_, UP_DIV(conv_param->output_h_ - oh, conv_param->dilation_h_)); | |||||
| int kw_start = MSMAX(0, UP_DIV(-ow, conv_param->dilation_w_)); | |||||
| int kw_end = MSMIN(conv_param->kernel_w_, UP_DIV(conv_param->output_w_ - ow, conv_param->dilation_w_)); | |||||
| for (int kh = kh_start; kh < kh_end; kh++) { | |||||
| for (int kw = kw_start; kw < kw_end; kw++) { | |||||
| int src_index = ih * conv_param->input_w_ * C8NUM + iw * C8NUM + | |||||
| kh * input_plane * conv_param->kernel_w_ * C8NUM + kw * input_plane * C8NUM; | |||||
| int dst_index = oh * conv_param->output_w_ * C8NUM + ow * C8NUM + | |||||
| kh * conv_param->dilation_h_ * conv_param->output_w_ * C8NUM + | |||||
| kw * conv_param->dilation_w_ * C8NUM; | |||||
| for (int i = 0; i < C8NUM; i++) { | |||||
| dst_ptr[dst_index + i] += src_ptr[src_index + i]; | |||||
| } | |||||
| } /*kw*/ | |||||
| } /*kh*/ | |||||
| } /*iw*/ | |||||
| } /*ih*/ | |||||
| } /*oc8*/ | |||||
| PostFuncInt8C8(tmp, bias, out, output_channel, output_plane, conv_param->conv_quant_arg_.quant_multiplier_[0], | |||||
| conv_param->conv_quant_arg_.left_shift_[0], conv_param->conv_quant_arg_.right_shift_[0], | |||||
| conv_param->conv_quant_arg_.output_quant_args_[0].zp_, conv_param->conv_quant_arg_.out_act_min_[0], | |||||
| conv_param->conv_quant_arg_.out_act_max_[0]); | |||||
| return NNACL_OK; | |||||
| } | |||||
| int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel, | int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel, | ||||
| ConvParameter *conv_param) { | ConvParameter *conv_param) { | ||||
| /* row4x4-major(ih*iw x oc*kh*kw) -> row4-major(oh*ow x oc) */ | /* row4x4-major(ih*iw x oc*kh*kw) -> row4-major(oh*ow x oc) */ | ||||
| @@ -74,8 +28,8 @@ int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8 | |||||
| int src_iw_stride = C4NUM; | int src_iw_stride = C4NUM; | ||||
| int src_ih_stride = conv_param->input_w_ * C4NUM; | int src_ih_stride = conv_param->input_w_ * C4NUM; | ||||
| int src_kw_stride = input_plane * C4NUM; | |||||
| int src_kh_stride = input_plane * conv_param->kernel_w_ * C4NUM; | |||||
| int src_kw_stride = in_plane4 * C4NUM; | |||||
| int src_kh_stride = in_plane4 * conv_param->kernel_w_ * C4NUM; | |||||
| int dst_oh_stride = conv_param->output_w_ * C4NUM; | int dst_oh_stride = conv_param->output_w_ * C4NUM; | ||||
| int dst_ow_stride = C4NUM; | int dst_ow_stride = C4NUM; | ||||
| int dst_kh_stride = conv_param->dilation_h_ * conv_param->output_w_ * C4NUM; | int dst_kh_stride = conv_param->dilation_h_ * conv_param->output_w_ * C4NUM; | ||||
| @@ -153,18 +107,18 @@ void DeConvWeightTransInt8(int8_t *src, int8_t *dst, int input_channel, int outp | |||||
| return; | return; | ||||
| } | } | ||||
| void DeConvPackWeightSum(int8_t *weight, int32_t *weight_sum, int32_t input_zp, int32_t filter_zp, int deep16, int col4, | |||||
| void DeConvPackWeightSum(int8_t *weight, int32_t *weight_sum, int32_t input_zp, int32_t filter_zp, int deep, int col4, | |||||
| bool suppport_opt) { | bool suppport_opt) { | ||||
| /* optimize normal -> same layout */ | |||||
| int deep16 = UP_ROUND(deep, C16NUM); | |||||
| for (int c = 0; c < col4; c++) { | for (int c = 0; c < col4; c++) { | ||||
| int c4div = c / C4NUM, c4mod = c % C4NUM; | int c4div = c / C4NUM, c4mod = c % C4NUM; | ||||
| int32_t value = 0; | int32_t value = 0; | ||||
| for (int r = 0; r < deep16; r++) { | |||||
| for (int r = 0; r < deep; r++) { | |||||
| int r16div = r / C16NUM, r16mod = r % C16NUM; | int r16div = r / C16NUM, r16mod = r % C16NUM; | ||||
| int src_index = c4div * deep16 * C4NUM + r16div * C4NUM * C16NUM + c4mod * C16NUM + r16mod; | int src_index = c4div * deep16 * C4NUM + r16div * C4NUM * C16NUM + c4mod * C16NUM + r16mod; | ||||
| value += weight[src_index]; | value += weight[src_index]; | ||||
| } | } | ||||
| weight_sum[c] = filter_zp * input_zp * deep16 - value * input_zp; | |||||
| weight_sum[c] = filter_zp * input_zp * deep - value * input_zp; | |||||
| } | } | ||||
| return; | return; | ||||
| } | } | ||||
| @@ -30,10 +30,20 @@ using mindspore::schema::ActivationType_HSWISH; | |||||
| using mindspore::schema::ActivationType_LEAKY_RELU; | using mindspore::schema::ActivationType_LEAKY_RELU; | ||||
| using mindspore::schema::ActivationType_RELU; | using mindspore::schema::ActivationType_RELU; | ||||
| using mindspore::schema::ActivationType_RELU6; | using mindspore::schema::ActivationType_RELU6; | ||||
| using mindspore::schema::ActivationType_SWISH; | |||||
| using mindspore::schema::PrimitiveType_Activation; | using mindspore::schema::PrimitiveType_Activation; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int ActivationFp16CPUKernel::Init() { return RET_OK; } | |||||
| int ActivationFp16CPUKernel::Init() { | |||||
| if (type_ != schema::ActivationType_RELU && type_ != schema::ActivationType_RELU6 && | |||||
| type_ != schema::ActivationType_LEAKY_RELU && type_ != schema::ActivationType_SIGMOID && | |||||
| type_ != schema::ActivationType_TANH && type_ != schema::ActivationType_HSWISH && | |||||
| type_ != schema::ActivationType_SWISH) { | |||||
| MS_LOG(ERROR) << "Activation fp16 not support type: " << type_; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ActivationFp16CPUKernel::ReSize() { return RET_OK; } | int ActivationFp16CPUKernel::ReSize() { return RET_OK; } | ||||
| @@ -85,6 +95,8 @@ int ActivationFp16CPUKernel::DoActivation(int task_id) { | |||||
| error_code = TanhFp16(fp16_input_ + stride * task_id, fp16_output_ + stride * task_id, count); | error_code = TanhFp16(fp16_input_ + stride * task_id, fp16_output_ + stride * task_id, count); | ||||
| } else if (type_ == schema::ActivationType_HSWISH) { | } else if (type_ == schema::ActivationType_HSWISH) { | ||||
| error_code = HSwishFp16(fp16_input_ + stride * task_id, fp16_output_ + stride * task_id, count); | error_code = HSwishFp16(fp16_input_ + stride * task_id, fp16_output_ + stride * task_id, count); | ||||
| } else if (type_ == schema::ActivationType_SWISH) { | |||||
| error_code = SwishFp16(fp16_input_ + stride * task_id, fp16_output_ + stride * task_id, count); | |||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << "Activation fp16 not support type: " << type_; | MS_LOG(ERROR) << "Activation fp16 not support type: " << type_; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -79,32 +79,32 @@ void DeConvWinogradFp16CPUKernel::FreeResizeBuf() { | |||||
| } | } | ||||
| void DeConvWinogradFp16CPUKernel::FreeDeconvParam() { | void DeConvWinogradFp16CPUKernel::FreeDeconvParam() { | ||||
| for (int i = 0; i < deconv_param_->compute_size_; i++) { | |||||
| DeConvComputeUnit &unit = deconv_param_->compute_units_[i]; | |||||
| if (unit.weight_ != nullptr) { | |||||
| free(unit.weight_); | |||||
| unit.weight_ = nullptr; | |||||
| } | |||||
| if (deconv_param_ != nullptr) { | |||||
| for (int i = 0; i < deconv_param_->compute_size_; i++) { | |||||
| DeConvComputeUnit &unit = deconv_param_->compute_units_[i]; | |||||
| if (unit.use_winograd_) { | |||||
| if (unit.winograd_.AT_ != nullptr) { | |||||
| free(unit.winograd_.AT_); | |||||
| unit.winograd_.AT_ = nullptr; | |||||
| if (unit.weight_ != nullptr) { | |||||
| free(unit.weight_); | |||||
| unit.weight_ = nullptr; | |||||
| } | } | ||||
| if (unit.winograd_.BT_ != nullptr) { | |||||
| free(unit.winograd_.BT_); | |||||
| unit.winograd_.BT_ = nullptr; | |||||
| if (unit.use_winograd_) { | |||||
| if (unit.winograd_.AT_ != nullptr) { | |||||
| free(unit.winograd_.AT_); | |||||
| unit.winograd_.AT_ = nullptr; | |||||
| } | |||||
| if (unit.winograd_.BT_ != nullptr) { | |||||
| free(unit.winograd_.BT_); | |||||
| unit.winograd_.BT_ = nullptr; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| } | |||||
| if (deconv_param_->compute_units_ != nullptr) { | |||||
| free(deconv_param_->compute_units_); | |||||
| deconv_param_->compute_units_ = nullptr; | |||||
| } | |||||
| if (deconv_param_->compute_units_ != nullptr) { | |||||
| free(deconv_param_->compute_units_); | |||||
| deconv_param_->compute_units_ = nullptr; | |||||
| } | |||||
| if (deconv_param_ != nullptr) { | |||||
| delete (deconv_param_); | delete (deconv_param_); | ||||
| deconv_param_ = nullptr; | deconv_param_ = nullptr; | ||||
| } | } | ||||
| @@ -33,7 +33,17 @@ using mindspore::schema::ActivationType_SWISH; | |||||
| using mindspore::schema::PrimitiveType_Activation; | using mindspore::schema::PrimitiveType_Activation; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int ActivationCPUKernel::Init() { return RET_OK; } | |||||
| int ActivationCPUKernel::Init() { | |||||
| if (type_ != schema::ActivationType_RELU && type_ != schema::ActivationType_RELU6 && | |||||
| type_ != schema::ActivationType_LEAKY_RELU && type_ != schema::ActivationType_SIGMOID && | |||||
| type_ != schema::ActivationType_TANH && type_ != schema::ActivationType_HSWISH && | |||||
| type_ != schema::ActivationType_SWISH && type_ != schema::ActivationType_HSIGMOID && | |||||
| type_ != schema::ActivationType_HARD_TANH) { | |||||
| MS_LOG(ERROR) << "Activation fp32 not support type: " << type_; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ActivationCPUKernel::ReSize() { return RET_OK; } | int ActivationCPUKernel::ReSize() { return RET_OK; } | ||||
| @@ -150,7 +150,7 @@ int DeConvInt8CPUKernel::InitBiasWeight() { | |||||
| MS_LOG(ERROR) << "deconv int8 malloc weight_ptr_ error!"; | MS_LOG(ERROR) << "deconv int8 malloc weight_ptr_ error!"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| memset(weight_ptr_, static_cast<int8_t>(conv_param_->conv_quant_arg_.filter_quant_args_[0].zp_), size); | |||||
| memset(weight_ptr_, 0, size); | |||||
| DeConvWeightTransInt8(reinterpret_cast<int8_t *>(in_tensors_[1]->MutableData()), weight_ptr_, | DeConvWeightTransInt8(reinterpret_cast<int8_t *>(in_tensors_[1]->MutableData()), weight_ptr_, | ||||
| conv_param_->input_channel_, conv_param_->output_channel_, | conv_param_->input_channel_, conv_param_->output_channel_, | ||||
| conv_param_->kernel_h_ * conv_param_->kernel_w_, support_optimize_); | conv_param_->kernel_h_ * conv_param_->kernel_w_, support_optimize_); | ||||
| @@ -163,8 +163,8 @@ int DeConvInt8CPUKernel::InitBiasWeight() { | |||||
| } | } | ||||
| memset(weight_sum_, 0, size * sizeof(int32_t)); | memset(weight_sum_, 0, size * sizeof(int32_t)); | ||||
| DeConvPackWeightSum(weight_ptr_, weight_sum_, conv_param_->conv_quant_arg_.input_quant_args_[0].zp_, | DeConvPackWeightSum(weight_ptr_, weight_sum_, conv_param_->conv_quant_arg_.input_quant_args_[0].zp_, | ||||
| conv_param_->conv_quant_arg_.filter_quant_args_[0].zp_, UP_ROUND(matmul_param_->deep_, C16NUM), | |||||
| size, support_optimize_); | |||||
| conv_param_->conv_quant_arg_.filter_quant_args_[0].zp_, matmul_param_->deep_, size, | |||||
| support_optimize_); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -271,41 +271,6 @@ TEST_F(TestDeconvInt8, MatMulOptTest1) { | |||||
| CompareOutputData(tmp_output, correct_tmp_output, 12 * 3 * 8, 0); | CompareOutputData(tmp_output, correct_tmp_output, 12 * 3 * 8, 0); | ||||
| } | } | ||||
| TEST_F(TestDeconvInt8, PostAddTest1) { | |||||
| int32_t in[] = { | |||||
| -4956, -3923, 868, -8880, -4089, -5179, -4526, -4527, -10464, 99, -5826, -2995, -4519, -4519, -10509, -2505, | |||||
| -11272, 434, -4522, -4523, -5287, -8936, -878, 373, -4528, -4529, -1960, -6589, 1688, 2287, -8059, 926, | |||||
| -2506, -6972, -2834, -8281, -8118, -3110, -4526, -4527, -4528, -4529, -4519, -4519, -4519, -4519, -4519, -4519, | |||||
| -4520, -4521, -4522, -4523, -4524, -4525, -4526, -4527, -4528, -4529, -4519, -4519, -4519, -4519, -4519, -4519, | |||||
| 1578, 2231, -4522, -4523, -4524, -4525, -4526, -4527, -8449, -990, -4519, -4519, -4519, -4519, -4519, -4519, | |||||
| -4303, -10293, -4522, -4523, -4524, -4525, -4526, -4527, -4528, -4529, -4519, -4519, -4519, -4519, -4519, -4519, | |||||
| -7025, 924, -4522, -4523, -4524, -4525, -4526, -4527, -4528, -4529, -4519, -4519, -4519, -4519, -4519, -4519, | |||||
| -4520, -4521, -4522, -4523, -4524, -4525, -4526, -4527, -4528, -4529, -4519, -4519, -4519, -4519, -4519, -4519}; | |||||
| int8_t co[] = {-8, 11, 99, -80, 8, -12, 0, 0, 112, 124, -109, 85, -24, 28, 0, 0, -110, | |||||
| 37, -72, 65, -124, 91, 0, 0, -14, -81, 67, 90, 4, -106, 0, 0, 47, -38, | |||||
| 114, 125, -65, 100, 0, 0, 37, -45, 31, -69, -66, 26, 0, 0, -46, 100}; | |||||
| int32_t bias[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; | |||||
| int8_t out[50] = {0}; | |||||
| double multiplier = 0.0183649725490196; | |||||
| int32_t quant_multiplier; | |||||
| int32_t left_shift; | |||||
| int32_t right_shift; | |||||
| QuantizeRoundParameter(multiplier, &quant_multiplier, &left_shift, &right_shift); | |||||
| int32_t zp = 83; | |||||
| PostFuncInt8C8(in, bias, out, 10, 5, quant_multiplier, left_shift, right_shift, zp, -128, 127); | |||||
| CompareOutputData(out, co, 50, 1); | |||||
| int8_t co_relu[] = {0, 11, 99, 0, 8, 0, 0, 0, 112, 124, 0, 85, 0, 28, 0, 0, 0, 37, 0, 65, 0, 91, 0, 0, 0, | |||||
| 0, 67, 90, 4, 0, 0, 0, 47, 0, 114, 125, 0, 100, 0, 0, 37, 0, 31, 0, 0, 26, 0, 0, 0, 100}; | |||||
| PostFuncInt8C8(in, bias, out, 10, 5, quant_multiplier, left_shift, right_shift, zp, 0, 127); | |||||
| CompareOutputData(out, co_relu, 50, 1); | |||||
| int8_t co_relu6[] = {0, 6, 6, 0, 6, 0, 0, 0, 6, 6, 0, 6, 0, 6, 0, 0, 0, 6, 0, 6, 0, 6, 0, 0, 0, | |||||
| 0, 6, 6, 4, 0, 0, 0, 6, 0, 6, 6, 0, 6, 0, 0, 6, 0, 6, 0, 0, 6, 0, 0, 0, 6}; | |||||
| PostFuncInt8C8(in, bias, out, 10, 5, quant_multiplier, left_shift, right_shift, zp, 0, 6); | |||||
| CompareOutputData(out, co_relu6, 50, 1); | |||||
| } | |||||
| int DeConvInt8TestInit1(std::vector<lite::Tensor *> *inputs_, std::vector<lite::Tensor *> *outputs_, | int DeConvInt8TestInit1(std::vector<lite::Tensor *> *inputs_, std::vector<lite::Tensor *> *outputs_, | ||||
| ConvParameter *conv_param, int8_t **correct) { | ConvParameter *conv_param, int8_t **correct) { | ||||
| /* float data from deconv fp32 testcase : DeConvTestInit2 */ | /* float data from deconv fp32 testcase : DeConvTestInit2 */ | ||||