| @@ -47,7 +47,7 @@ int Power::InferShape(std::vector<lite::tensor::Tensor *> inputs, std::vector<li | |||||
| } | } | ||||
| auto output_tensor = outputs[0]; | auto output_tensor = outputs[0]; | ||||
| MS_ASSERT(output_tensor != nullptr); | MS_ASSERT(output_tensor != nullptr); | ||||
| if (exp_tensor) { | |||||
| if (exp_tensor != nullptr) { | |||||
| if (exp_tensor->shape() != x_tensor->shape() || exp_tensor->data_type() != x_tensor->data_type()) { | if (exp_tensor->shape() != x_tensor->shape() || exp_tensor->data_type() != x_tensor->data_type()) { | ||||
| MS_LOG(ERROR) << "Power inputs shape or type is not equal!"; | MS_LOG(ERROR) << "Power inputs shape or type is not equal!"; | ||||
| return 1; | return 1; | ||||
| @@ -39,7 +39,7 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tenso | |||||
| case kNumberTypeInt8: | case kNumberTypeInt8: | ||||
| case kNumberTypeUInt8: { | case kNumberTypeUInt8: { | ||||
| kernel = new (std::nothrow) MatmulInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | kernel = new (std::nothrow) MatmulInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | ||||
| if (!kernel) { | |||||
| if (kernel == nullptr) { | |||||
| MS_LOG(ERROR) << "kernel is nullptr."; | MS_LOG(ERROR) << "kernel is nullptr."; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| @@ -65,6 +65,13 @@ void WinogradFilterTransformFp16(const float16_t *weight_data, Matrix *trans_wei | |||||
| int kernel_plane_stride = channel_in; | int kernel_plane_stride = channel_in; | ||||
| if (oc_block == 0) { | if (oc_block == 0) { | ||||
| MS_LOG(ERROR) << "Divide by zero"; | MS_LOG(ERROR) << "Divide by zero"; | ||||
| free(tmp_weight_data); | |||||
| free(tmp_data); | |||||
| free(trans_out_data); | |||||
| free(matrix_g_data_fp16); | |||||
| free(matrix_gt_data_fp16); | |||||
| delete matrix_g; | |||||
| delete matrix_gt; | |||||
| return; | return; | ||||
| } | } | ||||
| for (int i = 0; i < channel_out; i++) { | for (int i = 0; i < channel_out; i++) { | ||||
| @@ -54,6 +54,11 @@ void WinogradFilterTransform(const float *weight_data, Matrix *trans_weight, int | |||||
| int kernel_plane_stride = channel_in; | int kernel_plane_stride = channel_in; | ||||
| if (oc_block == 0) { | if (oc_block == 0) { | ||||
| MS_LOG(ERROR) << "Divide by zero"; | MS_LOG(ERROR) << "Divide by zero"; | ||||
| free(tmp_weight_data); | |||||
| free(tmp_data); | |||||
| free(trans_out_data); | |||||
| delete matrix_g; | |||||
| delete matrix_gt; | |||||
| return; | return; | ||||
| } | } | ||||
| for (int i = 0; i < channel_out; i++) { | for (int i = 0; i < channel_out; i++) { | ||||
| @@ -161,6 +161,7 @@ int SqueezeInt8CPUKernel::Run() { | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "RunSqueezeParam failed. errorcode: "; | MS_LOG(ERROR) << "RunSqueezeParam failed. errorcode: "; | ||||
| } | } | ||||
| free(inputs_array); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -219,7 +219,7 @@ void IndirectGemmFp32_Comm(float *output, const float *input, const float *weigh | |||||
| int d4mod = deep % 4; | int d4mod = deep % 4; | ||||
| int d4div = deep / 4; | int d4div = deep / 4; | ||||
| int a_index = d4div * 4 * 8 + r * 4 + d4mod; | int a_index = d4div * 4 * 8 + r * 4 + d4mod; | ||||
| int b_index = 8 * deep + c; | |||||
| const int b_index = 8 * deep + c; | |||||
| value += input[a_index] * weight[b_index]; | value += input[a_index] * weight[b_index]; | ||||
| } | } | ||||
| output[r * offset + c] = value; | output[r * offset + c] = value; | ||||
| @@ -334,7 +334,7 @@ void ConvFp16(float16_t *input_data, float16_t *packed_input, float16_t *packed_ | |||||
| bool relu6 = conv_param->is_relu6_; | bool relu6 = conv_param->is_relu6_; | ||||
| // todo | // todo | ||||
| int thread_count = conv_param->thread_num_; | int thread_count = conv_param->thread_num_; | ||||
| int tile_n = 16; | |||||
| const int tile_n = 16; | |||||
| int output_count = out_h * out_w; | int output_count = out_h * out_w; | ||||
| int output_tile_count = UP_DIV(output_count, tile_n); | int output_tile_count = UP_DIV(output_count, tile_n); | ||||
| @@ -379,7 +379,7 @@ void Conv3x3Fp16(float16_t *input_data, float16_t *transed_weight, const float16 | |||||
| float16_t *tile_buffer, float16_t *block_unit_buffer, float16_t *tmp_dst_buffer, float16_t *tmp_out, | float16_t *tile_buffer, float16_t *block_unit_buffer, float16_t *tmp_dst_buffer, float16_t *tmp_out, | ||||
| int task_id, ConvParameter *conv_param) { | int task_id, ConvParameter *conv_param) { | ||||
| int thread_count = conv_param->thread_num_; | int thread_count = conv_param->thread_num_; | ||||
| int tile_num = 16; | |||||
| const int tile_num = 16; | |||||
| const int output_unit = 4; | const int output_unit = 4; | ||||
| const int k_plane = 36; | const int k_plane = 36; | ||||
| int ic4 = UP_DIV(conv_param->input_channel_, C4NUM); | int ic4 = UP_DIV(conv_param->input_channel_, C4NUM); | ||||
| @@ -427,7 +427,7 @@ void UnPack3x3OutputFp16(const float16_t *src, float16_t *dst, int batch, int he | |||||
| float16_t *batch_out = dst + ro_batch_size; | float16_t *batch_out = dst + ro_batch_size; | ||||
| for (int h = 0; h < height; h++) { | for (int h = 0; h < height; h++) { | ||||
| int src_h_offset = h * out_w_block * C4NUM * C8NUM; | int src_h_offset = h * out_w_block * C4NUM * C8NUM; | ||||
| int dst_h_offset = h * width * channel; | |||||
| const int dst_h_offset = h * width * channel; | |||||
| for (int w = 0; w < width; w++) { | for (int w = 0; w < width; w++) { | ||||
| int src_w_offset = src_h_offset + w * C8NUM; | int src_w_offset = src_h_offset + w * C8NUM; | ||||
| int dst_w_offset = dst_h_offset + w * channel; | int dst_w_offset = dst_h_offset + w * channel; | ||||
| @@ -462,7 +462,7 @@ void UnPack3x3ReluOutputFp16(const float16_t *src, float16_t *dst, int batch, in | |||||
| float16_t *batch_out = dst + ro_batch_size; | float16_t *batch_out = dst + ro_batch_size; | ||||
| for (int h = 0; h < height; h++) { | for (int h = 0; h < height; h++) { | ||||
| int src_h_offset = h * out_w_block * C4NUM * C8NUM; | int src_h_offset = h * out_w_block * C4NUM * C8NUM; | ||||
| int dst_h_offset = h * width * channel; | |||||
| const int dst_h_offset = h * width * channel; | |||||
| for (int w = 0; w < width; w++) { | for (int w = 0; w < width; w++) { | ||||
| int src_w_offset = src_h_offset + w * C8NUM; | int src_w_offset = src_h_offset + w * C8NUM; | ||||
| int dst_w_offset = dst_h_offset + w * channel; | int dst_w_offset = dst_h_offset + w * channel; | ||||
| @@ -502,7 +502,7 @@ void UnPack3x3Relu6OutputFp16(const float16_t *src, float16_t *dst, int batch, i | |||||
| float16_t *batch_out = dst + ro_batch_size; | float16_t *batch_out = dst + ro_batch_size; | ||||
| for (int h = 0; h < height; h++) { | for (int h = 0; h < height; h++) { | ||||
| int src_h_offset = h * out_w_block * C4NUM * C8NUM; | int src_h_offset = h * out_w_block * C4NUM * C8NUM; | ||||
| int dst_h_offset = h * width * channel; | |||||
| const int dst_h_offset = h * width * channel; | |||||
| for (int w = 0; w < width; w++) { | for (int w = 0; w < width; w++) { | ||||
| int src_w_offset = src_h_offset + w * C8NUM; | int src_w_offset = src_h_offset + w * C8NUM; | ||||
| int dst_w_offset = dst_h_offset + w * channel; | int dst_w_offset = dst_h_offset + w * channel; | ||||
| @@ -545,7 +545,7 @@ void ConvWinogardFp16(float16_t *input_data, float16_t *trans_weight, const floa | |||||
| int out_unit = conv_param->output_unit_; | int out_unit = conv_param->output_unit_; | ||||
| int out_w_block = UP_DIV(conv_param->output_w_, out_unit); | int out_w_block = UP_DIV(conv_param->output_w_, out_unit); | ||||
| int out_h_block = UP_DIV(conv_param->output_h_, out_unit); | int out_h_block = UP_DIV(conv_param->output_h_, out_unit); | ||||
| int tile_num = 16; | |||||
| const int tile_num = 16; | |||||
| int output_count = out_w_block * out_h_block; | int output_count = out_w_block * out_h_block; | ||||
| int output_tile_count = UP_DIV(output_count, tile_num); | int output_tile_count = UP_DIV(output_count, tile_num); | ||||
| int out_channel = conv_param->output_channel_; | int out_channel = conv_param->output_channel_; | ||||
| @@ -594,7 +594,7 @@ void UnPackWinogradOutputFp16(const float16_t *src, float16_t *dst, int batch, i | |||||
| int dst_batch_offset = b * height * width * channel; | int dst_batch_offset = b * height * width * channel; | ||||
| for (int h = 0; h < height; h++) { | for (int h = 0; h < height; h++) { | ||||
| int src_h_offset = src_batch_offset + C8NUM * (h * out_w_block_num * output_unit); | int src_h_offset = src_batch_offset + C8NUM * (h * out_w_block_num * output_unit); | ||||
| int dst_h_offset = dst_batch_offset + h * width * channel; | |||||
| const int dst_h_offset = dst_batch_offset + h * width * channel; | |||||
| for (int w = 0; w < width; w++) { | for (int w = 0; w < width; w++) { | ||||
| int src_w_offset = src_h_offset + w * C8NUM; | int src_w_offset = src_h_offset + w * C8NUM; | ||||
| int dst_w_offset = dst_h_offset + w * channel; | int dst_w_offset = dst_h_offset + w * channel; | ||||
| @@ -633,7 +633,7 @@ void UnPackWinogradReluOutputFp16(const float16_t *src, float16_t *dst, int batc | |||||
| int dst_batch_offset = b * height * width * channel; | int dst_batch_offset = b * height * width * channel; | ||||
| for (int h = 0; h < height; h++) { | for (int h = 0; h < height; h++) { | ||||
| int src_h_offset = src_batch_offset + C8NUM * (h * out_w_block_num * output_unit); | int src_h_offset = src_batch_offset + C8NUM * (h * out_w_block_num * output_unit); | ||||
| int dst_h_offset = dst_batch_offset + h * width * channel; | |||||
| const int dst_h_offset = dst_batch_offset + h * width * channel; | |||||
| for (int w = 0; w < width; w++) { | for (int w = 0; w < width; w++) { | ||||
| int src_w_offset = src_h_offset + w * C8NUM; | int src_w_offset = src_h_offset + w * C8NUM; | ||||
| int dst_w_offset = dst_h_offset + w * channel; | int dst_w_offset = dst_h_offset + w * channel; | ||||
| @@ -679,7 +679,7 @@ void UnPackWinogradRelu6OutputFp16(const float16_t *src, float16_t *dst, int bat | |||||
| int dst_batch_offset = b * height * width * channel; | int dst_batch_offset = b * height * width * channel; | ||||
| for (int h = 0; h < height; h++) { | for (int h = 0; h < height; h++) { | ||||
| int src_h_offset = src_batch_offset + C8NUM * (h * out_w_block_num * output_unit); | int src_h_offset = src_batch_offset + C8NUM * (h * out_w_block_num * output_unit); | ||||
| int dst_h_offset = dst_batch_offset + h * width * channel; | |||||
| const int dst_h_offset = dst_batch_offset + h * width * channel; | |||||
| for (int w = 0; w < width; w++) { | for (int w = 0; w < width; w++) { | ||||
| int src_w_offset = src_h_offset + w * C8NUM; | int src_w_offset = src_h_offset + w * C8NUM; | ||||
| int dst_w_offset = dst_h_offset + w * channel; | int dst_w_offset = dst_h_offset + w * channel; | ||||
| @@ -18,6 +18,9 @@ | |||||
| void PostConvFuncCommFp16(float16_t *out_ptr, const float16_t *src_ptr_, const float16_t *bias_ptr, | void PostConvFuncCommFp16(float16_t *out_ptr, const float16_t *src_ptr_, const float16_t *bias_ptr, | ||||
| size_t output_channel, size_t plane_size, size_t stride, bool is_relu, bool is_relu6, | size_t output_channel, size_t plane_size, size_t stride, bool is_relu, bool is_relu6, | ||||
| int size) { | int size) { | ||||
| if (size == 0) { | |||||
| return; | |||||
| } | |||||
| for (int oc = 0; oc < output_channel; oc++) { | for (int oc = 0; oc < output_channel; oc++) { | ||||
| int oc_div = oc / size, oc_mod = oc % size; | int oc_div = oc / size, oc_mod = oc % size; | ||||
| for (int hw = 0; hw < plane_size; hw++) { | for (int hw = 0; hw < plane_size; hw++) { | ||||
| @@ -93,8 +93,8 @@ void Im2ColPackUnitFp16(float16_t *input_data, ConvParameter *conv_param, float1 | |||||
| void PackWeightFp16(float16_t *weight_data, ConvParameter *conv_param, float16_t *packed_weight) { | void PackWeightFp16(float16_t *weight_data, ConvParameter *conv_param, float16_t *packed_weight) { | ||||
| // original weight format : ohwi | // original weight format : ohwi | ||||
| int tile_num = 8; | |||||
| int inchannel_block = 4; | |||||
| const int tile_num = 8; | |||||
| const int inchannel_block = 4; | |||||
| int kernel_h = conv_param->kernel_h_; | int kernel_h = conv_param->kernel_h_; | ||||
| int kernel_w = conv_param->kernel_w_; | int kernel_w = conv_param->kernel_w_; | ||||
| int in_channel = conv_param->input_channel_; | int in_channel = conv_param->input_channel_; | ||||
| @@ -539,7 +539,7 @@ void Conv3x3Fp16OutputTransform(const float16_t *gemm_out, float16_t *out_data, | |||||
| void WinogradInputTransformFp16(const float16_t *input_data, float16_t *trans_input, float16_t *tmp_data, int cal_num, | void WinogradInputTransformFp16(const float16_t *input_data, float16_t *trans_input, float16_t *tmp_data, int cal_num, | ||||
| int out_tile_index, int out_w_block_num, ConvParameter *conv_param, | int out_tile_index, int out_w_block_num, ConvParameter *conv_param, | ||||
| InputTransformUnitFp16Func input_trans_func) { | InputTransformUnitFp16Func input_trans_func) { | ||||
| int tile_num = 16; | |||||
| const int tile_num = 16; | |||||
| int input_unit = conv_param->input_unit_; | int input_unit = conv_param->input_unit_; | ||||
| int output_unit = conv_param->output_unit_; | int output_unit = conv_param->output_unit_; | ||||
| int in_channel = conv_param->input_channel_; | int in_channel = conv_param->input_channel_; | ||||
| @@ -160,7 +160,7 @@ void InputTransform4x4UnitFp16(const float16_t *src_data, float16_t *dst_data, i | |||||
| float16_t m23 = t23 - 0.25f * t21; | float16_t m23 = t23 - 0.25f * t21; | ||||
| float16_t m30 = t30 - 4 * t32; | float16_t m30 = t30 - 4 * t32; | ||||
| float16_t m31 = t31 + 2 * t32; | |||||
| const float16_t m31 = t31 + 2 * t32; | |||||
| float16_t m32 = 2 * t32 - t31; | float16_t m32 = 2 * t32 - t31; | ||||
| float16_t m33 = t33 - 0.25f * t31; | float16_t m33 = t33 - 0.25f * t31; | ||||
| @@ -437,7 +437,7 @@ void Conv3x3Fp32(float *input_data, float *transed_weight, const float *bias_dat | |||||
| int out_h_block = UP_DIV(conv_param->output_h_, OUPUT_UNIT); | int out_h_block = UP_DIV(conv_param->output_h_, OUPUT_UNIT); | ||||
| int output_count = out_w_block * out_h_block; | int output_count = out_w_block * out_h_block; | ||||
| int output_tile_count = UP_DIV(output_count, TILE_NUM); | int output_tile_count = UP_DIV(output_count, TILE_NUM); | ||||
| int input_unit_square = 4 * 4; | |||||
| const int input_unit_square = 4 * 4; | |||||
| float *tile_buffer = buffer_list[0]; | float *tile_buffer = buffer_list[0]; | ||||
| float *block_unit_buffer = buffer_list[1]; | float *block_unit_buffer = buffer_list[1]; | ||||
| float *tmp_dst_buffer = buffer_list[2]; | float *tmp_dst_buffer = buffer_list[2]; | ||||
| @@ -91,7 +91,7 @@ int8_t GetInt8Output(float real_out, float output_inverse_scale, int32_t output_ | |||||
| void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | ||||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | ||||
| bool out_value = param->out_value_; | bool out_value = param->out_value_; | ||||
| float output_inverse_scale = 1.f / out_quant_arg->scale_; | |||||
| const float output_inverse_scale = 1.f / out_quant_arg->scale_; | |||||
| float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; | float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; | ||||
| int32_t output_zp = out_quant_arg->zp_; | int32_t output_zp = out_quant_arg->zp_; | ||||
| for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | ||||
| @@ -117,7 +117,7 @@ void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, | |||||
| void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | ||||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | ||||
| bool out_value = param->out_value_; | bool out_value = param->out_value_; | ||||
| float output_inverse_scale = 1.f / out_quant_arg->scale_; | |||||
| const float output_inverse_scale = 1.f / out_quant_arg->scale_; | |||||
| float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; | float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; | ||||
| int32_t output_zp = out_quant_arg->zp_; | int32_t output_zp = out_quant_arg->zp_; | ||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| @@ -148,7 +148,7 @@ void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, | |||||
| void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | ||||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | ||||
| bool out_value = param->out_value_; | bool out_value = param->out_value_; | ||||
| float output_inverse_scale = 1.f / out_quant_arg->scale_; | |||||
| const float output_inverse_scale = 1.f / out_quant_arg->scale_; | |||||
| float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; | float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; | ||||
| int32_t output_zp = out_quant_arg->zp_; | int32_t output_zp = out_quant_arg->zp_; | ||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| @@ -183,7 +183,7 @@ void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, | |||||
| void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param, | ||||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | ||||
| bool out_value = param->out_value_; | bool out_value = param->out_value_; | ||||
| float output_inverse_scale = 1.f / out_quant_arg->scale_; | |||||
| const float output_inverse_scale = 1.f / out_quant_arg->scale_; | |||||
| float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; | float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; | ||||
| int32_t output_zp = out_quant_arg->zp_; | int32_t output_zp = out_quant_arg->zp_; | ||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| @@ -26,7 +26,7 @@ int ElementNotEqualInt8(int8_t *input0, int8_t *input1, int8_t *output, int elem | |||||
| ArithmeticQuantArg *quant_arg) { | ArithmeticQuantArg *quant_arg) { | ||||
| float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_; | float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_; | ||||
| float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_; | float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_; | ||||
| float output_inverse_scale = 1.f / quant_arg->out_args_.scale_; | |||||
| const float output_inverse_scale = 1.f / quant_arg->out_args_.scale_; | |||||
| float out_zp = quant_arg->out_args_.zp_; | float out_zp = quant_arg->out_args_.zp_; | ||||
| for (int index = 0; index < element_size; ++index) { | for (int index = 0; index < element_size; ++index) { | ||||
| @@ -45,7 +45,7 @@ int ElementNotEqualInt8(int8_t *input0, int8_t *input1, int8_t *output, int elem | |||||
| int ElementEqualInt8(int8_t *input0, int8_t *input1, int8_t *output, int element_size, ArithmeticQuantArg *quant_arg) { | int ElementEqualInt8(int8_t *input0, int8_t *input1, int8_t *output, int element_size, ArithmeticQuantArg *quant_arg) { | ||||
| float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_; | float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_; | ||||
| float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_; | float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_; | ||||
| float output_inverse_scale = 1.f / quant_arg->out_args_.scale_; | |||||
| const float output_inverse_scale = 1.f / quant_arg->out_args_.scale_; | |||||
| float out_zp = quant_arg->out_args_.zp_; | float out_zp = quant_arg->out_args_.zp_; | ||||
| for (int index = 0; index < element_size; ++index) { | for (int index = 0; index < element_size; ++index) { | ||||
| float in0_real = input0[index] * quant_arg->in0_args_.scale_ + in0_bias; | float in0_real = input0[index] * quant_arg->in0_args_.scale_ + in0_bias; | ||||
| @@ -63,7 +63,7 @@ int ElementEqualInt8(int8_t *input0, int8_t *input1, int8_t *output, int element | |||||
| int ElementLessInt8(int8_t *input0, int8_t *input1, int8_t *output, int element_size, ArithmeticQuantArg *quant_arg) { | int ElementLessInt8(int8_t *input0, int8_t *input1, int8_t *output, int element_size, ArithmeticQuantArg *quant_arg) { | ||||
| float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_; | float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_; | ||||
| float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_; | float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_; | ||||
| float output_inverse_scale = 1.f / quant_arg->out_args_.scale_; | |||||
| const float output_inverse_scale = 1.f / quant_arg->out_args_.scale_; | |||||
| float out_zp = quant_arg->out_args_.zp_; | float out_zp = quant_arg->out_args_.zp_; | ||||
| for (int index = 0; index < element_size; ++index) { | for (int index = 0; index < element_size; ++index) { | ||||
| float in0_real = input0[index] * quant_arg->in0_args_.scale_ + in0_bias; | float in0_real = input0[index] * quant_arg->in0_args_.scale_ + in0_bias; | ||||
| @@ -78,7 +78,7 @@ int ElementLessEqualInt8(int8_t *input0, int8_t *input1, int8_t *output, int ele | |||||
| ArithmeticQuantArg *quant_arg) { | ArithmeticQuantArg *quant_arg) { | ||||
| float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_; | float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_; | ||||
| float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_; | float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_; | ||||
| float output_inverse_scale = 1.f / quant_arg->out_args_.scale_; | |||||
| const float output_inverse_scale = 1.f / quant_arg->out_args_.scale_; | |||||
| float out_zp = quant_arg->out_args_.zp_; | float out_zp = quant_arg->out_args_.zp_; | ||||
| for (int index = 0; index < element_size; ++index) { | for (int index = 0; index < element_size; ++index) { | ||||
| @@ -94,7 +94,7 @@ int ElementGreaterInt8(int8_t *input0, int8_t *input1, int8_t *output, int eleme | |||||
| ArithmeticQuantArg *quant_arg) { | ArithmeticQuantArg *quant_arg) { | ||||
| float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_; | float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_; | ||||
| float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_; | float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_; | ||||
| float output_inverse_scale = 1.f / quant_arg->out_args_.scale_; | |||||
| const float output_inverse_scale = 1.f / quant_arg->out_args_.scale_; | |||||
| float out_zp = quant_arg->out_args_.zp_; | float out_zp = quant_arg->out_args_.zp_; | ||||
| for (int index = 0; index < element_size; ++index) { | for (int index = 0; index < element_size; ++index) { | ||||
| @@ -110,7 +110,7 @@ int ElementGreaterEqualInt8(int8_t *input0, int8_t *input1, int8_t *output, int | |||||
| ArithmeticQuantArg *quant_arg) { | ArithmeticQuantArg *quant_arg) { | ||||
| float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_; | float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_; | ||||
| float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_; | float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_; | ||||
| float output_inverse_scale = 1.f / quant_arg->out_args_.scale_; | |||||
| const float output_inverse_scale = 1.f / quant_arg->out_args_.scale_; | |||||
| float out_zp = quant_arg->out_args_.zp_; | float out_zp = quant_arg->out_args_.zp_; | ||||
| for (int index = 0; index < element_size; ++index) { | for (int index = 0; index < element_size; ++index) { | ||||
| float in0_real = input0[index] * quant_arg->in0_args_.scale_ + in0_bias; | float in0_real = input0[index] * quant_arg->in0_args_.scale_ + in0_bias; | ||||
| @@ -365,7 +365,7 @@ void Conv3x3Int8(int16_t *input_data, int16_t *transed_weight, const int32_t *bi | |||||
| int output_tile_count = UP_DIV(output_count, TILE_NUM); | int output_tile_count = UP_DIV(output_count, TILE_NUM); | ||||
| int oc4 = UP_DIV(output_channel, C4NUM); | int oc4 = UP_DIV(output_channel, C4NUM); | ||||
| int tile_buffer_offset = TILE_NUM * 16 * ic8 * C8NUM; | int tile_buffer_offset = TILE_NUM * 16 * ic8 * C8NUM; | ||||
| int block_unit_buffer_offset = 16 * C8NUM; | |||||
| const int block_unit_buffer_offset = 16 * C8NUM; | |||||
| int tmp_dst_buffer_offset = TILE_NUM * 16 * oc4 * C4NUM; | int tmp_dst_buffer_offset = TILE_NUM * 16 * oc4 * C4NUM; | ||||
| int input_batch = conv_param->input_batch_; | int input_batch = conv_param->input_batch_; | ||||
| @@ -253,7 +253,7 @@ int ReduceMinLastAxis(const int outer_size, const int inner_size, const int axis | |||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| int i, j, k; | int i, j, k; | ||||
| int base_offset = 20; | |||||
| const int base_offset = 20; | |||||
| for (j = tid; j < outer_size; j += thread_num) { | for (j = tid; j < outer_size; j += thread_num) { | ||||
| const int32_t *outer_src = src_data + j * axis_size * inner_size; | const int32_t *outer_src = src_data + j * axis_size * inner_size; | ||||
| int8_t *outer_dst = dst_data + j * inner_size; | int8_t *outer_dst = dst_data + j * inner_size; | ||||
| @@ -41,7 +41,7 @@ int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int | |||||
| for (n = 0; n < in_n; n++) { | for (n = 0; n < in_n; n++) { | ||||
| for (h = tid; h < new_height; h += thread_num) { | for (h = tid; h < new_height; h += thread_num) { | ||||
| // float actual_y = (float)h * height_scale; | // float actual_y = (float)h * height_scale; | ||||
| int base_offset = 20; | |||||
| const int base_offset = 20; | |||||
| int scaled_actual_y; | int scaled_actual_y; | ||||
| int bottom, top; | int bottom, top; | ||||
| int scaled_bottom_weight, scaled_top_weight; | int scaled_bottom_weight, scaled_top_weight; | ||||
| @@ -149,7 +149,7 @@ void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32 | |||||
| int ResizeNearestNeighborInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape, | int ResizeNearestNeighborInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape, | ||||
| const int *output_shape, const bool align_corners, const QuantMulArg *multiplier, | const int *output_shape, const bool align_corners, const QuantMulArg *multiplier, | ||||
| QuantArg *quant_in, QuantArg *quant_out, int tid, int thread_num) { | QuantArg *quant_in, QuantArg *quant_out, int tid, int thread_num) { | ||||
| int base_offset = 20; | |||||
| const int base_offset = 20; | |||||
| int32_t batch, y, x, c; | int32_t batch, y, x, c; | ||||
| int32_t in_h, in_w, new_height, new_width; | int32_t in_h, in_w, new_height, new_width; | ||||
| in_h = input_shape[1]; | in_h = input_shape[1]; | ||||
| @@ -55,7 +55,7 @@ int MultiplyByQuantizedMultiplier(int32_t value, int32_t multiplier, int32_t lef | |||||
| } | } | ||||
| int FractionsBits(int kIntegerBits) { | int FractionsBits(int kIntegerBits) { | ||||
| int totalBits = 8 * sizeof(int32_t) - 1; | |||||
| const int totalBits = 8 * sizeof(int32_t) - 1; | |||||
| return totalBits - kIntegerBits; | return totalBits - kIntegerBits; | ||||
| } | } | ||||
| @@ -82,7 +82,7 @@ int32_t BitNot(int32_t a) { return ~(uint32_t)a; } | |||||
| int SelectUsingMask(int mask, int bound, int val) { return BitXor(BitAnd(mask, bound), BitAnd(BitNot(mask), val)); } | int SelectUsingMask(int mask, int bound, int val) { return BitXor(BitAnd(mask, bound), BitAnd(BitNot(mask), val)); } | ||||
| int32_t MaskNonZero(int32_t a) { | int32_t MaskNonZero(int32_t a) { | ||||
| int32_t zreo = 0; | |||||
| const int32_t zreo = 0; | |||||
| return a ? BitNot(zreo) : zreo; | return a ? BitNot(zreo) : zreo; | ||||
| } | } | ||||
| @@ -284,7 +284,7 @@ void Conv3x3Fp32InputTransform(const float *input_data, float *trans_input, floa | |||||
| int pad_w = conv_param->pad_w_; | int pad_w = conv_param->pad_w_; | ||||
| int pad_h = conv_param->pad_h_; | int pad_h = conv_param->pad_h_; | ||||
| int ic4 = UP_DIV(input_channel, C4NUM); | int ic4 = UP_DIV(input_channel, C4NUM); | ||||
| int input_unit = 4; | |||||
| const int input_unit = 4; | |||||
| if (out_w_block == 0) { | if (out_w_block == 0) { | ||||
| return; | return; | ||||
| } | } | ||||
| @@ -162,7 +162,7 @@ void InputTransform4x4Unit(const float *src_data, float *dst_data, int src_step, | |||||
| float m30 = t30 - 4 * t32; | float m30 = t30 - 4 * t32; | ||||
| float m31 = t31 + 2 * t32; | float m31 = t31 + 2 * t32; | ||||
| float m32 = 2 * t32 - t31; | |||||
| const float m32 = 2 * t32 - t31; | |||||
| float m33 = t33 - 0.25f * t31; | float m33 = t33 - 0.25f * t31; | ||||
| (dst_data + i)[0] = m00; | (dst_data + i)[0] = m00; | ||||
| @@ -49,6 +49,9 @@ __kernel void ElementDiv(__read_only image2d_t input_a, __read_only image2d_t in | |||||
| float4 a = read_imagef(input_a, smp_none, (int2)(X, Y)); | float4 a = read_imagef(input_a, smp_none, (int2)(X, Y)); | ||||
| float4 b = read_imagef(input_b, smp_none, (int2)(X, Y)); | float4 b = read_imagef(input_b, smp_none, (int2)(X, Y)); | ||||
| if (b == 0) { | |||||
| return; | |||||
| } | |||||
| write_imagef(output, (int2)(X, Y), a / b); | write_imagef(output, (int2)(X, Y), a / b); | ||||
| } | } | ||||
| @@ -510,6 +510,7 @@ STATUS AwareQuantizer::QuantConvBias(const mindspore::schema::MetaGraphT *graph, | |||||
| auto ret = memcpy_s(biasTensor->data.data(), bShapeSize * sizeof(int32_t), qDatas, bShapeSize * sizeof(int32_t)); | auto ret = memcpy_s(biasTensor->data.data(), bShapeSize * sizeof(int32_t), qDatas, bShapeSize * sizeof(int32_t)); | ||||
| if (ret != EOK) { | if (ret != EOK) { | ||||
| // MS_LOGE("memcpy_s failed: %d", ret); | // MS_LOGE("memcpy_s failed: %d", ret); | ||||
| delete[] qDatas; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| delete[] qDatas; | delete[] qDatas; | ||||