From 0d3c16c1414a571fc1cb09dc484823254a2047d3 Mon Sep 17 00:00:00 2001 From: fuzhiye Date: Tue, 27 Oct 2020 09:11:47 +0800 Subject: [PATCH] fix bug --- mindspore/lite/nnacl/pack.c | 42 ++++++++++--------- .../kernel/arm/fp16/arithmetic_fp16.cc | 6 --- .../kernel/arm/int8/convolution_int8.cc | 11 ++++- 3 files changed, 32 insertions(+), 27 deletions(-) diff --git a/mindspore/lite/nnacl/pack.c b/mindspore/lite/nnacl/pack.c index 255b5f0118..6c8c65f21d 100644 --- a/mindspore/lite/nnacl/pack.c +++ b/mindspore/lite/nnacl/pack.c @@ -336,18 +336,19 @@ void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvPara int in_channel = conv_param->input_channel_; int in_h = conv_param->input_h_; int in_w = conv_param->input_w_; - int ic8 = UP_DIV(in_channel, C8NUM); - int ic8_minus = ic8 - 1; + int ic8_round = UP_ROUND(in_channel, C8NUM); + int ic8 = in_channel / C8NUM * C8NUM; + int in_plane = in_h * in_w; for (int b = 0; b < in_batch; b++) { - int src_batch_offset = b * in_channel * in_h * in_w; - int dst_batch_offset = b * ic8 * C8NUM * in_h * in_w; - for (int k = 0; k < in_w * in_h; k++) { + int src_batch_offset = b * in_channel * in_plane; + int dst_batch_offset = b * ic8_round * in_plane; + for (int k = 0; k < in_plane; k++) { int src_plane_offset = src_batch_offset + k * in_channel; int dst_plane_offset = dst_batch_offset + k * C8NUM; - for (int i = 0; i < ic8_minus; ++i) { - int src_c_offset = src_plane_offset + i * C8NUM; - int dst_c_offset = dst_plane_offset + i * C8NUM * in_h * in_w; + for (int i = 0; i < ic8; i += 8) { + int src_c_offset = src_plane_offset + i; + int dst_c_offset = dst_plane_offset + i * in_plane; #ifdef ENABLE_ARM vst1q_s16(packed_input + dst_c_offset, vmovl_s8(vld1_s8(input_data + src_c_offset))); #else @@ -356,17 +357,17 @@ void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvPara } #endif } // ic8_minus loop - int tmp_ic = ic8_minus * C8NUM; - int res_c = in_channel - tmp_ic; - int tmp_ic_offset = tmp_ic * in_h * in_w; + int res_c = in_channel - ic8; + int tmp_ic_offset = ic8 * in_plane; for (int l = 0; l < res_c; ++l) { - int src_c_offset = src_plane_offset + tmp_ic + l; + int src_c_offset = src_plane_offset + ic8 + l; int dst_c_offset = dst_plane_offset + tmp_ic_offset + l; - (packed_input + dst_c_offset)[l] = (int16_t)(input_data + src_c_offset)[l]; + (packed_input + dst_c_offset)[0] = (int16_t)(input_data + src_c_offset)[0]; } // res ic loop - for (int l = res_c; l < C8NUM; ++l) { - int dst_c_offset = dst_plane_offset + tmp_ic_offset + l; - (packed_input + dst_c_offset)[l] = 0; + int res2 = ic8_round - in_channel; + for (int l = 0; l < res2; ++l) { + int dst_c_offset = dst_plane_offset + tmp_ic_offset + res_c + l; + (packed_input + dst_c_offset)[0] = 0; } // res ic loop } // kh * kw loop } @@ -375,7 +376,8 @@ void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvPara void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, ConvParameter *conv_param) { // origin weight format : ohwi int input_channel = conv_param->input_channel_; - int ic8 = UP_DIV(input_channel, C8NUM); + int ic8 = input_channel / C8NUM * C8NUM; + int ic8_round = UP_ROUND(input_channel, C8NUM); int output_channel = conv_param->output_channel_; QuantArg *filter_zp = conv_param->conv_quant_arg_.filter_quant_args_; int kernel_plane = conv_param->kernel_h_ * conv_param->kernel_w_; @@ -391,9 +393,9 @@ void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight zp = filter_zp[o].zp_; } int src_oc_offset = src_kernel_offset + o * kernel_plane * input_channel; - int dst_oc_offset = dst_kernel_offset + o * ic8 * kernel_plane * C8NUM; + int dst_oc_offset = dst_kernel_offset + o * ic8_round * kernel_plane; int i = 0; - for (; i < (ic8 - 1); i += C8NUM) { + for (; i < ic8; i += C8NUM) { int src_ic_offset = src_oc_offset + i; int dst_ic_offset = dst_oc_offset + i * kernel_plane; #ifdef ENABLE_ARM64 @@ -416,7 +418,7 @@ void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight } #endif } - dst_oc_offset += (ic8 - 1) * kernel_plane * C8NUM; + dst_oc_offset += ic8 * kernel_plane; for (; i < input_channel; i++) { int c8_block_rem = i % C8NUM; int src_ic_offset = src_oc_offset + i; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc index c03a1ef569..24f8a043f5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc @@ -251,12 +251,6 @@ REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_LogicalAnd, CpuArithmeticFp16 REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_LogicalOr, CpuArithmeticFp16KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Maximum, CpuArithmeticFp16KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Minimum, CpuArithmeticFp16KernelCreator) -REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_NotEqual, CpuArithmeticFp16KernelCreator) -REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Equal, CpuArithmeticFp16KernelCreator) -REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Less, CpuArithmeticFp16KernelCreator) -REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_LessEqual, CpuArithmeticFp16KernelCreator) -REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Greater, CpuArithmeticFp16KernelCreator) -REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_GreaterEqual, CpuArithmeticFp16KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Eltwise, CpuArithmeticFp16KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_SquaredDifference, CpuArithmeticFp16KernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc index b770e5542f..e6f7801648 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc @@ -259,7 +259,16 @@ kernel::LiteKernel *CpuConvInt8KernelCreator(const std::vector & int dilation_w = conv_param->dilation_w_; kernel::LiteKernel *kernel; if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) { - kernel = new (std::nothrow) kernel::ConvolutionInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); +#ifdef ENABLE_ARM64 + void *optimize_op_handler = OptimizeModule::GetInstance()->optimized_op_handler_; + if (optimize_op_handler != nullptr) { + kernel = new (std::nothrow) kernel::ConvolutionInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); + } else { + kernel = new (std::nothrow) kernel::Convolution3x3Int8CPUKernel(opParameter, inputs, outputs, ctx, primitive); + } +#else + kernel = new (std::nothrow) kernel::Convolution3x3Int8CPUKernel(opParameter, inputs, outputs, ctx, primitive); +#endif } else if (kernel_h == 1 && kernel_w == 1) { kernel = new (std::nothrow) kernel::Convolution1x1Int8CPUKernel(opParameter, inputs, outputs, ctx, primitive); } else {