Browse Source

fix bug

tags/v1.1.0
fuzhiye 5 years ago
parent
commit
0d3c16c141
3 changed files with 32 additions and 27 deletions
  1. +22
    -20
      mindspore/lite/nnacl/pack.c
  2. +0
    -6
      mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
  3. +10
    -1
      mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc

+ 22
- 20
mindspore/lite/nnacl/pack.c View File

@@ -336,18 +336,19 @@ void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvPara
int in_channel = conv_param->input_channel_;
int in_h = conv_param->input_h_;
int in_w = conv_param->input_w_;
int ic8 = UP_DIV(in_channel, C8NUM);
int ic8_minus = ic8 - 1;
int ic8_round = UP_ROUND(in_channel, C8NUM);
int ic8 = in_channel / C8NUM * C8NUM;
int in_plane = in_h * in_w;

for (int b = 0; b < in_batch; b++) {
int src_batch_offset = b * in_channel * in_h * in_w;
int dst_batch_offset = b * ic8 * C8NUM * in_h * in_w;
for (int k = 0; k < in_w * in_h; k++) {
int src_batch_offset = b * in_channel * in_plane;
int dst_batch_offset = b * ic8_round * in_plane;
for (int k = 0; k < in_plane; k++) {
int src_plane_offset = src_batch_offset + k * in_channel;
int dst_plane_offset = dst_batch_offset + k * C8NUM;
for (int i = 0; i < ic8_minus; ++i) {
int src_c_offset = src_plane_offset + i * C8NUM;
int dst_c_offset = dst_plane_offset + i * C8NUM * in_h * in_w;
for (int i = 0; i < ic8; i += 8) {
int src_c_offset = src_plane_offset + i;
int dst_c_offset = dst_plane_offset + i * in_plane;
#ifdef ENABLE_ARM
vst1q_s16(packed_input + dst_c_offset, vmovl_s8(vld1_s8(input_data + src_c_offset)));
#else
@@ -356,17 +357,17 @@ void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvPara
}
#endif
} // ic8_minus loop
int tmp_ic = ic8_minus * C8NUM;
int res_c = in_channel - tmp_ic;
int tmp_ic_offset = tmp_ic * in_h * in_w;
int res_c = in_channel - ic8;
int tmp_ic_offset = ic8 * in_plane;
for (int l = 0; l < res_c; ++l) {
int src_c_offset = src_plane_offset + tmp_ic + l;
int src_c_offset = src_plane_offset + ic8 + l;
int dst_c_offset = dst_plane_offset + tmp_ic_offset + l;
(packed_input + dst_c_offset)[l] = (int16_t)(input_data + src_c_offset)[l];
(packed_input + dst_c_offset)[0] = (int16_t)(input_data + src_c_offset)[0];
} // res ic loop
for (int l = res_c; l < C8NUM; ++l) {
int dst_c_offset = dst_plane_offset + tmp_ic_offset + l;
(packed_input + dst_c_offset)[l] = 0;
int res2 = ic8_round - in_channel;
for (int l = 0; l < res2; ++l) {
int dst_c_offset = dst_plane_offset + tmp_ic_offset + res_c + l;
(packed_input + dst_c_offset)[0] = 0;
} // res ic loop
} // kh * kw loop
}
@@ -375,7 +376,8 @@ void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvPara
void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, ConvParameter *conv_param) {
// origin weight format : ohwi
int input_channel = conv_param->input_channel_;
int ic8 = UP_DIV(input_channel, C8NUM);
int ic8 = input_channel / C8NUM * C8NUM;
int ic8_round = UP_ROUND(input_channel, C8NUM);
int output_channel = conv_param->output_channel_;
QuantArg *filter_zp = conv_param->conv_quant_arg_.filter_quant_args_;
int kernel_plane = conv_param->kernel_h_ * conv_param->kernel_w_;
@@ -391,9 +393,9 @@ void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight
zp = filter_zp[o].zp_;
}
int src_oc_offset = src_kernel_offset + o * kernel_plane * input_channel;
int dst_oc_offset = dst_kernel_offset + o * ic8 * kernel_plane * C8NUM;
int dst_oc_offset = dst_kernel_offset + o * ic8_round * kernel_plane;
int i = 0;
for (; i < (ic8 - 1); i += C8NUM) {
for (; i < ic8; i += C8NUM) {
int src_ic_offset = src_oc_offset + i;
int dst_ic_offset = dst_oc_offset + i * kernel_plane;
#ifdef ENABLE_ARM64
@@ -416,7 +418,7 @@ void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight
}
#endif
}
dst_oc_offset += (ic8 - 1) * kernel_plane * C8NUM;
dst_oc_offset += ic8 * kernel_plane;
for (; i < input_channel; i++) {
int c8_block_rem = i % C8NUM;
int src_ic_offset = src_oc_offset + i;


+ 0
- 6
mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc View File

@@ -251,12 +251,6 @@ REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_LogicalAnd, CpuArithmeticFp16
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_LogicalOr, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Maximum, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Minimum, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_NotEqual, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Equal, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Less, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_LessEqual, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Greater, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_GreaterEqual, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Eltwise, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_SquaredDifference, CpuArithmeticFp16KernelCreator)
} // namespace mindspore::kernel

+ 10
- 1
mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc View File

@@ -259,7 +259,16 @@ kernel::LiteKernel *CpuConvInt8KernelCreator(const std::vector<lite::Tensor *> &
int dilation_w = conv_param->dilation_w_;
kernel::LiteKernel *kernel;
if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) {
kernel = new (std::nothrow) kernel::ConvolutionInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
#ifdef ENABLE_ARM64
void *optimize_op_handler = OptimizeModule::GetInstance()->optimized_op_handler_;
if (optimize_op_handler != nullptr) {
kernel = new (std::nothrow) kernel::ConvolutionInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
} else {
kernel = new (std::nothrow) kernel::Convolution3x3Int8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
}
#else
kernel = new (std::nothrow) kernel::Convolution3x3Int8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
#endif
} else if (kernel_h == 1 && kernel_w == 1) {
kernel = new (std::nothrow) kernel::Convolution1x1Int8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
} else {


Loading…
Cancel
Save