diff --git a/mindspore/lite/nnacl/assembly/opt/MatmulDpInt8.S b/mindspore/lite/nnacl/assembly/opt/MatmulDpInt8.S index 11a27b1b4d..dcfaa8115c 100644 --- a/mindspore/lite/nnacl/assembly/opt/MatmulDpInt8.S +++ b/mindspore/lite/nnacl/assembly/opt/MatmulDpInt8.S @@ -421,13 +421,37 @@ End3: smax v17.4s, v17.4s, v7.4s smax v18.4s, v18.4s, v7.4s smax v19.4s, v19.4s, v7.4s - - // Apply the act_min bound + smax v20.4s, v20.4s, v7.4s + smax v21.4s, v21.4s, v7.4s + smax v22.4s, v22.4s, v7.4s + smax v23.4s, v23.4s, v7.4s + smax v24.4s, v24.4s, v7.4s + smax v25.4s, v25.4s, v7.4s + smax v26.4s, v26.4s, v7.4s + smax v27.4s, v27.4s, v7.4s + smax v28.4s, v28.4s, v7.4s + smax v29.4s, v29.4s, v7.4s + smax v30.4s, v30.4s, v7.4s + smax v31.4s, v31.4s, v7.4s + + // Apply the act_max bound dup v6.4s, w9 smin v16.4s, v16.4s, v6.4s smin v17.4s, v17.4s, v6.4s smin v18.4s, v18.4s, v6.4s smin v19.4s, v19.4s, v6.4s + smin v20.4s, v20.4s, v6.4s + smin v21.4s, v21.4s, v6.4s + smin v22.4s, v22.4s, v6.4s + smin v23.4s, v23.4s, v6.4s + smin v24.4s, v24.4s, v6.4s + smin v25.4s, v25.4s, v6.4s + smin v26.4s, v26.4s, v6.4s + smin v27.4s, v27.4s, v6.4s + smin v28.4s, v28.4s, v6.4s + smin v29.4s, v29.4s, v6.4s + smin v30.4s, v30.4s, v6.4s + smin v31.4s, v31.4s, v6.4s // int32 -> int16 sqxtn v0.4h, v16.4s diff --git a/mindspore/lite/nnacl/opt_op_handler.c b/mindspore/lite/nnacl/opt_op_handler.c index a3fc07a1d8..294f6af837 100644 --- a/mindspore/lite/nnacl/opt_op_handler.c +++ b/mindspore/lite/nnacl/opt_op_handler.c @@ -57,6 +57,6 @@ void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi, bool per_channel) { return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, 8), UP_ROUND(col, 8), deep_4, input_sum, bias, mini, maxi, - output_zp, multiplier[0], left_shift[0], right_shift[0], row, col, col); + output_zp, multiplier[0], left_shift[0], right_shift[0], row, col, stride); } #endif diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc index 110b0a5d4b..942c11fd07 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc @@ -38,7 +38,7 @@ Convolution1x1Int8CPUKernel::~Convolution1x1Int8CPUKernel() { matmul_param_ = nullptr; } if (packed_weight_ != nullptr) { - delete packed_weight_; + free(packed_weight_); packed_weight_ = nullptr; } FreeResizeBuf(); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc index a1e47bd2a4..fdb3cd0ddb 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc @@ -60,6 +60,7 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() { for (int i = 0; i < weight_tensor->ElementsNum(); i++) { packed_weight_[i] = (int16_t)(tmp_weight[i] - weight_zp); } + free(tmp_weight); bias_data_ = reinterpret_cast(malloc(channel * sizeof(int32_t))); if (bias_data_ == nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc index 1c069ce764..211bad75dc 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc @@ -402,7 +402,7 @@ kernel::LiteKernel *CpuConvInt8KernelCreator(const std::vector