Merge pull request !5442 from liuwenhao/mastertags/v1.0.0
| @@ -421,13 +421,37 @@ End3: | |||||
| smax v17.4s, v17.4s, v7.4s | smax v17.4s, v17.4s, v7.4s | ||||
| smax v18.4s, v18.4s, v7.4s | smax v18.4s, v18.4s, v7.4s | ||||
| smax v19.4s, v19.4s, v7.4s | smax v19.4s, v19.4s, v7.4s | ||||
| // Apply the act_min bound | |||||
| smax v20.4s, v20.4s, v7.4s | |||||
| smax v21.4s, v21.4s, v7.4s | |||||
| smax v22.4s, v22.4s, v7.4s | |||||
| smax v23.4s, v23.4s, v7.4s | |||||
| smax v24.4s, v24.4s, v7.4s | |||||
| smax v25.4s, v25.4s, v7.4s | |||||
| smax v26.4s, v26.4s, v7.4s | |||||
| smax v27.4s, v27.4s, v7.4s | |||||
| smax v28.4s, v28.4s, v7.4s | |||||
| smax v29.4s, v29.4s, v7.4s | |||||
| smax v30.4s, v30.4s, v7.4s | |||||
| smax v31.4s, v31.4s, v7.4s | |||||
| // Apply the act_max bound | |||||
| dup v6.4s, w9 | dup v6.4s, w9 | ||||
| smin v16.4s, v16.4s, v6.4s | smin v16.4s, v16.4s, v6.4s | ||||
| smin v17.4s, v17.4s, v6.4s | smin v17.4s, v17.4s, v6.4s | ||||
| smin v18.4s, v18.4s, v6.4s | smin v18.4s, v18.4s, v6.4s | ||||
| smin v19.4s, v19.4s, v6.4s | smin v19.4s, v19.4s, v6.4s | ||||
| smin v20.4s, v20.4s, v6.4s | |||||
| smin v21.4s, v21.4s, v6.4s | |||||
| smin v22.4s, v22.4s, v6.4s | |||||
| smin v23.4s, v23.4s, v6.4s | |||||
| smin v24.4s, v24.4s, v6.4s | |||||
| smin v25.4s, v25.4s, v6.4s | |||||
| smin v26.4s, v26.4s, v6.4s | |||||
| smin v27.4s, v27.4s, v6.4s | |||||
| smin v28.4s, v28.4s, v6.4s | |||||
| smin v29.4s, v29.4s, v6.4s | |||||
| smin v30.4s, v30.4s, v6.4s | |||||
| smin v31.4s, v31.4s, v6.4s | |||||
| // int32 -> int16 | // int32 -> int16 | ||||
| sqxtn v0.4h, v16.4s | sqxtn v0.4h, v16.4s | ||||
| @@ -57,6 +57,6 @@ void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, | |||||
| int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, | int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, | ||||
| int32_t maxi, bool per_channel) { | int32_t maxi, bool per_channel) { | ||||
| return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, 8), UP_ROUND(col, 8), deep_4, input_sum, bias, mini, maxi, | return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, 8), UP_ROUND(col, 8), deep_4, input_sum, bias, mini, maxi, | ||||
| output_zp, multiplier[0], left_shift[0], right_shift[0], row, col, col); | |||||
| output_zp, multiplier[0], left_shift[0], right_shift[0], row, col, stride); | |||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -38,7 +38,7 @@ Convolution1x1Int8CPUKernel::~Convolution1x1Int8CPUKernel() { | |||||
| matmul_param_ = nullptr; | matmul_param_ = nullptr; | ||||
| } | } | ||||
| if (packed_weight_ != nullptr) { | if (packed_weight_ != nullptr) { | ||||
| delete packed_weight_; | |||||
| free(packed_weight_); | |||||
| packed_weight_ = nullptr; | packed_weight_ = nullptr; | ||||
| } | } | ||||
| FreeResizeBuf(); | FreeResizeBuf(); | ||||
| @@ -60,6 +60,7 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() { | |||||
| for (int i = 0; i < weight_tensor->ElementsNum(); i++) { | for (int i = 0; i < weight_tensor->ElementsNum(); i++) { | ||||
| packed_weight_[i] = (int16_t)(tmp_weight[i] - weight_zp); | packed_weight_[i] = (int16_t)(tmp_weight[i] - weight_zp); | ||||
| } | } | ||||
| free(tmp_weight); | |||||
| bias_data_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t))); | bias_data_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t))); | ||||
| if (bias_data_ == nullptr) { | if (bias_data_ == nullptr) { | ||||
| @@ -402,7 +402,7 @@ kernel::LiteKernel *CpuConvInt8KernelCreator(const std::vector<lite::tensor::Ten | |||||
| if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) { | if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) { | ||||
| kernel = new (std::nothrow) kernel::ConvolutionInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | kernel = new (std::nothrow) kernel::ConvolutionInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | ||||
| } else if (kernel_h == 1 && kernel_w == 1 && filter_quant_size == 1) { | } else if (kernel_h == 1 && kernel_w == 1 && filter_quant_size == 1) { | ||||
| kernel = new (std::nothrow) kernel::ConvolutionInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| kernel = new (std::nothrow) kernel::Convolution1x1Int8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| } else { | } else { | ||||
| kernel = new (std::nothrow) kernel::ConvolutionInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | kernel = new (std::nothrow) kernel::ConvolutionInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive); | ||||
| } | } | ||||