| @@ -48,11 +48,11 @@ namespace ncnn { | |||
| #endif // NCNN_BF16 | |||
| #if NCNN_INT8 | |||
| #include "convolution_packed_int8.h" | |||
| #include "convolution_im2col_gemm_int8.h" | |||
| #include "convolution_3x3_winograd_int8.h" | |||
| // #include "convolution_3x3_int8.h" | |||
| #include "convolution_int8.h" | |||
| #endif // NCNN_INT8 | |||
| #if __ARM_NEON | |||
| @@ -68,12 +68,6 @@ namespace ncnn { | |||
| #include "convolution_5x5_pack4_bf16s.h" | |||
| #include "convolution_7x7_pack1to4_bf16s.h" | |||
| #endif // NCNN_BF16 | |||
| #if NCNN_INT8 | |||
| #include "convolution_pack8to4_int8.h" | |||
| #include "convolution_pack1to4_int8.h" | |||
| #include "convolution_pack8to1_int8.h" | |||
| #endif // NCNN_INT8 | |||
| #endif // __ARM_NEON | |||
| Convolution_arm::Convolution_arm() | |||
| @@ -1238,41 +1232,6 @@ int Convolution_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const | |||
| #endif // NCNN_BF16 | |||
| #if NCNN_INT8 | |||
| static void convolution_transform_kernel_packed_int8_neon(const Mat& weight_data, Mat& weight_data_tm, int num_input, int num_output, int kernel_w, int kernel_h, int elempack, int out_elempack) | |||
| { | |||
| const int maxk = kernel_w * kernel_h; | |||
| // src = kw-kh-inch-outch | |||
| // dst = pa-pb-kw-kh-inch/pa-outch/pb | |||
| { | |||
| Mat weight_data_r2 = weight_data.reshape(maxk, num_input, num_output); | |||
| weight_data_tm.create(maxk, num_input / elempack, num_output / out_elempack, (size_t)elempack * out_elempack, elempack * out_elempack); | |||
| for (int q = 0; q + (out_elempack - 1) < num_output; q += out_elempack) | |||
| { | |||
| signed char* g00 = weight_data_tm.channel(q / out_elempack); | |||
| for (int p = 0; p + (elempack - 1) < num_input; p += elempack) | |||
| { | |||
| for (int k = 0; k < maxk; k++) | |||
| { | |||
| for (int i = 0; i < out_elempack; i++) | |||
| { | |||
| for (int j = 0; j < elempack; j++) | |||
| { | |||
| const signed char* k00 = weight_data_r2.channel(q + i).row<const signed char>(p + j); | |||
| g00[0] = k00[k]; | |||
| g00++; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| int Convolution_arm::create_pipeline_int8_arm(const Option& opt) | |||
| { | |||
| const int maxk = kernel_w * kernel_h; | |||
| @@ -1286,16 +1245,6 @@ int Convolution_arm::create_pipeline_int8_arm(const Option& opt) | |||
| } | |||
| #endif | |||
| int elempack = 1; | |||
| int out_elempack = 1; | |||
| #if __ARM_NEON | |||
| if (opt.use_packing_layout) | |||
| { | |||
| elempack = num_input % 8 == 0 ? 8 : 1; | |||
| out_elempack = num_output % 4 == 0 ? 4 : 1; | |||
| } | |||
| #endif // __ARM_NEON | |||
| if (opt.use_winograd_convolution && prefer_winograd) | |||
| { | |||
| if (opt.use_winograd43_convolution) | |||
| @@ -1307,13 +1256,9 @@ int Convolution_arm::create_pipeline_int8_arm(const Option& opt) | |||
| { | |||
| convolution_im2col_gemm_transform_kernel_int8(weight_data, weight_sgemm_data, num_input, num_output, kernel_w, kernel_h, opt); | |||
| } | |||
| else if (elempack == 1 && out_elempack == 1) | |||
| { | |||
| weight_data_tm = weight_data; | |||
| } | |||
| else | |||
| { | |||
| convolution_transform_kernel_packed_int8_neon(weight_data, weight_data_tm, num_input, num_output, kernel_w, kernel_h, elempack, out_elempack); | |||
| convolution_transform_kernel_packed_int8(weight_data, weight_data_tm, num_input, num_output, kernel_w, kernel_h); | |||
| } | |||
| scale_in_data.create(num_output); | |||
| @@ -1404,14 +1349,7 @@ int Convolution_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, con | |||
| #if __ARM_NEON | |||
| if (opt.use_packing_layout) | |||
| { | |||
| if ((opt.use_winograd_convolution && prefer_winograd) || opt.use_sgemm_convolution) | |||
| { | |||
| out_elempack_int32 = num_output % 8 == 0 ? 8 : num_output % 4 == 0 ? 4 : 1; | |||
| } | |||
| else | |||
| { | |||
| out_elempack_int32 = num_output % 4 == 0 ? 4 : 1; | |||
| } | |||
| out_elempack_int32 = num_output % 8 == 0 ? 8 : num_output % 4 == 0 ? 4 : 1; | |||
| } | |||
| #endif // __ARM_NEON | |||
| @@ -1439,23 +1377,9 @@ int Convolution_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, con | |||
| { | |||
| convolution_im2col_gemm_int8(bottom_blob_bordered, top_blob_int32, weight_sgemm_data, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, _nT, opt); | |||
| } | |||
| #if __ARM_NEON | |||
| else if (elempack == 8 && out_elempack_int32 == 4) | |||
| { | |||
| convolution_pack8to4_int8_neon(bottom_blob_bordered, top_blob_int32, weight_data_tm, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, opt); | |||
| } | |||
| else if (elempack == 1 && out_elempack_int32 == 4) | |||
| { | |||
| convolution_pack1to4_int8_neon(bottom_blob_bordered, top_blob_int32, weight_data_tm, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, opt); | |||
| } | |||
| else if (elempack == 8 && out_elempack_int32 == 1) | |||
| { | |||
| convolution_pack8to1_int8_neon(bottom_blob_bordered, top_blob_int32, weight_data_tm, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, opt); | |||
| } | |||
| #endif // __ARM_NEON | |||
| else // if (elempack == 1 && out_elempack_int32 == 1) | |||
| else | |||
| { | |||
| convolution_int8(bottom_blob_bordered, top_blob_int32, weight_data_tm, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, opt); | |||
| convolution_packed_int8(bottom_blob_bordered, top_blob_int32, weight_data_tm, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, opt); | |||
| } | |||
| bottom_blob_bordered.release(); | |||
| @@ -17,8 +17,20 @@ | |||
| namespace ncnn { | |||
| #include "convolution_packed_int8.h" | |||
| #include "convolution_im2col_gemm_int8.h" | |||
| // packed | |||
| void convolution_transform_kernel_packed_int8_asimddp(const Mat& kernel, Mat& kernel_tm, int inch, int outch, int kernel_w, int kernel_h) | |||
| { | |||
| convolution_transform_kernel_packed_int8(kernel, kernel_tm, inch, outch, kernel_w, kernel_h); | |||
| } | |||
| void convolution_packed_int8_asimddp(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_tm, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, const Option& opt) | |||
| { | |||
| convolution_packed_int8(bottom_blob, top_blob, weight_data_tm, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, opt); | |||
| } | |||
| // gemm | |||
| void convolution_im2col_gemm_transform_kernel_int8_asimddp(const Mat& kernel, Mat& AT, int inch, int outch, int kernel_w, int kernel_h, const Option& opt) | |||
| { | |||
| @@ -17,8 +17,20 @@ | |||
| namespace ncnn { | |||
| #include "convolution_packed_int8.h" | |||
| #include "convolution_im2col_gemm_int8.h" | |||
| // packed | |||
| void convolution_transform_kernel_packed_int8_i8mm(const Mat& kernel, Mat& kernel_tm, int inch, int outch, int kernel_w, int kernel_h) | |||
| { | |||
| convolution_transform_kernel_packed_int8(kernel, kernel_tm, inch, outch, kernel_w, kernel_h); | |||
| } | |||
| void convolution_packed_int8_i8mm(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_tm, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, const Option& opt) | |||
| { | |||
| convolution_packed_int8(bottom_blob, top_blob, weight_data_tm, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, opt); | |||
| } | |||
| // gemm | |||
| void convolution_im2col_gemm_transform_kernel_int8_i8mm(const Mat& kernel, Mat& AT, int inch, int outch, int kernel_w, int kernel_h, const Option& opt) | |||
| { | |||
| @@ -1,82 +0,0 @@ | |||
| // Tencent is pleased to support the open source community by making ncnn available. | |||
| // | |||
| // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. | |||
| // | |||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||
| // in compliance with the License. You may obtain a copy of the License at | |||
| // | |||
| // https://opensource.org/licenses/BSD-3-Clause | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software distributed | |||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
| // specific language governing permissions and limitations under the License. | |||
| static void convolution_int8(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_int8, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int maxk = kernel_w * kernel_h; | |||
| // kernel offsets | |||
| std::vector<int> _space_ofs(maxk); | |||
| int* space_ofs = &_space_ofs[0]; | |||
| { | |||
| int p1 = 0; | |||
| int p2 = 0; | |||
| int gap = w * dilation_h - kernel_w * dilation_w; | |||
| for (int i = 0; i < kernel_h; i++) | |||
| { | |||
| for (int j = 0; j < kernel_w; j++) | |||
| { | |||
| space_ofs[p1] = p2; | |||
| p1++; | |||
| p2 += dilation_w; | |||
| } | |||
| p2 += gap; | |||
| } | |||
| } | |||
| // num_output | |||
| #pragma omp parallel for num_threads(opt.num_threads) | |||
| for (int p = 0; p < outch; p++) | |||
| { | |||
| int* outptr = top_blob.channel(p); | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| { | |||
| int sum = 0; | |||
| // const signed char* kptr = weight_data_int8.channel(p); | |||
| const signed char* kptr = (const signed char*)weight_data_int8 + maxk * channels * p; | |||
| // channels | |||
| for (int q = 0; q < channels; q++) | |||
| { | |||
| const Mat m = bottom_blob.channel(q); | |||
| const signed char* sptr = m.row<signed char>(i * stride_h) + j * stride_w; | |||
| for (int k = 0; k < maxk; k++) | |||
| { | |||
| signed char val = sptr[space_ofs[k]]; | |||
| signed char w = kptr[k]; | |||
| sum += val * w; | |||
| } | |||
| kptr += maxk; | |||
| } | |||
| outptr[j] = sum; | |||
| } | |||
| outptr += outw; | |||
| } | |||
| } | |||
| } | |||
| @@ -1,82 +0,0 @@ | |||
| // Tencent is pleased to support the open source community by making ncnn available. | |||
| // | |||
| // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. | |||
| // | |||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||
| // in compliance with the License. You may obtain a copy of the License at | |||
| // | |||
| // https://opensource.org/licenses/BSD-3-Clause | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software distributed | |||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
| // specific language governing permissions and limitations under the License. | |||
| static void convolution_pack1to4_int8_neon(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_int8, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int maxk = kernel_w * kernel_h; | |||
| // kernel offsets | |||
| std::vector<int> _space_ofs(maxk); | |||
| int* space_ofs = &_space_ofs[0]; | |||
| { | |||
| int p1 = 0; | |||
| int p2 = 0; | |||
| int gap = w * dilation_h - kernel_w * dilation_w; | |||
| for (int i = 0; i < kernel_h; i++) | |||
| { | |||
| for (int j = 0; j < kernel_w; j++) | |||
| { | |||
| space_ofs[p1] = p2; | |||
| p1++; | |||
| p2 += dilation_w; | |||
| } | |||
| p2 += gap; | |||
| } | |||
| } | |||
| // num_output | |||
| #pragma omp parallel for num_threads(opt.num_threads) | |||
| for (int p = 0; p < outch; p++) | |||
| { | |||
| int* outptr = top_blob.channel(p); | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| { | |||
| int32x4_t _sum0 = vdupq_n_s32(0); | |||
| const signed char* kptr = weight_data_int8.channel(p); | |||
| // channels | |||
| for (int q = 0; q < channels; q++) | |||
| { | |||
| const Mat m = bottom_blob.channel(q); | |||
| const signed char* sptr = m.row<const signed char>(i * stride_h) + j * stride_w; | |||
| for (int k = 0; k < maxk; k++) | |||
| { | |||
| int8x8_t _val = vdup_n_s8(sptr[space_ofs[k]]); | |||
| int8x8_t _w = vld1_s8(kptr); | |||
| int16x8_t _s0 = vmull_s8(_val, _w); | |||
| _sum0 = vaddw_s16(_sum0, vget_low_s16(_s0)); | |||
| kptr += 4; | |||
| } | |||
| } | |||
| vst1q_s32(outptr + j * 4, _sum0); | |||
| } | |||
| outptr += outw * 4; | |||
| } | |||
| } | |||
| } | |||
| @@ -1,94 +0,0 @@ | |||
| // Tencent is pleased to support the open source community by making ncnn available. | |||
| // | |||
| // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. | |||
| // | |||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||
| // in compliance with the License. You may obtain a copy of the License at | |||
| // | |||
| // https://opensource.org/licenses/BSD-3-Clause | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software distributed | |||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
| // specific language governing permissions and limitations under the License. | |||
| static void convolution_pack8to1_int8_neon(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_int8, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int maxk = kernel_w * kernel_h; | |||
| // kernel offsets | |||
| std::vector<int> _space_ofs(maxk); | |||
| int* space_ofs = &_space_ofs[0]; | |||
| { | |||
| int p1 = 0; | |||
| int p2 = 0; | |||
| int gap = w * dilation_h - kernel_w * dilation_w; | |||
| for (int i = 0; i < kernel_h; i++) | |||
| { | |||
| for (int j = 0; j < kernel_w; j++) | |||
| { | |||
| space_ofs[p1] = p2; | |||
| p1++; | |||
| p2 += dilation_w; | |||
| } | |||
| p2 += gap; | |||
| } | |||
| } | |||
| // num_output | |||
| #pragma omp parallel for num_threads(opt.num_threads) | |||
| for (int p = 0; p < outch; p++) | |||
| { | |||
| int* outptr = top_blob.channel(p); | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| { | |||
| int32x4_t _sum0 = vdupq_n_s32(0); | |||
| int32x4_t _sum1 = vdupq_n_s32(0); | |||
| const signed char* kptr = weight_data_int8.channel(p); | |||
| // channels | |||
| for (int q = 0; q < channels; q++) | |||
| { | |||
| const Mat m = bottom_blob.channel(q); | |||
| const signed char* sptr = m.row<const signed char>(i * stride_h) + j * stride_w * 8; | |||
| for (int k = 0; k < maxk; k++) | |||
| { | |||
| int8x8_t _val = vld1_s8(sptr + space_ofs[k] * 8); | |||
| int8x8_t _w = vld1_s8(kptr); | |||
| int16x8_t _s8 = vmull_s8(_val, _w); | |||
| _sum0 = vaddw_s16(_sum0, vget_low_s16(_s8)); | |||
| _sum1 = vaddw_s16(_sum1, vget_high_s16(_s8)); | |||
| kptr += 8; | |||
| } | |||
| } | |||
| int32x4_t _sum = vaddq_s32(_sum0, _sum1); | |||
| #if __aarch64__ | |||
| int sum = vaddvq_s32(_sum); // dot | |||
| #else | |||
| int32x2_t _ss = vadd_s32(vget_low_s32(_sum), vget_high_s32(_sum)); | |||
| _ss = vpadd_s32(_ss, _ss); | |||
| int sum = vget_lane_s32(_ss, 0); | |||
| #endif | |||
| outptr[j] = sum; | |||
| } | |||
| outptr += outw; | |||
| } | |||
| } | |||
| } | |||
| @@ -1,100 +0,0 @@ | |||
| // Tencent is pleased to support the open source community by making ncnn available. | |||
| // | |||
| // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. | |||
| // | |||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||
| // in compliance with the License. You may obtain a copy of the License at | |||
| // | |||
| // https://opensource.org/licenses/BSD-3-Clause | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software distributed | |||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
| // specific language governing permissions and limitations under the License. | |||
| static void convolution_pack8to4_int8_neon(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_int8, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int maxk = kernel_w * kernel_h; | |||
| // kernel offsets | |||
| std::vector<int> _space_ofs(maxk); | |||
| int* space_ofs = &_space_ofs[0]; | |||
| { | |||
| int p1 = 0; | |||
| int p2 = 0; | |||
| int gap = w * dilation_h - kernel_w * dilation_w; | |||
| for (int i = 0; i < kernel_h; i++) | |||
| { | |||
| for (int j = 0; j < kernel_w; j++) | |||
| { | |||
| space_ofs[p1] = p2; | |||
| p1++; | |||
| p2 += dilation_w; | |||
| } | |||
| p2 += gap; | |||
| } | |||
| } | |||
| // num_output | |||
| #pragma omp parallel for num_threads(opt.num_threads) | |||
| for (int p = 0; p < outch; p++) | |||
| { | |||
| int* outptr = top_blob.channel(p); | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| { | |||
| int32x4_t _sum01 = vdupq_n_s32(0); | |||
| int32x4_t _sum23 = vdupq_n_s32(0); | |||
| const signed char* kptr = weight_data_int8.channel(p); | |||
| // channels | |||
| for (int q = 0; q < channels; q++) | |||
| { | |||
| const Mat m = bottom_blob.channel(q); | |||
| const signed char* sptr = m.row<signed char>(i * stride_h) + j * stride_w * 8; | |||
| for (int k = 0; k < maxk; k++) | |||
| { | |||
| int8x8_t _val = vld1_s8(sptr + space_ofs[k] * 8); | |||
| int8x8_t _w0 = vld1_s8(kptr); | |||
| int8x8_t _w1 = vld1_s8(kptr + 8); | |||
| int8x8_t _w2 = vld1_s8(kptr + 16); | |||
| int8x8_t _w3 = vld1_s8(kptr + 24); | |||
| int16x8_t _wv0 = vmull_s8(_val, _w0); | |||
| int16x8_t _wv1 = vmull_s8(_val, _w1); | |||
| int16x8_t _wv2 = vmull_s8(_val, _w2); | |||
| int16x8_t _wv3 = vmull_s8(_val, _w3); | |||
| int16x4_t _wv00 = vpadd_s16(vget_low_s16(_wv0), vget_high_s16(_wv0)); | |||
| int16x4_t _wv11 = vpadd_s16(vget_low_s16(_wv1), vget_high_s16(_wv1)); | |||
| int16x4_t _wv22 = vpadd_s16(vget_low_s16(_wv2), vget_high_s16(_wv2)); | |||
| int16x4_t _wv33 = vpadd_s16(vget_low_s16(_wv3), vget_high_s16(_wv3)); | |||
| _sum01 = vpadalq_s16(_sum01, vcombine_s16(_wv00, _wv11)); | |||
| _sum23 = vpadalq_s16(_sum23, vcombine_s16(_wv22, _wv33)); | |||
| kptr += 32; | |||
| } | |||
| } | |||
| int32x4_t _sum0 = vcombine_s32(vpadd_s32(vget_low_s32(_sum01), vget_high_s32(_sum01)), vpadd_s32(vget_low_s32(_sum23), vget_high_s32(_sum23))); | |||
| vst1q_s32(outptr + j * 4, _sum0); | |||
| } | |||
| outptr += outw * 4; | |||
| } | |||
| } | |||
| } | |||
| @@ -214,6 +214,27 @@ static int test_convolution_int8(int w, int h, int c, int outch, int kernel, int | |||
| } | |||
| } | |||
| { | |||
| ncnn::Option opt; | |||
| opt.num_threads = 1; | |||
| opt.use_packing_layout = false; | |||
| opt.use_fp16_packed = false; | |||
| opt.use_fp16_storage = false; | |||
| opt.use_fp16_arithmetic = false; | |||
| opt.use_bf16_storage = false; | |||
| opt.use_shader_pack8 = false; | |||
| opt.use_image_storage = false; | |||
| opt.use_sgemm_convolution = false; | |||
| opt.use_winograd_convolution = false; | |||
| ret = test_layer_opt<ncnn::Convolution>("Convolution", pd, weights, opt, a, requant ? 1.0f : 0.001f, 0, flag); | |||
| if (ret != 0) | |||
| { | |||
| fprintf(stderr, "test_convolution_int8 failed w=%d h=%d c=%d outch=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d requant=%d act=%d actparams=[%f,%f]\n", w, h, c, outch, kernel, dilation, stride, pad, bias, requant, activation_type, activation_params[0], activation_params[1]); | |||
| return ret; | |||
| } | |||
| } | |||
| { | |||
| ncnn::Option opt; | |||
| opt.num_threads = 1; | |||