diff --git a/.github/workflows/linux-aarch64-cpu-gcc.yml b/.github/workflows/linux-aarch64-cpu-gcc.yml index f49c785a6..5e537eeb8 100644 --- a/.github/workflows/linux-aarch64-cpu-gcc.yml +++ b/.github/workflows/linux-aarch64-cpu-gcc.yml @@ -43,17 +43,28 @@ jobs: sudo apt-get update sudo apt-get install g++-aarch64-linux-gnu - - name: configure - run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. - name: build - run: cmake --build build -j 2 - + run: | + mkdir build && cd build + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j 2 - name: test run: | export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH cd build TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2 + - name: build-noint8 + run: | + mkdir build-noint8 && cd build-noint8 + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j 2 + - name: test-noint8 + run: | + export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH + cd build-noint8 + TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2 + linux-gcc-arm82: runs-on: ubuntu-20.04 steps: @@ -90,13 +101,24 @@ jobs: sudo apt-get update sudo apt-get install g++-aarch64-linux-gnu - - name: configure - run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. - name: build - run: cmake --build build -j 2 - + run: | + mkdir build && cd build + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j 2 - name: test run: | export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH cd build TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2 + + - name: build-noint8 + run: | + mkdir build-noint8 && cd build-noint8 + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON -DNCNN_INT8=OFF .. + cmake --build . -j 2 + - name: test-noint8 + run: | + export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH + cd build-noint8 + TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2 diff --git a/.github/workflows/linux-arm-cpu-gcc.yml b/.github/workflows/linux-arm-cpu-gcc.yml index 703593274..71c637a12 100644 --- a/.github/workflows/linux-arm-cpu-gcc.yml +++ b/.github/workflows/linux-arm-cpu-gcc.yml @@ -43,17 +43,28 @@ jobs: sudo apt-get update sudo apt-get install g++-arm-linux-gnueabi - - name: configure - run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. - name: build - run: cmake --build build -j 2 - + run: | + mkdir build && cd build + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j 2 - name: test run: | export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH cd build TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabi" ctest --output-on-failure -j 2 + - name: build-noint8 + run: | + mkdir build-noint8 && cd build-noint8 + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j 2 + - name: test-noint8 + run: | + export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH + cd build-noint8 + TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabi" ctest --output-on-failure -j 2 + linux-gcc-armhf: runs-on: ubuntu-20.04 steps: @@ -90,13 +101,24 @@ jobs: sudo apt-get update sudo apt-get install g++-arm-linux-gnueabihf - - name: configure - run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. - name: build - run: cmake --build build -j 2 - + run: | + mkdir build && cd build + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j 2 - name: test run: | export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH cd build TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabihf" ctest --output-on-failure -j 2 + + - name: build-noint8 + run: | + mkdir build-noint8 && cd build-noint8 + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON -DNCNN_INT8=OFF .. + cmake --build . -j 2 + - name: test-noint8 + run: | + export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH + cd build-noint8 + TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabihf" ctest --output-on-failure -j 2 diff --git a/.github/workflows/linux-x64-cpu-clang.yml b/.github/workflows/linux-x64-cpu-clang.yml index 2466a45bb..b4a746645 100644 --- a/.github/workflows/linux-x64-cpu-clang.yml +++ b/.github/workflows/linux-x64-cpu-clang.yml @@ -47,6 +47,16 @@ jobs: cmake --build . -j 2 - name: test-avx2 run: cd build-avx2 && ctest --output-on-failure -j 2 + - name: build-noint8 + env: + CC: clang + CXX: clang++ + run: | + mkdir build-noint8 && cd build-noint8 + cmake -DNCNN_INT8=OFF -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j 2 + - name: test-noint8 + run: cd build-noint8 && ctest --output-on-failure -j 2 linux-clang-simplestl: runs-on: ubuntu-latest diff --git a/.github/workflows/linux-x64-cpu-gcc.yml b/.github/workflows/linux-x64-cpu-gcc.yml index 6e153f63d..96230aaab 100644 --- a/.github/workflows/linux-x64-cpu-gcc.yml +++ b/.github/workflows/linux-x64-cpu-gcc.yml @@ -38,6 +38,13 @@ jobs: cmake --build . -j 2 - name: test-avx2 run: cd build-avx2 && ctest --output-on-failure -j 2 + - name: build-noint8 + run: | + mkdir build-noint8 && cd build-noint8 + cmake -DNCNN_INT8=OFF -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j 2 + - name: test-noint8 + run: cd build-noint8 && ctest --output-on-failure -j 2 linux-gcc-cpp03-nostdio-nostring-simplestl: runs-on: ubuntu-16.04 diff --git a/.github/workflows/linux-x86-cpu-clang.yml b/.github/workflows/linux-x86-cpu-clang.yml index 8e9512a9c..991c5fd04 100644 --- a/.github/workflows/linux-x86-cpu-clang.yml +++ b/.github/workflows/linux-x86-cpu-clang.yml @@ -37,3 +37,13 @@ jobs: mkdir build-shared && cd build-shared cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.clang-m32.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_SHARED_LIB=ON -DNCNN_ENABLE_LTO=ON .. cmake --build . -j 2 + - name: build-noint8 + env: + CC: clang + CXX: clang++ + run: | + mkdir build-noint8 && cd build-noint8 + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.clang-m32.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_INT8=OFF .. + cmake --build . -j 2 + - name: test-noint8 + run: cd build-noint8 && ctest --output-on-failure -j 2 diff --git a/.github/workflows/linux-x86-cpu-gcc.yml b/.github/workflows/linux-x86-cpu-gcc.yml index e604fe445..9742f5759 100644 --- a/.github/workflows/linux-x86-cpu-gcc.yml +++ b/.github/workflows/linux-x86-cpu-gcc.yml @@ -31,3 +31,10 @@ jobs: mkdir build-shared && cd build-shared cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-m32.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_SHARED_LIB=ON -DNCNN_ENABLE_LTO=ON .. cmake --build . -j 2 + - name: build-noint8 + run: | + mkdir build-noint8 && cd build-noint8 + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-m32.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_INT8=OFF .. + cmake --build . -j 2 + - name: test-noint8 + run: cd build-noint8 && ctest --output-on-failure -j 2 diff --git a/CMakeLists.txt b/CMakeLists.txt index 505809ece..c5eac12c1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -79,6 +79,7 @@ option(NCNN_BUILD_TESTS "build tests" OFF) option(NCNN_COVERAGE "build for coverage" OFF) option(NCNN_BUILD_BENCHMARK "build benchmark" ON) option(NCNN_PYTHON "build python api" OFF) +option(NCNN_INT8 "int8 inference" ON) if(ANDROID OR IOS OR NCNN_SIMPLESTL OR CMAKE_CROSSCOMPILING) option(NCNN_DISABLE_RTTI "disable rtti" ON) diff --git a/docs/how-to-use-and-FAQ/build-minimal-library.md b/docs/how-to-use-and-FAQ/build-minimal-library.md index 841e15d8a..f8a8255b7 100644 --- a/docs/how-to-use-and-FAQ/build-minimal-library.md +++ b/docs/how-to-use-and-FAQ/build-minimal-library.md @@ -39,6 +39,15 @@ cmake -DNCNN_STRING=OFF .. Read more [here](https://github.com/Tencent/ncnn/blob/master/docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.md#input-and-output). +### disable NCNN_INT8 + +``` +cmake -DNCNN_INT8=OFF .. +``` + +* Cannot use quantized int8 inference. + + ### drop pixel rotate and affine functions ``` diff --git a/src/layer/arm/convolution_arm.cpp b/src/layer/arm/convolution_arm.cpp index c0311cce9..95ca29533 100644 --- a/src/layer/arm/convolution_arm.cpp +++ b/src/layer/arm/convolution_arm.cpp @@ -29,19 +29,21 @@ namespace ncnn { #include "convolution_bf16s.h" #include "convolution_sgemm.h" -#include "convolution_sgemm_int8.h" #include "convolution_1x1.h" #include "convolution_1x1_bf16s.h" -#include "convolution_1x1_int8.h" #include "convolution_2x2.h" #include "convolution_3x3.h" -#include "convolution_3x3_int8.h" #include "convolution_4x4.h" #include "convolution_5x5.h" #include "convolution_7x7.h" +#if NCNN_INT8 +#include "convolution_sgemm_int8.h" +#include "convolution_1x1_int8.h" +#include "convolution_3x3_int8.h" #include "convolution_int8.h" +#endif // NCNN_INT8 #if __ARM_NEON #include "convolution_pack4.h" @@ -67,6 +69,7 @@ namespace ncnn { #include "convolution_7x7_pack1to4.h" #include "convolution_7x7_pack1to4_bf16s.h" +#if NCNN_INT8 #include "convolution_pack8_int8.h" #include "convolution_pack1to8_int8.h" #include "convolution_pack8to1_int8.h" @@ -80,6 +83,7 @@ namespace ncnn { #include "convolution_3x3_pack1to8_int8.h" #include "convolution_7x7_pack1to8_int8.h" #include "convolution_3x3_pack8to1_int8.h" +#endif // NCNN_INT8 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #include "convolution_fp16s.h" @@ -169,10 +173,12 @@ int Convolution_arm::create_pipeline(const Option& opt) activation->create_pipeline(opt); } +#if NCNN_INT8 if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) { return create_pipeline_int8_arm(opt); } +#endif #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC if (opt.use_fp16_storage) @@ -418,10 +424,12 @@ int Convolution_arm::destroy_pipeline(const Option& opt) int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const { +#if NCNN_INT8 if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) { return forward_int8_arm(bottom_blob, top_blob, opt); } +#endif if (bottom_blob.dims != 3) { @@ -1767,6 +1775,7 @@ int Convolution_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const return 0; } +#if NCNN_INT8 int Convolution_arm::create_pipeline_int8_arm(const Option& opt) { const int maxk = kernel_w * kernel_h; @@ -2263,6 +2272,7 @@ int Convolution_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, con return 0; } +#endif // NCNN_INT8 int Convolution_arm::forwardDilation_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const { diff --git a/src/layer/arm/convolution_arm.h b/src/layer/arm/convolution_arm.h index ae49752e4..e0eca6a08 100644 --- a/src/layer/arm/convolution_arm.h +++ b/src/layer/arm/convolution_arm.h @@ -37,8 +37,10 @@ protected: #endif int create_pipeline_bf16s(const Option& opt); int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; +#if NCNN_INT8 int create_pipeline_int8_arm(const Option& opt); int forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; +#endif int forwardDilation_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; public: @@ -67,11 +69,13 @@ public: // bf16 Mat weight_data_bf16; +#if NCNN_INT8 // int8 Mat weight_data_int8; // Mat weight_3x3s2_data_int8; std::vector weight_3x3_winograd23_data_int8; +#endif }; } // namespace ncnn diff --git a/src/layer/arm/convolutiondepthwise_arm.cpp b/src/layer/arm/convolutiondepthwise_arm.cpp index 2b393b481..4f13f1ccb 100644 --- a/src/layer/arm/convolutiondepthwise_arm.cpp +++ b/src/layer/arm/convolutiondepthwise_arm.cpp @@ -26,16 +26,21 @@ namespace ncnn { #include "convolutiondepthwise_3x3.h" -#include "convolutiondepthwise_3x3_int8.h" #include "convolutiondepthwise_5x5.h" +#if NCNN_INT8 +#include "convolutiondepthwise_3x3_int8.h" +#endif // NCNN_INT8 + #if __ARM_NEON #include "convolutiondepthwise_3x3_pack4.h" #include "convolutiondepthwise_3x3_pack4_bf16s.h" #include "convolutiondepthwise_5x5_pack4.h" #include "convolutiondepthwise_5x5_pack4_bf16s.h" +#if NCNN_INT8 #include "convolutiondepthwise_3x3_pack8_int8.h" +#endif // NCNN_INT8 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #include "convolutiondepthwise_3x3_fp16s.h" @@ -104,10 +109,12 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt) activation->create_pipeline(opt); } +#if NCNN_INT8 if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) { return create_pipeline_int8_arm(opt); } +#endif const int maxk = kernel_w * kernel_h; int channels = (weight_data_size / group) / maxk / (num_output / group) * group; @@ -269,6 +276,7 @@ int ConvolutionDepthWise_arm::create_group_ops(const Option& opt) weights[0] = weight_data_g; weights[1] = bias_data_g; +#if NCNN_INT8 if (int8_scale_term) { Mat weight_data_int8_scales_g(num_output_g); @@ -280,6 +288,7 @@ int ConvolutionDepthWise_arm::create_group_ops(const Option& opt) { weights[4] = top_blob_int8_scales.range(g, 1); } +#endif op->load_model(ModelBinFromMatArray(weights)); } @@ -288,6 +297,7 @@ int ConvolutionDepthWise_arm::create_group_ops(const Option& opt) ncnn::Mat weights[4]; weights[0] = weight_data_g; +#if NCNN_INT8 if (int8_scale_term) { Mat weight_data_int8_scales_g(num_output_g); @@ -299,6 +309,7 @@ int ConvolutionDepthWise_arm::create_group_ops(const Option& opt) { weights[3] = top_blob_int8_scales.range(g, 1); } +#endif op->load_model(ModelBinFromMatArray(weights)); } @@ -332,13 +343,12 @@ int ConvolutionDepthWise_arm::destroy_pipeline(const Option& opt) int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const { - // convolv with NxN kernel - // value = value + bias - +#if NCNN_INT8 if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) { return forward_int8_arm(bottom_blob, top_blob, opt); } +#endif int elembits = bottom_blob.elembits(); @@ -1447,6 +1457,7 @@ int ConvolutionDepthWise_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blo return 0; } +#if NCNN_INT8 int ConvolutionDepthWise_arm::create_pipeline_int8_arm(const Option& opt) { const int maxk = kernel_w * kernel_h; @@ -1981,5 +1992,6 @@ int ConvolutionDepthWise_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_ return 0; } +#endif // NCNN_INT8 } // namespace ncnn diff --git a/src/layer/arm/convolutiondepthwise_arm.h b/src/layer/arm/convolutiondepthwise_arm.h index 2d754d71c..2cff01cb9 100644 --- a/src/layer/arm/convolutiondepthwise_arm.h +++ b/src/layer/arm/convolutiondepthwise_arm.h @@ -36,8 +36,10 @@ protected: int forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; #endif int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; +#if NCNN_INT8 int create_pipeline_int8_arm(const Option& opt); int forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; +#endif public: Layer* activation; @@ -54,8 +56,10 @@ public: Mat weight_data_bf16; Mat weight_data_pack4_bf16; +#if NCNN_INT8 // int8 Mat weight_data_int8; +#endif }; } // namespace ncnn diff --git a/src/layer/arm/innerproduct_arm.cpp b/src/layer/arm/innerproduct_arm.cpp index a363ae597..840bd7afa 100644 --- a/src/layer/arm/innerproduct_arm.cpp +++ b/src/layer/arm/innerproduct_arm.cpp @@ -55,10 +55,12 @@ int InnerProduct_arm::create_pipeline(const Option& opt) } #endif // __ARM_NEON +#if NCNN_INT8 if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) { return create_pipeline_int8_arm(opt); } +#endif #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC if (opt.use_fp16_storage) @@ -94,260 +96,14 @@ int InnerProduct_arm::destroy_pipeline(const Option& opt) return 0; } -int InnerProduct_arm::create_pipeline_int8_arm(const Option& opt) -{ - if (activation_type == 1) - { - activation = ncnn::create_layer(ncnn::LayerType::ReLU); - - ncnn::ParamDict pd; - activation->load_param(pd); - } - - const int num_input = weight_data_size / num_output; - - int out_elempack = 1; - - if (opt.use_packing_layout) - { - out_elempack = num_output % 8 == 0 ? 8 : 1; - } - - // src = inch-outch - // dst = pb-inch-outch/pb - { - Mat weight_data_r2 = weight_data.reshape(num_input, num_output); - - weight_data_int8.create(num_input, num_output / out_elempack, (size_t)out_elempack, out_elempack); - - for (int q = 0; q + (out_elempack - 1) < num_output; q += out_elempack) - { - signed char* g0 = weight_data_int8.row(q / out_elempack); - - for (int p = 0; p < num_input; p++) - { - for (int j = 0; j < out_elempack; j++) - { - *g0++ = weight_data_r2.row(q + j)[p]; - } - } - } - } - - // // convert fp32 to int8 - // if (weight_data_int8_scales.empty()) - // { - // return 0; - // } - // #if __aarch64__ - // // first reorder Matrix A before MatMul - // const int n = num_output; - // const int k = weight_data.total() / n; - // weight_data_int8.create(n * k, (size_t)1u, opt.blob_allocator); - // - // int8_t* b = weight_data; - // int8_t* sb = weight_data_int8; - // reorder_a(b, sb, n, k, k); - // - // // pre-built scales - // scales_in.create(num_output, 4u, opt.blob_allocator); - // for (int i = 0; i < num_output; ++i) - // { - // if (std::fabs(static_cast(weight_data_int8_scales[i])) <= 1e-6) - // { - // scales_in[i] = 0.f; - // } - // else - // { - // scales_in[i] = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[i]); - // } - // } - // #endif - return 0; -} - -int InnerProduct_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const -{ - const int num_input = weight_data_size / num_output; - - if (bottom_blob.dims == 2 && bottom_blob.w == num_input && bottom_blob.h * bottom_blob.elempack > 1) - { - // gemm - Mat bottom_blob_unpacked; - Option opt_unpack = opt; - opt_unpack.blob_allocator = opt.workspace_allocator; - convert_packing(bottom_blob, bottom_blob_unpacked, 1, opt_unpack); - - return forward_int8(bottom_blob_unpacked, top_blob, opt); - } - - int elembits = bottom_blob.elembits(); - - Mat bottom_blob_int8 = bottom_blob; - if (elembits != 8) - { - Option opt_q = opt; - opt_q.blob_allocator = opt.workspace_allocator; - quantize_to_int8(bottom_blob, bottom_blob_int8, bottom_blob_int8_scales, opt_q); - } - - Mat bottom_blob_int8_flattened = bottom_blob_int8; - if (bottom_blob_int8.dims != 1) - { - Option opt_flatten = opt; - opt_flatten.blob_allocator = opt.workspace_allocator; - flatten->forward(bottom_blob_int8, bottom_blob_int8_flattened, opt_flatten); - } - - // int elempack = bottom_blob_int8_flattened.elempack; - - int out_elempack = 1; - if (opt.use_packing_layout) - { - out_elempack = num_output % 8 == 0 ? 8 : 1; - } - - top_blob.create(num_output / out_elempack, (size_t)(4u * out_elempack), out_elempack, opt.blob_allocator); - if (top_blob.empty()) - return -100; - - Mat top_blob_int32; - top_blob_int32.create(num_output / out_elempack, (size_t)(4u * out_elempack), out_elempack, opt.workspace_allocator); - if (top_blob_int32.empty()) - return -100; - -#if __ARM_NEON - if (out_elempack == 8) - { - // num_output - #pragma omp parallel for num_threads(opt.num_threads) - for (int p = 0; p < num_output / out_elempack; p++) - { - int32x4_t _sum0 = vdupq_n_s32(0); - int32x4_t _sum1 = vdupq_n_s32(0); - - const signed char* kptr = weight_data_int8.row(p); - const signed char* sptr = bottom_blob_int8_flattened; - - int i = 0; - for (; i < num_input; i++) - { - int8x8_t _val = vdup_n_s8(sptr[0]); - - int8x8_t _w = vld1_s8(kptr); - - int16x8_t _s0 = vmull_s8(_val, _w); - _sum0 = vaddw_s16(_sum0, vget_low_s16(_s0)); - _sum1 = vaddw_s16(_sum1, vget_high_s16(_s0)); - - sptr += 1; - kptr += 8; - } - - int* outptr = (int*)top_blob_int32; - vst1q_s32(outptr + p * 8, _sum0); - vst1q_s32(outptr + p * 8 + 4, _sum1); - } - } -#endif // __ARM_NEON - - if (out_elempack == 1) - { - // num_output - #pragma omp parallel for num_threads(opt.num_threads) - for (int p = 0; p < num_output / out_elempack; p++) - { - int sum = 0; - - const signed char* kptr = weight_data_int8.row(p); - const signed char* sptr = bottom_blob_int8_flattened; - - int i = 0; - for (; i < num_input; i++) - { - signed char val = sptr[0]; - - signed char w = kptr[0]; - - sum += val * w; - - sptr += 1; - kptr += 1; - } - - int* outptr = (int*)top_blob_int32; - outptr[p] = sum; - } - } - - Mat scale_data(num_output); - for (int p = 0; p < num_output; p++) - { - // dequantize - float scale_in; - if (weight_data_int8_scales[p] == 0) - scale_in = 0; - else - scale_in = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[p]); - - scale_data[p] = scale_in; - } - - dequantize_from_int32(top_blob_int32, top_blob, scale_data, bias_data, opt); - - if (activation) - { - activation->forward_inplace(top_blob, opt); - } - - return 0; - - // #if __aarch64__ - // const int w = bottom_blob_tm.w; - // const int h = bottom_blob_tm.h; - // - // const int m = 1; - // const int k = bottom_blob_tm.c * w * h; - // Mat bottom_blob_reorder(m * k, (size_t)1u, opt.workspace_allocator); - // { - // reorder_a(bottom_blob_tm_flattened, bottom_blob_reorder, m, k, k); - // } - // - // Mat top_blob_tm(m * num_output, (size_t)4u, opt.workspace_allocator); - // int32_t* pc = top_blob_tm; - // const int8_t* pa = bottom_blob_reorder; - // const int8_t* pb = weight_data_int8; - // int8kernel((void*)pc, pa, pb, m, k, num_output, num_output, 0, 0, opt); - // - // float* outptr = top_blob; - // - // // dequant.fused.relu int32_t to float - // for (int p = 0; p < num_output; ++p) - // { - // float sumfp32 = pc[p] * scales_in[p]; - // if (bias_term) - // { - // sumfp32 += bias_data[p]; - // } - // if (1 == activation_type) - // { - // sumfp32 = std::max(0.f, sumfp32); - // } - // - // outptr[p] = sumfp32; - // } - // return 0; - // #else - // return InnerProduct::forward_int8(bottom_blob, top_blob, opt); - // #endif -} - int InnerProduct_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const { +#if NCNN_INT8 if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) { return forward_int8_arm(bottom_blob, top_blob, opt); } +#endif int elembits = bottom_blob.elembits(); @@ -2140,4 +1896,254 @@ int InnerProduct_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const return 0; } +#if NCNN_INT8 +int InnerProduct_arm::create_pipeline_int8_arm(const Option& opt) +{ + if (activation_type == 1) + { + activation = ncnn::create_layer(ncnn::LayerType::ReLU); + + ncnn::ParamDict pd; + activation->load_param(pd); + } + + const int num_input = weight_data_size / num_output; + + int out_elempack = 1; + + if (opt.use_packing_layout) + { + out_elempack = num_output % 8 == 0 ? 8 : 1; + } + + // src = inch-outch + // dst = pb-inch-outch/pb + { + Mat weight_data_r2 = weight_data.reshape(num_input, num_output); + + weight_data_int8.create(num_input, num_output / out_elempack, (size_t)out_elempack, out_elempack); + + for (int q = 0; q + (out_elempack - 1) < num_output; q += out_elempack) + { + signed char* g0 = weight_data_int8.row(q / out_elempack); + + for (int p = 0; p < num_input; p++) + { + for (int j = 0; j < out_elempack; j++) + { + *g0++ = weight_data_r2.row(q + j)[p]; + } + } + } + } + + // // convert fp32 to int8 + // if (weight_data_int8_scales.empty()) + // { + // return 0; + // } + // #if __aarch64__ + // // first reorder Matrix A before MatMul + // const int n = num_output; + // const int k = weight_data.total() / n; + // weight_data_int8.create(n * k, (size_t)1u, opt.blob_allocator); + // + // int8_t* b = weight_data; + // int8_t* sb = weight_data_int8; + // reorder_a(b, sb, n, k, k); + // + // // pre-built scales + // scales_in.create(num_output, 4u, opt.blob_allocator); + // for (int i = 0; i < num_output; ++i) + // { + // if (std::fabs(static_cast(weight_data_int8_scales[i])) <= 1e-6) + // { + // scales_in[i] = 0.f; + // } + // else + // { + // scales_in[i] = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[i]); + // } + // } + // #endif + return 0; +} + +int InnerProduct_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const +{ + const int num_input = weight_data_size / num_output; + + if (bottom_blob.dims == 2 && bottom_blob.w == num_input && bottom_blob.h * bottom_blob.elempack > 1) + { + // gemm + Mat bottom_blob_unpacked; + Option opt_unpack = opt; + opt_unpack.blob_allocator = opt.workspace_allocator; + convert_packing(bottom_blob, bottom_blob_unpacked, 1, opt_unpack); + + return forward_int8(bottom_blob_unpacked, top_blob, opt); + } + + int elembits = bottom_blob.elembits(); + + Mat bottom_blob_int8 = bottom_blob; + if (elembits != 8) + { + Option opt_q = opt; + opt_q.blob_allocator = opt.workspace_allocator; + quantize_to_int8(bottom_blob, bottom_blob_int8, bottom_blob_int8_scales, opt_q); + } + + Mat bottom_blob_int8_flattened = bottom_blob_int8; + if (bottom_blob_int8.dims != 1) + { + Option opt_flatten = opt; + opt_flatten.blob_allocator = opt.workspace_allocator; + flatten->forward(bottom_blob_int8, bottom_blob_int8_flattened, opt_flatten); + } + + // int elempack = bottom_blob_int8_flattened.elempack; + + int out_elempack = 1; + if (opt.use_packing_layout) + { + out_elempack = num_output % 8 == 0 ? 8 : 1; + } + + top_blob.create(num_output / out_elempack, (size_t)(4u * out_elempack), out_elempack, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + Mat top_blob_int32; + top_blob_int32.create(num_output / out_elempack, (size_t)(4u * out_elempack), out_elempack, opt.workspace_allocator); + if (top_blob_int32.empty()) + return -100; + +#if __ARM_NEON + if (out_elempack == 8) + { + // num_output + #pragma omp parallel for num_threads(opt.num_threads) + for (int p = 0; p < num_output / out_elempack; p++) + { + int32x4_t _sum0 = vdupq_n_s32(0); + int32x4_t _sum1 = vdupq_n_s32(0); + + const signed char* kptr = weight_data_int8.row(p); + const signed char* sptr = bottom_blob_int8_flattened; + + int i = 0; + for (; i < num_input; i++) + { + int8x8_t _val = vdup_n_s8(sptr[0]); + + int8x8_t _w = vld1_s8(kptr); + + int16x8_t _s0 = vmull_s8(_val, _w); + _sum0 = vaddw_s16(_sum0, vget_low_s16(_s0)); + _sum1 = vaddw_s16(_sum1, vget_high_s16(_s0)); + + sptr += 1; + kptr += 8; + } + + int* outptr = (int*)top_blob_int32; + vst1q_s32(outptr + p * 8, _sum0); + vst1q_s32(outptr + p * 8 + 4, _sum1); + } + } +#endif // __ARM_NEON + + if (out_elempack == 1) + { + // num_output + #pragma omp parallel for num_threads(opt.num_threads) + for (int p = 0; p < num_output / out_elempack; p++) + { + int sum = 0; + + const signed char* kptr = weight_data_int8.row(p); + const signed char* sptr = bottom_blob_int8_flattened; + + int i = 0; + for (; i < num_input; i++) + { + signed char val = sptr[0]; + + signed char w = kptr[0]; + + sum += val * w; + + sptr += 1; + kptr += 1; + } + + int* outptr = (int*)top_blob_int32; + outptr[p] = sum; + } + } + + Mat scale_data(num_output); + for (int p = 0; p < num_output; p++) + { + // dequantize + float scale_in; + if (weight_data_int8_scales[p] == 0) + scale_in = 0; + else + scale_in = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[p]); + + scale_data[p] = scale_in; + } + + dequantize_from_int32(top_blob_int32, top_blob, scale_data, bias_data, opt); + + if (activation) + { + activation->forward_inplace(top_blob, opt); + } + + return 0; + + // #if __aarch64__ + // const int w = bottom_blob_tm.w; + // const int h = bottom_blob_tm.h; + // + // const int m = 1; + // const int k = bottom_blob_tm.c * w * h; + // Mat bottom_blob_reorder(m * k, (size_t)1u, opt.workspace_allocator); + // { + // reorder_a(bottom_blob_tm_flattened, bottom_blob_reorder, m, k, k); + // } + // + // Mat top_blob_tm(m * num_output, (size_t)4u, opt.workspace_allocator); + // int32_t* pc = top_blob_tm; + // const int8_t* pa = bottom_blob_reorder; + // const int8_t* pb = weight_data_int8; + // int8kernel((void*)pc, pa, pb, m, k, num_output, num_output, 0, 0, opt); + // + // float* outptr = top_blob; + // + // // dequant.fused.relu int32_t to float + // for (int p = 0; p < num_output; ++p) + // { + // float sumfp32 = pc[p] * scales_in[p]; + // if (bias_term) + // { + // sumfp32 += bias_data[p]; + // } + // if (1 == activation_type) + // { + // sumfp32 = std::max(0.f, sumfp32); + // } + // + // outptr[p] = sumfp32; + // } + // return 0; + // #else + // return InnerProduct::forward_int8(bottom_blob, top_blob, opt); + // #endif +} +#endif // NCNN_INT8 + } // namespace ncnn diff --git a/src/layer/arm/innerproduct_arm.h b/src/layer/arm/innerproduct_arm.h index 17107c0ca..4cdc600bb 100644 --- a/src/layer/arm/innerproduct_arm.h +++ b/src/layer/arm/innerproduct_arm.h @@ -39,9 +39,10 @@ protected: #endif int create_pipeline_bf16s(const Option& opt); int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; - +#if NCNN_INT8 int create_pipeline_int8_arm(const Option& opt); int forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; +#endif public: Layer* flatten; @@ -54,9 +55,11 @@ public: // bf16 Mat weight_data_bf16; +#if NCNN_INT8 // int8 Mat weight_data_int8; Mat scales_in; +#endif }; } // namespace ncnn diff --git a/src/layer/convolution.cpp b/src/layer/convolution.cpp index c674de62f..ff610843a 100644 --- a/src/layer/convolution.cpp +++ b/src/layer/convolution.cpp @@ -47,7 +47,12 @@ int Convolution::load_param(const ParamDict& pd) if (int8_scale_term) { +#if NCNN_INT8 support_int8_storage = true; +#else + NCNN_LOGE("please build ncnn with NCNN_INT8 enabled for int8 inference"); + return -1; +#endif } return 0; @@ -66,6 +71,7 @@ int Convolution::load_model(const ModelBin& mb) return -100; } +#if NCNN_INT8 if (int8_scale_term) { weight_data_int8_scales = mb.load(num_output, 1); @@ -76,12 +82,14 @@ int Convolution::load_model(const ModelBin& mb) { top_blob_int8_scales = mb.load(1, 1); } +#endif // NCNN_INT8 return 0; } int Convolution::create_pipeline(const Option& opt) { +#if NCNN_INT8 // runtime quantize the weight data if (opt.use_int8_inference && weight_data.elemsize == (size_t)4u && int8_scale_term) { @@ -101,6 +109,7 @@ int Convolution::create_pipeline(const Option& opt) weight_data = weight_data_int8.reshape(weight_data_size); } +#endif // NCNN_INT8 return 0; } @@ -110,10 +119,12 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op // convolv with NxN kernel // value = value + bias +#if NCNN_INT8 if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) { return forward_int8(bottom_blob, top_blob, opt); } +#endif // flattened blob, implement as InnerProduct if (bottom_blob.dims == 1 && kernel_w == 1 && kernel_h == 1) @@ -140,11 +151,13 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op weights[0] = weight_data; weights[1] = bias_data; +#if NCNN_INT8 if (int8_scale_term) { weights[2] = weight_data_int8_scales; weights[3] = bottom_blob_int8_scales; } +#endif op->load_model(ModelBinFromMatArray(weights)); @@ -327,6 +340,7 @@ void Convolution::make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered } } +#if NCNN_INT8 static inline signed char float2int8(float v) { int int32 = static_cast(round(v)); @@ -492,5 +506,6 @@ int Convolution::forward_int8(const Mat& bottom_blob, Mat& top_blob, const Optio return 0; } +#endif // NCNN_INT8 } // namespace ncnn diff --git a/src/layer/convolution.h b/src/layer/convolution.h index d3f90d708..f3458248a 100644 --- a/src/layer/convolution.h +++ b/src/layer/convolution.h @@ -35,7 +35,9 @@ public: protected: void make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const; +#if NCNN_INT8 int forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; +#endif public: // param @@ -65,9 +67,11 @@ public: Mat weight_data; Mat bias_data; +#if NCNN_INT8 Mat weight_data_int8_scales; Mat bottom_blob_int8_scales; Mat top_blob_int8_scales; +#endif // implementation type, 0 means do not use auto pack model int impl_type; diff --git a/src/layer/convolutiondepthwise.cpp b/src/layer/convolutiondepthwise.cpp index a274cdb70..3f7d0e3d8 100644 --- a/src/layer/convolutiondepthwise.cpp +++ b/src/layer/convolutiondepthwise.cpp @@ -53,7 +53,12 @@ int ConvolutionDepthWise::load_param(const ParamDict& pd) if (int8_scale_term) { +#if NCNN_INT8 support_int8_storage = true; +#else + NCNN_LOGE("please build ncnn with NCNN_INT8 enabled for int8 inference"); + return -1; +#endif } return 0; @@ -72,6 +77,7 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb) return -100; } +#if NCNN_INT8 if (int8_scale_term == 1 || int8_scale_term == 101) { weight_data_int8_scales = mb.load(group, 1); @@ -104,12 +110,14 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb) top_blob_int8_scales = Mat(group); top_blob_int8_scales.fill(top_blob_int8_scale); } +#endif // NCNN_INT8 return 0; } int ConvolutionDepthWise::create_pipeline(const Option& opt) { +#if NCNN_INT8 // runtime quantize the weight data if (opt.use_int8_inference && weight_data.elemsize == (size_t)4u && int8_scale_term) { @@ -133,6 +141,7 @@ int ConvolutionDepthWise::create_pipeline(const Option& opt) weight_data = int8_weight_data; } +#endif // NCNN_INT8 return 0; } @@ -142,10 +151,12 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O // convolv with NxN kernel // value = value + bias +#if NCNN_INT8 if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) { return forward_int8(bottom_blob, top_blob, opt); } +#endif int w = bottom_blob.w; int h = bottom_blob.h; @@ -403,6 +414,7 @@ void ConvolutionDepthWise::make_padding(const Mat& bottom_blob, Mat& bottom_blob } } +#if NCNN_INT8 static inline signed char float2int8(float v) { int int32 = static_cast(round(v)); @@ -694,5 +706,6 @@ int ConvolutionDepthWise::forward_int8(const Mat& bottom_blob, Mat& top_blob, co return 0; } +#endif // NCNN_INT8 } // namespace ncnn diff --git a/src/layer/convolutiondepthwise.h b/src/layer/convolutiondepthwise.h index c32e3203f..07cadd19a 100644 --- a/src/layer/convolutiondepthwise.h +++ b/src/layer/convolutiondepthwise.h @@ -35,7 +35,9 @@ public: protected: void make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const; +#if NCNN_INT8 int forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; +#endif public: // param @@ -66,9 +68,11 @@ public: Mat weight_data; Mat bias_data; +#if NCNN_INT8 Mat weight_data_int8_scales; Mat bottom_blob_int8_scales; Mat top_blob_int8_scales; +#endif }; } // namespace ncnn diff --git a/src/layer/innerproduct.cpp b/src/layer/innerproduct.cpp index b236317c3..e54752947 100644 --- a/src/layer/innerproduct.cpp +++ b/src/layer/innerproduct.cpp @@ -35,7 +35,12 @@ int InnerProduct::load_param(const ParamDict& pd) if (int8_scale_term) { +#if NCNN_INT8 support_int8_storage = true; +#else + NCNN_LOGE("please build ncnn with NCNN_INT8 enabled for int8 inference"); + return -1; +#endif } return 0; @@ -54,17 +59,20 @@ int InnerProduct::load_model(const ModelBin& mb) return -100; } +#if NCNN_INT8 if (int8_scale_term) { weight_data_int8_scales = mb.load(num_output, 1); bottom_blob_int8_scales = mb.load(1, 1); } +#endif // NCNN_INT8 return 0; } int InnerProduct::create_pipeline(const Option& opt) { +#if NCNN_INT8 // runtime quantize the weight data if (opt.use_int8_inference && weight_data.elemsize == (size_t)4u && int8_scale_term) { @@ -81,16 +89,19 @@ int InnerProduct::create_pipeline(const Option& opt) weight_data = weight_data_int8.reshape(weight_data_size); } +#endif // NCNN_INT8 return 0; } int InnerProduct::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const { +#if NCNN_INT8 if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) { return forward_int8(bottom_blob, top_blob, opt); } +#endif const int num_input = weight_data_size / num_output; @@ -218,6 +229,7 @@ int InnerProduct::forward(const Mat& bottom_blob, Mat& top_blob, const Option& o return 0; } +#if NCNN_INT8 int InnerProduct::forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const { const int num_input = weight_data_size / num_output; @@ -332,5 +344,6 @@ int InnerProduct::forward_int8(const Mat& bottom_blob, Mat& top_blob, const Opti return 0; } +#endif // NCNN_INT8 } // namespace ncnn diff --git a/src/layer/innerproduct.h b/src/layer/innerproduct.h index 91f5cb9f3..1f9b3fdc0 100644 --- a/src/layer/innerproduct.h +++ b/src/layer/innerproduct.h @@ -33,7 +33,9 @@ public: virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; protected: +#if NCNN_INT8 int forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; +#endif public: // param @@ -52,8 +54,10 @@ public: Mat weight_data; Mat bias_data; +#if NCNN_INT8 Mat weight_data_int8_scales; Mat bottom_blob_int8_scales; +#endif }; } // namespace ncnn diff --git a/src/layer/x86/convolution_x86.cpp b/src/layer/x86/convolution_x86.cpp index a8612005f..48775bace 100644 --- a/src/layer/x86/convolution_x86.cpp +++ b/src/layer/x86/convolution_x86.cpp @@ -35,18 +35,21 @@ namespace ncnn { #include "convolution_5x5.h" #include "convolution_7x7.h" +#if NCNN_INT8 #include "convolution_sgemm_int8.h" #include "convolution_1x1_int8.h" #include "convolution_3x3_int8.h" - #include "convolution_int8.h" +#endif // NCNN_INT8 #if __SSE2__ #include "convolution_1x1_pack4.h" +#if NCNN_INT8 #include "convolution_pack8_int8.h" #include "convolution_pack1to8_int8.h" #include "convolution_pack8to1_int8.h" +#endif // NCNN_INT8 #if __AVX__ #include "convolution_3x3_pack1to8.h" #include "convolution_3x3_pack8to1.h" @@ -118,10 +121,12 @@ int Convolution_x86::create_pipeline(const Option& opt) activation->create_pipeline(opt); } +#if NCNN_INT8 if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) { return create_pipeline_int8_x86(opt); } +#endif int kernel_size = kernel_w * kernel_h; int num_input = weight_data_size / kernel_size / num_output; @@ -311,10 +316,12 @@ int Convolution_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option // convolv with NxN kernel // value = value + bias +#if NCNN_INT8 if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) { return forward_int8_x86(bottom_blob, top_blob, opt); } +#endif if (bottom_blob.dims != 3) { @@ -1058,6 +1065,7 @@ int Convolution_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option return 0; } +#if NCNN_INT8 int Convolution_x86::create_pipeline_int8_x86(const Option& opt) { const int maxk = kernel_w * kernel_h; @@ -1410,6 +1418,7 @@ int Convolution_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, con return 0; } +#endif // NCNN_INT8 int Convolution_x86::forwardDilation_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const { diff --git a/src/layer/x86/convolution_x86.h b/src/layer/x86/convolution_x86.h index 561118d9c..326a98cb0 100644 --- a/src/layer/x86/convolution_x86.h +++ b/src/layer/x86/convolution_x86.h @@ -30,8 +30,10 @@ public: virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; protected: +#if NCNN_INT8 int create_pipeline_int8_x86(const Option& opt); int forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; +#endif int forwardDilation_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; public: @@ -47,9 +49,11 @@ public: Mat weight_3x3_winograd64_data_pack8; +#if NCNN_INT8 // int8 Mat weight_data_int8; Mat weight_3x3_winograd23_data_int8; +#endif }; } // namespace ncnn diff --git a/src/layer/x86/convolutiondepthwise_x86.cpp b/src/layer/x86/convolutiondepthwise_x86.cpp index cb7f8b4ac..a43ce0cc0 100644 --- a/src/layer/x86/convolutiondepthwise_x86.cpp +++ b/src/layer/x86/convolutiondepthwise_x86.cpp @@ -36,7 +36,9 @@ namespace ncnn { #endif #endif // __SSE2__ #include "convolutiondepthwise_3x3.h" +#if NCNN_INT8 #include "convolutiondepthwise_3x3_int8.h" +#endif // NCNN_INT8 ConvolutionDepthWise_x86::ConvolutionDepthWise_x86() { @@ -102,10 +104,12 @@ int ConvolutionDepthWise_x86::create_pipeline(const Option& opt) activation->create_pipeline(opt); } +#if NCNN_INT8 if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) { return create_pipeline_int8_x86(opt); } +#endif const int maxk = kernel_w * kernel_h; int channels = (weight_data_size / group) / maxk / (num_output / group) * group; @@ -235,6 +239,7 @@ int ConvolutionDepthWise_x86::create_group_ops(const Option& opt) weights[0] = weight_data_g; weights[1] = bias_data_g; +#if NCNN_INT8 if (int8_scale_term) { Mat weight_data_int8_scales_g(num_output_g); @@ -246,6 +251,7 @@ int ConvolutionDepthWise_x86::create_group_ops(const Option& opt) { weights[4] = top_blob_int8_scales.range(g, 1); } +#endif op->load_model(ModelBinFromMatArray(weights)); } @@ -254,6 +260,7 @@ int ConvolutionDepthWise_x86::create_group_ops(const Option& opt) ncnn::Mat weights[4]; weights[0] = weight_data_g; +#if NCNN_INT8 if (int8_scale_term) { Mat weight_data_int8_scales_g(num_output_g); @@ -265,6 +272,7 @@ int ConvolutionDepthWise_x86::create_group_ops(const Option& opt) { weights[3] = top_blob_int8_scales.range(g, 1); } +#endif op->load_model(ModelBinFromMatArray(weights)); } @@ -298,13 +306,12 @@ int ConvolutionDepthWise_x86::destroy_pipeline(const Option& opt) int ConvolutionDepthWise_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const { - // convolv with NxN kernel - // value = value + bias - +#if NCNN_INT8 if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) { return forward_int8_x86(bottom_blob, top_blob, opt); } +#endif int w = bottom_blob.w; int h = bottom_blob.h; @@ -628,6 +635,7 @@ int ConvolutionDepthWise_x86::forward(const Mat& bottom_blob, Mat& top_blob, con return 0; } +#if NCNN_INT8 int ConvolutionDepthWise_x86::create_pipeline_int8_x86(const Option& opt) { const int maxk = kernel_w * kernel_h; @@ -1061,5 +1069,6 @@ int ConvolutionDepthWise_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_ return 0; } +#endif // NCNN_INT8 } // namespace ncnn diff --git a/src/layer/x86/convolutiondepthwise_x86.h b/src/layer/x86/convolutiondepthwise_x86.h index cf7a27b83..3f6cdca56 100644 --- a/src/layer/x86/convolutiondepthwise_x86.h +++ b/src/layer/x86/convolutiondepthwise_x86.h @@ -31,8 +31,10 @@ public: protected: int create_group_ops(const Option& opt); +#if NCNN_INT8 int create_pipeline_int8_x86(const Option& opt); int forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; +#endif public: Layer* activation; @@ -41,8 +43,10 @@ public: // packing Mat weight_data_packed; +#if NCNN_INT8 // int8 Mat weight_data_int8; +#endif }; } // namespace ncnn diff --git a/src/layer/x86/innerproduct_x86.cpp b/src/layer/x86/innerproduct_x86.cpp index 007605cc6..ee17d7de0 100644 --- a/src/layer/x86/innerproduct_x86.cpp +++ b/src/layer/x86/innerproduct_x86.cpp @@ -54,10 +54,12 @@ int InnerProduct_x86::create_pipeline(const Option& opt) flatten->create_pipeline(opt); } +#if NCNN_INT8 if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) { return create_pipeline_int8_x86(opt); } +#endif const int num_input = weight_data_size / num_output; @@ -124,10 +126,12 @@ int InnerProduct_x86::destroy_pipeline(const Option& opt) int InnerProduct_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const { +#if NCNN_INT8 if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) { return forward_int8_x86(bottom_blob, top_blob, opt); } +#endif const int num_input = weight_data_size / num_output; @@ -1694,6 +1698,7 @@ int InnerProduct_x86::forward_fp16(const Mat& bottom_blob, Mat& top_blob, const } #endif // __AVX__ +#if NCNN_INT8 int InnerProduct_x86::create_pipeline_int8_x86(const Option& opt) { if (activation_type == 1) @@ -1883,5 +1888,6 @@ int InnerProduct_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, co return 0; } +#endif // NCNN_INT8 } // namespace ncnn diff --git a/src/layer/x86/innerproduct_x86.h b/src/layer/x86/innerproduct_x86.h index d3afcfb87..2610dfe50 100644 --- a/src/layer/x86/innerproduct_x86.h +++ b/src/layer/x86/innerproduct_x86.h @@ -34,9 +34,10 @@ public: protected: int forward_fp16(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; - +#if NCNN_INT8 int create_pipeline_int8_x86(const Option& opt); int forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; +#endif public: Layer* flatten; @@ -47,9 +48,11 @@ public: // fp16 weight data Mat weight_data_fp16; +#if NCNN_INT8 // int8 Mat weight_data_int8; Mat scales_in; +#endif }; } // namespace ncnn diff --git a/src/platform.h.in b/src/platform.h.in index 81e215755..44d775aa2 100644 --- a/src/platform.h.in +++ b/src/platform.h.in @@ -30,6 +30,7 @@ #cmakedefine01 NCNN_RUNTIME_CPU #cmakedefine01 NCNN_AVX2 #cmakedefine01 NCNN_ARM82 +#cmakedefine01 NCNN_INT8 #cmakedefine NCNN_VERSION_STRING "@NCNN_VERSION_STRING@" diff --git a/tests/test_convolution.cpp b/tests/test_convolution.cpp index fa242b7b1..766a086f7 100644 --- a/tests/test_convolution.cpp +++ b/tests/test_convolution.cpp @@ -171,6 +171,7 @@ static int test_convolution_2() || test_convolution_vec(64, 128, 1, 1, 1, 0, 0); } +#if NCNN_INT8 static int test_convolution_int8(int w, int h, int c, int outch, int kernel, int dilation, int stride, int pad, int bias, bool requant = false) { ncnn::Mat a = RandomMat(w, h, c); @@ -298,12 +299,20 @@ static int test_convolution_1() || test_convolution_int8(6, 7, 64, 64, 3, 1, 2, 0, 1) || test_convolution_int8(25, 33, 16, 15, 3, 1, 1, 1, 0); } +#endif // NCNN_INT8 int main() { SRAND(7767517); + +#if NCNN_INT8 return 0 || test_convolution_0() || test_convolution_1() || test_convolution_2(); +#else + return 0 + || test_convolution_0() + || test_convolution_2(); +#endif } diff --git a/tests/test_convolutiondepthwise.cpp b/tests/test_convolutiondepthwise.cpp index a2f2146c0..8dce9ea50 100644 --- a/tests/test_convolutiondepthwise.cpp +++ b/tests/test_convolutiondepthwise.cpp @@ -125,6 +125,7 @@ static int test_convolutiondepthwise_0() return 0; } +#if NCNN_INT8 static int test_convolutiondepthwise_int8(int w, int h, int c, int outch, int kernel, int dilation, int stride, int pad, int bias, int group, bool requant = false) { ncnn::Mat a = RandomMat(w, h, c); @@ -251,10 +252,15 @@ static int test_convolutiondepthwise_1() return 0; } +#endif // NCNN_INT8 int main() { SRAND(7767517); +#if NCNN_INT8 return test_convolutiondepthwise_0() || test_convolutiondepthwise_1(); +#else + return test_convolutiondepthwise_0(); +#endif } diff --git a/tests/test_innerproduct.cpp b/tests/test_innerproduct.cpp index 3d32d85c3..a66040923 100644 --- a/tests/test_innerproduct.cpp +++ b/tests/test_innerproduct.cpp @@ -87,6 +87,7 @@ static int test_innerproduct_2() || test_innerproduct(RandomMat(24), 32, 1); } +#if NCNN_INT8 static int test_innerproduct_int8(const ncnn::Mat& a, int outch, int bias) { ncnn::ParamDict pd; @@ -145,6 +146,7 @@ static int test_innerproduct_3() || test_innerproduct_int8(RandomMat(7, 2, 16), 4, 1) || test_innerproduct_int8(RandomMat(6, 3, 16), 16, 1); } +#endif // NCNN_INT8 static int test_innerproduct_gemm(const ncnn::Mat& a, int outch, int bias) { @@ -193,6 +195,7 @@ static int test_innerproduct_4() || test_innerproduct_gemm(RandomMat(12, 16), 7, 1); } +#if NCNN_INT8 static int test_innerproduct_gemm_int8(const ncnn::Mat& a, int outch, int bias) { ncnn::ParamDict pd; @@ -242,11 +245,13 @@ static int test_innerproduct_5() || test_innerproduct_gemm_int8(RandomMat(6, 16), 16, 0) || test_innerproduct_gemm_int8(RandomMat(12, 16), 7, 1); } +#endif // NCNN_INT8 int main() { SRAND(7767517); +#if NCNN_INT8 return 0 || test_innerproduct_0() || test_innerproduct_1() @@ -254,4 +259,11 @@ int main() || test_innerproduct_3() || test_innerproduct_4() || test_innerproduct_5(); +#else + return 0 + || test_innerproduct_0() + || test_innerproduct_1() + || test_innerproduct_2() + || test_innerproduct_4(); +#endif } diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index d7d1a8769..a255ac719 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -12,7 +12,11 @@ add_subdirectory(caffe) add_subdirectory(mxnet) add_subdirectory(onnx) add_subdirectory(darknet) -add_subdirectory(quantize) +if(NCNN_INT8) + add_subdirectory(quantize) +else() + message(WARNING "NCNN_INT8 disabled, quantize tools won't be built") +endif() add_executable(ncnn2mem ncnn2mem.cpp) target_link_libraries(ncnn2mem PRIVATE ncnn) diff --git a/tools/ncnnoptimize.cpp b/tools/ncnnoptimize.cpp index 153e5d374..778c3aacf 100644 --- a/tools/ncnnoptimize.cpp +++ b/tools/ncnnoptimize.cpp @@ -2673,8 +2673,10 @@ int NetOptimize::replace_convolution_with_innerproduct_after_global_pooling() innerproduct->weight_data = convolution->weight_data; innerproduct->bias_data = convolution->bias_data; +#if NCNN_INT8 innerproduct->weight_data_int8_scales = convolution->weight_data_int8_scales; innerproduct->bottom_blob_int8_scales = convolution->bottom_blob_int8_scales; +#endif innerproduct->activation_type = convolution->activation_type; innerproduct->activation_params = convolution->activation_params; @@ -2739,8 +2741,10 @@ int NetOptimize::replace_convolution_with_innerproduct_after_innerproduct() innerproduct2->weight_data = convolution->weight_data; innerproduct2->bias_data = convolution->bias_data; +#if NCNN_INT8 innerproduct->weight_data_int8_scales = convolution->weight_data_int8_scales; innerproduct->bottom_blob_int8_scales = convolution->bottom_blob_int8_scales; +#endif innerproduct2->activation_type = convolution->activation_type; innerproduct2->activation_params = convolution->activation_params;