| @@ -43,17 +43,28 @@ jobs: | |||
| sudo apt-get update | |||
| sudo apt-get install g++-aarch64-linux-gnu | |||
| - name: configure | |||
| run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. | |||
| - name: build | |||
| run: cmake --build build -j 2 | |||
| run: | | |||
| mkdir build && cd build | |||
| cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. | |||
| cmake --build . -j 2 | |||
| - name: test | |||
| run: | | |||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH | |||
| cd build | |||
| TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2 | |||
| - name: build-noint8 | |||
| run: | | |||
| mkdir build-noint8 && cd build-noint8 | |||
| cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. | |||
| cmake --build . -j 2 | |||
| - name: test-noint8 | |||
| run: | | |||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH | |||
| cd build-noint8 | |||
| TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2 | |||
| linux-gcc-arm82: | |||
| runs-on: ubuntu-20.04 | |||
| steps: | |||
| @@ -90,13 +101,24 @@ jobs: | |||
| sudo apt-get update | |||
| sudo apt-get install g++-aarch64-linux-gnu | |||
| - name: configure | |||
| run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. | |||
| - name: build | |||
| run: cmake --build build -j 2 | |||
| run: | | |||
| mkdir build && cd build | |||
| cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. | |||
| cmake --build . -j 2 | |||
| - name: test | |||
| run: | | |||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH | |||
| cd build | |||
| TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2 | |||
| - name: build-noint8 | |||
| run: | | |||
| mkdir build-noint8 && cd build-noint8 | |||
| cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON -DNCNN_INT8=OFF .. | |||
| cmake --build . -j 2 | |||
| - name: test-noint8 | |||
| run: | | |||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH | |||
| cd build-noint8 | |||
| TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2 | |||
| @@ -43,17 +43,28 @@ jobs: | |||
| sudo apt-get update | |||
| sudo apt-get install g++-arm-linux-gnueabi | |||
| - name: configure | |||
| run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. | |||
| - name: build | |||
| run: cmake --build build -j 2 | |||
| run: | | |||
| mkdir build && cd build | |||
| cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. | |||
| cmake --build . -j 2 | |||
| - name: test | |||
| run: | | |||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH | |||
| cd build | |||
| TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabi" ctest --output-on-failure -j 2 | |||
| - name: build-noint8 | |||
| run: | | |||
| mkdir build-noint8 && cd build-noint8 | |||
| cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. | |||
| cmake --build . -j 2 | |||
| - name: test-noint8 | |||
| run: | | |||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH | |||
| cd build-noint8 | |||
| TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabi" ctest --output-on-failure -j 2 | |||
| linux-gcc-armhf: | |||
| runs-on: ubuntu-20.04 | |||
| steps: | |||
| @@ -90,13 +101,24 @@ jobs: | |||
| sudo apt-get update | |||
| sudo apt-get install g++-arm-linux-gnueabihf | |||
| - name: configure | |||
| run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. | |||
| - name: build | |||
| run: cmake --build build -j 2 | |||
| run: | | |||
| mkdir build && cd build | |||
| cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. | |||
| cmake --build . -j 2 | |||
| - name: test | |||
| run: | | |||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH | |||
| cd build | |||
| TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabihf" ctest --output-on-failure -j 2 | |||
| - name: build-noint8 | |||
| run: | | |||
| mkdir build-noint8 && cd build-noint8 | |||
| cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON -DNCNN_INT8=OFF .. | |||
| cmake --build . -j 2 | |||
| - name: test-noint8 | |||
| run: | | |||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH | |||
| cd build-noint8 | |||
| TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabihf" ctest --output-on-failure -j 2 | |||
| @@ -47,6 +47,16 @@ jobs: | |||
| cmake --build . -j 2 | |||
| - name: test-avx2 | |||
| run: cd build-avx2 && ctest --output-on-failure -j 2 | |||
| - name: build-noint8 | |||
| env: | |||
| CC: clang | |||
| CXX: clang++ | |||
| run: | | |||
| mkdir build-noint8 && cd build-noint8 | |||
| cmake -DNCNN_INT8=OFF -DNCNN_BUILD_TESTS=ON .. | |||
| cmake --build . -j 2 | |||
| - name: test-noint8 | |||
| run: cd build-noint8 && ctest --output-on-failure -j 2 | |||
| linux-clang-simplestl: | |||
| runs-on: ubuntu-latest | |||
| @@ -38,6 +38,13 @@ jobs: | |||
| cmake --build . -j 2 | |||
| - name: test-avx2 | |||
| run: cd build-avx2 && ctest --output-on-failure -j 2 | |||
| - name: build-noint8 | |||
| run: | | |||
| mkdir build-noint8 && cd build-noint8 | |||
| cmake -DNCNN_INT8=OFF -DNCNN_BUILD_TESTS=ON .. | |||
| cmake --build . -j 2 | |||
| - name: test-noint8 | |||
| run: cd build-noint8 && ctest --output-on-failure -j 2 | |||
| linux-gcc-cpp03-nostdio-nostring-simplestl: | |||
| runs-on: ubuntu-16.04 | |||
| @@ -37,3 +37,13 @@ jobs: | |||
| mkdir build-shared && cd build-shared | |||
| cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.clang-m32.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_SHARED_LIB=ON -DNCNN_ENABLE_LTO=ON .. | |||
| cmake --build . -j 2 | |||
| - name: build-noint8 | |||
| env: | |||
| CC: clang | |||
| CXX: clang++ | |||
| run: | | |||
| mkdir build-noint8 && cd build-noint8 | |||
| cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.clang-m32.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_INT8=OFF .. | |||
| cmake --build . -j 2 | |||
| - name: test-noint8 | |||
| run: cd build-noint8 && ctest --output-on-failure -j 2 | |||
| @@ -31,3 +31,10 @@ jobs: | |||
| mkdir build-shared && cd build-shared | |||
| cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-m32.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_SHARED_LIB=ON -DNCNN_ENABLE_LTO=ON .. | |||
| cmake --build . -j 2 | |||
| - name: build-noint8 | |||
| run: | | |||
| mkdir build-noint8 && cd build-noint8 | |||
| cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-m32.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_INT8=OFF .. | |||
| cmake --build . -j 2 | |||
| - name: test-noint8 | |||
| run: cd build-noint8 && ctest --output-on-failure -j 2 | |||
| @@ -79,6 +79,7 @@ option(NCNN_BUILD_TESTS "build tests" OFF) | |||
| option(NCNN_COVERAGE "build for coverage" OFF) | |||
| option(NCNN_BUILD_BENCHMARK "build benchmark" ON) | |||
| option(NCNN_PYTHON "build python api" OFF) | |||
| option(NCNN_INT8 "int8 inference" ON) | |||
| if(ANDROID OR IOS OR NCNN_SIMPLESTL OR CMAKE_CROSSCOMPILING) | |||
| option(NCNN_DISABLE_RTTI "disable rtti" ON) | |||
| @@ -39,6 +39,15 @@ cmake -DNCNN_STRING=OFF .. | |||
| Read more [here](https://github.com/Tencent/ncnn/blob/master/docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.md#input-and-output). | |||
| ### disable NCNN_INT8 | |||
| ``` | |||
| cmake -DNCNN_INT8=OFF .. | |||
| ``` | |||
| * Cannot use quantized int8 inference. | |||
| ### drop pixel rotate and affine functions | |||
| ``` | |||
| @@ -29,19 +29,21 @@ namespace ncnn { | |||
| #include "convolution_bf16s.h" | |||
| #include "convolution_sgemm.h" | |||
| #include "convolution_sgemm_int8.h" | |||
| #include "convolution_1x1.h" | |||
| #include "convolution_1x1_bf16s.h" | |||
| #include "convolution_1x1_int8.h" | |||
| #include "convolution_2x2.h" | |||
| #include "convolution_3x3.h" | |||
| #include "convolution_3x3_int8.h" | |||
| #include "convolution_4x4.h" | |||
| #include "convolution_5x5.h" | |||
| #include "convolution_7x7.h" | |||
| #if NCNN_INT8 | |||
| #include "convolution_sgemm_int8.h" | |||
| #include "convolution_1x1_int8.h" | |||
| #include "convolution_3x3_int8.h" | |||
| #include "convolution_int8.h" | |||
| #endif // NCNN_INT8 | |||
| #if __ARM_NEON | |||
| #include "convolution_pack4.h" | |||
| @@ -67,6 +69,7 @@ namespace ncnn { | |||
| #include "convolution_7x7_pack1to4.h" | |||
| #include "convolution_7x7_pack1to4_bf16s.h" | |||
| #if NCNN_INT8 | |||
| #include "convolution_pack8_int8.h" | |||
| #include "convolution_pack1to8_int8.h" | |||
| #include "convolution_pack8to1_int8.h" | |||
| @@ -80,6 +83,7 @@ namespace ncnn { | |||
| #include "convolution_3x3_pack1to8_int8.h" | |||
| #include "convolution_7x7_pack1to8_int8.h" | |||
| #include "convolution_3x3_pack8to1_int8.h" | |||
| #endif // NCNN_INT8 | |||
| #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | |||
| #include "convolution_fp16s.h" | |||
| @@ -169,10 +173,12 @@ int Convolution_arm::create_pipeline(const Option& opt) | |||
| activation->create_pipeline(opt); | |||
| } | |||
| #if NCNN_INT8 | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) | |||
| { | |||
| return create_pipeline_int8_arm(opt); | |||
| } | |||
| #endif | |||
| #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | |||
| if (opt.use_fp16_storage) | |||
| @@ -418,10 +424,12 @@ int Convolution_arm::destroy_pipeline(const Option& opt) | |||
| int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const | |||
| { | |||
| #if NCNN_INT8 | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) | |||
| { | |||
| return forward_int8_arm(bottom_blob, top_blob, opt); | |||
| } | |||
| #endif | |||
| if (bottom_blob.dims != 3) | |||
| { | |||
| @@ -1767,6 +1775,7 @@ int Convolution_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const | |||
| return 0; | |||
| } | |||
| #if NCNN_INT8 | |||
| int Convolution_arm::create_pipeline_int8_arm(const Option& opt) | |||
| { | |||
| const int maxk = kernel_w * kernel_h; | |||
| @@ -2263,6 +2272,7 @@ int Convolution_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, con | |||
| return 0; | |||
| } | |||
| #endif // NCNN_INT8 | |||
| int Convolution_arm::forwardDilation_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const | |||
| { | |||
| @@ -37,8 +37,10 @@ protected: | |||
| #endif | |||
| int create_pipeline_bf16s(const Option& opt); | |||
| int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| #if NCNN_INT8 | |||
| int create_pipeline_int8_arm(const Option& opt); | |||
| int forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| #endif | |||
| int forwardDilation_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| public: | |||
| @@ -67,11 +69,13 @@ public: | |||
| // bf16 | |||
| Mat weight_data_bf16; | |||
| #if NCNN_INT8 | |||
| // int8 | |||
| Mat weight_data_int8; | |||
| // Mat weight_3x3s2_data_int8; | |||
| std::vector<Mat> weight_3x3_winograd23_data_int8; | |||
| #endif | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -26,16 +26,21 @@ | |||
| namespace ncnn { | |||
| #include "convolutiondepthwise_3x3.h" | |||
| #include "convolutiondepthwise_3x3_int8.h" | |||
| #include "convolutiondepthwise_5x5.h" | |||
| #if NCNN_INT8 | |||
| #include "convolutiondepthwise_3x3_int8.h" | |||
| #endif // NCNN_INT8 | |||
| #if __ARM_NEON | |||
| #include "convolutiondepthwise_3x3_pack4.h" | |||
| #include "convolutiondepthwise_3x3_pack4_bf16s.h" | |||
| #include "convolutiondepthwise_5x5_pack4.h" | |||
| #include "convolutiondepthwise_5x5_pack4_bf16s.h" | |||
| #if NCNN_INT8 | |||
| #include "convolutiondepthwise_3x3_pack8_int8.h" | |||
| #endif // NCNN_INT8 | |||
| #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | |||
| #include "convolutiondepthwise_3x3_fp16s.h" | |||
| @@ -104,10 +109,12 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt) | |||
| activation->create_pipeline(opt); | |||
| } | |||
| #if NCNN_INT8 | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) | |||
| { | |||
| return create_pipeline_int8_arm(opt); | |||
| } | |||
| #endif | |||
| const int maxk = kernel_w * kernel_h; | |||
| int channels = (weight_data_size / group) / maxk / (num_output / group) * group; | |||
| @@ -269,6 +276,7 @@ int ConvolutionDepthWise_arm::create_group_ops(const Option& opt) | |||
| weights[0] = weight_data_g; | |||
| weights[1] = bias_data_g; | |||
| #if NCNN_INT8 | |||
| if (int8_scale_term) | |||
| { | |||
| Mat weight_data_int8_scales_g(num_output_g); | |||
| @@ -280,6 +288,7 @@ int ConvolutionDepthWise_arm::create_group_ops(const Option& opt) | |||
| { | |||
| weights[4] = top_blob_int8_scales.range(g, 1); | |||
| } | |||
| #endif | |||
| op->load_model(ModelBinFromMatArray(weights)); | |||
| } | |||
| @@ -288,6 +297,7 @@ int ConvolutionDepthWise_arm::create_group_ops(const Option& opt) | |||
| ncnn::Mat weights[4]; | |||
| weights[0] = weight_data_g; | |||
| #if NCNN_INT8 | |||
| if (int8_scale_term) | |||
| { | |||
| Mat weight_data_int8_scales_g(num_output_g); | |||
| @@ -299,6 +309,7 @@ int ConvolutionDepthWise_arm::create_group_ops(const Option& opt) | |||
| { | |||
| weights[3] = top_blob_int8_scales.range(g, 1); | |||
| } | |||
| #endif | |||
| op->load_model(ModelBinFromMatArray(weights)); | |||
| } | |||
| @@ -332,13 +343,12 @@ int ConvolutionDepthWise_arm::destroy_pipeline(const Option& opt) | |||
| int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const | |||
| { | |||
| // convolv with NxN kernel | |||
| // value = value + bias | |||
| #if NCNN_INT8 | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) | |||
| { | |||
| return forward_int8_arm(bottom_blob, top_blob, opt); | |||
| } | |||
| #endif | |||
| int elembits = bottom_blob.elembits(); | |||
| @@ -1447,6 +1457,7 @@ int ConvolutionDepthWise_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blo | |||
| return 0; | |||
| } | |||
| #if NCNN_INT8 | |||
| int ConvolutionDepthWise_arm::create_pipeline_int8_arm(const Option& opt) | |||
| { | |||
| const int maxk = kernel_w * kernel_h; | |||
| @@ -1981,5 +1992,6 @@ int ConvolutionDepthWise_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_ | |||
| return 0; | |||
| } | |||
| #endif // NCNN_INT8 | |||
| } // namespace ncnn | |||
| @@ -36,8 +36,10 @@ protected: | |||
| int forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| #endif | |||
| int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| #if NCNN_INT8 | |||
| int create_pipeline_int8_arm(const Option& opt); | |||
| int forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| #endif | |||
| public: | |||
| Layer* activation; | |||
| @@ -54,8 +56,10 @@ public: | |||
| Mat weight_data_bf16; | |||
| Mat weight_data_pack4_bf16; | |||
| #if NCNN_INT8 | |||
| // int8 | |||
| Mat weight_data_int8; | |||
| #endif | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -55,10 +55,12 @@ int InnerProduct_arm::create_pipeline(const Option& opt) | |||
| } | |||
| #endif // __ARM_NEON | |||
| #if NCNN_INT8 | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) | |||
| { | |||
| return create_pipeline_int8_arm(opt); | |||
| } | |||
| #endif | |||
| #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | |||
| if (opt.use_fp16_storage) | |||
| @@ -94,260 +96,14 @@ int InnerProduct_arm::destroy_pipeline(const Option& opt) | |||
| return 0; | |||
| } | |||
| int InnerProduct_arm::create_pipeline_int8_arm(const Option& opt) | |||
| { | |||
| if (activation_type == 1) | |||
| { | |||
| activation = ncnn::create_layer(ncnn::LayerType::ReLU); | |||
| ncnn::ParamDict pd; | |||
| activation->load_param(pd); | |||
| } | |||
| const int num_input = weight_data_size / num_output; | |||
| int out_elempack = 1; | |||
| if (opt.use_packing_layout) | |||
| { | |||
| out_elempack = num_output % 8 == 0 ? 8 : 1; | |||
| } | |||
| // src = inch-outch | |||
| // dst = pb-inch-outch/pb | |||
| { | |||
| Mat weight_data_r2 = weight_data.reshape(num_input, num_output); | |||
| weight_data_int8.create(num_input, num_output / out_elempack, (size_t)out_elempack, out_elempack); | |||
| for (int q = 0; q + (out_elempack - 1) < num_output; q += out_elempack) | |||
| { | |||
| signed char* g0 = weight_data_int8.row<signed char>(q / out_elempack); | |||
| for (int p = 0; p < num_input; p++) | |||
| { | |||
| for (int j = 0; j < out_elempack; j++) | |||
| { | |||
| *g0++ = weight_data_r2.row<signed char>(q + j)[p]; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| // // convert fp32 to int8 | |||
| // if (weight_data_int8_scales.empty()) | |||
| // { | |||
| // return 0; | |||
| // } | |||
| // #if __aarch64__ | |||
| // // first reorder Matrix A before MatMul | |||
| // const int n = num_output; | |||
| // const int k = weight_data.total() / n; | |||
| // weight_data_int8.create(n * k, (size_t)1u, opt.blob_allocator); | |||
| // | |||
| // int8_t* b = weight_data; | |||
| // int8_t* sb = weight_data_int8; | |||
| // reorder_a(b, sb, n, k, k); | |||
| // | |||
| // // pre-built scales | |||
| // scales_in.create(num_output, 4u, opt.blob_allocator); | |||
| // for (int i = 0; i < num_output; ++i) | |||
| // { | |||
| // if (std::fabs(static_cast<float>(weight_data_int8_scales[i])) <= 1e-6) | |||
| // { | |||
| // scales_in[i] = 0.f; | |||
| // } | |||
| // else | |||
| // { | |||
| // scales_in[i] = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[i]); | |||
| // } | |||
| // } | |||
| // #endif | |||
| return 0; | |||
| } | |||
| int InnerProduct_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const | |||
| { | |||
| const int num_input = weight_data_size / num_output; | |||
| if (bottom_blob.dims == 2 && bottom_blob.w == num_input && bottom_blob.h * bottom_blob.elempack > 1) | |||
| { | |||
| // gemm | |||
| Mat bottom_blob_unpacked; | |||
| Option opt_unpack = opt; | |||
| opt_unpack.blob_allocator = opt.workspace_allocator; | |||
| convert_packing(bottom_blob, bottom_blob_unpacked, 1, opt_unpack); | |||
| return forward_int8(bottom_blob_unpacked, top_blob, opt); | |||
| } | |||
| int elembits = bottom_blob.elembits(); | |||
| Mat bottom_blob_int8 = bottom_blob; | |||
| if (elembits != 8) | |||
| { | |||
| Option opt_q = opt; | |||
| opt_q.blob_allocator = opt.workspace_allocator; | |||
| quantize_to_int8(bottom_blob, bottom_blob_int8, bottom_blob_int8_scales, opt_q); | |||
| } | |||
| Mat bottom_blob_int8_flattened = bottom_blob_int8; | |||
| if (bottom_blob_int8.dims != 1) | |||
| { | |||
| Option opt_flatten = opt; | |||
| opt_flatten.blob_allocator = opt.workspace_allocator; | |||
| flatten->forward(bottom_blob_int8, bottom_blob_int8_flattened, opt_flatten); | |||
| } | |||
| // int elempack = bottom_blob_int8_flattened.elempack; | |||
| int out_elempack = 1; | |||
| if (opt.use_packing_layout) | |||
| { | |||
| out_elempack = num_output % 8 == 0 ? 8 : 1; | |||
| } | |||
| top_blob.create(num_output / out_elempack, (size_t)(4u * out_elempack), out_elempack, opt.blob_allocator); | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| Mat top_blob_int32; | |||
| top_blob_int32.create(num_output / out_elempack, (size_t)(4u * out_elempack), out_elempack, opt.workspace_allocator); | |||
| if (top_blob_int32.empty()) | |||
| return -100; | |||
| #if __ARM_NEON | |||
| if (out_elempack == 8) | |||
| { | |||
| // num_output | |||
| #pragma omp parallel for num_threads(opt.num_threads) | |||
| for (int p = 0; p < num_output / out_elempack; p++) | |||
| { | |||
| int32x4_t _sum0 = vdupq_n_s32(0); | |||
| int32x4_t _sum1 = vdupq_n_s32(0); | |||
| const signed char* kptr = weight_data_int8.row<const signed char>(p); | |||
| const signed char* sptr = bottom_blob_int8_flattened; | |||
| int i = 0; | |||
| for (; i < num_input; i++) | |||
| { | |||
| int8x8_t _val = vdup_n_s8(sptr[0]); | |||
| int8x8_t _w = vld1_s8(kptr); | |||
| int16x8_t _s0 = vmull_s8(_val, _w); | |||
| _sum0 = vaddw_s16(_sum0, vget_low_s16(_s0)); | |||
| _sum1 = vaddw_s16(_sum1, vget_high_s16(_s0)); | |||
| sptr += 1; | |||
| kptr += 8; | |||
| } | |||
| int* outptr = (int*)top_blob_int32; | |||
| vst1q_s32(outptr + p * 8, _sum0); | |||
| vst1q_s32(outptr + p * 8 + 4, _sum1); | |||
| } | |||
| } | |||
| #endif // __ARM_NEON | |||
| if (out_elempack == 1) | |||
| { | |||
| // num_output | |||
| #pragma omp parallel for num_threads(opt.num_threads) | |||
| for (int p = 0; p < num_output / out_elempack; p++) | |||
| { | |||
| int sum = 0; | |||
| const signed char* kptr = weight_data_int8.row<const signed char>(p); | |||
| const signed char* sptr = bottom_blob_int8_flattened; | |||
| int i = 0; | |||
| for (; i < num_input; i++) | |||
| { | |||
| signed char val = sptr[0]; | |||
| signed char w = kptr[0]; | |||
| sum += val * w; | |||
| sptr += 1; | |||
| kptr += 1; | |||
| } | |||
| int* outptr = (int*)top_blob_int32; | |||
| outptr[p] = sum; | |||
| } | |||
| } | |||
| Mat scale_data(num_output); | |||
| for (int p = 0; p < num_output; p++) | |||
| { | |||
| // dequantize | |||
| float scale_in; | |||
| if (weight_data_int8_scales[p] == 0) | |||
| scale_in = 0; | |||
| else | |||
| scale_in = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[p]); | |||
| scale_data[p] = scale_in; | |||
| } | |||
| dequantize_from_int32(top_blob_int32, top_blob, scale_data, bias_data, opt); | |||
| if (activation) | |||
| { | |||
| activation->forward_inplace(top_blob, opt); | |||
| } | |||
| return 0; | |||
| // #if __aarch64__ | |||
| // const int w = bottom_blob_tm.w; | |||
| // const int h = bottom_blob_tm.h; | |||
| // | |||
| // const int m = 1; | |||
| // const int k = bottom_blob_tm.c * w * h; | |||
| // Mat bottom_blob_reorder(m * k, (size_t)1u, opt.workspace_allocator); | |||
| // { | |||
| // reorder_a(bottom_blob_tm_flattened, bottom_blob_reorder, m, k, k); | |||
| // } | |||
| // | |||
| // Mat top_blob_tm(m * num_output, (size_t)4u, opt.workspace_allocator); | |||
| // int32_t* pc = top_blob_tm; | |||
| // const int8_t* pa = bottom_blob_reorder; | |||
| // const int8_t* pb = weight_data_int8; | |||
| // int8kernel((void*)pc, pa, pb, m, k, num_output, num_output, 0, 0, opt); | |||
| // | |||
| // float* outptr = top_blob; | |||
| // | |||
| // // dequant.fused.relu int32_t to float | |||
| // for (int p = 0; p < num_output; ++p) | |||
| // { | |||
| // float sumfp32 = pc[p] * scales_in[p]; | |||
| // if (bias_term) | |||
| // { | |||
| // sumfp32 += bias_data[p]; | |||
| // } | |||
| // if (1 == activation_type) | |||
| // { | |||
| // sumfp32 = std::max(0.f, sumfp32); | |||
| // } | |||
| // | |||
| // outptr[p] = sumfp32; | |||
| // } | |||
| // return 0; | |||
| // #else | |||
| // return InnerProduct::forward_int8(bottom_blob, top_blob, opt); | |||
| // #endif | |||
| } | |||
| int InnerProduct_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const | |||
| { | |||
| #if NCNN_INT8 | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) | |||
| { | |||
| return forward_int8_arm(bottom_blob, top_blob, opt); | |||
| } | |||
| #endif | |||
| int elembits = bottom_blob.elembits(); | |||
| @@ -2140,4 +1896,254 @@ int InnerProduct_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const | |||
| return 0; | |||
| } | |||
| #if NCNN_INT8 | |||
| int InnerProduct_arm::create_pipeline_int8_arm(const Option& opt) | |||
| { | |||
| if (activation_type == 1) | |||
| { | |||
| activation = ncnn::create_layer(ncnn::LayerType::ReLU); | |||
| ncnn::ParamDict pd; | |||
| activation->load_param(pd); | |||
| } | |||
| const int num_input = weight_data_size / num_output; | |||
| int out_elempack = 1; | |||
| if (opt.use_packing_layout) | |||
| { | |||
| out_elempack = num_output % 8 == 0 ? 8 : 1; | |||
| } | |||
| // src = inch-outch | |||
| // dst = pb-inch-outch/pb | |||
| { | |||
| Mat weight_data_r2 = weight_data.reshape(num_input, num_output); | |||
| weight_data_int8.create(num_input, num_output / out_elempack, (size_t)out_elempack, out_elempack); | |||
| for (int q = 0; q + (out_elempack - 1) < num_output; q += out_elempack) | |||
| { | |||
| signed char* g0 = weight_data_int8.row<signed char>(q / out_elempack); | |||
| for (int p = 0; p < num_input; p++) | |||
| { | |||
| for (int j = 0; j < out_elempack; j++) | |||
| { | |||
| *g0++ = weight_data_r2.row<signed char>(q + j)[p]; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| // // convert fp32 to int8 | |||
| // if (weight_data_int8_scales.empty()) | |||
| // { | |||
| // return 0; | |||
| // } | |||
| // #if __aarch64__ | |||
| // // first reorder Matrix A before MatMul | |||
| // const int n = num_output; | |||
| // const int k = weight_data.total() / n; | |||
| // weight_data_int8.create(n * k, (size_t)1u, opt.blob_allocator); | |||
| // | |||
| // int8_t* b = weight_data; | |||
| // int8_t* sb = weight_data_int8; | |||
| // reorder_a(b, sb, n, k, k); | |||
| // | |||
| // // pre-built scales | |||
| // scales_in.create(num_output, 4u, opt.blob_allocator); | |||
| // for (int i = 0; i < num_output; ++i) | |||
| // { | |||
| // if (std::fabs(static_cast<float>(weight_data_int8_scales[i])) <= 1e-6) | |||
| // { | |||
| // scales_in[i] = 0.f; | |||
| // } | |||
| // else | |||
| // { | |||
| // scales_in[i] = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[i]); | |||
| // } | |||
| // } | |||
| // #endif | |||
| return 0; | |||
| } | |||
| int InnerProduct_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const | |||
| { | |||
| const int num_input = weight_data_size / num_output; | |||
| if (bottom_blob.dims == 2 && bottom_blob.w == num_input && bottom_blob.h * bottom_blob.elempack > 1) | |||
| { | |||
| // gemm | |||
| Mat bottom_blob_unpacked; | |||
| Option opt_unpack = opt; | |||
| opt_unpack.blob_allocator = opt.workspace_allocator; | |||
| convert_packing(bottom_blob, bottom_blob_unpacked, 1, opt_unpack); | |||
| return forward_int8(bottom_blob_unpacked, top_blob, opt); | |||
| } | |||
| int elembits = bottom_blob.elembits(); | |||
| Mat bottom_blob_int8 = bottom_blob; | |||
| if (elembits != 8) | |||
| { | |||
| Option opt_q = opt; | |||
| opt_q.blob_allocator = opt.workspace_allocator; | |||
| quantize_to_int8(bottom_blob, bottom_blob_int8, bottom_blob_int8_scales, opt_q); | |||
| } | |||
| Mat bottom_blob_int8_flattened = bottom_blob_int8; | |||
| if (bottom_blob_int8.dims != 1) | |||
| { | |||
| Option opt_flatten = opt; | |||
| opt_flatten.blob_allocator = opt.workspace_allocator; | |||
| flatten->forward(bottom_blob_int8, bottom_blob_int8_flattened, opt_flatten); | |||
| } | |||
| // int elempack = bottom_blob_int8_flattened.elempack; | |||
| int out_elempack = 1; | |||
| if (opt.use_packing_layout) | |||
| { | |||
| out_elempack = num_output % 8 == 0 ? 8 : 1; | |||
| } | |||
| top_blob.create(num_output / out_elempack, (size_t)(4u * out_elempack), out_elempack, opt.blob_allocator); | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| Mat top_blob_int32; | |||
| top_blob_int32.create(num_output / out_elempack, (size_t)(4u * out_elempack), out_elempack, opt.workspace_allocator); | |||
| if (top_blob_int32.empty()) | |||
| return -100; | |||
| #if __ARM_NEON | |||
| if (out_elempack == 8) | |||
| { | |||
| // num_output | |||
| #pragma omp parallel for num_threads(opt.num_threads) | |||
| for (int p = 0; p < num_output / out_elempack; p++) | |||
| { | |||
| int32x4_t _sum0 = vdupq_n_s32(0); | |||
| int32x4_t _sum1 = vdupq_n_s32(0); | |||
| const signed char* kptr = weight_data_int8.row<const signed char>(p); | |||
| const signed char* sptr = bottom_blob_int8_flattened; | |||
| int i = 0; | |||
| for (; i < num_input; i++) | |||
| { | |||
| int8x8_t _val = vdup_n_s8(sptr[0]); | |||
| int8x8_t _w = vld1_s8(kptr); | |||
| int16x8_t _s0 = vmull_s8(_val, _w); | |||
| _sum0 = vaddw_s16(_sum0, vget_low_s16(_s0)); | |||
| _sum1 = vaddw_s16(_sum1, vget_high_s16(_s0)); | |||
| sptr += 1; | |||
| kptr += 8; | |||
| } | |||
| int* outptr = (int*)top_blob_int32; | |||
| vst1q_s32(outptr + p * 8, _sum0); | |||
| vst1q_s32(outptr + p * 8 + 4, _sum1); | |||
| } | |||
| } | |||
| #endif // __ARM_NEON | |||
| if (out_elempack == 1) | |||
| { | |||
| // num_output | |||
| #pragma omp parallel for num_threads(opt.num_threads) | |||
| for (int p = 0; p < num_output / out_elempack; p++) | |||
| { | |||
| int sum = 0; | |||
| const signed char* kptr = weight_data_int8.row<const signed char>(p); | |||
| const signed char* sptr = bottom_blob_int8_flattened; | |||
| int i = 0; | |||
| for (; i < num_input; i++) | |||
| { | |||
| signed char val = sptr[0]; | |||
| signed char w = kptr[0]; | |||
| sum += val * w; | |||
| sptr += 1; | |||
| kptr += 1; | |||
| } | |||
| int* outptr = (int*)top_blob_int32; | |||
| outptr[p] = sum; | |||
| } | |||
| } | |||
| Mat scale_data(num_output); | |||
| for (int p = 0; p < num_output; p++) | |||
| { | |||
| // dequantize | |||
| float scale_in; | |||
| if (weight_data_int8_scales[p] == 0) | |||
| scale_in = 0; | |||
| else | |||
| scale_in = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[p]); | |||
| scale_data[p] = scale_in; | |||
| } | |||
| dequantize_from_int32(top_blob_int32, top_blob, scale_data, bias_data, opt); | |||
| if (activation) | |||
| { | |||
| activation->forward_inplace(top_blob, opt); | |||
| } | |||
| return 0; | |||
| // #if __aarch64__ | |||
| // const int w = bottom_blob_tm.w; | |||
| // const int h = bottom_blob_tm.h; | |||
| // | |||
| // const int m = 1; | |||
| // const int k = bottom_blob_tm.c * w * h; | |||
| // Mat bottom_blob_reorder(m * k, (size_t)1u, opt.workspace_allocator); | |||
| // { | |||
| // reorder_a(bottom_blob_tm_flattened, bottom_blob_reorder, m, k, k); | |||
| // } | |||
| // | |||
| // Mat top_blob_tm(m * num_output, (size_t)4u, opt.workspace_allocator); | |||
| // int32_t* pc = top_blob_tm; | |||
| // const int8_t* pa = bottom_blob_reorder; | |||
| // const int8_t* pb = weight_data_int8; | |||
| // int8kernel((void*)pc, pa, pb, m, k, num_output, num_output, 0, 0, opt); | |||
| // | |||
| // float* outptr = top_blob; | |||
| // | |||
| // // dequant.fused.relu int32_t to float | |||
| // for (int p = 0; p < num_output; ++p) | |||
| // { | |||
| // float sumfp32 = pc[p] * scales_in[p]; | |||
| // if (bias_term) | |||
| // { | |||
| // sumfp32 += bias_data[p]; | |||
| // } | |||
| // if (1 == activation_type) | |||
| // { | |||
| // sumfp32 = std::max(0.f, sumfp32); | |||
| // } | |||
| // | |||
| // outptr[p] = sumfp32; | |||
| // } | |||
| // return 0; | |||
| // #else | |||
| // return InnerProduct::forward_int8(bottom_blob, top_blob, opt); | |||
| // #endif | |||
| } | |||
| #endif // NCNN_INT8 | |||
| } // namespace ncnn | |||
| @@ -39,9 +39,10 @@ protected: | |||
| #endif | |||
| int create_pipeline_bf16s(const Option& opt); | |||
| int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| #if NCNN_INT8 | |||
| int create_pipeline_int8_arm(const Option& opt); | |||
| int forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| #endif | |||
| public: | |||
| Layer* flatten; | |||
| @@ -54,9 +55,11 @@ public: | |||
| // bf16 | |||
| Mat weight_data_bf16; | |||
| #if NCNN_INT8 | |||
| // int8 | |||
| Mat weight_data_int8; | |||
| Mat scales_in; | |||
| #endif | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -47,7 +47,12 @@ int Convolution::load_param(const ParamDict& pd) | |||
| if (int8_scale_term) | |||
| { | |||
| #if NCNN_INT8 | |||
| support_int8_storage = true; | |||
| #else | |||
| NCNN_LOGE("please build ncnn with NCNN_INT8 enabled for int8 inference"); | |||
| return -1; | |||
| #endif | |||
| } | |||
| return 0; | |||
| @@ -66,6 +71,7 @@ int Convolution::load_model(const ModelBin& mb) | |||
| return -100; | |||
| } | |||
| #if NCNN_INT8 | |||
| if (int8_scale_term) | |||
| { | |||
| weight_data_int8_scales = mb.load(num_output, 1); | |||
| @@ -76,12 +82,14 @@ int Convolution::load_model(const ModelBin& mb) | |||
| { | |||
| top_blob_int8_scales = mb.load(1, 1); | |||
| } | |||
| #endif // NCNN_INT8 | |||
| return 0; | |||
| } | |||
| int Convolution::create_pipeline(const Option& opt) | |||
| { | |||
| #if NCNN_INT8 | |||
| // runtime quantize the weight data | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)4u && int8_scale_term) | |||
| { | |||
| @@ -101,6 +109,7 @@ int Convolution::create_pipeline(const Option& opt) | |||
| weight_data = weight_data_int8.reshape(weight_data_size); | |||
| } | |||
| #endif // NCNN_INT8 | |||
| return 0; | |||
| } | |||
| @@ -110,10 +119,12 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op | |||
| // convolv with NxN kernel | |||
| // value = value + bias | |||
| #if NCNN_INT8 | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) | |||
| { | |||
| return forward_int8(bottom_blob, top_blob, opt); | |||
| } | |||
| #endif | |||
| // flattened blob, implement as InnerProduct | |||
| if (bottom_blob.dims == 1 && kernel_w == 1 && kernel_h == 1) | |||
| @@ -140,11 +151,13 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op | |||
| weights[0] = weight_data; | |||
| weights[1] = bias_data; | |||
| #if NCNN_INT8 | |||
| if (int8_scale_term) | |||
| { | |||
| weights[2] = weight_data_int8_scales; | |||
| weights[3] = bottom_blob_int8_scales; | |||
| } | |||
| #endif | |||
| op->load_model(ModelBinFromMatArray(weights)); | |||
| @@ -327,6 +340,7 @@ void Convolution::make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered | |||
| } | |||
| } | |||
| #if NCNN_INT8 | |||
| static inline signed char float2int8(float v) | |||
| { | |||
| int int32 = static_cast<int>(round(v)); | |||
| @@ -492,5 +506,6 @@ int Convolution::forward_int8(const Mat& bottom_blob, Mat& top_blob, const Optio | |||
| return 0; | |||
| } | |||
| #endif // NCNN_INT8 | |||
| } // namespace ncnn | |||
| @@ -35,7 +35,9 @@ public: | |||
| protected: | |||
| void make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const; | |||
| #if NCNN_INT8 | |||
| int forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| #endif | |||
| public: | |||
| // param | |||
| @@ -65,9 +67,11 @@ public: | |||
| Mat weight_data; | |||
| Mat bias_data; | |||
| #if NCNN_INT8 | |||
| Mat weight_data_int8_scales; | |||
| Mat bottom_blob_int8_scales; | |||
| Mat top_blob_int8_scales; | |||
| #endif | |||
| // implementation type, 0 means do not use auto pack model | |||
| int impl_type; | |||
| @@ -53,7 +53,12 @@ int ConvolutionDepthWise::load_param(const ParamDict& pd) | |||
| if (int8_scale_term) | |||
| { | |||
| #if NCNN_INT8 | |||
| support_int8_storage = true; | |||
| #else | |||
| NCNN_LOGE("please build ncnn with NCNN_INT8 enabled for int8 inference"); | |||
| return -1; | |||
| #endif | |||
| } | |||
| return 0; | |||
| @@ -72,6 +77,7 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb) | |||
| return -100; | |||
| } | |||
| #if NCNN_INT8 | |||
| if (int8_scale_term == 1 || int8_scale_term == 101) | |||
| { | |||
| weight_data_int8_scales = mb.load(group, 1); | |||
| @@ -104,12 +110,14 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb) | |||
| top_blob_int8_scales = Mat(group); | |||
| top_blob_int8_scales.fill(top_blob_int8_scale); | |||
| } | |||
| #endif // NCNN_INT8 | |||
| return 0; | |||
| } | |||
| int ConvolutionDepthWise::create_pipeline(const Option& opt) | |||
| { | |||
| #if NCNN_INT8 | |||
| // runtime quantize the weight data | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)4u && int8_scale_term) | |||
| { | |||
| @@ -133,6 +141,7 @@ int ConvolutionDepthWise::create_pipeline(const Option& opt) | |||
| weight_data = int8_weight_data; | |||
| } | |||
| #endif // NCNN_INT8 | |||
| return 0; | |||
| } | |||
| @@ -142,10 +151,12 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O | |||
| // convolv with NxN kernel | |||
| // value = value + bias | |||
| #if NCNN_INT8 | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) | |||
| { | |||
| return forward_int8(bottom_blob, top_blob, opt); | |||
| } | |||
| #endif | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| @@ -403,6 +414,7 @@ void ConvolutionDepthWise::make_padding(const Mat& bottom_blob, Mat& bottom_blob | |||
| } | |||
| } | |||
| #if NCNN_INT8 | |||
| static inline signed char float2int8(float v) | |||
| { | |||
| int int32 = static_cast<int>(round(v)); | |||
| @@ -694,5 +706,6 @@ int ConvolutionDepthWise::forward_int8(const Mat& bottom_blob, Mat& top_blob, co | |||
| return 0; | |||
| } | |||
| #endif // NCNN_INT8 | |||
| } // namespace ncnn | |||
| @@ -35,7 +35,9 @@ public: | |||
| protected: | |||
| void make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const; | |||
| #if NCNN_INT8 | |||
| int forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| #endif | |||
| public: | |||
| // param | |||
| @@ -66,9 +68,11 @@ public: | |||
| Mat weight_data; | |||
| Mat bias_data; | |||
| #if NCNN_INT8 | |||
| Mat weight_data_int8_scales; | |||
| Mat bottom_blob_int8_scales; | |||
| Mat top_blob_int8_scales; | |||
| #endif | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -35,7 +35,12 @@ int InnerProduct::load_param(const ParamDict& pd) | |||
| if (int8_scale_term) | |||
| { | |||
| #if NCNN_INT8 | |||
| support_int8_storage = true; | |||
| #else | |||
| NCNN_LOGE("please build ncnn with NCNN_INT8 enabled for int8 inference"); | |||
| return -1; | |||
| #endif | |||
| } | |||
| return 0; | |||
| @@ -54,17 +59,20 @@ int InnerProduct::load_model(const ModelBin& mb) | |||
| return -100; | |||
| } | |||
| #if NCNN_INT8 | |||
| if (int8_scale_term) | |||
| { | |||
| weight_data_int8_scales = mb.load(num_output, 1); | |||
| bottom_blob_int8_scales = mb.load(1, 1); | |||
| } | |||
| #endif // NCNN_INT8 | |||
| return 0; | |||
| } | |||
| int InnerProduct::create_pipeline(const Option& opt) | |||
| { | |||
| #if NCNN_INT8 | |||
| // runtime quantize the weight data | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)4u && int8_scale_term) | |||
| { | |||
| @@ -81,16 +89,19 @@ int InnerProduct::create_pipeline(const Option& opt) | |||
| weight_data = weight_data_int8.reshape(weight_data_size); | |||
| } | |||
| #endif // NCNN_INT8 | |||
| return 0; | |||
| } | |||
| int InnerProduct::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const | |||
| { | |||
| #if NCNN_INT8 | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) | |||
| { | |||
| return forward_int8(bottom_blob, top_blob, opt); | |||
| } | |||
| #endif | |||
| const int num_input = weight_data_size / num_output; | |||
| @@ -218,6 +229,7 @@ int InnerProduct::forward(const Mat& bottom_blob, Mat& top_blob, const Option& o | |||
| return 0; | |||
| } | |||
| #if NCNN_INT8 | |||
| int InnerProduct::forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const | |||
| { | |||
| const int num_input = weight_data_size / num_output; | |||
| @@ -332,5 +344,6 @@ int InnerProduct::forward_int8(const Mat& bottom_blob, Mat& top_blob, const Opti | |||
| return 0; | |||
| } | |||
| #endif // NCNN_INT8 | |||
| } // namespace ncnn | |||
| @@ -33,7 +33,9 @@ public: | |||
| virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| protected: | |||
| #if NCNN_INT8 | |||
| int forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| #endif | |||
| public: | |||
| // param | |||
| @@ -52,8 +54,10 @@ public: | |||
| Mat weight_data; | |||
| Mat bias_data; | |||
| #if NCNN_INT8 | |||
| Mat weight_data_int8_scales; | |||
| Mat bottom_blob_int8_scales; | |||
| #endif | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -35,18 +35,21 @@ namespace ncnn { | |||
| #include "convolution_5x5.h" | |||
| #include "convolution_7x7.h" | |||
| #if NCNN_INT8 | |||
| #include "convolution_sgemm_int8.h" | |||
| #include "convolution_1x1_int8.h" | |||
| #include "convolution_3x3_int8.h" | |||
| #include "convolution_int8.h" | |||
| #endif // NCNN_INT8 | |||
| #if __SSE2__ | |||
| #include "convolution_1x1_pack4.h" | |||
| #if NCNN_INT8 | |||
| #include "convolution_pack8_int8.h" | |||
| #include "convolution_pack1to8_int8.h" | |||
| #include "convolution_pack8to1_int8.h" | |||
| #endif // NCNN_INT8 | |||
| #if __AVX__ | |||
| #include "convolution_3x3_pack1to8.h" | |||
| #include "convolution_3x3_pack8to1.h" | |||
| @@ -118,10 +121,12 @@ int Convolution_x86::create_pipeline(const Option& opt) | |||
| activation->create_pipeline(opt); | |||
| } | |||
| #if NCNN_INT8 | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) | |||
| { | |||
| return create_pipeline_int8_x86(opt); | |||
| } | |||
| #endif | |||
| int kernel_size = kernel_w * kernel_h; | |||
| int num_input = weight_data_size / kernel_size / num_output; | |||
| @@ -311,10 +316,12 @@ int Convolution_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option | |||
| // convolv with NxN kernel | |||
| // value = value + bias | |||
| #if NCNN_INT8 | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) | |||
| { | |||
| return forward_int8_x86(bottom_blob, top_blob, opt); | |||
| } | |||
| #endif | |||
| if (bottom_blob.dims != 3) | |||
| { | |||
| @@ -1058,6 +1065,7 @@ int Convolution_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option | |||
| return 0; | |||
| } | |||
| #if NCNN_INT8 | |||
| int Convolution_x86::create_pipeline_int8_x86(const Option& opt) | |||
| { | |||
| const int maxk = kernel_w * kernel_h; | |||
| @@ -1410,6 +1418,7 @@ int Convolution_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, con | |||
| return 0; | |||
| } | |||
| #endif // NCNN_INT8 | |||
| int Convolution_x86::forwardDilation_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const | |||
| { | |||
| @@ -30,8 +30,10 @@ public: | |||
| virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| protected: | |||
| #if NCNN_INT8 | |||
| int create_pipeline_int8_x86(const Option& opt); | |||
| int forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| #endif | |||
| int forwardDilation_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| public: | |||
| @@ -47,9 +49,11 @@ public: | |||
| Mat weight_3x3_winograd64_data_pack8; | |||
| #if NCNN_INT8 | |||
| // int8 | |||
| Mat weight_data_int8; | |||
| Mat weight_3x3_winograd23_data_int8; | |||
| #endif | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -36,7 +36,9 @@ namespace ncnn { | |||
| #endif | |||
| #endif // __SSE2__ | |||
| #include "convolutiondepthwise_3x3.h" | |||
| #if NCNN_INT8 | |||
| #include "convolutiondepthwise_3x3_int8.h" | |||
| #endif // NCNN_INT8 | |||
| ConvolutionDepthWise_x86::ConvolutionDepthWise_x86() | |||
| { | |||
| @@ -102,10 +104,12 @@ int ConvolutionDepthWise_x86::create_pipeline(const Option& opt) | |||
| activation->create_pipeline(opt); | |||
| } | |||
| #if NCNN_INT8 | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) | |||
| { | |||
| return create_pipeline_int8_x86(opt); | |||
| } | |||
| #endif | |||
| const int maxk = kernel_w * kernel_h; | |||
| int channels = (weight_data_size / group) / maxk / (num_output / group) * group; | |||
| @@ -235,6 +239,7 @@ int ConvolutionDepthWise_x86::create_group_ops(const Option& opt) | |||
| weights[0] = weight_data_g; | |||
| weights[1] = bias_data_g; | |||
| #if NCNN_INT8 | |||
| if (int8_scale_term) | |||
| { | |||
| Mat weight_data_int8_scales_g(num_output_g); | |||
| @@ -246,6 +251,7 @@ int ConvolutionDepthWise_x86::create_group_ops(const Option& opt) | |||
| { | |||
| weights[4] = top_blob_int8_scales.range(g, 1); | |||
| } | |||
| #endif | |||
| op->load_model(ModelBinFromMatArray(weights)); | |||
| } | |||
| @@ -254,6 +260,7 @@ int ConvolutionDepthWise_x86::create_group_ops(const Option& opt) | |||
| ncnn::Mat weights[4]; | |||
| weights[0] = weight_data_g; | |||
| #if NCNN_INT8 | |||
| if (int8_scale_term) | |||
| { | |||
| Mat weight_data_int8_scales_g(num_output_g); | |||
| @@ -265,6 +272,7 @@ int ConvolutionDepthWise_x86::create_group_ops(const Option& opt) | |||
| { | |||
| weights[3] = top_blob_int8_scales.range(g, 1); | |||
| } | |||
| #endif | |||
| op->load_model(ModelBinFromMatArray(weights)); | |||
| } | |||
| @@ -298,13 +306,12 @@ int ConvolutionDepthWise_x86::destroy_pipeline(const Option& opt) | |||
| int ConvolutionDepthWise_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const | |||
| { | |||
| // convolv with NxN kernel | |||
| // value = value + bias | |||
| #if NCNN_INT8 | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) | |||
| { | |||
| return forward_int8_x86(bottom_blob, top_blob, opt); | |||
| } | |||
| #endif | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| @@ -628,6 +635,7 @@ int ConvolutionDepthWise_x86::forward(const Mat& bottom_blob, Mat& top_blob, con | |||
| return 0; | |||
| } | |||
| #if NCNN_INT8 | |||
| int ConvolutionDepthWise_x86::create_pipeline_int8_x86(const Option& opt) | |||
| { | |||
| const int maxk = kernel_w * kernel_h; | |||
| @@ -1061,5 +1069,6 @@ int ConvolutionDepthWise_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_ | |||
| return 0; | |||
| } | |||
| #endif // NCNN_INT8 | |||
| } // namespace ncnn | |||
| @@ -31,8 +31,10 @@ public: | |||
| protected: | |||
| int create_group_ops(const Option& opt); | |||
| #if NCNN_INT8 | |||
| int create_pipeline_int8_x86(const Option& opt); | |||
| int forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| #endif | |||
| public: | |||
| Layer* activation; | |||
| @@ -41,8 +43,10 @@ public: | |||
| // packing | |||
| Mat weight_data_packed; | |||
| #if NCNN_INT8 | |||
| // int8 | |||
| Mat weight_data_int8; | |||
| #endif | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -54,10 +54,12 @@ int InnerProduct_x86::create_pipeline(const Option& opt) | |||
| flatten->create_pipeline(opt); | |||
| } | |||
| #if NCNN_INT8 | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) | |||
| { | |||
| return create_pipeline_int8_x86(opt); | |||
| } | |||
| #endif | |||
| const int num_input = weight_data_size / num_output; | |||
| @@ -124,10 +126,12 @@ int InnerProduct_x86::destroy_pipeline(const Option& opt) | |||
| int InnerProduct_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const | |||
| { | |||
| #if NCNN_INT8 | |||
| if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u) | |||
| { | |||
| return forward_int8_x86(bottom_blob, top_blob, opt); | |||
| } | |||
| #endif | |||
| const int num_input = weight_data_size / num_output; | |||
| @@ -1694,6 +1698,7 @@ int InnerProduct_x86::forward_fp16(const Mat& bottom_blob, Mat& top_blob, const | |||
| } | |||
| #endif // __AVX__ | |||
| #if NCNN_INT8 | |||
| int InnerProduct_x86::create_pipeline_int8_x86(const Option& opt) | |||
| { | |||
| if (activation_type == 1) | |||
| @@ -1883,5 +1888,6 @@ int InnerProduct_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, co | |||
| return 0; | |||
| } | |||
| #endif // NCNN_INT8 | |||
| } // namespace ncnn | |||
| @@ -34,9 +34,10 @@ public: | |||
| protected: | |||
| int forward_fp16(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| #if NCNN_INT8 | |||
| int create_pipeline_int8_x86(const Option& opt); | |||
| int forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; | |||
| #endif | |||
| public: | |||
| Layer* flatten; | |||
| @@ -47,9 +48,11 @@ public: | |||
| // fp16 weight data | |||
| Mat weight_data_fp16; | |||
| #if NCNN_INT8 | |||
| // int8 | |||
| Mat weight_data_int8; | |||
| Mat scales_in; | |||
| #endif | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -30,6 +30,7 @@ | |||
| #cmakedefine01 NCNN_RUNTIME_CPU | |||
| #cmakedefine01 NCNN_AVX2 | |||
| #cmakedefine01 NCNN_ARM82 | |||
| #cmakedefine01 NCNN_INT8 | |||
| #cmakedefine NCNN_VERSION_STRING "@NCNN_VERSION_STRING@" | |||
| @@ -171,6 +171,7 @@ static int test_convolution_2() | |||
| || test_convolution_vec(64, 128, 1, 1, 1, 0, 0); | |||
| } | |||
| #if NCNN_INT8 | |||
| static int test_convolution_int8(int w, int h, int c, int outch, int kernel, int dilation, int stride, int pad, int bias, bool requant = false) | |||
| { | |||
| ncnn::Mat a = RandomMat(w, h, c); | |||
| @@ -298,12 +299,20 @@ static int test_convolution_1() | |||
| || test_convolution_int8(6, 7, 64, 64, 3, 1, 2, 0, 1) | |||
| || test_convolution_int8(25, 33, 16, 15, 3, 1, 1, 1, 0); | |||
| } | |||
| #endif // NCNN_INT8 | |||
| int main() | |||
| { | |||
| SRAND(7767517); | |||
| #if NCNN_INT8 | |||
| return 0 | |||
| || test_convolution_0() | |||
| || test_convolution_1() | |||
| || test_convolution_2(); | |||
| #else | |||
| return 0 | |||
| || test_convolution_0() | |||
| || test_convolution_2(); | |||
| #endif | |||
| } | |||
| @@ -125,6 +125,7 @@ static int test_convolutiondepthwise_0() | |||
| return 0; | |||
| } | |||
| #if NCNN_INT8 | |||
| static int test_convolutiondepthwise_int8(int w, int h, int c, int outch, int kernel, int dilation, int stride, int pad, int bias, int group, bool requant = false) | |||
| { | |||
| ncnn::Mat a = RandomMat(w, h, c); | |||
| @@ -251,10 +252,15 @@ static int test_convolutiondepthwise_1() | |||
| return 0; | |||
| } | |||
| #endif // NCNN_INT8 | |||
| int main() | |||
| { | |||
| SRAND(7767517); | |||
| #if NCNN_INT8 | |||
| return test_convolutiondepthwise_0() || test_convolutiondepthwise_1(); | |||
| #else | |||
| return test_convolutiondepthwise_0(); | |||
| #endif | |||
| } | |||
| @@ -87,6 +87,7 @@ static int test_innerproduct_2() | |||
| || test_innerproduct(RandomMat(24), 32, 1); | |||
| } | |||
| #if NCNN_INT8 | |||
| static int test_innerproduct_int8(const ncnn::Mat& a, int outch, int bias) | |||
| { | |||
| ncnn::ParamDict pd; | |||
| @@ -145,6 +146,7 @@ static int test_innerproduct_3() | |||
| || test_innerproduct_int8(RandomMat(7, 2, 16), 4, 1) | |||
| || test_innerproduct_int8(RandomMat(6, 3, 16), 16, 1); | |||
| } | |||
| #endif // NCNN_INT8 | |||
| static int test_innerproduct_gemm(const ncnn::Mat& a, int outch, int bias) | |||
| { | |||
| @@ -193,6 +195,7 @@ static int test_innerproduct_4() | |||
| || test_innerproduct_gemm(RandomMat(12, 16), 7, 1); | |||
| } | |||
| #if NCNN_INT8 | |||
| static int test_innerproduct_gemm_int8(const ncnn::Mat& a, int outch, int bias) | |||
| { | |||
| ncnn::ParamDict pd; | |||
| @@ -242,11 +245,13 @@ static int test_innerproduct_5() | |||
| || test_innerproduct_gemm_int8(RandomMat(6, 16), 16, 0) | |||
| || test_innerproduct_gemm_int8(RandomMat(12, 16), 7, 1); | |||
| } | |||
| #endif // NCNN_INT8 | |||
| int main() | |||
| { | |||
| SRAND(7767517); | |||
| #if NCNN_INT8 | |||
| return 0 | |||
| || test_innerproduct_0() | |||
| || test_innerproduct_1() | |||
| @@ -254,4 +259,11 @@ int main() | |||
| || test_innerproduct_3() | |||
| || test_innerproduct_4() | |||
| || test_innerproduct_5(); | |||
| #else | |||
| return 0 | |||
| || test_innerproduct_0() | |||
| || test_innerproduct_1() | |||
| || test_innerproduct_2() | |||
| || test_innerproduct_4(); | |||
| #endif | |||
| } | |||
| @@ -12,7 +12,11 @@ add_subdirectory(caffe) | |||
| add_subdirectory(mxnet) | |||
| add_subdirectory(onnx) | |||
| add_subdirectory(darknet) | |||
| add_subdirectory(quantize) | |||
| if(NCNN_INT8) | |||
| add_subdirectory(quantize) | |||
| else() | |||
| message(WARNING "NCNN_INT8 disabled, quantize tools won't be built") | |||
| endif() | |||
| add_executable(ncnn2mem ncnn2mem.cpp) | |||
| target_link_libraries(ncnn2mem PRIVATE ncnn) | |||
| @@ -2673,8 +2673,10 @@ int NetOptimize::replace_convolution_with_innerproduct_after_global_pooling() | |||
| innerproduct->weight_data = convolution->weight_data; | |||
| innerproduct->bias_data = convolution->bias_data; | |||
| #if NCNN_INT8 | |||
| innerproduct->weight_data_int8_scales = convolution->weight_data_int8_scales; | |||
| innerproduct->bottom_blob_int8_scales = convolution->bottom_blob_int8_scales; | |||
| #endif | |||
| innerproduct->activation_type = convolution->activation_type; | |||
| innerproduct->activation_params = convolution->activation_params; | |||
| @@ -2739,8 +2741,10 @@ int NetOptimize::replace_convolution_with_innerproduct_after_innerproduct() | |||
| innerproduct2->weight_data = convolution->weight_data; | |||
| innerproduct2->bias_data = convolution->bias_data; | |||
| #if NCNN_INT8 | |||
| innerproduct->weight_data_int8_scales = convolution->weight_data_int8_scales; | |||
| innerproduct->bottom_blob_int8_scales = convolution->bottom_blob_int8_scales; | |||
| #endif | |||
| innerproduct2->activation_type = convolution->activation_type; | |||
| innerproduct2->activation_params = convolution->activation_params; | |||