Browse Source

cmake option NCNN_INT8 (#2839)

tags/20210507
nihui GitHub 5 years ago
parent
commit
7e1aaa5828
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 529 additions and 278 deletions
  1. +30
    -8
      .github/workflows/linux-aarch64-cpu-gcc.yml
  2. +30
    -8
      .github/workflows/linux-arm-cpu-gcc.yml
  3. +10
    -0
      .github/workflows/linux-x64-cpu-clang.yml
  4. +7
    -0
      .github/workflows/linux-x64-cpu-gcc.yml
  5. +10
    -0
      .github/workflows/linux-x86-cpu-clang.yml
  6. +7
    -0
      .github/workflows/linux-x86-cpu-gcc.yml
  7. +1
    -0
      CMakeLists.txt
  8. +9
    -0
      docs/how-to-use-and-FAQ/build-minimal-library.md
  9. +13
    -3
      src/layer/arm/convolution_arm.cpp
  10. +4
    -0
      src/layer/arm/convolution_arm.h
  11. +16
    -4
      src/layer/arm/convolutiondepthwise_arm.cpp
  12. +4
    -0
      src/layer/arm/convolutiondepthwise_arm.h
  13. +254
    -248
      src/layer/arm/innerproduct_arm.cpp
  14. +4
    -1
      src/layer/arm/innerproduct_arm.h
  15. +15
    -0
      src/layer/convolution.cpp
  16. +4
    -0
      src/layer/convolution.h
  17. +13
    -0
      src/layer/convolutiondepthwise.cpp
  18. +4
    -0
      src/layer/convolutiondepthwise.h
  19. +13
    -0
      src/layer/innerproduct.cpp
  20. +4
    -0
      src/layer/innerproduct.h
  21. +10
    -1
      src/layer/x86/convolution_x86.cpp
  22. +4
    -0
      src/layer/x86/convolution_x86.h
  23. +12
    -3
      src/layer/x86/convolutiondepthwise_x86.cpp
  24. +4
    -0
      src/layer/x86/convolutiondepthwise_x86.h
  25. +6
    -0
      src/layer/x86/innerproduct_x86.cpp
  26. +4
    -1
      src/layer/x86/innerproduct_x86.h
  27. +1
    -0
      src/platform.h.in
  28. +9
    -0
      tests/test_convolution.cpp
  29. +6
    -0
      tests/test_convolutiondepthwise.cpp
  30. +12
    -0
      tests/test_innerproduct.cpp
  31. +5
    -1
      tools/CMakeLists.txt
  32. +4
    -0
      tools/ncnnoptimize.cpp

+ 30
- 8
.github/workflows/linux-aarch64-cpu-gcc.yml View File

@@ -43,17 +43,28 @@ jobs:
sudo apt-get update
sudo apt-get install g++-aarch64-linux-gnu

- name: configure
run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
- name: build
run: cmake --build build -j 2

run: |
mkdir build && cd build
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . -j 2
- name: test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build
TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2

- name: build-noint8
run: |
mkdir build-noint8 && cd build-noint8
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . -j 2
- name: test-noint8
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build-noint8
TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2

linux-gcc-arm82:
runs-on: ubuntu-20.04
steps:
@@ -90,13 +101,24 @@ jobs:
sudo apt-get update
sudo apt-get install g++-aarch64-linux-gnu

- name: configure
run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
- name: build
run: cmake --build build -j 2

run: |
mkdir build && cd build
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . -j 2
- name: test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build
TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2

- name: build-noint8
run: |
mkdir build-noint8 && cd build-noint8
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON -DNCNN_INT8=OFF ..
cmake --build . -j 2
- name: test-noint8
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build-noint8
TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2

+ 30
- 8
.github/workflows/linux-arm-cpu-gcc.yml View File

@@ -43,17 +43,28 @@ jobs:
sudo apt-get update
sudo apt-get install g++-arm-linux-gnueabi

- name: configure
run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
- name: build
run: cmake --build build -j 2

run: |
mkdir build && cd build
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . -j 2
- name: test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build
TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabi" ctest --output-on-failure -j 2

- name: build-noint8
run: |
mkdir build-noint8 && cd build-noint8
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . -j 2
- name: test-noint8
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build-noint8
TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabi" ctest --output-on-failure -j 2

linux-gcc-armhf:
runs-on: ubuntu-20.04
steps:
@@ -90,13 +101,24 @@ jobs:
sudo apt-get update
sudo apt-get install g++-arm-linux-gnueabihf

- name: configure
run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
- name: build
run: cmake --build build -j 2

run: |
mkdir build && cd build
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . -j 2
- name: test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build
TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabihf" ctest --output-on-failure -j 2

- name: build-noint8
run: |
mkdir build-noint8 && cd build-noint8
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON -DNCNN_INT8=OFF ..
cmake --build . -j 2
- name: test-noint8
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build-noint8
TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabihf" ctest --output-on-failure -j 2

+ 10
- 0
.github/workflows/linux-x64-cpu-clang.yml View File

@@ -47,6 +47,16 @@ jobs:
cmake --build . -j 2
- name: test-avx2
run: cd build-avx2 && ctest --output-on-failure -j 2
- name: build-noint8
env:
CC: clang
CXX: clang++
run: |
mkdir build-noint8 && cd build-noint8
cmake -DNCNN_INT8=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . -j 2
- name: test-noint8
run: cd build-noint8 && ctest --output-on-failure -j 2

linux-clang-simplestl:
runs-on: ubuntu-latest


+ 7
- 0
.github/workflows/linux-x64-cpu-gcc.yml View File

@@ -38,6 +38,13 @@ jobs:
cmake --build . -j 2
- name: test-avx2
run: cd build-avx2 && ctest --output-on-failure -j 2
- name: build-noint8
run: |
mkdir build-noint8 && cd build-noint8
cmake -DNCNN_INT8=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . -j 2
- name: test-noint8
run: cd build-noint8 && ctest --output-on-failure -j 2

linux-gcc-cpp03-nostdio-nostring-simplestl:
runs-on: ubuntu-16.04


+ 10
- 0
.github/workflows/linux-x86-cpu-clang.yml View File

@@ -37,3 +37,13 @@ jobs:
mkdir build-shared && cd build-shared
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.clang-m32.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_SHARED_LIB=ON -DNCNN_ENABLE_LTO=ON ..
cmake --build . -j 2
- name: build-noint8
env:
CC: clang
CXX: clang++
run: |
mkdir build-noint8 && cd build-noint8
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.clang-m32.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_INT8=OFF ..
cmake --build . -j 2
- name: test-noint8
run: cd build-noint8 && ctest --output-on-failure -j 2

+ 7
- 0
.github/workflows/linux-x86-cpu-gcc.yml View File

@@ -31,3 +31,10 @@ jobs:
mkdir build-shared && cd build-shared
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-m32.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_SHARED_LIB=ON -DNCNN_ENABLE_LTO=ON ..
cmake --build . -j 2
- name: build-noint8
run: |
mkdir build-noint8 && cd build-noint8
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-m32.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_INT8=OFF ..
cmake --build . -j 2
- name: test-noint8
run: cd build-noint8 && ctest --output-on-failure -j 2

+ 1
- 0
CMakeLists.txt View File

@@ -79,6 +79,7 @@ option(NCNN_BUILD_TESTS "build tests" OFF)
option(NCNN_COVERAGE "build for coverage" OFF)
option(NCNN_BUILD_BENCHMARK "build benchmark" ON)
option(NCNN_PYTHON "build python api" OFF)
option(NCNN_INT8 "int8 inference" ON)

if(ANDROID OR IOS OR NCNN_SIMPLESTL OR CMAKE_CROSSCOMPILING)
option(NCNN_DISABLE_RTTI "disable rtti" ON)


+ 9
- 0
docs/how-to-use-and-FAQ/build-minimal-library.md View File

@@ -39,6 +39,15 @@ cmake -DNCNN_STRING=OFF ..

Read more [here](https://github.com/Tencent/ncnn/blob/master/docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.md#input-and-output).

### disable NCNN_INT8

```
cmake -DNCNN_INT8=OFF ..
```

* Cannot use quantized int8 inference.


### drop pixel rotate and affine functions

```


+ 13
- 3
src/layer/arm/convolution_arm.cpp View File

@@ -29,19 +29,21 @@ namespace ncnn {

#include "convolution_bf16s.h"
#include "convolution_sgemm.h"
#include "convolution_sgemm_int8.h"

#include "convolution_1x1.h"
#include "convolution_1x1_bf16s.h"
#include "convolution_1x1_int8.h"
#include "convolution_2x2.h"
#include "convolution_3x3.h"
#include "convolution_3x3_int8.h"
#include "convolution_4x4.h"
#include "convolution_5x5.h"
#include "convolution_7x7.h"

#if NCNN_INT8
#include "convolution_sgemm_int8.h"
#include "convolution_1x1_int8.h"
#include "convolution_3x3_int8.h"
#include "convolution_int8.h"
#endif // NCNN_INT8

#if __ARM_NEON
#include "convolution_pack4.h"
@@ -67,6 +69,7 @@ namespace ncnn {
#include "convolution_7x7_pack1to4.h"
#include "convolution_7x7_pack1to4_bf16s.h"

#if NCNN_INT8
#include "convolution_pack8_int8.h"
#include "convolution_pack1to8_int8.h"
#include "convolution_pack8to1_int8.h"
@@ -80,6 +83,7 @@ namespace ncnn {
#include "convolution_3x3_pack1to8_int8.h"
#include "convolution_7x7_pack1to8_int8.h"
#include "convolution_3x3_pack8to1_int8.h"
#endif // NCNN_INT8

#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#include "convolution_fp16s.h"
@@ -169,10 +173,12 @@ int Convolution_arm::create_pipeline(const Option& opt)
activation->create_pipeline(opt);
}

#if NCNN_INT8
if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
{
return create_pipeline_int8_arm(opt);
}
#endif

#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
if (opt.use_fp16_storage)
@@ -418,10 +424,12 @@ int Convolution_arm::destroy_pipeline(const Option& opt)

int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{
#if NCNN_INT8
if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
{
return forward_int8_arm(bottom_blob, top_blob, opt);
}
#endif

if (bottom_blob.dims != 3)
{
@@ -1767,6 +1775,7 @@ int Convolution_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const
return 0;
}

#if NCNN_INT8
int Convolution_arm::create_pipeline_int8_arm(const Option& opt)
{
const int maxk = kernel_w * kernel_h;
@@ -2263,6 +2272,7 @@ int Convolution_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, con

return 0;
}
#endif // NCNN_INT8

int Convolution_arm::forwardDilation_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{


+ 4
- 0
src/layer/arm/convolution_arm.h View File

@@ -37,8 +37,10 @@ protected:
#endif
int create_pipeline_bf16s(const Option& opt);
int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
#if NCNN_INT8
int create_pipeline_int8_arm(const Option& opt);
int forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
#endif
int forwardDilation_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;

public:
@@ -67,11 +69,13 @@ public:
// bf16
Mat weight_data_bf16;

#if NCNN_INT8
// int8
Mat weight_data_int8;

// Mat weight_3x3s2_data_int8;
std::vector<Mat> weight_3x3_winograd23_data_int8;
#endif
};

} // namespace ncnn


+ 16
- 4
src/layer/arm/convolutiondepthwise_arm.cpp View File

@@ -26,16 +26,21 @@
namespace ncnn {

#include "convolutiondepthwise_3x3.h"
#include "convolutiondepthwise_3x3_int8.h"
#include "convolutiondepthwise_5x5.h"

#if NCNN_INT8
#include "convolutiondepthwise_3x3_int8.h"
#endif // NCNN_INT8

#if __ARM_NEON
#include "convolutiondepthwise_3x3_pack4.h"
#include "convolutiondepthwise_3x3_pack4_bf16s.h"
#include "convolutiondepthwise_5x5_pack4.h"
#include "convolutiondepthwise_5x5_pack4_bf16s.h"

#if NCNN_INT8
#include "convolutiondepthwise_3x3_pack8_int8.h"
#endif // NCNN_INT8

#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#include "convolutiondepthwise_3x3_fp16s.h"
@@ -104,10 +109,12 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt)
activation->create_pipeline(opt);
}

#if NCNN_INT8
if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
{
return create_pipeline_int8_arm(opt);
}
#endif

const int maxk = kernel_w * kernel_h;
int channels = (weight_data_size / group) / maxk / (num_output / group) * group;
@@ -269,6 +276,7 @@ int ConvolutionDepthWise_arm::create_group_ops(const Option& opt)
weights[0] = weight_data_g;
weights[1] = bias_data_g;

#if NCNN_INT8
if (int8_scale_term)
{
Mat weight_data_int8_scales_g(num_output_g);
@@ -280,6 +288,7 @@ int ConvolutionDepthWise_arm::create_group_ops(const Option& opt)
{
weights[4] = top_blob_int8_scales.range(g, 1);
}
#endif

op->load_model(ModelBinFromMatArray(weights));
}
@@ -288,6 +297,7 @@ int ConvolutionDepthWise_arm::create_group_ops(const Option& opt)
ncnn::Mat weights[4];
weights[0] = weight_data_g;

#if NCNN_INT8
if (int8_scale_term)
{
Mat weight_data_int8_scales_g(num_output_g);
@@ -299,6 +309,7 @@ int ConvolutionDepthWise_arm::create_group_ops(const Option& opt)
{
weights[3] = top_blob_int8_scales.range(g, 1);
}
#endif

op->load_model(ModelBinFromMatArray(weights));
}
@@ -332,13 +343,12 @@ int ConvolutionDepthWise_arm::destroy_pipeline(const Option& opt)

int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{
// convolv with NxN kernel
// value = value + bias

#if NCNN_INT8
if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
{
return forward_int8_arm(bottom_blob, top_blob, opt);
}
#endif

int elembits = bottom_blob.elembits();

@@ -1447,6 +1457,7 @@ int ConvolutionDepthWise_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blo
return 0;
}

#if NCNN_INT8
int ConvolutionDepthWise_arm::create_pipeline_int8_arm(const Option& opt)
{
const int maxk = kernel_w * kernel_h;
@@ -1981,5 +1992,6 @@ int ConvolutionDepthWise_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_

return 0;
}
#endif // NCNN_INT8

} // namespace ncnn

+ 4
- 0
src/layer/arm/convolutiondepthwise_arm.h View File

@@ -36,8 +36,10 @@ protected:
int forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
#endif
int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
#if NCNN_INT8
int create_pipeline_int8_arm(const Option& opt);
int forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
#endif

public:
Layer* activation;
@@ -54,8 +56,10 @@ public:
Mat weight_data_bf16;
Mat weight_data_pack4_bf16;

#if NCNN_INT8
// int8
Mat weight_data_int8;
#endif
};

} // namespace ncnn


+ 254
- 248
src/layer/arm/innerproduct_arm.cpp View File

@@ -55,10 +55,12 @@ int InnerProduct_arm::create_pipeline(const Option& opt)
}
#endif // __ARM_NEON

#if NCNN_INT8
if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
{
return create_pipeline_int8_arm(opt);
}
#endif

#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
if (opt.use_fp16_storage)
@@ -94,260 +96,14 @@ int InnerProduct_arm::destroy_pipeline(const Option& opt)
return 0;
}

int InnerProduct_arm::create_pipeline_int8_arm(const Option& opt)
{
if (activation_type == 1)
{
activation = ncnn::create_layer(ncnn::LayerType::ReLU);

ncnn::ParamDict pd;
activation->load_param(pd);
}

const int num_input = weight_data_size / num_output;

int out_elempack = 1;

if (opt.use_packing_layout)
{
out_elempack = num_output % 8 == 0 ? 8 : 1;
}

// src = inch-outch
// dst = pb-inch-outch/pb
{
Mat weight_data_r2 = weight_data.reshape(num_input, num_output);

weight_data_int8.create(num_input, num_output / out_elempack, (size_t)out_elempack, out_elempack);

for (int q = 0; q + (out_elempack - 1) < num_output; q += out_elempack)
{
signed char* g0 = weight_data_int8.row<signed char>(q / out_elempack);

for (int p = 0; p < num_input; p++)
{
for (int j = 0; j < out_elempack; j++)
{
*g0++ = weight_data_r2.row<signed char>(q + j)[p];
}
}
}
}

// // convert fp32 to int8
// if (weight_data_int8_scales.empty())
// {
// return 0;
// }
// #if __aarch64__
// // first reorder Matrix A before MatMul
// const int n = num_output;
// const int k = weight_data.total() / n;
// weight_data_int8.create(n * k, (size_t)1u, opt.blob_allocator);
//
// int8_t* b = weight_data;
// int8_t* sb = weight_data_int8;
// reorder_a(b, sb, n, k, k);
//
// // pre-built scales
// scales_in.create(num_output, 4u, opt.blob_allocator);
// for (int i = 0; i < num_output; ++i)
// {
// if (std::fabs(static_cast<float>(weight_data_int8_scales[i])) <= 1e-6)
// {
// scales_in[i] = 0.f;
// }
// else
// {
// scales_in[i] = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[i]);
// }
// }
// #endif
return 0;
}

int InnerProduct_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{
const int num_input = weight_data_size / num_output;

if (bottom_blob.dims == 2 && bottom_blob.w == num_input && bottom_blob.h * bottom_blob.elempack > 1)
{
// gemm
Mat bottom_blob_unpacked;
Option opt_unpack = opt;
opt_unpack.blob_allocator = opt.workspace_allocator;
convert_packing(bottom_blob, bottom_blob_unpacked, 1, opt_unpack);

return forward_int8(bottom_blob_unpacked, top_blob, opt);
}

int elembits = bottom_blob.elembits();

Mat bottom_blob_int8 = bottom_blob;
if (elembits != 8)
{
Option opt_q = opt;
opt_q.blob_allocator = opt.workspace_allocator;
quantize_to_int8(bottom_blob, bottom_blob_int8, bottom_blob_int8_scales, opt_q);
}

Mat bottom_blob_int8_flattened = bottom_blob_int8;
if (bottom_blob_int8.dims != 1)
{
Option opt_flatten = opt;
opt_flatten.blob_allocator = opt.workspace_allocator;
flatten->forward(bottom_blob_int8, bottom_blob_int8_flattened, opt_flatten);
}

// int elempack = bottom_blob_int8_flattened.elempack;

int out_elempack = 1;
if (opt.use_packing_layout)
{
out_elempack = num_output % 8 == 0 ? 8 : 1;
}

top_blob.create(num_output / out_elempack, (size_t)(4u * out_elempack), out_elempack, opt.blob_allocator);
if (top_blob.empty())
return -100;

Mat top_blob_int32;
top_blob_int32.create(num_output / out_elempack, (size_t)(4u * out_elempack), out_elempack, opt.workspace_allocator);
if (top_blob_int32.empty())
return -100;

#if __ARM_NEON
if (out_elempack == 8)
{
// num_output
#pragma omp parallel for num_threads(opt.num_threads)
for (int p = 0; p < num_output / out_elempack; p++)
{
int32x4_t _sum0 = vdupq_n_s32(0);
int32x4_t _sum1 = vdupq_n_s32(0);

const signed char* kptr = weight_data_int8.row<const signed char>(p);
const signed char* sptr = bottom_blob_int8_flattened;

int i = 0;
for (; i < num_input; i++)
{
int8x8_t _val = vdup_n_s8(sptr[0]);

int8x8_t _w = vld1_s8(kptr);

int16x8_t _s0 = vmull_s8(_val, _w);
_sum0 = vaddw_s16(_sum0, vget_low_s16(_s0));
_sum1 = vaddw_s16(_sum1, vget_high_s16(_s0));

sptr += 1;
kptr += 8;
}

int* outptr = (int*)top_blob_int32;
vst1q_s32(outptr + p * 8, _sum0);
vst1q_s32(outptr + p * 8 + 4, _sum1);
}
}
#endif // __ARM_NEON

if (out_elempack == 1)
{
// num_output
#pragma omp parallel for num_threads(opt.num_threads)
for (int p = 0; p < num_output / out_elempack; p++)
{
int sum = 0;

const signed char* kptr = weight_data_int8.row<const signed char>(p);
const signed char* sptr = bottom_blob_int8_flattened;

int i = 0;
for (; i < num_input; i++)
{
signed char val = sptr[0];

signed char w = kptr[0];

sum += val * w;

sptr += 1;
kptr += 1;
}

int* outptr = (int*)top_blob_int32;
outptr[p] = sum;
}
}

Mat scale_data(num_output);
for (int p = 0; p < num_output; p++)
{
// dequantize
float scale_in;
if (weight_data_int8_scales[p] == 0)
scale_in = 0;
else
scale_in = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[p]);

scale_data[p] = scale_in;
}

dequantize_from_int32(top_blob_int32, top_blob, scale_data, bias_data, opt);

if (activation)
{
activation->forward_inplace(top_blob, opt);
}

return 0;

// #if __aarch64__
// const int w = bottom_blob_tm.w;
// const int h = bottom_blob_tm.h;
//
// const int m = 1;
// const int k = bottom_blob_tm.c * w * h;
// Mat bottom_blob_reorder(m * k, (size_t)1u, opt.workspace_allocator);
// {
// reorder_a(bottom_blob_tm_flattened, bottom_blob_reorder, m, k, k);
// }
//
// Mat top_blob_tm(m * num_output, (size_t)4u, opt.workspace_allocator);
// int32_t* pc = top_blob_tm;
// const int8_t* pa = bottom_blob_reorder;
// const int8_t* pb = weight_data_int8;
// int8kernel((void*)pc, pa, pb, m, k, num_output, num_output, 0, 0, opt);
//
// float* outptr = top_blob;
//
// // dequant.fused.relu int32_t to float
// for (int p = 0; p < num_output; ++p)
// {
// float sumfp32 = pc[p] * scales_in[p];
// if (bias_term)
// {
// sumfp32 += bias_data[p];
// }
// if (1 == activation_type)
// {
// sumfp32 = std::max(0.f, sumfp32);
// }
//
// outptr[p] = sumfp32;
// }
// return 0;
// #else
// return InnerProduct::forward_int8(bottom_blob, top_blob, opt);
// #endif
}

int InnerProduct_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{
#if NCNN_INT8
if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
{
return forward_int8_arm(bottom_blob, top_blob, opt);
}
#endif

int elembits = bottom_blob.elembits();

@@ -2140,4 +1896,254 @@ int InnerProduct_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const
return 0;
}

#if NCNN_INT8
int InnerProduct_arm::create_pipeline_int8_arm(const Option& opt)
{
if (activation_type == 1)
{
activation = ncnn::create_layer(ncnn::LayerType::ReLU);

ncnn::ParamDict pd;
activation->load_param(pd);
}

const int num_input = weight_data_size / num_output;

int out_elempack = 1;

if (opt.use_packing_layout)
{
out_elempack = num_output % 8 == 0 ? 8 : 1;
}

// src = inch-outch
// dst = pb-inch-outch/pb
{
Mat weight_data_r2 = weight_data.reshape(num_input, num_output);

weight_data_int8.create(num_input, num_output / out_elempack, (size_t)out_elempack, out_elempack);

for (int q = 0; q + (out_elempack - 1) < num_output; q += out_elempack)
{
signed char* g0 = weight_data_int8.row<signed char>(q / out_elempack);

for (int p = 0; p < num_input; p++)
{
for (int j = 0; j < out_elempack; j++)
{
*g0++ = weight_data_r2.row<signed char>(q + j)[p];
}
}
}
}

// // convert fp32 to int8
// if (weight_data_int8_scales.empty())
// {
// return 0;
// }
// #if __aarch64__
// // first reorder Matrix A before MatMul
// const int n = num_output;
// const int k = weight_data.total() / n;
// weight_data_int8.create(n * k, (size_t)1u, opt.blob_allocator);
//
// int8_t* b = weight_data;
// int8_t* sb = weight_data_int8;
// reorder_a(b, sb, n, k, k);
//
// // pre-built scales
// scales_in.create(num_output, 4u, opt.blob_allocator);
// for (int i = 0; i < num_output; ++i)
// {
// if (std::fabs(static_cast<float>(weight_data_int8_scales[i])) <= 1e-6)
// {
// scales_in[i] = 0.f;
// }
// else
// {
// scales_in[i] = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[i]);
// }
// }
// #endif
return 0;
}

int InnerProduct_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{
const int num_input = weight_data_size / num_output;

if (bottom_blob.dims == 2 && bottom_blob.w == num_input && bottom_blob.h * bottom_blob.elempack > 1)
{
// gemm
Mat bottom_blob_unpacked;
Option opt_unpack = opt;
opt_unpack.blob_allocator = opt.workspace_allocator;
convert_packing(bottom_blob, bottom_blob_unpacked, 1, opt_unpack);

return forward_int8(bottom_blob_unpacked, top_blob, opt);
}

int elembits = bottom_blob.elembits();

Mat bottom_blob_int8 = bottom_blob;
if (elembits != 8)
{
Option opt_q = opt;
opt_q.blob_allocator = opt.workspace_allocator;
quantize_to_int8(bottom_blob, bottom_blob_int8, bottom_blob_int8_scales, opt_q);
}

Mat bottom_blob_int8_flattened = bottom_blob_int8;
if (bottom_blob_int8.dims != 1)
{
Option opt_flatten = opt;
opt_flatten.blob_allocator = opt.workspace_allocator;
flatten->forward(bottom_blob_int8, bottom_blob_int8_flattened, opt_flatten);
}

// int elempack = bottom_blob_int8_flattened.elempack;

int out_elempack = 1;
if (opt.use_packing_layout)
{
out_elempack = num_output % 8 == 0 ? 8 : 1;
}

top_blob.create(num_output / out_elempack, (size_t)(4u * out_elempack), out_elempack, opt.blob_allocator);
if (top_blob.empty())
return -100;

Mat top_blob_int32;
top_blob_int32.create(num_output / out_elempack, (size_t)(4u * out_elempack), out_elempack, opt.workspace_allocator);
if (top_blob_int32.empty())
return -100;

#if __ARM_NEON
if (out_elempack == 8)
{
// num_output
#pragma omp parallel for num_threads(opt.num_threads)
for (int p = 0; p < num_output / out_elempack; p++)
{
int32x4_t _sum0 = vdupq_n_s32(0);
int32x4_t _sum1 = vdupq_n_s32(0);

const signed char* kptr = weight_data_int8.row<const signed char>(p);
const signed char* sptr = bottom_blob_int8_flattened;

int i = 0;
for (; i < num_input; i++)
{
int8x8_t _val = vdup_n_s8(sptr[0]);

int8x8_t _w = vld1_s8(kptr);

int16x8_t _s0 = vmull_s8(_val, _w);
_sum0 = vaddw_s16(_sum0, vget_low_s16(_s0));
_sum1 = vaddw_s16(_sum1, vget_high_s16(_s0));

sptr += 1;
kptr += 8;
}

int* outptr = (int*)top_blob_int32;
vst1q_s32(outptr + p * 8, _sum0);
vst1q_s32(outptr + p * 8 + 4, _sum1);
}
}
#endif // __ARM_NEON

if (out_elempack == 1)
{
// num_output
#pragma omp parallel for num_threads(opt.num_threads)
for (int p = 0; p < num_output / out_elempack; p++)
{
int sum = 0;

const signed char* kptr = weight_data_int8.row<const signed char>(p);
const signed char* sptr = bottom_blob_int8_flattened;

int i = 0;
for (; i < num_input; i++)
{
signed char val = sptr[0];

signed char w = kptr[0];

sum += val * w;

sptr += 1;
kptr += 1;
}

int* outptr = (int*)top_blob_int32;
outptr[p] = sum;
}
}

Mat scale_data(num_output);
for (int p = 0; p < num_output; p++)
{
// dequantize
float scale_in;
if (weight_data_int8_scales[p] == 0)
scale_in = 0;
else
scale_in = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[p]);

scale_data[p] = scale_in;
}

dequantize_from_int32(top_blob_int32, top_blob, scale_data, bias_data, opt);

if (activation)
{
activation->forward_inplace(top_blob, opt);
}

return 0;

// #if __aarch64__
// const int w = bottom_blob_tm.w;
// const int h = bottom_blob_tm.h;
//
// const int m = 1;
// const int k = bottom_blob_tm.c * w * h;
// Mat bottom_blob_reorder(m * k, (size_t)1u, opt.workspace_allocator);
// {
// reorder_a(bottom_blob_tm_flattened, bottom_blob_reorder, m, k, k);
// }
//
// Mat top_blob_tm(m * num_output, (size_t)4u, opt.workspace_allocator);
// int32_t* pc = top_blob_tm;
// const int8_t* pa = bottom_blob_reorder;
// const int8_t* pb = weight_data_int8;
// int8kernel((void*)pc, pa, pb, m, k, num_output, num_output, 0, 0, opt);
//
// float* outptr = top_blob;
//
// // dequant.fused.relu int32_t to float
// for (int p = 0; p < num_output; ++p)
// {
// float sumfp32 = pc[p] * scales_in[p];
// if (bias_term)
// {
// sumfp32 += bias_data[p];
// }
// if (1 == activation_type)
// {
// sumfp32 = std::max(0.f, sumfp32);
// }
//
// outptr[p] = sumfp32;
// }
// return 0;
// #else
// return InnerProduct::forward_int8(bottom_blob, top_blob, opt);
// #endif
}
#endif // NCNN_INT8

} // namespace ncnn

+ 4
- 1
src/layer/arm/innerproduct_arm.h View File

@@ -39,9 +39,10 @@ protected:
#endif
int create_pipeline_bf16s(const Option& opt);
int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
#if NCNN_INT8
int create_pipeline_int8_arm(const Option& opt);
int forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
#endif

public:
Layer* flatten;
@@ -54,9 +55,11 @@ public:
// bf16
Mat weight_data_bf16;

#if NCNN_INT8
// int8
Mat weight_data_int8;
Mat scales_in;
#endif
};

} // namespace ncnn


+ 15
- 0
src/layer/convolution.cpp View File

@@ -47,7 +47,12 @@ int Convolution::load_param(const ParamDict& pd)

if (int8_scale_term)
{
#if NCNN_INT8
support_int8_storage = true;
#else
NCNN_LOGE("please build ncnn with NCNN_INT8 enabled for int8 inference");
return -1;
#endif
}

return 0;
@@ -66,6 +71,7 @@ int Convolution::load_model(const ModelBin& mb)
return -100;
}

#if NCNN_INT8
if (int8_scale_term)
{
weight_data_int8_scales = mb.load(num_output, 1);
@@ -76,12 +82,14 @@ int Convolution::load_model(const ModelBin& mb)
{
top_blob_int8_scales = mb.load(1, 1);
}
#endif // NCNN_INT8

return 0;
}

int Convolution::create_pipeline(const Option& opt)
{
#if NCNN_INT8
// runtime quantize the weight data
if (opt.use_int8_inference && weight_data.elemsize == (size_t)4u && int8_scale_term)
{
@@ -101,6 +109,7 @@ int Convolution::create_pipeline(const Option& opt)

weight_data = weight_data_int8.reshape(weight_data_size);
}
#endif // NCNN_INT8

return 0;
}
@@ -110,10 +119,12 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
// convolv with NxN kernel
// value = value + bias

#if NCNN_INT8
if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
{
return forward_int8(bottom_blob, top_blob, opt);
}
#endif

// flattened blob, implement as InnerProduct
if (bottom_blob.dims == 1 && kernel_w == 1 && kernel_h == 1)
@@ -140,11 +151,13 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
weights[0] = weight_data;
weights[1] = bias_data;

#if NCNN_INT8
if (int8_scale_term)
{
weights[2] = weight_data_int8_scales;
weights[3] = bottom_blob_int8_scales;
}
#endif

op->load_model(ModelBinFromMatArray(weights));

@@ -327,6 +340,7 @@ void Convolution::make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered
}
}

#if NCNN_INT8
static inline signed char float2int8(float v)
{
int int32 = static_cast<int>(round(v));
@@ -492,5 +506,6 @@ int Convolution::forward_int8(const Mat& bottom_blob, Mat& top_blob, const Optio

return 0;
}
#endif // NCNN_INT8

} // namespace ncnn

+ 4
- 0
src/layer/convolution.h View File

@@ -35,7 +35,9 @@ public:
protected:
void make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const;

#if NCNN_INT8
int forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
#endif

public:
// param
@@ -65,9 +67,11 @@ public:
Mat weight_data;
Mat bias_data;

#if NCNN_INT8
Mat weight_data_int8_scales;
Mat bottom_blob_int8_scales;
Mat top_blob_int8_scales;
#endif

// implementation type, 0 means do not use auto pack model
int impl_type;


+ 13
- 0
src/layer/convolutiondepthwise.cpp View File

@@ -53,7 +53,12 @@ int ConvolutionDepthWise::load_param(const ParamDict& pd)

if (int8_scale_term)
{
#if NCNN_INT8
support_int8_storage = true;
#else
NCNN_LOGE("please build ncnn with NCNN_INT8 enabled for int8 inference");
return -1;
#endif
}

return 0;
@@ -72,6 +77,7 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)
return -100;
}

#if NCNN_INT8
if (int8_scale_term == 1 || int8_scale_term == 101)
{
weight_data_int8_scales = mb.load(group, 1);
@@ -104,12 +110,14 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)
top_blob_int8_scales = Mat(group);
top_blob_int8_scales.fill(top_blob_int8_scale);
}
#endif // NCNN_INT8

return 0;
}

int ConvolutionDepthWise::create_pipeline(const Option& opt)
{
#if NCNN_INT8
// runtime quantize the weight data
if (opt.use_int8_inference && weight_data.elemsize == (size_t)4u && int8_scale_term)
{
@@ -133,6 +141,7 @@ int ConvolutionDepthWise::create_pipeline(const Option& opt)

weight_data = int8_weight_data;
}
#endif // NCNN_INT8

return 0;
}
@@ -142,10 +151,12 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
// convolv with NxN kernel
// value = value + bias

#if NCNN_INT8
if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
{
return forward_int8(bottom_blob, top_blob, opt);
}
#endif

int w = bottom_blob.w;
int h = bottom_blob.h;
@@ -403,6 +414,7 @@ void ConvolutionDepthWise::make_padding(const Mat& bottom_blob, Mat& bottom_blob
}
}

#if NCNN_INT8
static inline signed char float2int8(float v)
{
int int32 = static_cast<int>(round(v));
@@ -694,5 +706,6 @@ int ConvolutionDepthWise::forward_int8(const Mat& bottom_blob, Mat& top_blob, co

return 0;
}
#endif // NCNN_INT8

} // namespace ncnn

+ 4
- 0
src/layer/convolutiondepthwise.h View File

@@ -35,7 +35,9 @@ public:
protected:
void make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const;

#if NCNN_INT8
int forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
#endif

public:
// param
@@ -66,9 +68,11 @@ public:
Mat weight_data;
Mat bias_data;

#if NCNN_INT8
Mat weight_data_int8_scales;
Mat bottom_blob_int8_scales;
Mat top_blob_int8_scales;
#endif
};

} // namespace ncnn


+ 13
- 0
src/layer/innerproduct.cpp View File

@@ -35,7 +35,12 @@ int InnerProduct::load_param(const ParamDict& pd)

if (int8_scale_term)
{
#if NCNN_INT8
support_int8_storage = true;
#else
NCNN_LOGE("please build ncnn with NCNN_INT8 enabled for int8 inference");
return -1;
#endif
}

return 0;
@@ -54,17 +59,20 @@ int InnerProduct::load_model(const ModelBin& mb)
return -100;
}

#if NCNN_INT8
if (int8_scale_term)
{
weight_data_int8_scales = mb.load(num_output, 1);
bottom_blob_int8_scales = mb.load(1, 1);
}
#endif // NCNN_INT8

return 0;
}

int InnerProduct::create_pipeline(const Option& opt)
{
#if NCNN_INT8
// runtime quantize the weight data
if (opt.use_int8_inference && weight_data.elemsize == (size_t)4u && int8_scale_term)
{
@@ -81,16 +89,19 @@ int InnerProduct::create_pipeline(const Option& opt)

weight_data = weight_data_int8.reshape(weight_data_size);
}
#endif // NCNN_INT8

return 0;
}

int InnerProduct::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{
#if NCNN_INT8
if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
{
return forward_int8(bottom_blob, top_blob, opt);
}
#endif

const int num_input = weight_data_size / num_output;

@@ -218,6 +229,7 @@ int InnerProduct::forward(const Mat& bottom_blob, Mat& top_blob, const Option& o
return 0;
}

#if NCNN_INT8
int InnerProduct::forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{
const int num_input = weight_data_size / num_output;
@@ -332,5 +344,6 @@ int InnerProduct::forward_int8(const Mat& bottom_blob, Mat& top_blob, const Opti

return 0;
}
#endif // NCNN_INT8

} // namespace ncnn

+ 4
- 0
src/layer/innerproduct.h View File

@@ -33,7 +33,9 @@ public:
virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;

protected:
#if NCNN_INT8
int forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
#endif

public:
// param
@@ -52,8 +54,10 @@ public:
Mat weight_data;
Mat bias_data;

#if NCNN_INT8
Mat weight_data_int8_scales;
Mat bottom_blob_int8_scales;
#endif
};

} // namespace ncnn


+ 10
- 1
src/layer/x86/convolution_x86.cpp View File

@@ -35,18 +35,21 @@ namespace ncnn {
#include "convolution_5x5.h"
#include "convolution_7x7.h"

#if NCNN_INT8
#include "convolution_sgemm_int8.h"
#include "convolution_1x1_int8.h"
#include "convolution_3x3_int8.h"

#include "convolution_int8.h"
#endif // NCNN_INT8

#if __SSE2__
#include "convolution_1x1_pack4.h"

#if NCNN_INT8
#include "convolution_pack8_int8.h"
#include "convolution_pack1to8_int8.h"
#include "convolution_pack8to1_int8.h"
#endif // NCNN_INT8
#if __AVX__
#include "convolution_3x3_pack1to8.h"
#include "convolution_3x3_pack8to1.h"
@@ -118,10 +121,12 @@ int Convolution_x86::create_pipeline(const Option& opt)
activation->create_pipeline(opt);
}

#if NCNN_INT8
if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
{
return create_pipeline_int8_x86(opt);
}
#endif

int kernel_size = kernel_w * kernel_h;
int num_input = weight_data_size / kernel_size / num_output;
@@ -311,10 +316,12 @@ int Convolution_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option
// convolv with NxN kernel
// value = value + bias

#if NCNN_INT8
if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
{
return forward_int8_x86(bottom_blob, top_blob, opt);
}
#endif

if (bottom_blob.dims != 3)
{
@@ -1058,6 +1065,7 @@ int Convolution_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option
return 0;
}

#if NCNN_INT8
int Convolution_x86::create_pipeline_int8_x86(const Option& opt)
{
const int maxk = kernel_w * kernel_h;
@@ -1410,6 +1418,7 @@ int Convolution_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, con

return 0;
}
#endif // NCNN_INT8

int Convolution_x86::forwardDilation_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{


+ 4
- 0
src/layer/x86/convolution_x86.h View File

@@ -30,8 +30,10 @@ public:
virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;

protected:
#if NCNN_INT8
int create_pipeline_int8_x86(const Option& opt);
int forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
#endif
int forwardDilation_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;

public:
@@ -47,9 +49,11 @@ public:

Mat weight_3x3_winograd64_data_pack8;

#if NCNN_INT8
// int8
Mat weight_data_int8;
Mat weight_3x3_winograd23_data_int8;
#endif
};

} // namespace ncnn


+ 12
- 3
src/layer/x86/convolutiondepthwise_x86.cpp View File

@@ -36,7 +36,9 @@ namespace ncnn {
#endif
#endif // __SSE2__
#include "convolutiondepthwise_3x3.h"
#if NCNN_INT8
#include "convolutiondepthwise_3x3_int8.h"
#endif // NCNN_INT8

ConvolutionDepthWise_x86::ConvolutionDepthWise_x86()
{
@@ -102,10 +104,12 @@ int ConvolutionDepthWise_x86::create_pipeline(const Option& opt)
activation->create_pipeline(opt);
}

#if NCNN_INT8
if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
{
return create_pipeline_int8_x86(opt);
}
#endif

const int maxk = kernel_w * kernel_h;
int channels = (weight_data_size / group) / maxk / (num_output / group) * group;
@@ -235,6 +239,7 @@ int ConvolutionDepthWise_x86::create_group_ops(const Option& opt)
weights[0] = weight_data_g;
weights[1] = bias_data_g;

#if NCNN_INT8
if (int8_scale_term)
{
Mat weight_data_int8_scales_g(num_output_g);
@@ -246,6 +251,7 @@ int ConvolutionDepthWise_x86::create_group_ops(const Option& opt)
{
weights[4] = top_blob_int8_scales.range(g, 1);
}
#endif

op->load_model(ModelBinFromMatArray(weights));
}
@@ -254,6 +260,7 @@ int ConvolutionDepthWise_x86::create_group_ops(const Option& opt)
ncnn::Mat weights[4];
weights[0] = weight_data_g;

#if NCNN_INT8
if (int8_scale_term)
{
Mat weight_data_int8_scales_g(num_output_g);
@@ -265,6 +272,7 @@ int ConvolutionDepthWise_x86::create_group_ops(const Option& opt)
{
weights[3] = top_blob_int8_scales.range(g, 1);
}
#endif

op->load_model(ModelBinFromMatArray(weights));
}
@@ -298,13 +306,12 @@ int ConvolutionDepthWise_x86::destroy_pipeline(const Option& opt)

int ConvolutionDepthWise_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{
// convolv with NxN kernel
// value = value + bias

#if NCNN_INT8
if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
{
return forward_int8_x86(bottom_blob, top_blob, opt);
}
#endif

int w = bottom_blob.w;
int h = bottom_blob.h;
@@ -628,6 +635,7 @@ int ConvolutionDepthWise_x86::forward(const Mat& bottom_blob, Mat& top_blob, con
return 0;
}

#if NCNN_INT8
int ConvolutionDepthWise_x86::create_pipeline_int8_x86(const Option& opt)
{
const int maxk = kernel_w * kernel_h;
@@ -1061,5 +1069,6 @@ int ConvolutionDepthWise_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_

return 0;
}
#endif // NCNN_INT8

} // namespace ncnn

+ 4
- 0
src/layer/x86/convolutiondepthwise_x86.h View File

@@ -31,8 +31,10 @@ public:

protected:
int create_group_ops(const Option& opt);
#if NCNN_INT8
int create_pipeline_int8_x86(const Option& opt);
int forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
#endif

public:
Layer* activation;
@@ -41,8 +43,10 @@ public:
// packing
Mat weight_data_packed;

#if NCNN_INT8
// int8
Mat weight_data_int8;
#endif
};

} // namespace ncnn


+ 6
- 0
src/layer/x86/innerproduct_x86.cpp View File

@@ -54,10 +54,12 @@ int InnerProduct_x86::create_pipeline(const Option& opt)
flatten->create_pipeline(opt);
}

#if NCNN_INT8
if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
{
return create_pipeline_int8_x86(opt);
}
#endif

const int num_input = weight_data_size / num_output;

@@ -124,10 +126,12 @@ int InnerProduct_x86::destroy_pipeline(const Option& opt)

int InnerProduct_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{
#if NCNN_INT8
if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
{
return forward_int8_x86(bottom_blob, top_blob, opt);
}
#endif

const int num_input = weight_data_size / num_output;

@@ -1694,6 +1698,7 @@ int InnerProduct_x86::forward_fp16(const Mat& bottom_blob, Mat& top_blob, const
}
#endif // __AVX__

#if NCNN_INT8
int InnerProduct_x86::create_pipeline_int8_x86(const Option& opt)
{
if (activation_type == 1)
@@ -1883,5 +1888,6 @@ int InnerProduct_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, co

return 0;
}
#endif // NCNN_INT8

} // namespace ncnn

+ 4
- 1
src/layer/x86/innerproduct_x86.h View File

@@ -34,9 +34,10 @@ public:

protected:
int forward_fp16(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
#if NCNN_INT8
int create_pipeline_int8_x86(const Option& opt);
int forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
#endif

public:
Layer* flatten;
@@ -47,9 +48,11 @@ public:
// fp16 weight data
Mat weight_data_fp16;

#if NCNN_INT8
// int8
Mat weight_data_int8;
Mat scales_in;
#endif
};

} // namespace ncnn


+ 1
- 0
src/platform.h.in View File

@@ -30,6 +30,7 @@
#cmakedefine01 NCNN_RUNTIME_CPU
#cmakedefine01 NCNN_AVX2
#cmakedefine01 NCNN_ARM82
#cmakedefine01 NCNN_INT8

#cmakedefine NCNN_VERSION_STRING "@NCNN_VERSION_STRING@"



+ 9
- 0
tests/test_convolution.cpp View File

@@ -171,6 +171,7 @@ static int test_convolution_2()
|| test_convolution_vec(64, 128, 1, 1, 1, 0, 0);
}

#if NCNN_INT8
static int test_convolution_int8(int w, int h, int c, int outch, int kernel, int dilation, int stride, int pad, int bias, bool requant = false)
{
ncnn::Mat a = RandomMat(w, h, c);
@@ -298,12 +299,20 @@ static int test_convolution_1()
|| test_convolution_int8(6, 7, 64, 64, 3, 1, 2, 0, 1)
|| test_convolution_int8(25, 33, 16, 15, 3, 1, 1, 1, 0);
}
#endif // NCNN_INT8

int main()
{
SRAND(7767517);

#if NCNN_INT8
return 0
|| test_convolution_0()
|| test_convolution_1()
|| test_convolution_2();
#else
return 0
|| test_convolution_0()
|| test_convolution_2();
#endif
}

+ 6
- 0
tests/test_convolutiondepthwise.cpp View File

@@ -125,6 +125,7 @@ static int test_convolutiondepthwise_0()
return 0;
}

#if NCNN_INT8
static int test_convolutiondepthwise_int8(int w, int h, int c, int outch, int kernel, int dilation, int stride, int pad, int bias, int group, bool requant = false)
{
ncnn::Mat a = RandomMat(w, h, c);
@@ -251,10 +252,15 @@ static int test_convolutiondepthwise_1()

return 0;
}
#endif // NCNN_INT8

int main()
{
SRAND(7767517);

#if NCNN_INT8
return test_convolutiondepthwise_0() || test_convolutiondepthwise_1();
#else
return test_convolutiondepthwise_0();
#endif
}

+ 12
- 0
tests/test_innerproduct.cpp View File

@@ -87,6 +87,7 @@ static int test_innerproduct_2()
|| test_innerproduct(RandomMat(24), 32, 1);
}

#if NCNN_INT8
static int test_innerproduct_int8(const ncnn::Mat& a, int outch, int bias)
{
ncnn::ParamDict pd;
@@ -145,6 +146,7 @@ static int test_innerproduct_3()
|| test_innerproduct_int8(RandomMat(7, 2, 16), 4, 1)
|| test_innerproduct_int8(RandomMat(6, 3, 16), 16, 1);
}
#endif // NCNN_INT8

static int test_innerproduct_gemm(const ncnn::Mat& a, int outch, int bias)
{
@@ -193,6 +195,7 @@ static int test_innerproduct_4()
|| test_innerproduct_gemm(RandomMat(12, 16), 7, 1);
}

#if NCNN_INT8
static int test_innerproduct_gemm_int8(const ncnn::Mat& a, int outch, int bias)
{
ncnn::ParamDict pd;
@@ -242,11 +245,13 @@ static int test_innerproduct_5()
|| test_innerproduct_gemm_int8(RandomMat(6, 16), 16, 0)
|| test_innerproduct_gemm_int8(RandomMat(12, 16), 7, 1);
}
#endif // NCNN_INT8

int main()
{
SRAND(7767517);

#if NCNN_INT8
return 0
|| test_innerproduct_0()
|| test_innerproduct_1()
@@ -254,4 +259,11 @@ int main()
|| test_innerproduct_3()
|| test_innerproduct_4()
|| test_innerproduct_5();
#else
return 0
|| test_innerproduct_0()
|| test_innerproduct_1()
|| test_innerproduct_2()
|| test_innerproduct_4();
#endif
}

+ 5
- 1
tools/CMakeLists.txt View File

@@ -12,7 +12,11 @@ add_subdirectory(caffe)
add_subdirectory(mxnet)
add_subdirectory(onnx)
add_subdirectory(darknet)
add_subdirectory(quantize)
if(NCNN_INT8)
add_subdirectory(quantize)
else()
message(WARNING "NCNN_INT8 disabled, quantize tools won't be built")
endif()

add_executable(ncnn2mem ncnn2mem.cpp)
target_link_libraries(ncnn2mem PRIVATE ncnn)


+ 4
- 0
tools/ncnnoptimize.cpp View File

@@ -2673,8 +2673,10 @@ int NetOptimize::replace_convolution_with_innerproduct_after_global_pooling()

innerproduct->weight_data = convolution->weight_data;
innerproduct->bias_data = convolution->bias_data;
#if NCNN_INT8
innerproduct->weight_data_int8_scales = convolution->weight_data_int8_scales;
innerproduct->bottom_blob_int8_scales = convolution->bottom_blob_int8_scales;
#endif

innerproduct->activation_type = convolution->activation_type;
innerproduct->activation_params = convolution->activation_params;
@@ -2739,8 +2741,10 @@ int NetOptimize::replace_convolution_with_innerproduct_after_innerproduct()

innerproduct2->weight_data = convolution->weight_data;
innerproduct2->bias_data = convolution->bias_data;
#if NCNN_INT8
innerproduct->weight_data_int8_scales = convolution->weight_data_int8_scales;
innerproduct->bottom_blob_int8_scales = convolution->bottom_blob_int8_scales;
#endif

innerproduct2->activation_type = convolution->activation_type;
innerproduct2->activation_params = convolution->activation_params;


Loading…
Cancel
Save