diff --git a/.github/workflows/linux-aarch64-cpu-gcc.yml b/.github/workflows/linux-aarch64-cpu-gcc.yml
index f49c785a6..5e537eeb8 100644
--- a/.github/workflows/linux-aarch64-cpu-gcc.yml
+++ b/.github/workflows/linux-aarch64-cpu-gcc.yml
@@ -43,17 +43,28 @@ jobs:
         sudo apt-get update
         sudo apt-get install g++-aarch64-linux-gnu
 
-    - name: configure
-      run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
     - name: build
-      run: cmake --build build -j 2
-
+      run: |
+        mkdir build && cd build
+        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
+        cmake --build . -j 2
     - name: test
       run: |
         export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
         cd build
         TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2
 
+    - name: build-noint8
+      run: |
+        mkdir build-noint8 && cd build-noint8
+        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
+        cmake --build . -j 2
+    - name: test-noint8
+      run: |
+        export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
+        cd build-noint8
+        TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2
+
   linux-gcc-arm82:
     runs-on: ubuntu-20.04
     steps:
@@ -90,13 +101,24 @@ jobs:
         sudo apt-get update
         sudo apt-get install g++-aarch64-linux-gnu
 
-    - name: configure
-      run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
     - name: build
-      run: cmake --build build -j 2
-
+      run: |
+        mkdir build && cd build
+        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
+        cmake --build . -j 2
     - name: test
       run: |
         export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
         cd build
         TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2
+
+    - name: build-noint8
+      run: |
+        mkdir build-noint8 && cd build-noint8
+        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON -DNCNN_INT8=OFF ..
+        cmake --build . -j 2
+    - name: test-noint8
+      run: |
+        export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
+        cd build-noint8
+        TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2
diff --git a/.github/workflows/linux-arm-cpu-gcc.yml b/.github/workflows/linux-arm-cpu-gcc.yml
index 703593274..71c637a12 100644
--- a/.github/workflows/linux-arm-cpu-gcc.yml
+++ b/.github/workflows/linux-arm-cpu-gcc.yml
@@ -43,17 +43,28 @@ jobs:
         sudo apt-get update
         sudo apt-get install g++-arm-linux-gnueabi
 
-    - name: configure
-      run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
     - name: build
-      run: cmake --build build -j 2
-
+      run: |
+        mkdir build && cd build
+        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
+        cmake --build . -j 2
     - name: test
       run: |
         export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
         cd build
         TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabi" ctest --output-on-failure -j 2
 
+    - name: build-noint8
+      run: |
+        mkdir build-noint8 && cd build-noint8
+        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
+        cmake --build . -j 2
+    - name: test-noint8
+      run: |
+        export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
+        cd build-noint8
+        TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabi" ctest --output-on-failure -j 2
+
   linux-gcc-armhf:
     runs-on: ubuntu-20.04
     steps:
@@ -90,13 +101,24 @@ jobs:
         sudo apt-get update
         sudo apt-get install g++-arm-linux-gnueabihf
 
-    - name: configure
-      run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
     - name: build
-      run: cmake --build build -j 2
-
+      run: |
+        mkdir build && cd build
+        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
+        cmake --build . -j 2
     - name: test
       run: |
         export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
         cd build
         TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabihf" ctest --output-on-failure -j 2
+
+    - name: build-noint8
+      run: |
+        mkdir build-noint8 && cd build-noint8
+        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON -DNCNN_INT8=OFF ..
+        cmake --build . -j 2
+    - name: test-noint8
+      run: |
+        export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
+        cd build-noint8
+        TESTS_EXECUTABLE_LOADER=qemu-arm TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabihf" ctest --output-on-failure -j 2
diff --git a/.github/workflows/linux-x64-cpu-clang.yml b/.github/workflows/linux-x64-cpu-clang.yml
index 2466a45bb..b4a746645 100644
--- a/.github/workflows/linux-x64-cpu-clang.yml
+++ b/.github/workflows/linux-x64-cpu-clang.yml
@@ -47,6 +47,16 @@ jobs:
         cmake --build . -j 2
     - name: test-avx2
       run: cd build-avx2 && ctest --output-on-failure -j 2
+    - name: build-noint8
+      env:
+        CC: clang
+        CXX: clang++
+      run: |
+        mkdir build-noint8 && cd build-noint8
+        cmake -DNCNN_INT8=OFF -DNCNN_BUILD_TESTS=ON ..
+        cmake --build . -j 2
+    - name: test-noint8
+      run: cd build-noint8 && ctest --output-on-failure -j 2
 
   linux-clang-simplestl:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/linux-x64-cpu-gcc.yml b/.github/workflows/linux-x64-cpu-gcc.yml
index 6e153f63d..96230aaab 100644
--- a/.github/workflows/linux-x64-cpu-gcc.yml
+++ b/.github/workflows/linux-x64-cpu-gcc.yml
@@ -38,6 +38,13 @@ jobs:
         cmake --build . -j 2
     - name: test-avx2
       run: cd build-avx2 && ctest --output-on-failure -j 2
+    - name: build-noint8
+      run: |
+        mkdir build-noint8 && cd build-noint8
+        cmake -DNCNN_INT8=OFF -DNCNN_BUILD_TESTS=ON ..
+        cmake --build . -j 2
+    - name: test-noint8
+      run: cd build-noint8 && ctest --output-on-failure -j 2
 
   linux-gcc-cpp03-nostdio-nostring-simplestl:
     runs-on: ubuntu-16.04
diff --git a/.github/workflows/linux-x86-cpu-clang.yml b/.github/workflows/linux-x86-cpu-clang.yml
index 8e9512a9c..991c5fd04 100644
--- a/.github/workflows/linux-x86-cpu-clang.yml
+++ b/.github/workflows/linux-x86-cpu-clang.yml
@@ -37,3 +37,13 @@ jobs:
         mkdir build-shared && cd build-shared
         cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.clang-m32.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_SHARED_LIB=ON -DNCNN_ENABLE_LTO=ON ..
         cmake --build . -j 2
+    - name: build-noint8
+      env:
+        CC: clang
+        CXX: clang++
+      run: |
+        mkdir build-noint8 && cd build-noint8
+        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.clang-m32.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_INT8=OFF ..
+        cmake --build . -j 2
+    - name: test-noint8
+      run: cd build-noint8 && ctest --output-on-failure -j 2
diff --git a/.github/workflows/linux-x86-cpu-gcc.yml b/.github/workflows/linux-x86-cpu-gcc.yml
index e604fe445..9742f5759 100644
--- a/.github/workflows/linux-x86-cpu-gcc.yml
+++ b/.github/workflows/linux-x86-cpu-gcc.yml
@@ -31,3 +31,10 @@ jobs:
         mkdir build-shared && cd build-shared
         cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-m32.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_SHARED_LIB=ON -DNCNN_ENABLE_LTO=ON ..
         cmake --build . -j 2
+    - name: build-noint8
+      run: |
+        mkdir build-noint8 && cd build-noint8
+        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-m32.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_INT8=OFF ..
+        cmake --build . -j 2
+    - name: test-noint8
+      run: cd build-noint8 && ctest --output-on-failure -j 2
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 505809ece..c5eac12c1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -79,6 +79,7 @@ option(NCNN_BUILD_TESTS "build tests" OFF)
 option(NCNN_COVERAGE "build for coverage" OFF)
 option(NCNN_BUILD_BENCHMARK "build benchmark" ON)
 option(NCNN_PYTHON "build python api" OFF)
+option(NCNN_INT8 "int8 inference" ON)
 
 if(ANDROID OR IOS OR NCNN_SIMPLESTL OR CMAKE_CROSSCOMPILING)
     option(NCNN_DISABLE_RTTI "disable rtti" ON)
diff --git a/docs/how-to-use-and-FAQ/build-minimal-library.md b/docs/how-to-use-and-FAQ/build-minimal-library.md
index 841e15d8a..f8a8255b7 100644
--- a/docs/how-to-use-and-FAQ/build-minimal-library.md
+++ b/docs/how-to-use-and-FAQ/build-minimal-library.md
@@ -39,6 +39,15 @@ cmake -DNCNN_STRING=OFF ..
 
     Read more [here](https://github.com/Tencent/ncnn/blob/master/docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.md#input-and-output).
 
+### disable NCNN_INT8
+
+```
+cmake -DNCNN_INT8=OFF ..
+```
+
+* Cannot use quantized int8 inference.
+
+
 ### drop pixel rotate and affine functions
 
 ```
diff --git a/src/layer/arm/convolution_arm.cpp b/src/layer/arm/convolution_arm.cpp
index c0311cce9..95ca29533 100644
--- a/src/layer/arm/convolution_arm.cpp
+++ b/src/layer/arm/convolution_arm.cpp
@@ -29,19 +29,21 @@ namespace ncnn {
 
 #include "convolution_bf16s.h"
 #include "convolution_sgemm.h"
-#include "convolution_sgemm_int8.h"
 
 #include "convolution_1x1.h"
 #include "convolution_1x1_bf16s.h"
-#include "convolution_1x1_int8.h"
 #include "convolution_2x2.h"
 #include "convolution_3x3.h"
-#include "convolution_3x3_int8.h"
 #include "convolution_4x4.h"
 #include "convolution_5x5.h"
 #include "convolution_7x7.h"
 
+#if NCNN_INT8
+#include "convolution_sgemm_int8.h"
+#include "convolution_1x1_int8.h"
+#include "convolution_3x3_int8.h"
 #include "convolution_int8.h"
+#endif // NCNN_INT8
 
 #if __ARM_NEON
 #include "convolution_pack4.h"
@@ -67,6 +69,7 @@ namespace ncnn {
 #include "convolution_7x7_pack1to4.h"
 #include "convolution_7x7_pack1to4_bf16s.h"
 
+#if NCNN_INT8
 #include "convolution_pack8_int8.h"
 #include "convolution_pack1to8_int8.h"
 #include "convolution_pack8to1_int8.h"
@@ -80,6 +83,7 @@ namespace ncnn {
 #include "convolution_3x3_pack1to8_int8.h"
 #include "convolution_7x7_pack1to8_int8.h"
 #include "convolution_3x3_pack8to1_int8.h"
+#endif // NCNN_INT8
 
 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 #include "convolution_fp16s.h"
@@ -169,10 +173,12 @@ int Convolution_arm::create_pipeline(const Option& opt)
         activation->create_pipeline(opt);
     }
 
+#if NCNN_INT8
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
     {
         return create_pipeline_int8_arm(opt);
     }
+#endif
 
 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
     if (opt.use_fp16_storage)
@@ -418,10 +424,12 @@ int Convolution_arm::destroy_pipeline(const Option& opt)
 
 int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
+#if NCNN_INT8
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
     {
         return forward_int8_arm(bottom_blob, top_blob, opt);
     }
+#endif
 
     if (bottom_blob.dims != 3)
     {
@@ -1767,6 +1775,7 @@ int Convolution_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const
     return 0;
 }
 
+#if NCNN_INT8
 int Convolution_arm::create_pipeline_int8_arm(const Option& opt)
 {
     const int maxk = kernel_w * kernel_h;
@@ -2263,6 +2272,7 @@ int Convolution_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, con
 
     return 0;
 }
+#endif // NCNN_INT8
 
 int Convolution_arm::forwardDilation_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
diff --git a/src/layer/arm/convolution_arm.h b/src/layer/arm/convolution_arm.h
index ae49752e4..e0eca6a08 100644
--- a/src/layer/arm/convolution_arm.h
+++ b/src/layer/arm/convolution_arm.h
@@ -37,8 +37,10 @@ protected:
 #endif
     int create_pipeline_bf16s(const Option& opt);
     int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#if NCNN_INT8
     int create_pipeline_int8_arm(const Option& opt);
     int forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
     int forwardDilation_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
 public:
@@ -67,11 +69,13 @@ public:
     // bf16
     Mat weight_data_bf16;
 
+#if NCNN_INT8
     // int8
     Mat weight_data_int8;
 
     //     Mat weight_3x3s2_data_int8;
     std::vector<Mat> weight_3x3_winograd23_data_int8;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/convolutiondepthwise_arm.cpp b/src/layer/arm/convolutiondepthwise_arm.cpp
index 2b393b481..4f13f1ccb 100644
--- a/src/layer/arm/convolutiondepthwise_arm.cpp
+++ b/src/layer/arm/convolutiondepthwise_arm.cpp
@@ -26,16 +26,21 @@
 namespace ncnn {
 
 #include "convolutiondepthwise_3x3.h"
-#include "convolutiondepthwise_3x3_int8.h"
 #include "convolutiondepthwise_5x5.h"
 
+#if NCNN_INT8
+#include "convolutiondepthwise_3x3_int8.h"
+#endif // NCNN_INT8
+
 #if __ARM_NEON
 #include "convolutiondepthwise_3x3_pack4.h"
 #include "convolutiondepthwise_3x3_pack4_bf16s.h"
 #include "convolutiondepthwise_5x5_pack4.h"
 #include "convolutiondepthwise_5x5_pack4_bf16s.h"
 
+#if NCNN_INT8
 #include "convolutiondepthwise_3x3_pack8_int8.h"
+#endif // NCNN_INT8
 
 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 #include "convolutiondepthwise_3x3_fp16s.h"
@@ -104,10 +109,12 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt)
         activation->create_pipeline(opt);
     }
 
+#if NCNN_INT8
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
     {
         return create_pipeline_int8_arm(opt);
     }
+#endif
 
     const int maxk = kernel_w * kernel_h;
     int channels = (weight_data_size / group) / maxk / (num_output / group) * group;
@@ -269,6 +276,7 @@ int ConvolutionDepthWise_arm::create_group_ops(const Option& opt)
             weights[0] = weight_data_g;
             weights[1] = bias_data_g;
 
+#if NCNN_INT8
             if (int8_scale_term)
             {
                 Mat weight_data_int8_scales_g(num_output_g);
@@ -280,6 +288,7 @@ int ConvolutionDepthWise_arm::create_group_ops(const Option& opt)
             {
                 weights[4] = top_blob_int8_scales.range(g, 1);
             }
+#endif
 
             op->load_model(ModelBinFromMatArray(weights));
         }
@@ -288,6 +297,7 @@ int ConvolutionDepthWise_arm::create_group_ops(const Option& opt)
             ncnn::Mat weights[4];
             weights[0] = weight_data_g;
 
+#if NCNN_INT8
             if (int8_scale_term)
             {
                 Mat weight_data_int8_scales_g(num_output_g);
@@ -299,6 +309,7 @@ int ConvolutionDepthWise_arm::create_group_ops(const Option& opt)
             {
                 weights[3] = top_blob_int8_scales.range(g, 1);
             }
+#endif
 
             op->load_model(ModelBinFromMatArray(weights));
         }
@@ -332,13 +343,12 @@ int ConvolutionDepthWise_arm::destroy_pipeline(const Option& opt)
 
 int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
-    // convolv with NxN kernel
-    // value = value + bias
-
+#if NCNN_INT8
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
     {
         return forward_int8_arm(bottom_blob, top_blob, opt);
     }
+#endif
 
     int elembits = bottom_blob.elembits();
 
@@ -1447,6 +1457,7 @@ int ConvolutionDepthWise_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blo
     return 0;
 }
 
+#if NCNN_INT8
 int ConvolutionDepthWise_arm::create_pipeline_int8_arm(const Option& opt)
 {
     const int maxk = kernel_w * kernel_h;
@@ -1981,5 +1992,6 @@ int ConvolutionDepthWise_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_
 
     return 0;
 }
+#endif // NCNN_INT8
 
 } // namespace ncnn
diff --git a/src/layer/arm/convolutiondepthwise_arm.h b/src/layer/arm/convolutiondepthwise_arm.h
index 2d754d71c..2cff01cb9 100644
--- a/src/layer/arm/convolutiondepthwise_arm.h
+++ b/src/layer/arm/convolutiondepthwise_arm.h
@@ -36,8 +36,10 @@ protected:
     int forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 #endif
     int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#if NCNN_INT8
     int create_pipeline_int8_arm(const Option& opt);
     int forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
 
 public:
     Layer* activation;
@@ -54,8 +56,10 @@ public:
     Mat weight_data_bf16;
     Mat weight_data_pack4_bf16;
 
+#if NCNN_INT8
     // int8
     Mat weight_data_int8;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/innerproduct_arm.cpp b/src/layer/arm/innerproduct_arm.cpp
index a363ae597..840bd7afa 100644
--- a/src/layer/arm/innerproduct_arm.cpp
+++ b/src/layer/arm/innerproduct_arm.cpp
@@ -55,10 +55,12 @@ int InnerProduct_arm::create_pipeline(const Option& opt)
     }
 #endif // __ARM_NEON
 
+#if NCNN_INT8
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
     {
         return create_pipeline_int8_arm(opt);
     }
+#endif
 
 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
     if (opt.use_fp16_storage)
@@ -94,260 +96,14 @@ int InnerProduct_arm::destroy_pipeline(const Option& opt)
     return 0;
 }
 
-int InnerProduct_arm::create_pipeline_int8_arm(const Option& opt)
-{
-    if (activation_type == 1)
-    {
-        activation = ncnn::create_layer(ncnn::LayerType::ReLU);
-
-        ncnn::ParamDict pd;
-        activation->load_param(pd);
-    }
-
-    const int num_input = weight_data_size / num_output;
-
-    int out_elempack = 1;
-
-    if (opt.use_packing_layout)
-    {
-        out_elempack = num_output % 8 == 0 ? 8 : 1;
-    }
-
-    // src = inch-outch
-    // dst = pb-inch-outch/pb
-    {
-        Mat weight_data_r2 = weight_data.reshape(num_input, num_output);
-
-        weight_data_int8.create(num_input, num_output / out_elempack, (size_t)out_elempack, out_elempack);
-
-        for (int q = 0; q + (out_elempack - 1) < num_output; q += out_elempack)
-        {
-            signed char* g0 = weight_data_int8.row<signed char>(q / out_elempack);
-
-            for (int p = 0; p < num_input; p++)
-            {
-                for (int j = 0; j < out_elempack; j++)
-                {
-                    *g0++ = weight_data_r2.row<signed char>(q + j)[p];
-                }
-            }
-        }
-    }
-
-    //     // convert fp32 to int8
-    //     if (weight_data_int8_scales.empty())
-    //     {
-    //         return 0;
-    //     }
-    // #if __aarch64__
-    //     // first reorder Matrix A before MatMul
-    //     const int n = num_output;
-    //     const int k = weight_data.total() / n;
-    //     weight_data_int8.create(n * k, (size_t)1u, opt.blob_allocator);
-    //
-    //     int8_t* b = weight_data;
-    //     int8_t* sb = weight_data_int8;
-    //     reorder_a(b, sb, n, k, k);
-    //
-    //     // pre-built scales
-    //     scales_in.create(num_output, 4u, opt.blob_allocator);
-    //     for (int i = 0; i < num_output; ++i)
-    //     {
-    //         if (std::fabs(static_cast<float>(weight_data_int8_scales[i])) <= 1e-6)
-    //         {
-    //             scales_in[i] = 0.f;
-    //         }
-    //         else
-    //         {
-    //             scales_in[i] = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[i]);
-    //         }
-    //     }
-    // #endif
-    return 0;
-}
-
-int InnerProduct_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
-{
-    const int num_input = weight_data_size / num_output;
-
-    if (bottom_blob.dims == 2 && bottom_blob.w == num_input && bottom_blob.h * bottom_blob.elempack > 1)
-    {
-        // gemm
-        Mat bottom_blob_unpacked;
-        Option opt_unpack = opt;
-        opt_unpack.blob_allocator = opt.workspace_allocator;
-        convert_packing(bottom_blob, bottom_blob_unpacked, 1, opt_unpack);
-
-        return forward_int8(bottom_blob_unpacked, top_blob, opt);
-    }
-
-    int elembits = bottom_blob.elembits();
-
-    Mat bottom_blob_int8 = bottom_blob;
-    if (elembits != 8)
-    {
-        Option opt_q = opt;
-        opt_q.blob_allocator = opt.workspace_allocator;
-        quantize_to_int8(bottom_blob, bottom_blob_int8, bottom_blob_int8_scales, opt_q);
-    }
-
-    Mat bottom_blob_int8_flattened = bottom_blob_int8;
-    if (bottom_blob_int8.dims != 1)
-    {
-        Option opt_flatten = opt;
-        opt_flatten.blob_allocator = opt.workspace_allocator;
-        flatten->forward(bottom_blob_int8, bottom_blob_int8_flattened, opt_flatten);
-    }
-
-    //     int elempack = bottom_blob_int8_flattened.elempack;
-
-    int out_elempack = 1;
-    if (opt.use_packing_layout)
-    {
-        out_elempack = num_output % 8 == 0 ? 8 : 1;
-    }
-
-    top_blob.create(num_output / out_elempack, (size_t)(4u * out_elempack), out_elempack, opt.blob_allocator);
-    if (top_blob.empty())
-        return -100;
-
-    Mat top_blob_int32;
-    top_blob_int32.create(num_output / out_elempack, (size_t)(4u * out_elempack), out_elempack, opt.workspace_allocator);
-    if (top_blob_int32.empty())
-        return -100;
-
-#if __ARM_NEON
-    if (out_elempack == 8)
-    {
-        // num_output
-        #pragma omp parallel for num_threads(opt.num_threads)
-        for (int p = 0; p < num_output / out_elempack; p++)
-        {
-            int32x4_t _sum0 = vdupq_n_s32(0);
-            int32x4_t _sum1 = vdupq_n_s32(0);
-
-            const signed char* kptr = weight_data_int8.row<const signed char>(p);
-            const signed char* sptr = bottom_blob_int8_flattened;
-
-            int i = 0;
-            for (; i < num_input; i++)
-            {
-                int8x8_t _val = vdup_n_s8(sptr[0]);
-
-                int8x8_t _w = vld1_s8(kptr);
-
-                int16x8_t _s0 = vmull_s8(_val, _w);
-                _sum0 = vaddw_s16(_sum0, vget_low_s16(_s0));
-                _sum1 = vaddw_s16(_sum1, vget_high_s16(_s0));
-
-                sptr += 1;
-                kptr += 8;
-            }
-
-            int* outptr = (int*)top_blob_int32;
-            vst1q_s32(outptr + p * 8, _sum0);
-            vst1q_s32(outptr + p * 8 + 4, _sum1);
-        }
-    }
-#endif // __ARM_NEON
-
-    if (out_elempack == 1)
-    {
-        // num_output
-        #pragma omp parallel for num_threads(opt.num_threads)
-        for (int p = 0; p < num_output / out_elempack; p++)
-        {
-            int sum = 0;
-
-            const signed char* kptr = weight_data_int8.row<const signed char>(p);
-            const signed char* sptr = bottom_blob_int8_flattened;
-
-            int i = 0;
-            for (; i < num_input; i++)
-            {
-                signed char val = sptr[0];
-
-                signed char w = kptr[0];
-
-                sum += val * w;
-
-                sptr += 1;
-                kptr += 1;
-            }
-
-            int* outptr = (int*)top_blob_int32;
-            outptr[p] = sum;
-        }
-    }
-
-    Mat scale_data(num_output);
-    for (int p = 0; p < num_output; p++)
-    {
-        // dequantize
-        float scale_in;
-        if (weight_data_int8_scales[p] == 0)
-            scale_in = 0;
-        else
-            scale_in = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[p]);
-
-        scale_data[p] = scale_in;
-    }
-
-    dequantize_from_int32(top_blob_int32, top_blob, scale_data, bias_data, opt);
-
-    if (activation)
-    {
-        activation->forward_inplace(top_blob, opt);
-    }
-
-    return 0;
-
-    // #if __aarch64__
-    //     const int w = bottom_blob_tm.w;
-    //     const int h = bottom_blob_tm.h;
-    //
-    //     const int m = 1;
-    //     const int k = bottom_blob_tm.c * w * h;
-    //     Mat bottom_blob_reorder(m * k, (size_t)1u, opt.workspace_allocator);
-    //     {
-    //         reorder_a(bottom_blob_tm_flattened, bottom_blob_reorder, m, k, k);
-    //     }
-    //
-    //     Mat top_blob_tm(m * num_output, (size_t)4u, opt.workspace_allocator);
-    //     int32_t* pc = top_blob_tm;
-    //     const int8_t* pa = bottom_blob_reorder;
-    //     const int8_t* pb = weight_data_int8;
-    //     int8kernel((void*)pc, pa, pb, m, k, num_output, num_output, 0, 0, opt);
-    //
-    //     float* outptr = top_blob;
-    //
-    //     // dequant.fused.relu int32_t to float
-    //     for (int p = 0; p < num_output; ++p)
-    //     {
-    //         float sumfp32 = pc[p] * scales_in[p];
-    //         if (bias_term)
-    //         {
-    //             sumfp32 += bias_data[p];
-    //         }
-    //         if (1 == activation_type)
-    //         {
-    //             sumfp32 = std::max(0.f, sumfp32);
-    //         }
-    //
-    //         outptr[p] = sumfp32;
-    //     }
-    //     return 0;
-    // #else
-    //     return InnerProduct::forward_int8(bottom_blob, top_blob, opt);
-    // #endif
-}
-
 int InnerProduct_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
+#if NCNN_INT8
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
     {
         return forward_int8_arm(bottom_blob, top_blob, opt);
     }
+#endif
 
     int elembits = bottom_blob.elembits();
 
@@ -2140,4 +1896,254 @@ int InnerProduct_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const
     return 0;
 }
 
+#if NCNN_INT8
+int InnerProduct_arm::create_pipeline_int8_arm(const Option& opt)
+{
+    if (activation_type == 1)
+    {
+        activation = ncnn::create_layer(ncnn::LayerType::ReLU);
+
+        ncnn::ParamDict pd;
+        activation->load_param(pd);
+    }
+
+    const int num_input = weight_data_size / num_output;
+
+    int out_elempack = 1;
+
+    if (opt.use_packing_layout)
+    {
+        out_elempack = num_output % 8 == 0 ? 8 : 1;
+    }
+
+    // src = inch-outch
+    // dst = pb-inch-outch/pb
+    {
+        Mat weight_data_r2 = weight_data.reshape(num_input, num_output);
+
+        weight_data_int8.create(num_input, num_output / out_elempack, (size_t)out_elempack, out_elempack);
+
+        for (int q = 0; q + (out_elempack - 1) < num_output; q += out_elempack)
+        {
+            signed char* g0 = weight_data_int8.row<signed char>(q / out_elempack);
+
+            for (int p = 0; p < num_input; p++)
+            {
+                for (int j = 0; j < out_elempack; j++)
+                {
+                    *g0++ = weight_data_r2.row<signed char>(q + j)[p];
+                }
+            }
+        }
+    }
+
+    //     // convert fp32 to int8
+    //     if (weight_data_int8_scales.empty())
+    //     {
+    //         return 0;
+    //     }
+    // #if __aarch64__
+    //     // first reorder Matrix A before MatMul
+    //     const int n = num_output;
+    //     const int k = weight_data.total() / n;
+    //     weight_data_int8.create(n * k, (size_t)1u, opt.blob_allocator);
+    //
+    //     int8_t* b = weight_data;
+    //     int8_t* sb = weight_data_int8;
+    //     reorder_a(b, sb, n, k, k);
+    //
+    //     // pre-built scales
+    //     scales_in.create(num_output, 4u, opt.blob_allocator);
+    //     for (int i = 0; i < num_output; ++i)
+    //     {
+    //         if (std::fabs(static_cast<float>(weight_data_int8_scales[i])) <= 1e-6)
+    //         {
+    //             scales_in[i] = 0.f;
+    //         }
+    //         else
+    //         {
+    //             scales_in[i] = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[i]);
+    //         }
+    //     }
+    // #endif
+    return 0;
+}
+
+int InnerProduct_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
+{
+    const int num_input = weight_data_size / num_output;
+
+    if (bottom_blob.dims == 2 && bottom_blob.w == num_input && bottom_blob.h * bottom_blob.elempack > 1)
+    {
+        // gemm
+        Mat bottom_blob_unpacked;
+        Option opt_unpack = opt;
+        opt_unpack.blob_allocator = opt.workspace_allocator;
+        convert_packing(bottom_blob, bottom_blob_unpacked, 1, opt_unpack);
+
+        return forward_int8(bottom_blob_unpacked, top_blob, opt);
+    }
+
+    int elembits = bottom_blob.elembits();
+
+    Mat bottom_blob_int8 = bottom_blob;
+    if (elembits != 8)
+    {
+        Option opt_q = opt;
+        opt_q.blob_allocator = opt.workspace_allocator;
+        quantize_to_int8(bottom_blob, bottom_blob_int8, bottom_blob_int8_scales, opt_q);
+    }
+
+    Mat bottom_blob_int8_flattened = bottom_blob_int8;
+    if (bottom_blob_int8.dims != 1)
+    {
+        Option opt_flatten = opt;
+        opt_flatten.blob_allocator = opt.workspace_allocator;
+        flatten->forward(bottom_blob_int8, bottom_blob_int8_flattened, opt_flatten);
+    }
+
+    //     int elempack = bottom_blob_int8_flattened.elempack;
+
+    int out_elempack = 1;
+    if (opt.use_packing_layout)
+    {
+        out_elempack = num_output % 8 == 0 ? 8 : 1;
+    }
+
+    top_blob.create(num_output / out_elempack, (size_t)(4u * out_elempack), out_elempack, opt.blob_allocator);
+    if (top_blob.empty())
+        return -100;
+
+    Mat top_blob_int32;
+    top_blob_int32.create(num_output / out_elempack, (size_t)(4u * out_elempack), out_elempack, opt.workspace_allocator);
+    if (top_blob_int32.empty())
+        return -100;
+
+#if __ARM_NEON
+    if (out_elempack == 8)
+    {
+        // num_output
+        #pragma omp parallel for num_threads(opt.num_threads)
+        for (int p = 0; p < num_output / out_elempack; p++)
+        {
+            int32x4_t _sum0 = vdupq_n_s32(0);
+            int32x4_t _sum1 = vdupq_n_s32(0);
+
+            const signed char* kptr = weight_data_int8.row<const signed char>(p);
+            const signed char* sptr = bottom_blob_int8_flattened;
+
+            int i = 0;
+            for (; i < num_input; i++)
+            {
+                int8x8_t _val = vdup_n_s8(sptr[0]);
+
+                int8x8_t _w = vld1_s8(kptr);
+
+                int16x8_t _s0 = vmull_s8(_val, _w);
+                _sum0 = vaddw_s16(_sum0, vget_low_s16(_s0));
+                _sum1 = vaddw_s16(_sum1, vget_high_s16(_s0));
+
+                sptr += 1;
+                kptr += 8;
+            }
+
+            int* outptr = (int*)top_blob_int32;
+            vst1q_s32(outptr + p * 8, _sum0);
+            vst1q_s32(outptr + p * 8 + 4, _sum1);
+        }
+    }
+#endif // __ARM_NEON
+
+    if (out_elempack == 1)
+    {
+        // num_output
+        #pragma omp parallel for num_threads(opt.num_threads)
+        for (int p = 0; p < num_output / out_elempack; p++)
+        {
+            int sum = 0;
+
+            const signed char* kptr = weight_data_int8.row<const signed char>(p);
+            const signed char* sptr = bottom_blob_int8_flattened;
+
+            int i = 0;
+            for (; i < num_input; i++)
+            {
+                signed char val = sptr[0];
+
+                signed char w = kptr[0];
+
+                sum += val * w;
+
+                sptr += 1;
+                kptr += 1;
+            }
+
+            int* outptr = (int*)top_blob_int32;
+            outptr[p] = sum;
+        }
+    }
+
+    Mat scale_data(num_output);
+    for (int p = 0; p < num_output; p++)
+    {
+        // dequantize
+        float scale_in;
+        if (weight_data_int8_scales[p] == 0)
+            scale_in = 0;
+        else
+            scale_in = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[p]);
+
+        scale_data[p] = scale_in;
+    }
+
+    dequantize_from_int32(top_blob_int32, top_blob, scale_data, bias_data, opt);
+
+    if (activation)
+    {
+        activation->forward_inplace(top_blob, opt);
+    }
+
+    return 0;
+
+    // #if __aarch64__
+    //     const int w = bottom_blob_tm.w;
+    //     const int h = bottom_blob_tm.h;
+    //
+    //     const int m = 1;
+    //     const int k = bottom_blob_tm.c * w * h;
+    //     Mat bottom_blob_reorder(m * k, (size_t)1u, opt.workspace_allocator);
+    //     {
+    //         reorder_a(bottom_blob_tm_flattened, bottom_blob_reorder, m, k, k);
+    //     }
+    //
+    //     Mat top_blob_tm(m * num_output, (size_t)4u, opt.workspace_allocator);
+    //     int32_t* pc = top_blob_tm;
+    //     const int8_t* pa = bottom_blob_reorder;
+    //     const int8_t* pb = weight_data_int8;
+    //     int8kernel((void*)pc, pa, pb, m, k, num_output, num_output, 0, 0, opt);
+    //
+    //     float* outptr = top_blob;
+    //
+    //     // dequant.fused.relu int32_t to float
+    //     for (int p = 0; p < num_output; ++p)
+    //     {
+    //         float sumfp32 = pc[p] * scales_in[p];
+    //         if (bias_term)
+    //         {
+    //             sumfp32 += bias_data[p];
+    //         }
+    //         if (1 == activation_type)
+    //         {
+    //             sumfp32 = std::max(0.f, sumfp32);
+    //         }
+    //
+    //         outptr[p] = sumfp32;
+    //     }
+    //     return 0;
+    // #else
+    //     return InnerProduct::forward_int8(bottom_blob, top_blob, opt);
+    // #endif
+}
+#endif // NCNN_INT8
+
 } // namespace ncnn
diff --git a/src/layer/arm/innerproduct_arm.h b/src/layer/arm/innerproduct_arm.h
index 17107c0ca..4cdc600bb 100644
--- a/src/layer/arm/innerproduct_arm.h
+++ b/src/layer/arm/innerproduct_arm.h
@@ -39,9 +39,10 @@ protected:
 #endif
     int create_pipeline_bf16s(const Option& opt);
     int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
-
+#if NCNN_INT8
     int create_pipeline_int8_arm(const Option& opt);
     int forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
 
 public:
     Layer* flatten;
@@ -54,9 +55,11 @@ public:
     // bf16
     Mat weight_data_bf16;
 
+#if NCNN_INT8
     // int8
     Mat weight_data_int8;
     Mat scales_in;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/convolution.cpp b/src/layer/convolution.cpp
index c674de62f..ff610843a 100644
--- a/src/layer/convolution.cpp
+++ b/src/layer/convolution.cpp
@@ -47,7 +47,12 @@ int Convolution::load_param(const ParamDict& pd)
 
     if (int8_scale_term)
     {
+#if NCNN_INT8
         support_int8_storage = true;
+#else
+        NCNN_LOGE("please build ncnn with NCNN_INT8 enabled for int8 inference");
+        return -1;
+#endif
     }
 
     return 0;
@@ -66,6 +71,7 @@ int Convolution::load_model(const ModelBin& mb)
             return -100;
     }
 
+#if NCNN_INT8
     if (int8_scale_term)
     {
         weight_data_int8_scales = mb.load(num_output, 1);
@@ -76,12 +82,14 @@ int Convolution::load_model(const ModelBin& mb)
     {
         top_blob_int8_scales = mb.load(1, 1);
     }
+#endif // NCNN_INT8
 
     return 0;
 }
 
 int Convolution::create_pipeline(const Option& opt)
 {
+#if NCNN_INT8
     // runtime quantize the weight data
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)4u && int8_scale_term)
     {
@@ -101,6 +109,7 @@ int Convolution::create_pipeline(const Option& opt)
 
         weight_data = weight_data_int8.reshape(weight_data_size);
     }
+#endif // NCNN_INT8
 
     return 0;
 }
@@ -110,10 +119,12 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
     // convolv with NxN kernel
     // value = value + bias
 
+#if NCNN_INT8
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
     {
         return forward_int8(bottom_blob, top_blob, opt);
     }
+#endif
 
     // flattened blob, implement as InnerProduct
     if (bottom_blob.dims == 1 && kernel_w == 1 && kernel_h == 1)
@@ -140,11 +151,13 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
             weights[0] = weight_data;
             weights[1] = bias_data;
 
+#if NCNN_INT8
             if (int8_scale_term)
             {
                 weights[2] = weight_data_int8_scales;
                 weights[3] = bottom_blob_int8_scales;
             }
+#endif
 
             op->load_model(ModelBinFromMatArray(weights));
 
@@ -327,6 +340,7 @@ void Convolution::make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered
     }
 }
 
+#if NCNN_INT8
 static inline signed char float2int8(float v)
 {
     int int32 = static_cast<int>(round(v));
@@ -492,5 +506,6 @@ int Convolution::forward_int8(const Mat& bottom_blob, Mat& top_blob, const Optio
 
     return 0;
 }
+#endif // NCNN_INT8
 
 } // namespace ncnn
diff --git a/src/layer/convolution.h b/src/layer/convolution.h
index d3f90d708..f3458248a 100644
--- a/src/layer/convolution.h
+++ b/src/layer/convolution.h
@@ -35,7 +35,9 @@ public:
 protected:
     void make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const;
 
+#if NCNN_INT8
     int forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
 
 public:
     // param
@@ -65,9 +67,11 @@ public:
     Mat weight_data;
     Mat bias_data;
 
+#if NCNN_INT8
     Mat weight_data_int8_scales;
     Mat bottom_blob_int8_scales;
     Mat top_blob_int8_scales;
+#endif
 
     // implementation type, 0 means do not use auto pack model
     int impl_type;
diff --git a/src/layer/convolutiondepthwise.cpp b/src/layer/convolutiondepthwise.cpp
index a274cdb70..3f7d0e3d8 100644
--- a/src/layer/convolutiondepthwise.cpp
+++ b/src/layer/convolutiondepthwise.cpp
@@ -53,7 +53,12 @@ int ConvolutionDepthWise::load_param(const ParamDict& pd)
 
     if (int8_scale_term)
     {
+#if NCNN_INT8
         support_int8_storage = true;
+#else
+        NCNN_LOGE("please build ncnn with NCNN_INT8 enabled for int8 inference");
+        return -1;
+#endif
     }
 
     return 0;
@@ -72,6 +77,7 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)
             return -100;
     }
 
+#if NCNN_INT8
     if (int8_scale_term == 1 || int8_scale_term == 101)
     {
         weight_data_int8_scales = mb.load(group, 1);
@@ -104,12 +110,14 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)
         top_blob_int8_scales = Mat(group);
         top_blob_int8_scales.fill(top_blob_int8_scale);
     }
+#endif // NCNN_INT8
 
     return 0;
 }
 
 int ConvolutionDepthWise::create_pipeline(const Option& opt)
 {
+#if NCNN_INT8
     // runtime quantize the weight data
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)4u && int8_scale_term)
     {
@@ -133,6 +141,7 @@ int ConvolutionDepthWise::create_pipeline(const Option& opt)
 
         weight_data = int8_weight_data;
     }
+#endif // NCNN_INT8
 
     return 0;
 }
@@ -142,10 +151,12 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
     // convolv with NxN kernel
     // value = value + bias
 
+#if NCNN_INT8
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
     {
         return forward_int8(bottom_blob, top_blob, opt);
     }
+#endif
 
     int w = bottom_blob.w;
     int h = bottom_blob.h;
@@ -403,6 +414,7 @@ void ConvolutionDepthWise::make_padding(const Mat& bottom_blob, Mat& bottom_blob
     }
 }
 
+#if NCNN_INT8
 static inline signed char float2int8(float v)
 {
     int int32 = static_cast<int>(round(v));
@@ -694,5 +706,6 @@ int ConvolutionDepthWise::forward_int8(const Mat& bottom_blob, Mat& top_blob, co
 
     return 0;
 }
+#endif // NCNN_INT8
 
 } // namespace ncnn
diff --git a/src/layer/convolutiondepthwise.h b/src/layer/convolutiondepthwise.h
index c32e3203f..07cadd19a 100644
--- a/src/layer/convolutiondepthwise.h
+++ b/src/layer/convolutiondepthwise.h
@@ -35,7 +35,9 @@ public:
 protected:
     void make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const;
 
+#if NCNN_INT8
     int forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
 
 public:
     // param
@@ -66,9 +68,11 @@ public:
     Mat weight_data;
     Mat bias_data;
 
+#if NCNN_INT8
     Mat weight_data_int8_scales;
     Mat bottom_blob_int8_scales;
     Mat top_blob_int8_scales;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/innerproduct.cpp b/src/layer/innerproduct.cpp
index b236317c3..e54752947 100644
--- a/src/layer/innerproduct.cpp
+++ b/src/layer/innerproduct.cpp
@@ -35,7 +35,12 @@ int InnerProduct::load_param(const ParamDict& pd)
 
     if (int8_scale_term)
     {
+#if NCNN_INT8
         support_int8_storage = true;
+#else
+        NCNN_LOGE("please build ncnn with NCNN_INT8 enabled for int8 inference");
+        return -1;
+#endif
     }
 
     return 0;
@@ -54,17 +59,20 @@ int InnerProduct::load_model(const ModelBin& mb)
             return -100;
     }
 
+#if NCNN_INT8
     if (int8_scale_term)
     {
         weight_data_int8_scales = mb.load(num_output, 1);
         bottom_blob_int8_scales = mb.load(1, 1);
     }
+#endif // NCNN_INT8
 
     return 0;
 }
 
 int InnerProduct::create_pipeline(const Option& opt)
 {
+#if NCNN_INT8
     // runtime quantize the weight data
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)4u && int8_scale_term)
     {
@@ -81,16 +89,19 @@ int InnerProduct::create_pipeline(const Option& opt)
 
         weight_data = weight_data_int8.reshape(weight_data_size);
     }
+#endif // NCNN_INT8
 
     return 0;
 }
 
 int InnerProduct::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
+#if NCNN_INT8
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
     {
         return forward_int8(bottom_blob, top_blob, opt);
     }
+#endif
 
     const int num_input = weight_data_size / num_output;
 
@@ -218,6 +229,7 @@ int InnerProduct::forward(const Mat& bottom_blob, Mat& top_blob, const Option& o
     return 0;
 }
 
+#if NCNN_INT8
 int InnerProduct::forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
     const int num_input = weight_data_size / num_output;
@@ -332,5 +344,6 @@ int InnerProduct::forward_int8(const Mat& bottom_blob, Mat& top_blob, const Opti
 
     return 0;
 }
+#endif // NCNN_INT8
 
 } // namespace ncnn
diff --git a/src/layer/innerproduct.h b/src/layer/innerproduct.h
index 91f5cb9f3..1f9b3fdc0 100644
--- a/src/layer/innerproduct.h
+++ b/src/layer/innerproduct.h
@@ -33,7 +33,9 @@ public:
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
 protected:
+#if NCNN_INT8
     int forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
 
 public:
     // param
@@ -52,8 +54,10 @@ public:
     Mat weight_data;
     Mat bias_data;
 
+#if NCNN_INT8
     Mat weight_data_int8_scales;
     Mat bottom_blob_int8_scales;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/x86/convolution_x86.cpp b/src/layer/x86/convolution_x86.cpp
index a8612005f..48775bace 100644
--- a/src/layer/x86/convolution_x86.cpp
+++ b/src/layer/x86/convolution_x86.cpp
@@ -35,18 +35,21 @@ namespace ncnn {
 #include "convolution_5x5.h"
 #include "convolution_7x7.h"
 
+#if NCNN_INT8
 #include "convolution_sgemm_int8.h"
 #include "convolution_1x1_int8.h"
 #include "convolution_3x3_int8.h"
-
 #include "convolution_int8.h"
+#endif // NCNN_INT8
 
 #if __SSE2__
 #include "convolution_1x1_pack4.h"
 
+#if NCNN_INT8
 #include "convolution_pack8_int8.h"
 #include "convolution_pack1to8_int8.h"
 #include "convolution_pack8to1_int8.h"
+#endif // NCNN_INT8
 #if __AVX__
 #include "convolution_3x3_pack1to8.h"
 #include "convolution_3x3_pack8to1.h"
@@ -118,10 +121,12 @@ int Convolution_x86::create_pipeline(const Option& opt)
         activation->create_pipeline(opt);
     }
 
+#if NCNN_INT8
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
     {
         return create_pipeline_int8_x86(opt);
     }
+#endif
 
     int kernel_size = kernel_w * kernel_h;
     int num_input = weight_data_size / kernel_size / num_output;
@@ -311,10 +316,12 @@ int Convolution_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option
     // convolv with NxN kernel
     // value = value + bias
 
+#if NCNN_INT8
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
     {
         return forward_int8_x86(bottom_blob, top_blob, opt);
     }
+#endif
 
     if (bottom_blob.dims != 3)
     {
@@ -1058,6 +1065,7 @@ int Convolution_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option
     return 0;
 }
 
+#if NCNN_INT8
 int Convolution_x86::create_pipeline_int8_x86(const Option& opt)
 {
     const int maxk = kernel_w * kernel_h;
@@ -1410,6 +1418,7 @@ int Convolution_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, con
 
     return 0;
 }
+#endif // NCNN_INT8
 
 int Convolution_x86::forwardDilation_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
diff --git a/src/layer/x86/convolution_x86.h b/src/layer/x86/convolution_x86.h
index 561118d9c..326a98cb0 100644
--- a/src/layer/x86/convolution_x86.h
+++ b/src/layer/x86/convolution_x86.h
@@ -30,8 +30,10 @@ public:
     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
 protected:
+#if NCNN_INT8
     int create_pipeline_int8_x86(const Option& opt);
     int forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
     int forwardDilation_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
 public:
@@ -47,9 +49,11 @@ public:
 
     Mat weight_3x3_winograd64_data_pack8;
 
+#if NCNN_INT8
     // int8
     Mat weight_data_int8;
     Mat weight_3x3_winograd23_data_int8;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/x86/convolutiondepthwise_x86.cpp b/src/layer/x86/convolutiondepthwise_x86.cpp
index cb7f8b4ac..a43ce0cc0 100644
--- a/src/layer/x86/convolutiondepthwise_x86.cpp
+++ b/src/layer/x86/convolutiondepthwise_x86.cpp
@@ -36,7 +36,9 @@ namespace ncnn {
 #endif
 #endif // __SSE2__
 #include "convolutiondepthwise_3x3.h"
+#if NCNN_INT8
 #include "convolutiondepthwise_3x3_int8.h"
+#endif // NCNN_INT8
 
 ConvolutionDepthWise_x86::ConvolutionDepthWise_x86()
 {
@@ -102,10 +104,12 @@ int ConvolutionDepthWise_x86::create_pipeline(const Option& opt)
         activation->create_pipeline(opt);
     }
 
+#if NCNN_INT8
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
     {
         return create_pipeline_int8_x86(opt);
     }
+#endif
 
     const int maxk = kernel_w * kernel_h;
     int channels = (weight_data_size / group) / maxk / (num_output / group) * group;
@@ -235,6 +239,7 @@ int ConvolutionDepthWise_x86::create_group_ops(const Option& opt)
             weights[0] = weight_data_g;
             weights[1] = bias_data_g;
 
+#if NCNN_INT8
             if (int8_scale_term)
             {
                 Mat weight_data_int8_scales_g(num_output_g);
@@ -246,6 +251,7 @@ int ConvolutionDepthWise_x86::create_group_ops(const Option& opt)
             {
                 weights[4] = top_blob_int8_scales.range(g, 1);
             }
+#endif
 
             op->load_model(ModelBinFromMatArray(weights));
         }
@@ -254,6 +260,7 @@ int ConvolutionDepthWise_x86::create_group_ops(const Option& opt)
             ncnn::Mat weights[4];
             weights[0] = weight_data_g;
 
+#if NCNN_INT8
             if (int8_scale_term)
             {
                 Mat weight_data_int8_scales_g(num_output_g);
@@ -265,6 +272,7 @@ int ConvolutionDepthWise_x86::create_group_ops(const Option& opt)
             {
                 weights[3] = top_blob_int8_scales.range(g, 1);
             }
+#endif
 
             op->load_model(ModelBinFromMatArray(weights));
         }
@@ -298,13 +306,12 @@ int ConvolutionDepthWise_x86::destroy_pipeline(const Option& opt)
 
 int ConvolutionDepthWise_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
-    // convolv with NxN kernel
-    // value = value + bias
-
+#if NCNN_INT8
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
     {
         return forward_int8_x86(bottom_blob, top_blob, opt);
     }
+#endif
 
     int w = bottom_blob.w;
     int h = bottom_blob.h;
@@ -628,6 +635,7 @@ int ConvolutionDepthWise_x86::forward(const Mat& bottom_blob, Mat& top_blob, con
     return 0;
 }
 
+#if NCNN_INT8
 int ConvolutionDepthWise_x86::create_pipeline_int8_x86(const Option& opt)
 {
     const int maxk = kernel_w * kernel_h;
@@ -1061,5 +1069,6 @@ int ConvolutionDepthWise_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_
 
     return 0;
 }
+#endif // NCNN_INT8
 
 } // namespace ncnn
diff --git a/src/layer/x86/convolutiondepthwise_x86.h b/src/layer/x86/convolutiondepthwise_x86.h
index cf7a27b83..3f6cdca56 100644
--- a/src/layer/x86/convolutiondepthwise_x86.h
+++ b/src/layer/x86/convolutiondepthwise_x86.h
@@ -31,8 +31,10 @@ public:
 
 protected:
     int create_group_ops(const Option& opt);
+#if NCNN_INT8
     int create_pipeline_int8_x86(const Option& opt);
     int forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
 
 public:
     Layer* activation;
@@ -41,8 +43,10 @@ public:
     // packing
     Mat weight_data_packed;
 
+#if NCNN_INT8
     // int8
     Mat weight_data_int8;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/x86/innerproduct_x86.cpp b/src/layer/x86/innerproduct_x86.cpp
index 007605cc6..ee17d7de0 100644
--- a/src/layer/x86/innerproduct_x86.cpp
+++ b/src/layer/x86/innerproduct_x86.cpp
@@ -54,10 +54,12 @@ int InnerProduct_x86::create_pipeline(const Option& opt)
         flatten->create_pipeline(opt);
     }
 
+#if NCNN_INT8
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
     {
         return create_pipeline_int8_x86(opt);
     }
+#endif
 
     const int num_input = weight_data_size / num_output;
 
@@ -124,10 +126,12 @@ int InnerProduct_x86::destroy_pipeline(const Option& opt)
 
 int InnerProduct_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
+#if NCNN_INT8
     if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
     {
         return forward_int8_x86(bottom_blob, top_blob, opt);
     }
+#endif
 
     const int num_input = weight_data_size / num_output;
 
@@ -1694,6 +1698,7 @@ int InnerProduct_x86::forward_fp16(const Mat& bottom_blob, Mat& top_blob, const
 }
 #endif // __AVX__
 
+#if NCNN_INT8
 int InnerProduct_x86::create_pipeline_int8_x86(const Option& opt)
 {
     if (activation_type == 1)
@@ -1883,5 +1888,6 @@ int InnerProduct_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, co
 
     return 0;
 }
+#endif // NCNN_INT8
 
 } // namespace ncnn
diff --git a/src/layer/x86/innerproduct_x86.h b/src/layer/x86/innerproduct_x86.h
index d3afcfb87..2610dfe50 100644
--- a/src/layer/x86/innerproduct_x86.h
+++ b/src/layer/x86/innerproduct_x86.h
@@ -34,9 +34,10 @@ public:
 
 protected:
     int forward_fp16(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
-
+#if NCNN_INT8
     int create_pipeline_int8_x86(const Option& opt);
     int forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
 
 public:
     Layer* flatten;
@@ -47,9 +48,11 @@ public:
     // fp16 weight data
     Mat weight_data_fp16;
 
+#if NCNN_INT8
     // int8
     Mat weight_data_int8;
     Mat scales_in;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/platform.h.in b/src/platform.h.in
index 81e215755..44d775aa2 100644
--- a/src/platform.h.in
+++ b/src/platform.h.in
@@ -30,6 +30,7 @@
 #cmakedefine01 NCNN_RUNTIME_CPU
 #cmakedefine01 NCNN_AVX2
 #cmakedefine01 NCNN_ARM82
+#cmakedefine01 NCNN_INT8
 
 #cmakedefine NCNN_VERSION_STRING "@NCNN_VERSION_STRING@"
 
diff --git a/tests/test_convolution.cpp b/tests/test_convolution.cpp
index fa242b7b1..766a086f7 100644
--- a/tests/test_convolution.cpp
+++ b/tests/test_convolution.cpp
@@ -171,6 +171,7 @@ static int test_convolution_2()
            || test_convolution_vec(64, 128, 1, 1, 1, 0, 0);
 }
 
+#if NCNN_INT8
 static int test_convolution_int8(int w, int h, int c, int outch, int kernel, int dilation, int stride, int pad, int bias, bool requant = false)
 {
     ncnn::Mat a = RandomMat(w, h, c);
@@ -298,12 +299,20 @@ static int test_convolution_1()
            || test_convolution_int8(6, 7, 64, 64, 3, 1, 2, 0, 1)
            || test_convolution_int8(25, 33, 16, 15, 3, 1, 1, 1, 0);
 }
+#endif // NCNN_INT8
 
 int main()
 {
     SRAND(7767517);
+
+#if NCNN_INT8
     return 0
            || test_convolution_0()
            || test_convolution_1()
            || test_convolution_2();
+#else
+    return 0
+           || test_convolution_0()
+           || test_convolution_2();
+#endif
 }
diff --git a/tests/test_convolutiondepthwise.cpp b/tests/test_convolutiondepthwise.cpp
index a2f2146c0..8dce9ea50 100644
--- a/tests/test_convolutiondepthwise.cpp
+++ b/tests/test_convolutiondepthwise.cpp
@@ -125,6 +125,7 @@ static int test_convolutiondepthwise_0()
     return 0;
 }
 
+#if NCNN_INT8
 static int test_convolutiondepthwise_int8(int w, int h, int c, int outch, int kernel, int dilation, int stride, int pad, int bias, int group, bool requant = false)
 {
     ncnn::Mat a = RandomMat(w, h, c);
@@ -251,10 +252,15 @@ static int test_convolutiondepthwise_1()
 
     return 0;
 }
+#endif // NCNN_INT8
 
 int main()
 {
     SRAND(7767517);
 
+#if NCNN_INT8
     return test_convolutiondepthwise_0() || test_convolutiondepthwise_1();
+#else
+    return test_convolutiondepthwise_0();
+#endif
 }
diff --git a/tests/test_innerproduct.cpp b/tests/test_innerproduct.cpp
index 3d32d85c3..a66040923 100644
--- a/tests/test_innerproduct.cpp
+++ b/tests/test_innerproduct.cpp
@@ -87,6 +87,7 @@ static int test_innerproduct_2()
            || test_innerproduct(RandomMat(24), 32, 1);
 }
 
+#if NCNN_INT8
 static int test_innerproduct_int8(const ncnn::Mat& a, int outch, int bias)
 {
     ncnn::ParamDict pd;
@@ -145,6 +146,7 @@ static int test_innerproduct_3()
            || test_innerproduct_int8(RandomMat(7, 2, 16), 4, 1)
            || test_innerproduct_int8(RandomMat(6, 3, 16), 16, 1);
 }
+#endif // NCNN_INT8
 
 static int test_innerproduct_gemm(const ncnn::Mat& a, int outch, int bias)
 {
@@ -193,6 +195,7 @@ static int test_innerproduct_4()
            || test_innerproduct_gemm(RandomMat(12, 16), 7, 1);
 }
 
+#if NCNN_INT8
 static int test_innerproduct_gemm_int8(const ncnn::Mat& a, int outch, int bias)
 {
     ncnn::ParamDict pd;
@@ -242,11 +245,13 @@ static int test_innerproduct_5()
            || test_innerproduct_gemm_int8(RandomMat(6, 16), 16, 0)
            || test_innerproduct_gemm_int8(RandomMat(12, 16), 7, 1);
 }
+#endif // NCNN_INT8
 
 int main()
 {
     SRAND(7767517);
 
+#if NCNN_INT8
     return 0
            || test_innerproduct_0()
            || test_innerproduct_1()
@@ -254,4 +259,11 @@ int main()
            || test_innerproduct_3()
            || test_innerproduct_4()
            || test_innerproduct_5();
+#else
+    return 0
+           || test_innerproduct_0()
+           || test_innerproduct_1()
+           || test_innerproduct_2()
+           || test_innerproduct_4();
+#endif
 }
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index d7d1a8769..a255ac719 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -12,7 +12,11 @@ add_subdirectory(caffe)
 add_subdirectory(mxnet)
 add_subdirectory(onnx)
 add_subdirectory(darknet)
-add_subdirectory(quantize)
+if(NCNN_INT8)
+    add_subdirectory(quantize)
+else()
+    message(WARNING "NCNN_INT8 disabled, quantize tools won't be built")
+endif()
 
 add_executable(ncnn2mem ncnn2mem.cpp)
 target_link_libraries(ncnn2mem PRIVATE ncnn)
diff --git a/tools/ncnnoptimize.cpp b/tools/ncnnoptimize.cpp
index 153e5d374..778c3aacf 100644
--- a/tools/ncnnoptimize.cpp
+++ b/tools/ncnnoptimize.cpp
@@ -2673,8 +2673,10 @@ int NetOptimize::replace_convolution_with_innerproduct_after_global_pooling()
 
         innerproduct->weight_data = convolution->weight_data;
         innerproduct->bias_data = convolution->bias_data;
+#if NCNN_INT8
         innerproduct->weight_data_int8_scales = convolution->weight_data_int8_scales;
         innerproduct->bottom_blob_int8_scales = convolution->bottom_blob_int8_scales;
+#endif
 
         innerproduct->activation_type = convolution->activation_type;
         innerproduct->activation_params = convolution->activation_params;
@@ -2739,8 +2741,10 @@ int NetOptimize::replace_convolution_with_innerproduct_after_innerproduct()
 
             innerproduct2->weight_data = convolution->weight_data;
             innerproduct2->bias_data = convolution->bias_data;
+#if NCNN_INT8
             innerproduct->weight_data_int8_scales = convolution->weight_data_int8_scales;
             innerproduct->bottom_blob_int8_scales = convolution->bottom_blob_int8_scales;
+#endif
 
             innerproduct2->activation_type = convolution->activation_type;
             innerproduct2->activation_params = convolution->activation_params;