diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5309c5b40..1c4a578be 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -81,6 +81,7 @@ option(NCNN_COVERAGE "build for coverage" OFF)
 option(NCNN_BUILD_BENCHMARK "build benchmark" ON)
 option(NCNN_PYTHON "build python api" OFF)
 option(NCNN_INT8 "int8 inference" ON)
+option(NCNN_BF16 "bf16 inference" ON)
 
 if(ANDROID OR IOS OR NCNN_SIMPLESTL OR CMAKE_CROSSCOMPILING)
     option(NCNN_DISABLE_RTTI "disable rtti" ON)
diff --git a/src/layer/arm/batchnorm_arm.cpp b/src/layer/arm/batchnorm_arm.cpp
index 602befaea..d2087f22a 100644
--- a/src/layer/arm/batchnorm_arm.cpp
+++ b/src/layer/arm/batchnorm_arm.cpp
@@ -29,7 +29,9 @@ BatchNorm_arm::BatchNorm_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int BatchNorm_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
@@ -46,8 +48,10 @@ int BatchNorm_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) cons
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_inplace_bf16s(bottom_top_blob, opt);
+#endif
 
     int dims = bottom_top_blob.dims;
     int elempack = bottom_top_blob.elempack;
@@ -660,6 +664,7 @@ int BatchNorm_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& op
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int BatchNorm_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const
 {
     int dims = bottom_top_blob.dims;
@@ -829,5 +834,6 @@ int BatchNorm_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/batchnorm_arm.h b/src/layer/arm/batchnorm_arm.h
index c50cd47af..f6d5f9896 100644
--- a/src/layer/arm/batchnorm_arm.h
+++ b/src/layer/arm/batchnorm_arm.h
@@ -31,7 +31,9 @@ protected:
     int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
     int forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/binaryop_arm.cpp b/src/layer/arm/binaryop_arm.cpp
index c5c098b60..d7b97ed02 100644
--- a/src/layer/arm/binaryop_arm.cpp
+++ b/src/layer/arm/binaryop_arm.cpp
@@ -33,7 +33,9 @@ BinaryOp_arm::BinaryOp_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 #if __ARM_NEON
@@ -812,8 +814,10 @@ int BinaryOp_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>
         return forward_fp16s(bottom_blobs, top_blobs, opt);
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s(bottom_blobs, top_blobs, opt);
+#endif
 
     const Mat& bottom_blob = bottom_blobs[0];
     const Mat& bottom_blob1 = bottom_blobs[1];
@@ -866,8 +870,10 @@ int BinaryOp_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
         return forward_inplace_fp16s(bottom_top_blob, opt);
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_inplace_bf16s(bottom_top_blob, opt);
+#endif
 
 #if __ARM_NEON
     int elempack = bottom_top_blob.elempack;
@@ -3258,6 +3264,7 @@ int BinaryOp_arm::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt)
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 #if __ARM_NEON
 template<typename Op>
 static int binary_op_pack4_bf16s(const Mat& a, const Mat& b, Mat& c, const Option& opt)
@@ -4727,5 +4734,6 @@ int BinaryOp_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt)
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/binaryop_arm.h b/src/layer/arm/binaryop_arm.h
index 88f89638d..cc142f69b 100644
--- a/src/layer/arm/binaryop_arm.h
+++ b/src/layer/arm/binaryop_arm.h
@@ -33,8 +33,10 @@ protected:
     int forward_fp16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
     int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
     int forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/clip_arm.cpp b/src/layer/arm/clip_arm.cpp
index ceeac659d..1285d73a4 100644
--- a/src/layer/arm/clip_arm.cpp
+++ b/src/layer/arm/clip_arm.cpp
@@ -29,7 +29,9 @@ Clip_arm::Clip_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int Clip_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
@@ -41,8 +43,10 @@ int Clip_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
         return forward_inplace_fp16s(bottom_top_blob, opt);
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_inplace_bf16s(bottom_top_blob, opt);
+#endif
 
     int w = bottom_top_blob.w;
     int h = bottom_top_blob.h;
@@ -259,6 +263,7 @@ int Clip_arm::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) con
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int Clip_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const
 {
     int w = bottom_top_blob.w;
@@ -334,5 +339,6 @@ int Clip_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) con
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/clip_arm.h b/src/layer/arm/clip_arm.h
index 83ab1d6c9..73ab895f1 100644
--- a/src/layer/arm/clip_arm.h
+++ b/src/layer/arm/clip_arm.h
@@ -30,7 +30,9 @@ protected:
 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
     int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/concat_arm.cpp b/src/layer/arm/concat_arm.cpp
index 794ede95c..42a505b5e 100644
--- a/src/layer/arm/concat_arm.cpp
+++ b/src/layer/arm/concat_arm.cpp
@@ -25,7 +25,9 @@ Concat_arm::Concat_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int Concat_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
@@ -37,8 +39,10 @@ int Concat_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>&
         return forward_bf16s_fp16s(bottom_blobs, top_blobs, opt);
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s_fp16s(bottom_blobs, top_blobs, opt);
+#endif
 
     int dims = bottom_blobs[0].dims;
     int positive_axis = axis < 0 ? dims + axis : axis;
diff --git a/src/layer/arm/convolution_arm.cpp b/src/layer/arm/convolution_arm.cpp
index 2aed43bad..b8a2917c2 100644
--- a/src/layer/arm/convolution_arm.cpp
+++ b/src/layer/arm/convolution_arm.cpp
@@ -27,17 +27,20 @@
 
 namespace ncnn {
 
-#include "convolution_bf16s.h"
 #include "convolution_sgemm.h"
 
 #include "convolution_1x1.h"
-#include "convolution_1x1_bf16s.h"
 #include "convolution_2x2.h"
 #include "convolution_3x3.h"
 #include "convolution_4x4.h"
 #include "convolution_5x5.h"
 #include "convolution_7x7.h"
 
+#if NCNN_BF16
+#include "convolution_bf16s.h"
+#include "convolution_1x1_bf16s.h"
+#endif // NCNN_BF16
+
 #if NCNN_INT8
 #include "convolution_sgemm_int8.h"
 #include "convolution_1x1_int8.h"
@@ -47,27 +50,30 @@ namespace ncnn {
 
 #if __ARM_NEON
 #include "convolution_pack4.h"
-#include "convolution_pack4_bf16s.h"
 #include "convolution_pack1to4.h"
-#include "convolution_pack1to4_bf16s.h"
 #include "convolution_pack4to1.h"
-#include "convolution_pack4to1_bf16s.h"
 #include "convolution_sgemm_pack4.h"
-#include "convolution_sgemm_pack4_bf16s.h"
 #include "convolution_1x1_pack4.h"
-#include "convolution_1x1_pack4_bf16s.h"
 #include "convolution_1x1_pack4to1.h"
-#include "convolution_1x1_pack4to1_bf16s.h"
 #include "convolution_3x3_pack1to4.h"
-#include "convolution_3x3_pack1to4_bf16s.h"
 #include "convolution_3x3_pack4.h"
-#include "convolution_3x3_pack4_bf16s.h"
 #include "convolution_3x3_pack4to1.h"
-#include "convolution_3x3_pack4to1_bf16s.h"
 #include "convolution_5x5_pack4.h"
-#include "convolution_5x5_pack4_bf16s.h"
 #include "convolution_7x7_pack1to4.h"
+
+#if NCNN_BF16
+#include "convolution_pack4_bf16s.h"
+#include "convolution_pack1to4_bf16s.h"
+#include "convolution_pack4to1_bf16s.h"
+#include "convolution_sgemm_pack4_bf16s.h"
+#include "convolution_1x1_pack4_bf16s.h"
+#include "convolution_1x1_pack4to1_bf16s.h"
+#include "convolution_3x3_pack1to4_bf16s.h"
+#include "convolution_3x3_pack4_bf16s.h"
+#include "convolution_3x3_pack4to1_bf16s.h"
+#include "convolution_5x5_pack4_bf16s.h"
 #include "convolution_7x7_pack1to4_bf16s.h"
+#endif // NCNN_BF16
 
 #if NCNN_INT8
 #include "convolution_pack8to4_int8.h"
@@ -122,7 +128,9 @@ Convolution_arm::Convolution_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 
     activation = 0;
     convolution_dilation1 = 0;
@@ -188,10 +196,12 @@ int Convolution_arm::create_pipeline(const Option& opt)
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage)
     {
         return create_pipeline_bf16s(opt);
     }
+#endif
 
     if ((!support_packing || !opt.use_packing_layout) && !opt.use_bf16_storage && kernel_w == kernel_h && dilation_w != 1 && dilation_h == dilation_w && stride_w == 1 && stride_h == 1)
     {
@@ -449,8 +459,10 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s(bottom_blob, top_blob, opt);
+#endif
 
     int w = bottom_blob.w;
     int h = bottom_blob.h;
@@ -1538,6 +1550,7 @@ int Convolution_arm::forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int Convolution_arm::create_pipeline_bf16s(const Option& opt)
 {
     const int maxk = kernel_w * kernel_h;
@@ -1812,6 +1825,7 @@ int Convolution_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const
 
     return 0;
 }
+#endif // NCNN_BF16
 
 #if NCNN_INT8
 int Convolution_arm::create_pipeline_int8_arm(const Option& opt)
diff --git a/src/layer/arm/convolution_arm.h b/src/layer/arm/convolution_arm.h
index e0eca6a08..18d4fd57c 100644
--- a/src/layer/arm/convolution_arm.h
+++ b/src/layer/arm/convolution_arm.h
@@ -35,8 +35,10 @@ protected:
     int forward_fp16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
     int forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int create_pipeline_bf16s(const Option& opt);
     int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
 #if NCNN_INT8
     int create_pipeline_int8_arm(const Option& opt);
     int forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
@@ -66,8 +68,10 @@ public:
     Mat weight_data_fp16;
     Mat bias_data_fp16;
 
+#if NCNN_BF16
     // bf16
     Mat weight_data_bf16;
+#endif
 
 #if NCNN_INT8
     // int8
diff --git a/src/layer/arm/convolutiondepthwise_arm.cpp b/src/layer/arm/convolutiondepthwise_arm.cpp
index cd8124108..0f0d5a488 100644
--- a/src/layer/arm/convolutiondepthwise_arm.cpp
+++ b/src/layer/arm/convolutiondepthwise_arm.cpp
@@ -34,9 +34,12 @@ namespace ncnn {
 
 #if __ARM_NEON
 #include "convolutiondepthwise_3x3_pack4.h"
-#include "convolutiondepthwise_3x3_pack4_bf16s.h"
 #include "convolutiondepthwise_5x5_pack4.h"
+
+#if NCNN_BF16
+#include "convolutiondepthwise_3x3_pack4_bf16s.h"
 #include "convolutiondepthwise_5x5_pack4_bf16s.h"
+#endif // NCNN_BF16
 
 #if NCNN_INT8
 #include "convolutiondepthwise_3x3_pack8_int8.h"
@@ -58,7 +61,9 @@ ConvolutionDepthWise_arm::ConvolutionDepthWise_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 
     activation = 0;
 }
@@ -167,6 +172,7 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt)
         }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
         if (opt.use_bf16_storage)
         {
 #if __ARM_NEON
@@ -186,6 +192,7 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt)
 
             return 0;
         }
+#endif // NCNN_BF16
 
 #if __ARM_NEON
         // pack4
@@ -362,8 +369,10 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s(bottom_blob, top_blob, opt);
+#endif
 
     int w = bottom_blob.w;
     int h = bottom_blob.h;
@@ -1158,6 +1167,7 @@ int ConvolutionDepthWise_arm::forward_fp16sa(const Mat& bottom_blob, Mat& top_bl
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int ConvolutionDepthWise_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
     int w = bottom_blob.w;
@@ -1456,6 +1466,7 @@ int ConvolutionDepthWise_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blo
 
     return 0;
 }
+#endif // NCNN_BF16
 
 #if NCNN_INT8
 int ConvolutionDepthWise_arm::create_pipeline_int8_arm(const Option& opt)
diff --git a/src/layer/arm/convolutiondepthwise_arm.h b/src/layer/arm/convolutiondepthwise_arm.h
index 2cff01cb9..fb9bc427f 100644
--- a/src/layer/arm/convolutiondepthwise_arm.h
+++ b/src/layer/arm/convolutiondepthwise_arm.h
@@ -35,7 +35,9 @@ protected:
     int forward_fp16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
     int forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
 #if NCNN_INT8
     int create_pipeline_int8_arm(const Option& opt);
     int forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
@@ -52,9 +54,11 @@ public:
     Mat weight_data_fp16;
     Mat bias_data_fp16;
 
+#if NCNN_BF16
     // bf16
     Mat weight_data_bf16;
     Mat weight_data_pack4_bf16;
+#endif
 
 #if NCNN_INT8
     // int8
diff --git a/src/layer/arm/crop_arm.cpp b/src/layer/arm/crop_arm.cpp
index 1b4b521b5..cf5178bf7 100644
--- a/src/layer/arm/crop_arm.cpp
+++ b/src/layer/arm/crop_arm.cpp
@@ -29,7 +29,9 @@ Crop_arm::Crop_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 #if __ARM_NEON
diff --git a/src/layer/arm/deconvolution_arm.cpp b/src/layer/arm/deconvolution_arm.cpp
index 38cf04eff..498137dc4 100644
--- a/src/layer/arm/deconvolution_arm.cpp
+++ b/src/layer/arm/deconvolution_arm.cpp
@@ -40,7 +40,9 @@ Deconvolution_arm::Deconvolution_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 
     activation = 0;
 }
@@ -91,10 +93,12 @@ int Deconvolution_arm::create_pipeline(const Option& opt)
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage)
     {
         return create_pipeline_bf16s(opt);
     }
+#endif
 
     const int maxk = kernel_w * kernel_h;
     int num_input = weight_data_size / maxk / num_output;
@@ -308,8 +312,10 @@ int Deconvolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opti
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s(bottom_blob, top_blob, opt);
+#endif
 
     // deconvolv with NxN kernel
     // value = value + bias
@@ -1899,6 +1905,7 @@ int Deconvolution_arm::forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, con
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int Deconvolution_arm::create_pipeline_bf16s(const Option& opt)
 {
     const int maxk = kernel_w * kernel_h;
@@ -2337,5 +2344,6 @@ int Deconvolution_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, cons
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/deconvolution_arm.h b/src/layer/arm/deconvolution_arm.h
index b8dd38cf1..08bb004eb 100644
--- a/src/layer/arm/deconvolution_arm.h
+++ b/src/layer/arm/deconvolution_arm.h
@@ -35,8 +35,10 @@ protected:
     int forward_fp16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
     int forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int create_pipeline_bf16s(const Option& opt);
     int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
 
 public:
     Layer* activation;
@@ -51,8 +53,10 @@ public:
     Mat weight_data_fp16;
     Mat bias_data_fp16;
 
+#if NCNN_BF16
     // bf16
     Mat weight_data_bf16;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/deconvolutiondepthwise_arm.cpp b/src/layer/arm/deconvolutiondepthwise_arm.cpp
index 987c54c2d..99a33a1a0 100644
--- a/src/layer/arm/deconvolutiondepthwise_arm.cpp
+++ b/src/layer/arm/deconvolutiondepthwise_arm.cpp
@@ -33,7 +33,9 @@ DeconvolutionDepthWise_arm::DeconvolutionDepthWise_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int DeconvolutionDepthWise_arm::create_pipeline(const Option& opt)
@@ -101,6 +103,7 @@ int DeconvolutionDepthWise_arm::create_pipeline(const Option& opt)
         }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
         if (opt.use_bf16_storage)
         {
 #if __ARM_NEON
@@ -120,6 +123,7 @@ int DeconvolutionDepthWise_arm::create_pipeline(const Option& opt)
 
             return 0;
         }
+#endif // NCNN_BF16
 
 #if __ARM_NEON
         // pack4
@@ -228,8 +232,10 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s(bottom_blob, top_blob, opt);
+#endif
 
     // convolv with NxN kernel
     // value = value + bias
@@ -986,6 +992,7 @@ int DeconvolutionDepthWise_arm::forward_fp16sa(const Mat& bottom_blob, Mat& top_
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int DeconvolutionDepthWise_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
     int w = bottom_blob.w;
@@ -1224,5 +1231,6 @@ int DeconvolutionDepthWise_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_b
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/deconvolutiondepthwise_arm.h b/src/layer/arm/deconvolutiondepthwise_arm.h
index ba22418a1..9ef46009a 100644
--- a/src/layer/arm/deconvolutiondepthwise_arm.h
+++ b/src/layer/arm/deconvolutiondepthwise_arm.h
@@ -34,7 +34,9 @@ protected:
     int forward_fp16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
     int forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
 
 public:
     std::vector<ncnn::Layer*> group_ops;
@@ -47,8 +49,10 @@ public:
     Mat weight_data_fp16;
     Mat bias_data_fp16;
 
+#if NCNN_BF16
     // bf16
     Mat weight_data_bf16;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/dequantize_arm.cpp b/src/layer/arm/dequantize_arm.cpp
index db4a01755..ea345ded9 100644
--- a/src/layer/arm/dequantize_arm.cpp
+++ b/src/layer/arm/dequantize_arm.cpp
@@ -30,7 +30,9 @@ Dequantize_arm::Dequantize_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int Dequantize_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
@@ -47,8 +49,10 @@ int Dequantize_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage)
         return forward_bf16s(bottom_blob, top_blob, opt);
+#endif
 
     int dims = bottom_blob.dims;
     int elempack = bottom_blob.elempack;
@@ -2285,6 +2289,7 @@ int Dequantize_arm::forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int Dequantize_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
     int dims = bottom_blob.dims;
@@ -3038,5 +3043,6 @@ int Dequantize_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const O
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/dequantize_arm.h b/src/layer/arm/dequantize_arm.h
index 0b0014fd6..8949f3956 100644
--- a/src/layer/arm/dequantize_arm.h
+++ b/src/layer/arm/dequantize_arm.h
@@ -31,7 +31,9 @@ protected:
     int forward_fp16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
     int forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/eltwise_arm.cpp b/src/layer/arm/eltwise_arm.cpp
index 7a03b7f06..dda0a7755 100644
--- a/src/layer/arm/eltwise_arm.cpp
+++ b/src/layer/arm/eltwise_arm.cpp
@@ -29,7 +29,9 @@ Eltwise_arm::Eltwise_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int Eltwise_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
@@ -46,8 +48,10 @@ int Eltwise_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>&
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s(bottom_blobs, top_blobs, opt);
+#endif
 
     const Mat& bottom_blob = bottom_blobs[0];
     int w = bottom_blob.w;
@@ -2213,6 +2217,7 @@ int Eltwise_arm::forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vecto
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int Eltwise_arm::forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
 {
     const Mat& bottom_blob = bottom_blobs[0];
@@ -2980,5 +2985,6 @@ int Eltwise_arm::forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/eltwise_arm.h b/src/layer/arm/eltwise_arm.h
index c41197bfb..977458821 100644
--- a/src/layer/arm/eltwise_arm.h
+++ b/src/layer/arm/eltwise_arm.h
@@ -31,7 +31,9 @@ protected:
     int forward_fp16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
     int forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/flatten_arm.cpp b/src/layer/arm/flatten_arm.cpp
index a5664402a..015c5849a 100644
--- a/src/layer/arm/flatten_arm.cpp
+++ b/src/layer/arm/flatten_arm.cpp
@@ -29,7 +29,9 @@ Flatten_arm::Flatten_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif // NCNN_BF16
 }
 
 int Flatten_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
@@ -44,8 +46,10 @@ int Flatten_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
         return forward_bf16s_fp16s(bottom_blob, top_blob, opt);
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s_fp16s(bottom_blob, top_blob, opt);
+#endif
 
     int dims = bottom_blob.dims;
 
diff --git a/src/layer/arm/gru_arm.cpp b/src/layer/arm/gru_arm.cpp
index e5f3e6a59..97a2bb61e 100644
--- a/src/layer/arm/gru_arm.cpp
+++ b/src/layer/arm/gru_arm.cpp
@@ -32,7 +32,9 @@ GRU_arm::GRU_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int GRU_arm::create_pipeline(const Option& opt)
@@ -44,10 +46,12 @@ int GRU_arm::create_pipeline(const Option& opt)
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage)
     {
         return create_pipeline_bf16s(opt);
     }
+#endif
 
     // pack RUN
     int num_directions = direction == 2 ? 2 : 1;
@@ -627,8 +631,10 @@ int GRU_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) c
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s(bottom_blob, top_blob, opt);
+#endif
 
     int T = bottom_blob.h;
 
@@ -708,8 +714,10 @@ int GRU_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s(bottom_blobs, top_blobs, opt);
+#endif
 
     int T = bottom_blob.h;
     Mat& top_blob = top_blobs[0];
@@ -1727,6 +1735,7 @@ int GRU_arm::forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector<Ma
 }
 #endif
 
+#if NCNN_BF16
 static int gru_bf16s(const Mat& bottom_blob, Mat& top_blob, int reverse, const Mat& weight_xc, const Mat& bias_c, const Mat& weight_hc, Mat& hidden_state, const Option& opt)
 {
     int size = bottom_blob.w;
@@ -2378,5 +2387,6 @@ int GRU_arm::forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/gru_arm.h b/src/layer/arm/gru_arm.h
index ee67e96ee..0e6e9eef4 100644
--- a/src/layer/arm/gru_arm.h
+++ b/src/layer/arm/gru_arm.h
@@ -36,9 +36,11 @@ protected:
     int forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
     int forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int create_pipeline_bf16s(const Option& opt);
     int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
     int forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+#endif
 
 public:
     Mat weight_xc_data_packed;
diff --git a/src/layer/arm/hardsigmoid_arm.cpp b/src/layer/arm/hardsigmoid_arm.cpp
index 3a94d7aaf..f7105b193 100644
--- a/src/layer/arm/hardsigmoid_arm.cpp
+++ b/src/layer/arm/hardsigmoid_arm.cpp
@@ -29,7 +29,9 @@ HardSigmoid_arm::HardSigmoid_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int HardSigmoid_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
@@ -46,8 +48,10 @@ int HardSigmoid_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) co
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_inplace_bf16s(bottom_top_blob, opt);
+#endif
 
     int w = bottom_top_blob.w;
     int h = bottom_top_blob.h;
@@ -326,6 +330,7 @@ int HardSigmoid_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option&
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int HardSigmoid_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const
 {
     int w = bottom_top_blob.w;
@@ -404,5 +409,6 @@ int HardSigmoid_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& o
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/hardsigmoid_arm.h b/src/layer/arm/hardsigmoid_arm.h
index 93e61e17a..63fdb07d3 100644
--- a/src/layer/arm/hardsigmoid_arm.h
+++ b/src/layer/arm/hardsigmoid_arm.h
@@ -31,7 +31,9 @@ protected:
     int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
     int forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/hardswish_arm.cpp b/src/layer/arm/hardswish_arm.cpp
index d5e8c0c1c..a3fd6bd6e 100644
--- a/src/layer/arm/hardswish_arm.cpp
+++ b/src/layer/arm/hardswish_arm.cpp
@@ -29,7 +29,9 @@ HardSwish_arm::HardSwish_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int HardSwish_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
@@ -46,8 +48,10 @@ int HardSwish_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) cons
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_inplace_bf16s(bottom_top_blob, opt);
+#endif
 
     int w = bottom_top_blob.w;
     int h = bottom_top_blob.h;
@@ -333,6 +337,7 @@ int HardSwish_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& op
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int HardSwish_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const
 {
     int w = bottom_top_blob.w;
@@ -413,5 +418,6 @@ int HardSwish_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/hardswish_arm.h b/src/layer/arm/hardswish_arm.h
index b47b124b7..9a1d7a560 100644
--- a/src/layer/arm/hardswish_arm.h
+++ b/src/layer/arm/hardswish_arm.h
@@ -31,7 +31,9 @@ protected:
     int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
     int forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/innerproduct_arm.cpp b/src/layer/arm/innerproduct_arm.cpp
index 34edf9273..b0c7a58e0 100644
--- a/src/layer/arm/innerproduct_arm.cpp
+++ b/src/layer/arm/innerproduct_arm.cpp
@@ -34,7 +34,9 @@ InnerProduct_arm::InnerProduct_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 
     flatten = 0;
     activation = 0;
@@ -69,10 +71,12 @@ int InnerProduct_arm::create_pipeline(const Option& opt)
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage)
     {
         return create_pipeline_bf16s(opt);
     }
+#endif
 
     return 0;
 }
@@ -117,8 +121,10 @@ int InnerProduct_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Optio
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s(bottom_blob, top_blob, opt);
+#endif
 
     const int num_input = weight_data_size / num_output;
 
@@ -1535,6 +1541,7 @@ int InnerProduct_arm::forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, cons
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int InnerProduct_arm::create_pipeline_bf16s(const Option& opt)
 {
     const int num_input = weight_data_size / num_output;
@@ -1895,6 +1902,7 @@ int InnerProduct_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const
 
     return 0;
 }
+#endif // NCNN_BF16
 
 #if NCNN_INT8
 int InnerProduct_arm::create_pipeline_int8_arm(const Option& opt)
diff --git a/src/layer/arm/innerproduct_arm.h b/src/layer/arm/innerproduct_arm.h
index 4cdc600bb..ba772486b 100644
--- a/src/layer/arm/innerproduct_arm.h
+++ b/src/layer/arm/innerproduct_arm.h
@@ -37,8 +37,10 @@ protected:
     int forward_fp16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
     int forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int create_pipeline_bf16s(const Option& opt);
     int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
 #if NCNN_INT8
     int create_pipeline_int8_arm(const Option& opt);
     int forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
@@ -52,8 +54,10 @@ public:
     Mat weight_data_fp16;
     Mat bias_data_fp16;
 
+#if NCNN_BF16
     // bf16
     Mat weight_data_bf16;
+#endif
 
 #if NCNN_INT8
     // int8
diff --git a/src/layer/arm/instancenorm_arm.cpp b/src/layer/arm/instancenorm_arm.cpp
index 2f80a8b38..4562c6fc3 100644
--- a/src/layer/arm/instancenorm_arm.cpp
+++ b/src/layer/arm/instancenorm_arm.cpp
@@ -29,7 +29,9 @@ InstanceNorm_arm::InstanceNorm_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int InstanceNorm_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
@@ -41,8 +43,10 @@ int InstanceNorm_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) c
         return forward_inplace_fp16s(bottom_top_blob, opt);
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_inplace_bf16s(bottom_top_blob, opt);
+#endif
 
     int w = bottom_top_blob.w;
     int h = bottom_top_blob.h;
@@ -473,6 +477,7 @@ int InstanceNorm_arm::forward_inplace_fp16s(Mat& bottom_top_blob, const Option&
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int InstanceNorm_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const
 {
     int w = bottom_top_blob.w;
@@ -646,5 +651,6 @@ int InstanceNorm_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option&
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/instancenorm_arm.h b/src/layer/arm/instancenorm_arm.h
index 0ea031d02..1e483b6df 100644
--- a/src/layer/arm/instancenorm_arm.h
+++ b/src/layer/arm/instancenorm_arm.h
@@ -30,7 +30,9 @@ protected:
 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
     int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/interp_arm.cpp b/src/layer/arm/interp_arm.cpp
index 858507b18..86c9a3d31 100644
--- a/src/layer/arm/interp_arm.cpp
+++ b/src/layer/arm/interp_arm.cpp
@@ -23,15 +23,20 @@
 namespace ncnn {
 
 #include "interp_bicubic.h"
-#include "interp_bicubic_bf16s.h"
 #include "interp_bilinear.h"
+
+#if NCNN_BF16
+#include "interp_bicubic_bf16s.h"
 #include "interp_bilinear_bf16s.h"
+#endif
 
 #if __ARM_NEON
 #include "interp_bicubic_pack4.h"
-#include "interp_bicubic_pack4_bf16s.h"
 #include "interp_bilinear_pack4.h"
+#if NCNN_BF16
+#include "interp_bicubic_pack4_bf16s.h"
 #include "interp_bilinear_pack4_bf16s.h"
+#endif
 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 #include "interp_bicubic_fp16s.h"
 #include "interp_bicubic_pack4_fp16s.h"
@@ -51,7 +56,9 @@ Interp_arm::Interp_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int Interp_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
@@ -72,8 +79,10 @@ int Interp_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>&
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s(bottom_blobs, top_blobs, opt);
+#endif
 
     int h = bottom_blob.h;
     int w = bottom_blob.w;
@@ -830,6 +839,7 @@ int Interp_arm::forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int Interp_arm::forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
 {
     const Mat& bottom_blob = bottom_blobs[0];
@@ -1052,5 +1062,6 @@ int Interp_arm::forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/interp_arm.h b/src/layer/arm/interp_arm.h
index d70bef1dc..7ee5022c9 100644
--- a/src/layer/arm/interp_arm.h
+++ b/src/layer/arm/interp_arm.h
@@ -31,7 +31,9 @@ protected:
     int forward_fp16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
     int forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/lstm_arm.cpp b/src/layer/arm/lstm_arm.cpp
index dd34ef46c..7b8511b8f 100644
--- a/src/layer/arm/lstm_arm.cpp
+++ b/src/layer/arm/lstm_arm.cpp
@@ -32,7 +32,9 @@ LSTM_arm::LSTM_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int LSTM_arm::create_pipeline(const Option& opt)
@@ -44,10 +46,12 @@ int LSTM_arm::create_pipeline(const Option& opt)
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage)
     {
         return create_pipeline_bf16s(opt);
     }
+#endif
 
     // pack IFOG
     int num_directions = direction == 2 ? 2 : 1;
@@ -349,8 +353,10 @@ int LSTM_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt)
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s(bottom_blob, top_blob, opt);
+#endif
 
     int T = bottom_blob.h;
 
@@ -436,8 +442,10 @@ int LSTM_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s(bottom_blobs, top_blobs, opt);
+#endif
 
     int T = bottom_blob.h;
     Mat& top_blob = top_blobs[0];
@@ -1296,6 +1304,7 @@ int LSTM_arm::forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector<M
 }
 #endif
 
+#if NCNN_BF16
 static int lstm_bf16s(const Mat& bottom_blob, Mat& top_blob, int reverse, const Mat& weight_xc, const Mat& bias_c, const Mat& weight_hc, Mat& hidden_state, Mat& cell_state, const Option& opt)
 {
     int size = bottom_blob.w;
@@ -1680,5 +1689,6 @@ int LSTM_arm::forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Ma
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/lstm_arm.h b/src/layer/arm/lstm_arm.h
index 73035b8dd..5155caf11 100644
--- a/src/layer/arm/lstm_arm.h
+++ b/src/layer/arm/lstm_arm.h
@@ -36,9 +36,11 @@ protected:
     int forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
     int forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int create_pipeline_bf16s(const Option& opt);
     int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
     int forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+#endif
 
 public:
     Mat weight_xc_data_packed;
diff --git a/src/layer/arm/mish_arm.cpp b/src/layer/arm/mish_arm.cpp
index 0b3352428..a51a2ee21 100644
--- a/src/layer/arm/mish_arm.cpp
+++ b/src/layer/arm/mish_arm.cpp
@@ -35,7 +35,9 @@ Mish_arm::Mish_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int Mish_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
@@ -52,8 +54,10 @@ int Mish_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_inplace_bf16s(bottom_top_blob, opt);
+#endif
 
     int w = bottom_top_blob.w;
     int h = bottom_top_blob.h;
@@ -243,6 +247,7 @@ int Mish_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) co
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int Mish_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const
 {
     int w = bottom_top_blob.w;
@@ -304,5 +309,6 @@ int Mish_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) con
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/mish_arm.h b/src/layer/arm/mish_arm.h
index 3c968d9dc..81b566758 100644
--- a/src/layer/arm/mish_arm.h
+++ b/src/layer/arm/mish_arm.h
@@ -31,7 +31,9 @@ protected:
     int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
     int forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/padding_arm.cpp b/src/layer/arm/padding_arm.cpp
index 6567fe061..8f61d07a9 100644
--- a/src/layer/arm/padding_arm.cpp
+++ b/src/layer/arm/padding_arm.cpp
@@ -38,7 +38,9 @@ Padding_arm::Padding_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int Padding_arm::create_pipeline(const Option& opt)
@@ -50,12 +52,14 @@ int Padding_arm::create_pipeline(const Option& opt)
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage)
     {
         value_bf16 = float32_to_bfloat16(value);
 
         ncnn::cast_float32_to_bfloat16(per_channel_pad_data, per_channel_pad_data_bf16, opt);
     }
+#endif
 
     return 0;
 }
@@ -83,8 +87,10 @@ int Padding_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
         return forward_bf16s_fp16s(bottom_blob, top_blob, opt);
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s_fp16s(bottom_blob, top_blob, opt);
+#endif
 
     int w = bottom_blob.w;
     int h = bottom_blob.h;
@@ -352,19 +358,28 @@ int Padding_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blob, cons
                 {
                     Mat borderm = top_blob.channel(q);
 
-#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+                    // clang-format off
+                    // *INDENT-OFF*
                     uint16x4_t pad_value;
+#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
                     if (opt.use_fp16_storage)
                     {
                         pad_value = per_channel_pad_data_size ? vreinterpret_u16_f16(vld1_f16((const __fp16*)per_channel_pad_data_fp16 + q * 4)) : vreinterpret_u16_f16(vdup_n_f16((__fp16)value));
                     }
                     else
+#endif
+#if NCNN_BF16
+                    if (opt.use_bf16_storage)
                     {
                         pad_value = per_channel_pad_data_size ? vld1_u16((const unsigned short*)per_channel_pad_data_bf16 + q * 4) : vdup_n_u16(value_bf16);
                     }
-#else
-                    uint16x4_t pad_value = per_channel_pad_data_size ? vld1_u16((const unsigned short*)per_channel_pad_data_bf16 + q * 4) : vdup_n_u16(value_bf16);
+                    else
 #endif
+                    {
+                    }
+                    // *INDENT-ON*
+                    // clang-format on
+
                     //Channel padding
                     if ((q - front_) < 0 || (q - front_) >= channels)
                     {
diff --git a/src/layer/arm/padding_arm.h b/src/layer/arm/padding_arm.h
index 25c50e786..dfc088de0 100644
--- a/src/layer/arm/padding_arm.h
+++ b/src/layer/arm/padding_arm.h
@@ -34,9 +34,11 @@ protected:
     int forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
 public:
+#if NCNN_BF16
     // bf16
     unsigned short value_bf16;
     Mat per_channel_pad_data_bf16;
+#endif
 
     // fp16
     Mat per_channel_pad_data_fp16;
diff --git a/src/layer/arm/pixelshuffle_arm.cpp b/src/layer/arm/pixelshuffle_arm.cpp
index 1b2548ce5..60581e921 100644
--- a/src/layer/arm/pixelshuffle_arm.cpp
+++ b/src/layer/arm/pixelshuffle_arm.cpp
@@ -31,7 +31,9 @@ PixelShuffle_arm::PixelShuffle_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int PixelShuffle_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
@@ -43,8 +45,10 @@ int PixelShuffle_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Optio
         return forward_bf16s_fp16s(bottom_blob, top_blob, opt);
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s_fp16s(bottom_blob, top_blob, opt);
+#endif
 
     int w = bottom_blob.w;
     int h = bottom_blob.h;
diff --git a/src/layer/arm/pooling_arm.cpp b/src/layer/arm/pooling_arm.cpp
index d010b63f2..ad0a36332 100644
--- a/src/layer/arm/pooling_arm.cpp
+++ b/src/layer/arm/pooling_arm.cpp
@@ -39,7 +39,9 @@ Pooling_arm::Pooling_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int Pooling_arm::create_pipeline(const Option& /*opt*/)
@@ -78,8 +80,10 @@ int Pooling_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s(bottom_blob, top_blob, opt);
+#endif
 
     // max value in NxN window
     // avg value in NxN window
@@ -1235,6 +1239,7 @@ int Pooling_arm::forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Opt
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int Pooling_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
     // max value in NxN window
@@ -1644,5 +1649,6 @@ int Pooling_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Opti
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/pooling_arm.h b/src/layer/arm/pooling_arm.h
index 5dab6f5d3..de457bf57 100644
--- a/src/layer/arm/pooling_arm.h
+++ b/src/layer/arm/pooling_arm.h
@@ -32,7 +32,9 @@ protected:
     int forward_fp16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
     int forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/prelu_arm.cpp b/src/layer/arm/prelu_arm.cpp
index eb847e203..362272553 100644
--- a/src/layer/arm/prelu_arm.cpp
+++ b/src/layer/arm/prelu_arm.cpp
@@ -29,7 +29,9 @@ PReLU_arm::PReLU_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int PReLU_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
@@ -46,8 +48,10 @@ int PReLU_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_inplace_bf16s(bottom_top_blob, opt);
+#endif
 
     int dims = bottom_top_blob.dims;
     int elempack = bottom_top_blob.elempack;
@@ -816,6 +820,7 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int PReLU_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const
 {
     int dims = bottom_top_blob.dims;
@@ -1033,5 +1038,6 @@ int PReLU_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) co
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/prelu_arm.h b/src/layer/arm/prelu_arm.h
index be610bce1..90befce2b 100644
--- a/src/layer/arm/prelu_arm.h
+++ b/src/layer/arm/prelu_arm.h
@@ -31,7 +31,9 @@ protected:
     int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
     int forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/quantize_arm.cpp b/src/layer/arm/quantize_arm.cpp
index a486cd9bb..a9b05872e 100644
--- a/src/layer/arm/quantize_arm.cpp
+++ b/src/layer/arm/quantize_arm.cpp
@@ -34,7 +34,9 @@ Quantize_arm::Quantize_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int Quantize_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
@@ -51,8 +53,10 @@ int Quantize_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& o
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s(bottom_blob, top_blob, opt);
+#endif
 
     int dims = bottom_blob.dims;
     int elempack = bottom_blob.elempack;
@@ -1552,6 +1556,7 @@ int Quantize_arm::forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Op
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int Quantize_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
     int dims = bottom_blob.dims;
@@ -1953,5 +1958,6 @@ int Quantize_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Opt
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/quantize_arm.h b/src/layer/arm/quantize_arm.h
index f4edf9d6c..a5b9e5d02 100644
--- a/src/layer/arm/quantize_arm.h
+++ b/src/layer/arm/quantize_arm.h
@@ -31,7 +31,9 @@ protected:
     int forward_fp16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
     int forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/relu_arm.cpp b/src/layer/arm/relu_arm.cpp
index e319e68e3..f8d2b9925 100644
--- a/src/layer/arm/relu_arm.cpp
+++ b/src/layer/arm/relu_arm.cpp
@@ -29,7 +29,9 @@ ReLU_arm::ReLU_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int ReLU_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
@@ -44,8 +46,10 @@ int ReLU_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
         return forward_inplace_fp16s(bottom_top_blob, opt);
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_inplace_bf16s(bottom_top_blob, opt);
+#endif
 
     int w = bottom_top_blob.w;
     int h = bottom_top_blob.h;
@@ -578,6 +582,7 @@ int ReLU_arm::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) con
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int ReLU_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const
 {
     int w = bottom_top_blob.w;
@@ -881,6 +886,7 @@ int ReLU_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) con
 
     return 0;
 }
+#endif // NCNN_BF16
 
 int ReLU_arm::forward_inplace_int8(Mat& bottom_top_blob, const Option& opt) const
 {
diff --git a/src/layer/arm/relu_arm.h b/src/layer/arm/relu_arm.h
index 4a7bcbaba..cb1be789b 100644
--- a/src/layer/arm/relu_arm.h
+++ b/src/layer/arm/relu_arm.h
@@ -30,7 +30,9 @@ protected:
 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
     int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const;
+#endif
     int forward_inplace_int8(Mat& bottom_top_blob, const Option& opt) const;
 };
 
diff --git a/src/layer/arm/reshape_arm.cpp b/src/layer/arm/reshape_arm.cpp
index 81493d2d2..3b9c5cfec 100644
--- a/src/layer/arm/reshape_arm.cpp
+++ b/src/layer/arm/reshape_arm.cpp
@@ -29,7 +29,9 @@ Reshape_arm::Reshape_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int Reshape_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
@@ -41,8 +43,10 @@ int Reshape_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
         return forward_bf16s_fp16s(bottom_blob, top_blob, opt);
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s_fp16s(bottom_blob, top_blob, opt);
+#endif
 
     int elempack = bottom_blob.elempack;
 
diff --git a/src/layer/arm/rnn_arm.cpp b/src/layer/arm/rnn_arm.cpp
index 0de8b994b..9bd5bcc29 100644
--- a/src/layer/arm/rnn_arm.cpp
+++ b/src/layer/arm/rnn_arm.cpp
@@ -32,7 +32,9 @@ RNN_arm::RNN_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int RNN_arm::create_pipeline(const Option& opt)
@@ -44,10 +46,12 @@ int RNN_arm::create_pipeline(const Option& opt)
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage)
     {
         return create_pipeline_bf16s(opt);
     }
+#endif
 
     int num_directions = direction == 2 ? 2 : 1;
     int size = weight_data_size / num_directions / num_output;
@@ -309,8 +313,10 @@ int RNN_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) c
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s(bottom_blob, top_blob, opt);
+#endif
 
     int T = bottom_blob.h;
 
@@ -390,8 +396,10 @@ int RNN_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s(bottom_blobs, top_blobs, opt);
+#endif
 
     int T = bottom_blob.h;
     Mat& top_blob = top_blobs[0];
@@ -1067,6 +1075,7 @@ int RNN_arm::forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector<Ma
 }
 #endif
 
+#if NCNN_BF16
 static int rnn_bf16s(const Mat& bottom_blob, Mat& top_blob, int reverse, const Mat& weight_xc, const Mat& bias_c, const Mat& weight_hc, Mat& hidden_state, const Option& opt)
 {
     int size = bottom_blob.w;
@@ -1400,5 +1409,6 @@ int RNN_arm::forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/rnn_arm.h b/src/layer/arm/rnn_arm.h
index 22d2bf0db..6d7567b4d 100644
--- a/src/layer/arm/rnn_arm.h
+++ b/src/layer/arm/rnn_arm.h
@@ -36,9 +36,11 @@ protected:
     int forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
     int forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int create_pipeline_bf16s(const Option& opt);
     int forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
     int forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
+#endif
 
 public:
     Mat weight_xc_data_packed;
diff --git a/src/layer/arm/shufflechannel_arm.cpp b/src/layer/arm/shufflechannel_arm.cpp
index e657a5a99..54abfddeb 100644
--- a/src/layer/arm/shufflechannel_arm.cpp
+++ b/src/layer/arm/shufflechannel_arm.cpp
@@ -31,7 +31,9 @@ ShuffleChannel_arm::ShuffleChannel_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int ShuffleChannel_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
@@ -43,8 +45,10 @@ int ShuffleChannel_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
         return forward_bf16s_fp16s(bottom_blob, top_blob, opt);
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s_fp16s(bottom_blob, top_blob, opt);
+#endif
 
     int channels = bottom_blob.c;
     int elempack = bottom_blob.elempack;
diff --git a/src/layer/arm/sigmoid_arm.cpp b/src/layer/arm/sigmoid_arm.cpp
index c1d6b0497..f46057594 100644
--- a/src/layer/arm/sigmoid_arm.cpp
+++ b/src/layer/arm/sigmoid_arm.cpp
@@ -36,7 +36,9 @@ Sigmoid_arm::Sigmoid_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int Sigmoid_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
@@ -53,8 +55,10 @@ int Sigmoid_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_inplace_bf16s(bottom_top_blob, opt);
+#endif
 
     int w = bottom_top_blob.w;
     int h = bottom_top_blob.h;
@@ -247,6 +251,7 @@ int Sigmoid_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt)
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int Sigmoid_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const
 {
     int w = bottom_top_blob.w;
@@ -311,5 +316,6 @@ int Sigmoid_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt)
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/sigmoid_arm.h b/src/layer/arm/sigmoid_arm.h
index a338c4919..5b52d5885 100644
--- a/src/layer/arm/sigmoid_arm.h
+++ b/src/layer/arm/sigmoid_arm.h
@@ -31,7 +31,9 @@ protected:
     int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
     int forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/slice_arm.cpp b/src/layer/arm/slice_arm.cpp
index be3db7fe2..c9c19be85 100644
--- a/src/layer/arm/slice_arm.cpp
+++ b/src/layer/arm/slice_arm.cpp
@@ -25,7 +25,9 @@ Slice_arm::Slice_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int Slice_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
@@ -37,8 +39,10 @@ int Slice_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& t
         return forward_bf16s_fp16s(bottom_blobs, top_blobs, opt);
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s_fp16s(bottom_blobs, top_blobs, opt);
+#endif
 
     const Mat& bottom_blob = bottom_blobs[0];
     int dims = bottom_blob.dims;
diff --git a/src/layer/arm/swish_arm.cpp b/src/layer/arm/swish_arm.cpp
index 80c003f8c..79f9c2f52 100644
--- a/src/layer/arm/swish_arm.cpp
+++ b/src/layer/arm/swish_arm.cpp
@@ -35,7 +35,9 @@ Swish_arm::Swish_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int Swish_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
@@ -52,8 +54,10 @@ int Swish_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_inplace_bf16s(bottom_top_blob, opt);
+#endif
 
     int w = bottom_top_blob.w;
     int h = bottom_top_blob.h;
@@ -250,6 +254,7 @@ int Swish_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int Swish_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const
 {
     int w = bottom_top_blob.w;
@@ -313,5 +318,6 @@ int Swish_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) co
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/swish_arm.h b/src/layer/arm/swish_arm.h
index e990983ac..7e38ac922 100644
--- a/src/layer/arm/swish_arm.h
+++ b/src/layer/arm/swish_arm.h
@@ -31,7 +31,9 @@ protected:
     int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
     int forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/tanh_arm.cpp b/src/layer/arm/tanh_arm.cpp
index f4837e5c4..f71f62204 100644
--- a/src/layer/arm/tanh_arm.cpp
+++ b/src/layer/arm/tanh_arm.cpp
@@ -35,7 +35,9 @@ TanH_arm::TanH_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int TanH_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
@@ -52,8 +54,10 @@ int TanH_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_inplace_bf16s(bottom_top_blob, opt);
+#endif
 
     int w = bottom_top_blob.w;
     int h = bottom_top_blob.h;
@@ -243,6 +247,7 @@ int TanH_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) co
 }
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
+#if NCNN_BF16
 int TanH_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const
 {
     int w = bottom_top_blob.w;
@@ -304,5 +309,6 @@ int TanH_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) con
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/tanh_arm.h b/src/layer/arm/tanh_arm.h
index 254a4427c..58697e7f7 100644
--- a/src/layer/arm/tanh_arm.h
+++ b/src/layer/arm/tanh_arm.h
@@ -31,7 +31,9 @@ protected:
     int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
     int forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/arm/unaryop_arm.cpp b/src/layer/arm/unaryop_arm.cpp
index 7ac1755c5..d809bb80a 100644
--- a/src/layer/arm/unaryop_arm.cpp
+++ b/src/layer/arm/unaryop_arm.cpp
@@ -35,7 +35,9 @@ UnaryOp_arm::UnaryOp_arm()
 #endif
 #endif // __ARM_NEON
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 #if __ARM_NEON
@@ -265,8 +267,10 @@ int UnaryOp_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
         return forward_inplace_fp16s(bottom_top_blob, opt);
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_inplace_bf16s(bottom_top_blob, opt);
+#endif
 
     int elempack = bottom_top_blob.elempack;
 
@@ -1099,6 +1103,7 @@ static int unary_op_inplace_pack4_bf16s(Mat& a, const Option& opt)
 }
 #endif // __ARM_NEON
 
+#if NCNN_BF16
 template<typename Op>
 static int unary_op_inplace_bf16s(Mat& a, const Option& opt)
 {
@@ -1375,5 +1380,6 @@ int UnaryOp_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt)
 
     return 0;
 }
+#endif // NCNN_BF16
 
 } // namespace ncnn
diff --git a/src/layer/arm/unaryop_arm.h b/src/layer/arm/unaryop_arm.h
index 4a760e4cb..58e39e85a 100644
--- a/src/layer/arm/unaryop_arm.h
+++ b/src/layer/arm/unaryop_arm.h
@@ -30,7 +30,9 @@ protected:
 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
     int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
 #endif
+#if NCNN_BF16
     int forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt) const;
+#endif
 };
 
 } // namespace ncnn
diff --git a/src/layer/riscv/concat_riscv.cpp b/src/layer/riscv/concat_riscv.cpp
index d55e5a45f..55bee3a2e 100644
--- a/src/layer/riscv/concat_riscv.cpp
+++ b/src/layer/riscv/concat_riscv.cpp
@@ -35,7 +35,9 @@ Concat_riscv::Concat_riscv()
 #endif
 #endif // __riscv_vector
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int Concat_riscv::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
@@ -47,8 +49,10 @@ int Concat_riscv::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>
         return forward_bf16s_fp16s(bottom_blobs, top_blobs, opt);
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s_fp16s(bottom_blobs, top_blobs, opt);
+#endif
 
 #if __riscv_vector
     const int packn = csrr_vlenb() / 4;
diff --git a/src/layer/riscv/crop_riscv.cpp b/src/layer/riscv/crop_riscv.cpp
index 68d3d000d..2800d0108 100644
--- a/src/layer/riscv/crop_riscv.cpp
+++ b/src/layer/riscv/crop_riscv.cpp
@@ -35,7 +35,9 @@ Crop_riscv::Crop_riscv()
 #endif
 #endif // __riscv_vector
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 #if __riscv_vector
diff --git a/src/layer/riscv/flatten_riscv.cpp b/src/layer/riscv/flatten_riscv.cpp
index a9f6f7e8e..c8763d2b7 100644
--- a/src/layer/riscv/flatten_riscv.cpp
+++ b/src/layer/riscv/flatten_riscv.cpp
@@ -35,7 +35,9 @@ Flatten_riscv::Flatten_riscv()
 #endif
 #endif // __riscv_vector
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int Flatten_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
@@ -50,8 +52,10 @@ int Flatten_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
         return forward_bf16s_fp16s(bottom_blob, top_blob, opt);
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage && elembits == 16)
         return forward_bf16s_fp16s(bottom_blob, top_blob, opt);
+#endif
 
     int dims = bottom_blob.dims;
 
diff --git a/src/layer/riscv/padding_riscv.cpp b/src/layer/riscv/padding_riscv.cpp
index c90212c1e..6d567cd77 100644
--- a/src/layer/riscv/padding_riscv.cpp
+++ b/src/layer/riscv/padding_riscv.cpp
@@ -39,7 +39,9 @@ Padding_riscv::Padding_riscv()
 #endif
 #endif // __riscv_vector
 
+#if NCNN_BF16
     support_bf16_storage = true;
+#endif
 }
 
 int Padding_riscv::create_pipeline(const Option& opt)
@@ -51,12 +53,14 @@ int Padding_riscv::create_pipeline(const Option& opt)
     }
 #endif
 
+#if NCNN_BF16
     if (opt.use_bf16_storage)
     {
         value_bf16 = float32_to_bfloat16(value);
 
         ncnn::cast_float32_to_bfloat16(per_channel_pad_data, per_channel_pad_data_bf16, opt);
     }
+#endif
 
     return 0;
 }
@@ -282,19 +286,28 @@ int Padding_riscv::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blob, co
                 {
                     Mat borderm = top_blob.channel(q);
 
-#if __riscv_zfh
+                    // clang-format off
+                    // *INDENT-OFF*
                     vuint16m1_t pad_value;
+#if __riscv_zfh
                     if (opt.use_fp16_storage)
                     {
                         pad_value = per_channel_pad_data_size ? vreinterpret_v_f16m1_u16m1(vle16_v_f16m1((const __fp16*)per_channel_pad_data_fp16 + q * packn, vl)) : vreinterpret_v_f16m1_u16m1(vfmv_v_f_f16m1((__fp16)value, vl));
                     }
                     else
+#endif
+#if NCNN_BF16
+                    if (opt.use_bf16_storage)
                     {
                         pad_value = per_channel_pad_data_size ? vle16_v_u16m1((const unsigned short*)per_channel_pad_data_bf16 + q * packn, vl) : vmv_v_x_u16m1(value_bf16, vl);
                     }
-#else
-                    vuint16m1_t pad_value = per_channel_pad_data_size ? vle16_v_u16m1((const unsigned short*)per_channel_pad_data_bf16 + q * packn, vl) : vmv_v_x_u16m1(value_bf16, vl);
+                    else
 #endif
+                    {
+                    }
+                    // *INDENT-ON*
+                    // clang-format on
+
                     //Channel padding
                     if ((q - front_) < 0 || (q - front_) >= channels)
                     {
diff --git a/src/layer/riscv/padding_riscv.h b/src/layer/riscv/padding_riscv.h
index d4ffcc327..c591806fa 100644
--- a/src/layer/riscv/padding_riscv.h
+++ b/src/layer/riscv/padding_riscv.h
@@ -34,9 +34,11 @@ protected:
     int forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 
 public:
+#if NCNN_BF16
     // bf16
     unsigned short value_bf16;
     Mat per_channel_pad_data_bf16;
+#endif
 
     // fp16
     Mat per_channel_pad_data_fp16;
diff --git a/src/net.cpp b/src/net.cpp
index 927411f0b..6c3ea57e5 100644
--- a/src/net.cpp
+++ b/src/net.cpp
@@ -766,6 +766,7 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio
     }
     else
 #endif // NCNN_RVV
+#if NCNN_BF16
     if (opt.use_bf16_storage)
     {
         if (bottom_blob.elembits() == 32 && layer->support_bf16_storage)
@@ -781,6 +782,11 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio
             bottom_blob = bottom_blob_fp32;
         }
     }
+    else
+#endif // NCNN_BF16
+    {
+        // no type conversion
+    }
     // *INDENT-ON*
     // clang-format on
 
@@ -2582,6 +2588,7 @@ int Extractor::extract(int blob_index, Mat& feat, int type)
     }
     else
 #endif // NCNN_ARM82
+#if NCNN_BF16
     if (d->opt.use_bf16_storage && (type == 0))
     {
         if (feat.elembits() == 16)
@@ -2591,7 +2598,9 @@ int Extractor::extract(int blob_index, Mat& feat, int type)
             feat = feat_fp32;
         }
     }
-    else if (feat.elembits() == 8 && (type == 0))
+    else
+#endif // NCNN_BF16
+    if (feat.elembits() == 8 && (type == 0))
     {
         Mat feat_fp32;
         cast_int8_to_float32(feat, feat_fp32, d->opt);
diff --git a/src/platform.h.in b/src/platform.h.in
index 6f8723344..c686db45b 100644
--- a/src/platform.h.in
+++ b/src/platform.h.in
@@ -37,6 +37,7 @@
 #cmakedefine01 NCNN_MMI
 #cmakedefine01 NCNN_RVV
 #cmakedefine01 NCNN_INT8
+#cmakedefine01 NCNN_BF16
 
 #cmakedefine NCNN_VERSION_STRING "@NCNN_VERSION_STRING@"