diff --git a/src/layer/arm/convolution_7x7.h b/src/layer/arm/convolution_7x7.h index 6a89bbdea..d5448d927 100644 --- a/src/layer/arm/convolution_7x7.h +++ b/src/layer/arm/convolution_7x7.h @@ -255,8 +255,11 @@ static void conv7x7s1_neon(const Mat& bottom_blob, Mat& top_blob, const Mat& _ke "w"(_k46474849) // %31 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v9"); } -#else // __ARM_NEON && __aarch64__ defined, but __clang__ not defined \ -// When compiled with gcc, gcc does not accept over 30 operands +#else + /** + * __ARM_NEON && __aarch64__ defined, but __clang__ not defined + * When compiled with gcc, gcc does not accept over 30 operands + */ for (; nn > 0; nn--) { float32x4_t _sum = vld1q_f32(outptr); @@ -948,8 +951,11 @@ static void conv7x7s2_neon(const Mat& bottom_blob, Mat& top_blob, const Mat& _ke "w"(_k46474849) // %31 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v9"); } -#else // __ARM_NEON && __aarch64__ defined, but __clang__ not defined \ -// When compiled with gcc, gcc does not accept over 30 operands +#else + /** + * __ARM_NEON && __aarch64__ defined, but __clang__ not defined + * When compiled with gcc, gcc does not accept over 30 operands + */ for (; nn > 0; nn--) { float32x4_t _sum = vld1q_f32(outptr); diff --git a/src/layer/arm/crop_arm.cpp b/src/layer/arm/crop_arm.cpp index 27719059b..b8c6552dc 100644 --- a/src/layer/arm/crop_arm.cpp +++ b/src/layer/arm/crop_arm.cpp @@ -140,8 +140,6 @@ int Crop_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) size_t elemsize = bottom_blob.elemsize; int elempack = bottom_blob.elempack; - int elembits = bottom_blob.elembits(); - #if __ARM_NEON if (elempack == 8) { @@ -461,8 +459,6 @@ int Crop_arm::forward(const std::vector& bottom_blobs, std::vector& to size_t elemsize = bottom_blob.elemsize; int elempack = bottom_blob.elempack; - int elembits = bottom_blob.elembits(); - int ref_elempack = reference_blob.elempack; Mat& top_blob = top_blobs[0]; diff --git a/src/layer/arm/deconvolution_3x3.h b/src/layer/arm/deconvolution_3x3.h index 8cb80f1df..30df240f2 100644 --- a/src/layer/arm/deconvolution_3x3.h +++ b/src/layer/arm/deconvolution_3x3.h @@ -66,100 +66,6 @@ static void deconv3x3s1_neon(const Mat& bottom_blob, Mat& top_blob, const Mat& _ { float32x4_t _v = vld1q_f32(r0); -#if 0 // bad compiler generate slow instructions :( \ -// 0 - float32x4_t _out00 = vld1q_f32(outptr0 + 0); - _out00 = vmlaq_lane_f32(_out00, _v, vget_low_f32(_k0), 0); - - float32x4_t _out01 = vmulq_lane_f32(_v, vget_low_f32(_k0), 1); - - // ext - float32x4_t _zero_out01 = vdupq_n_f32(0.f); - _zero_out01 = vextq_f32(_zero_out01, _out01, 3); - _out00 = vaddq_f32(_out00, _zero_out01); - - // - float32x2_t _out00low = vget_low_f32(_out00); - float32x2_t _out00high = vget_high_f32(_out00); - - _out00high = vmla_lane_f32(_out00high, vget_low_f32(_v), vget_high_f32(_k0), 0); - - _out00 = vcombine_f32(_out00low, _out00high); - - vst1q_f32(outptr0 + 0, _out00); - - // - float32x2_t _out02high = vld1_f32(outptr0 + 4); - - float32x2_t _out01_zero = vext_f32(vget_high_f32(_out01), vget_low_f32(_zero_out01), 1); - _out02high = vadd_f32(_out02high, _out01_zero); - - _out02high = vmla_lane_f32(_out02high, vget_high_f32(_v), vget_high_f32(_k0), 0); - - vst1_f32(outptr0 + 4, _out02high); - - // 1 - float32x4_t _out10 = vld1q_f32(outptr1 + 0); - _out10 = vmlaq_lane_f32(_out10, _v, vget_low_f32(_k1), 0); - - float32x4_t _out11 = vmulq_lane_f32(_v, vget_low_f32(_k1), 1); - - // ext - float32x4_t _zero_out11 = vdupq_n_f32(0.f); - _zero_out11 = vextq_f32(_zero_out11, _out11, 3); - _out10 = vaddq_f32(_out10, _zero_out11); - - // - float32x2_t _out10low = vget_low_f32(_out10); - float32x2_t _out10high = vget_high_f32(_out10); - - _out10high = vmla_lane_f32(_out10high, vget_low_f32(_v), vget_high_f32(_k1), 0); - - _out10 = vcombine_f32(_out10low, _out10high); - - vst1q_f32(outptr1 + 0, _out10); - - // - float32x2_t _out12high = vld1_f32(outptr1 + 4); - - float32x2_t _out11_zero = vext_f32(vget_high_f32(_out11), vget_low_f32(_zero_out11), 1); - _out12high = vadd_f32(_out12high, _out11_zero); - - _out12high = vmla_lane_f32(_out12high, vget_high_f32(_v), vget_high_f32(_k1), 0); - - vst1_f32(outptr1 + 4, _out12high); - - // 2 - float32x4_t _out20 = vld1q_f32(outptr2 + 0); - _out20 = vmlaq_lane_f32(_out20, _v, vget_low_f32(_k2), 0); - - float32x4_t _out21 = vmulq_lane_f32(_v, vget_low_f32(_k2), 1); - - // ext - float32x4_t _zero_out21 = vdupq_n_f32(0.f); - _zero_out21 = vextq_f32(_zero_out21, _out21, 3); - _out20 = vaddq_f32(_out20, _zero_out21); - - // - float32x2_t _out20low = vget_low_f32(_out20); - float32x2_t _out20high = vget_high_f32(_out20); - - _out20high = vmla_lane_f32(_out20high, vget_low_f32(_v), vget_high_f32(_k2), 0); - - _out20 = vcombine_f32(_out20low, _out20high); - - vst1q_f32(outptr2 + 0, _out20); - - // - float32x2_t _out22high = vld1_f32(outptr2 + 4); - - float32x2_t _out21_zero = vext_f32(vget_high_f32(_out21), vget_low_f32(_zero_out21), 1); - _out22high = vadd_f32(_out22high, _out21_zero); - - _out22high = vmla_lane_f32(_out22high, vget_high_f32(_v), vget_high_f32(_k2), 0); - - vst1_f32(outptr2 + 4, _out22high); -#else // float32x4_t _out00 = vld1q_f32(outptr0 + 0); _out00 = vmlaq_lane_f32(_out00, _v, vget_low_f32(_k0), 0); @@ -198,7 +104,6 @@ static void deconv3x3s1_neon(const Mat& bottom_blob, Mat& top_blob, const Mat& _ float32x4_t _out22 = vld1q_f32(outptr2 + 2); _out22 = vmlaq_lane_f32(_out22, _v, vget_high_f32(_k2), 0); vst1q_f32(outptr2 + 2, _out22); -#endif r0 += 4; outptr0 += 4; diff --git a/src/layer/convolution1d.cpp b/src/layer/convolution1d.cpp index f6fdc92b3..184b2bdb6 100644 --- a/src/layer/convolution1d.cpp +++ b/src/layer/convolution1d.cpp @@ -67,7 +67,7 @@ int Convolution1D::load_model(const ModelBin& mb) return 0; } -int Convolution1D::create_pipeline(const Option& opt) +int Convolution1D::create_pipeline(const Option&) { if (dynamic_weight) return 0; diff --git a/src/layer/convolutiondepthwise1d.cpp b/src/layer/convolutiondepthwise1d.cpp index 65db75d82..79c831680 100644 --- a/src/layer/convolutiondepthwise1d.cpp +++ b/src/layer/convolutiondepthwise1d.cpp @@ -73,7 +73,7 @@ int ConvolutionDepthWise1D::load_model(const ModelBin& mb) return 0; } -int ConvolutionDepthWise1D::create_pipeline(const Option& opt) +int ConvolutionDepthWise1D::create_pipeline(const Option&) { return 0; } diff --git a/src/layer/deconvolution.cpp b/src/layer/deconvolution.cpp index 7baa95fa2..029e3f69d 100644 --- a/src/layer/deconvolution.cpp +++ b/src/layer/deconvolution.cpp @@ -67,12 +67,9 @@ int Deconvolution::load_model(const ModelBin& mb) static int deconvolution(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data, const Mat& bias_data, int kernel_w, int kernel_h, int stride_w, int stride_h, int dilation_w, int dilation_h, int activation_type, const Mat& activation_params, const Option& opt) { - const int w = bottom_blob.w; - const int h = bottom_blob.h; const int inch = bottom_blob.c; const int outw = top_blob.w; - const int outh = top_blob.h; const int outch = top_blob.c; const int bias_term = bias_data.empty() ? 0 : 1; diff --git a/src/layer/deconvolution3d.cpp b/src/layer/deconvolution3d.cpp index bf6fff608..95b85a3cf 100644 --- a/src/layer/deconvolution3d.cpp +++ b/src/layer/deconvolution3d.cpp @@ -74,14 +74,10 @@ int Deconvolution3D::load_model(const ModelBin& mb) static int deconvolution3d(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data, const Mat& bias_data, int kernel_w, int kernel_h, int kernel_d, int stride_w, int stride_h, int stride_d, int dilation_w, int dilation_h, int dilation_d, int activation_type, const Mat& activation_params, const Option& opt) { - const int w = bottom_blob.w; - const int h = bottom_blob.h; - const int d = bottom_blob.d; const int inch = bottom_blob.c; const int outw = top_blob.w; const int outh = top_blob.h; - const int outd = top_blob.d; const int outch = top_blob.c; const int bias_term = bias_data.empty() ? 0 : 1; diff --git a/src/layer/deconvolutiondepthwise.cpp b/src/layer/deconvolutiondepthwise.cpp index 0ff0325f0..77a392c16 100644 --- a/src/layer/deconvolutiondepthwise.cpp +++ b/src/layer/deconvolutiondepthwise.cpp @@ -68,12 +68,9 @@ int DeconvolutionDepthWise::load_model(const ModelBin& mb) static int deconvolutiondepthwise(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data, const Mat& bias_data, int kernel_w, int kernel_h, int stride_w, int stride_h, int dilation_w, int dilation_h, int group, int activation_type, const Mat& activation_params, const Option& opt) { - const int w = bottom_blob.w; - const int h = bottom_blob.h; const int inch = bottom_blob.c; const int outw = top_blob.w; - const int outh = top_blob.h; const int outch = top_blob.c; const int bias_term = bias_data.empty() ? 0 : 1; diff --git a/src/layer/deconvolutiondepthwise3d.cpp b/src/layer/deconvolutiondepthwise3d.cpp index e8030c979..dd30188a6 100644 --- a/src/layer/deconvolutiondepthwise3d.cpp +++ b/src/layer/deconvolutiondepthwise3d.cpp @@ -75,14 +75,10 @@ int DeconvolutionDepthWise3D::load_model(const ModelBin& mb) static int deconvolutiondepthwise3d(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data, const Mat& bias_data, int kernel_w, int kernel_h, int kernel_d, int stride_w, int stride_h, int stride_d, int dilation_w, int dilation_h, int dilation_d, int group, int activation_type, const Mat& activation_params, const Option& opt) { - const int w = bottom_blob.w; - const int h = bottom_blob.h; - const int d = bottom_blob.d; const int inch = bottom_blob.c; const int outw = top_blob.w; const int outh = top_blob.h; - const int outd = top_blob.d; const int outch = top_blob.c; const int bias_term = bias_data.empty() ? 0 : 1;