Browse Source

fix build on armv7 again ...

tags/20190908
nihui 6 years ago
parent
commit
7655b9e4e9
2 changed files with 14 additions and 0 deletions
  1. +7
    -0
      src/layer/arm/convolution_arm.cpp
  2. +7
    -0
      src/layer/arm/convolutiondepthwise_arm.cpp

+ 7
- 0
src/layer/arm/convolution_arm.cpp View File

@@ -570,10 +570,17 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option
float32x4_t _w2 = vld1q_f32( kptr + 8 ); float32x4_t _w2 = vld1q_f32( kptr + 8 );
float32x4_t _w3 = vld1q_f32( kptr + 12 ); float32x4_t _w3 = vld1q_f32( kptr + 12 );


#if __aarch64__
_sum = vmlaq_laneq_f32(_sum, _w0, _val, 0); _sum = vmlaq_laneq_f32(_sum, _w0, _val, 0);
_sum = vmlaq_laneq_f32(_sum, _w1, _val, 1); _sum = vmlaq_laneq_f32(_sum, _w1, _val, 1);
_sum = vmlaq_laneq_f32(_sum, _w2, _val, 2); _sum = vmlaq_laneq_f32(_sum, _w2, _val, 2);
_sum = vmlaq_laneq_f32(_sum, _w3, _val, 3); _sum = vmlaq_laneq_f32(_sum, _w3, _val, 3);
#else
_sum = vmlaq_lane_f32(_sum, _w0, vget_low_f32(_val), 0);
_sum = vmlaq_lane_f32(_sum, _w1, vget_low_f32(_val), 1);
_sum = vmlaq_lane_f32(_sum, _w2, vget_high_f32(_val), 0);
_sum = vmlaq_lane_f32(_sum, _w3, vget_high_f32(_val), 1);
#endif


kptr += 16; kptr += 16;
} }


+ 7
- 0
src/layer/arm/convolutiondepthwise_arm.cpp View File

@@ -621,10 +621,17 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
float32x4_t _w2 = vld1q_f32( kptr + 8 ); float32x4_t _w2 = vld1q_f32( kptr + 8 );
float32x4_t _w3 = vld1q_f32( kptr + 12 ); float32x4_t _w3 = vld1q_f32( kptr + 12 );


#if __aarch64__
_sum = vmlaq_laneq_f32(_sum, _w0, _val, 0); _sum = vmlaq_laneq_f32(_sum, _w0, _val, 0);
_sum = vmlaq_laneq_f32(_sum, _w1, _val, 1); _sum = vmlaq_laneq_f32(_sum, _w1, _val, 1);
_sum = vmlaq_laneq_f32(_sum, _w2, _val, 2); _sum = vmlaq_laneq_f32(_sum, _w2, _val, 2);
_sum = vmlaq_laneq_f32(_sum, _w3, _val, 3); _sum = vmlaq_laneq_f32(_sum, _w3, _val, 3);
#else
_sum = vmlaq_lane_f32(_sum, _w0, vget_low_f32(_val), 0);
_sum = vmlaq_lane_f32(_sum, _w1, vget_low_f32(_val), 1);
_sum = vmlaq_lane_f32(_sum, _w2, vget_high_f32(_val), 0);
_sum = vmlaq_lane_f32(_sum, _w3, vget_high_f32(_val), 1);
#endif


kptr += 16; kptr += 16;
} }


Loading…
Cancel
Save