|
|
|
@@ -33,7 +33,7 @@ int AvgPooling(const float *input_ptr, float *output_ptr, const PoolingParameter |
|
|
|
int out_tile_count = UP_DIV(out_plane, TILE_NUM); |
|
|
|
int window = win_w * win_h; |
|
|
|
|
|
|
|
#if defined(ENABLE_NEON) || defined(ENALBE_SSE) |
|
|
|
#if defined(ENABLE_NEON) || defined(ENABLE_SSE) |
|
|
|
MS_FLOAT32X4 min_value = MS_MOVQ_F32(minf); |
|
|
|
MS_FLOAT32X4 max_value = MS_MOVQ_F32(maxf); |
|
|
|
#endif |
|
|
|
@@ -62,7 +62,7 @@ int AvgPooling(const float *input_ptr, float *output_ptr, const PoolingParameter |
|
|
|
for (int ci = 0; ci < c4; ci++) { |
|
|
|
const float *src_c_ptr = src_plane_ptr + ci * C4NUM; |
|
|
|
float *dst_c_ptr = dst_plane_ptr + ci * C4NUM; |
|
|
|
#if defined(ENABLE_NEON) || defined(ENALBE_SSE) |
|
|
|
#if defined(ENABLE_NEON) || defined(ENABLE_SSE) |
|
|
|
MS_FLOAT32X4 tmp_avg = MS_MOVQ_F32(0); |
|
|
|
#else |
|
|
|
float tmp_avg1 = 0; |
|
|
|
@@ -74,7 +74,7 @@ int AvgPooling(const float *input_ptr, float *output_ptr, const PoolingParameter |
|
|
|
for (int h = real_win_h_start; h < real_win_h_end; h++) { |
|
|
|
for (int w = real_win_w_start; w < real_win_w_end; w++) { |
|
|
|
const float *src_win_ptr = src_c_ptr + ((in_h_index + h) * in_w + in_w_index + w) * channel; |
|
|
|
#if defined(ENABLE_NEON) || defined(ENALBE_SSE) |
|
|
|
#if defined(ENABLE_NEON) || defined(ENABLE_SSE) |
|
|
|
tmp_avg = MS_ADDQ_F32(tmp_avg, MS_LDQ_F32(src_win_ptr)); |
|
|
|
#else |
|
|
|
tmp_avg1 += src_win_ptr[0]; |
|
|
|
@@ -91,7 +91,7 @@ int AvgPooling(const float *input_ptr, float *output_ptr, const PoolingParameter |
|
|
|
if (real_count == 0) { |
|
|
|
return NNACL_ERR; |
|
|
|
} |
|
|
|
#if defined(ENABLE_NEON) || defined(ENALBE_SSE) |
|
|
|
#if defined(ENABLE_NEON) || defined(ENABLE_SSE) |
|
|
|
tmp_avg = tmp_avg / MS_MOVQ_F32(real_count); |
|
|
|
tmp_avg = MS_MAXQ_F32(tmp_avg, min_value); |
|
|
|
tmp_avg = MS_MINQ_F32(tmp_avg, max_value); |
|
|
|
@@ -159,7 +159,7 @@ void MaxPooling(const float *input_ptr, float *output_ptr, const PoolingParamete |
|
|
|
int out_tile_count = UP_DIV(out_plane, TILE_NUM); |
|
|
|
int c4 = channel / C4NUM; /* oc && ic */ |
|
|
|
|
|
|
|
#if defined(ENABLE_NEON) || defined(ENALBE_SSE) |
|
|
|
#if defined(ENABLE_NEON) || defined(ENABLE_SSE) |
|
|
|
MS_FLOAT32X4 min_value = MS_MOVQ_F32(minf); |
|
|
|
MS_FLOAT32X4 max_value = MS_MOVQ_F32(maxf); |
|
|
|
#endif |
|
|
|
@@ -188,7 +188,7 @@ void MaxPooling(const float *input_ptr, float *output_ptr, const PoolingParamete |
|
|
|
for (int ci = 0; ci < c4; ci++) { |
|
|
|
const float *src_c_ptr = src_plane_ptr + ci * C4NUM; |
|
|
|
float *dst_c_ptr = dst_plane_ptr + ci * C4NUM; |
|
|
|
#if defined(ENABLE_NEON) || defined(ENALBE_SSE) |
|
|
|
#if defined(ENABLE_NEON) || defined(ENABLE_SSE) |
|
|
|
MS_FLOAT32X4 tmp_max = MS_MOVQ_F32(-FLT_MAX); |
|
|
|
#else |
|
|
|
float tmp_max1 = -FLT_MAX; |
|
|
|
@@ -200,7 +200,7 @@ void MaxPooling(const float *input_ptr, float *output_ptr, const PoolingParamete |
|
|
|
for (int kh = real_win_h_start; kh < real_win_h_end; kh++) { |
|
|
|
for (int kw = real_win_w_start; kw < real_win_w_end; kw++) { |
|
|
|
const float *src_win_ptr = src_c_ptr + ((in_h_index + kh) * in_w + in_w_index + kw) * channel; |
|
|
|
#if defined(ENABLE_NEON) || defined(ENALBE_SSE) |
|
|
|
#if defined(ENABLE_NEON) || defined(ENABLE_SSE) |
|
|
|
tmp_max = MS_MAXQ_F32(tmp_max, MS_LDQ_F32(src_win_ptr)); |
|
|
|
#else |
|
|
|
tmp_max1 = fmax(tmp_max1, src_win_ptr[0]); |
|
|
|
@@ -210,7 +210,7 @@ void MaxPooling(const float *input_ptr, float *output_ptr, const PoolingParamete |
|
|
|
#endif |
|
|
|
} // win_w loop |
|
|
|
} // win_h loop |
|
|
|
#if defined(ENABLE_NEON) || defined(ENALBE_SSE) |
|
|
|
#if defined(ENABLE_NEON) || defined(ENABLE_SSE) |
|
|
|
tmp_max = MS_MAXQ_F32(tmp_max, min_value); |
|
|
|
tmp_max = MS_MINQ_F32(tmp_max, max_value); |
|
|
|
MS_STQ_F32(dst_c_ptr, tmp_max); |
|
|
|
|