Browse Source

fix requantize x86 pack4to1

tags/20210720
nihuini 5 years ago
parent
commit
7a3a5be4e6
1 changed files with 64 additions and 64 deletions
  1. +64
    -64
      src/layer/x86/requantize_x86.cpp

+ 64
- 64
src/layer/x86/requantize_x86.cpp View File

@@ -714,10 +714,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
else if (bias_data_size == 1)
@@ -735,10 +735,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
else
@@ -755,10 +755,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
}
@@ -780,10 +780,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
else if (bias_data_size == 1)
@@ -802,10 +802,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
else
@@ -823,10 +823,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
}
@@ -848,10 +848,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
else if (bias_data_size == 1)
@@ -870,10 +870,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
else
@@ -891,10 +891,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
}
@@ -915,10 +915,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
else if (bias_data_size == 1)
@@ -938,10 +938,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
else
@@ -960,10 +960,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
}
@@ -1072,10 +1072,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr0[0] = (v >> 56) & 0xff;
ptr1[0] = (v >> 48) & 0xff;
ptr2[0] = (v >> 40) & 0xff;
ptr3[0] = (v >> 32) & 0xff;
ptr0[0] = (v >> 32) & 0xff;
ptr1[0] = (v >> 40) & 0xff;
ptr2[0] = (v >> 48) & 0xff;
ptr3[0] = (v >> 56) & 0xff;

intptr += 4;
ptr0 += 1;
@@ -1107,10 +1107,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr0[0] = (v >> 56) & 0xff;
ptr1[0] = (v >> 48) & 0xff;
ptr2[0] = (v >> 40) & 0xff;
ptr3[0] = (v >> 32) & 0xff;
ptr0[0] = (v >> 32) & 0xff;
ptr1[0] = (v >> 40) & 0xff;
ptr2[0] = (v >> 48) & 0xff;
ptr3[0] = (v >> 56) & 0xff;

intptr += 4;
ptr0 += 1;
@@ -1228,10 +1228,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr0[0] = (v >> 56) & 0xff;
ptr1[0] = (v >> 48) & 0xff;
ptr2[0] = (v >> 40) & 0xff;
ptr3[0] = (v >> 32) & 0xff;
ptr0[0] = (v >> 32) & 0xff;
ptr1[0] = (v >> 40) & 0xff;
ptr2[0] = (v >> 48) & 0xff;
ptr3[0] = (v >> 56) & 0xff;

intptr += 4;
ptr0 += 1;
@@ -1263,10 +1263,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr0[0] = (v >> 56) & 0xff;
ptr1[0] = (v >> 48) & 0xff;
ptr2[0] = (v >> 40) & 0xff;
ptr3[0] = (v >> 32) & 0xff;
ptr0[0] = (v >> 32) & 0xff;
ptr1[0] = (v >> 40) & 0xff;
ptr2[0] = (v >> 48) & 0xff;
ptr3[0] = (v >> 56) & 0xff;

intptr += 4;
ptr0 += 1;


Loading…
Cancel
Save