| @@ -714,10 +714,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| _v = activation_sse(_v, activation_type, activation_params); | |||
| _v = _mm_mul_ps(_v, _scale_out); | |||
| int64_t v = float2int8_sse(_v, _v); | |||
| ptr[0] = (v >> 56) & 0xff; | |||
| ptr[1] = (v >> 48) & 0xff; | |||
| ptr[2] = (v >> 40) & 0xff; | |||
| ptr[3] = (v >> 32) & 0xff; | |||
| ptr[0] = (v >> 32) & 0xff; | |||
| ptr[1] = (v >> 40) & 0xff; | |||
| ptr[2] = (v >> 48) & 0xff; | |||
| ptr[3] = (v >> 56) & 0xff; | |||
| } | |||
| } | |||
| else if (bias_data_size == 1) | |||
| @@ -735,10 +735,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| _v = activation_sse(_v, activation_type, activation_params); | |||
| _v = _mm_mul_ps(_v, _scale_out); | |||
| int64_t v = float2int8_sse(_v, _v); | |||
| ptr[0] = (v >> 56) & 0xff; | |||
| ptr[1] = (v >> 48) & 0xff; | |||
| ptr[2] = (v >> 40) & 0xff; | |||
| ptr[3] = (v >> 32) & 0xff; | |||
| ptr[0] = (v >> 32) & 0xff; | |||
| ptr[1] = (v >> 40) & 0xff; | |||
| ptr[2] = (v >> 48) & 0xff; | |||
| ptr[3] = (v >> 56) & 0xff; | |||
| } | |||
| } | |||
| else | |||
| @@ -755,10 +755,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| _v = activation_sse(_v, activation_type, activation_params); | |||
| _v = _mm_mul_ps(_v, _scale_out); | |||
| int64_t v = float2int8_sse(_v, _v); | |||
| ptr[0] = (v >> 56) & 0xff; | |||
| ptr[1] = (v >> 48) & 0xff; | |||
| ptr[2] = (v >> 40) & 0xff; | |||
| ptr[3] = (v >> 32) & 0xff; | |||
| ptr[0] = (v >> 32) & 0xff; | |||
| ptr[1] = (v >> 40) & 0xff; | |||
| ptr[2] = (v >> 48) & 0xff; | |||
| ptr[3] = (v >> 56) & 0xff; | |||
| } | |||
| } | |||
| } | |||
| @@ -780,10 +780,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| _v = activation_sse(_v, activation_type, activation_params); | |||
| _v = _mm_mul_ps(_v, _scale_out); | |||
| int64_t v = float2int8_sse(_v, _v); | |||
| ptr[0] = (v >> 56) & 0xff; | |||
| ptr[1] = (v >> 48) & 0xff; | |||
| ptr[2] = (v >> 40) & 0xff; | |||
| ptr[3] = (v >> 32) & 0xff; | |||
| ptr[0] = (v >> 32) & 0xff; | |||
| ptr[1] = (v >> 40) & 0xff; | |||
| ptr[2] = (v >> 48) & 0xff; | |||
| ptr[3] = (v >> 56) & 0xff; | |||
| } | |||
| } | |||
| else if (bias_data_size == 1) | |||
| @@ -802,10 +802,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| _v = activation_sse(_v, activation_type, activation_params); | |||
| _v = _mm_mul_ps(_v, _scale_out); | |||
| int64_t v = float2int8_sse(_v, _v); | |||
| ptr[0] = (v >> 56) & 0xff; | |||
| ptr[1] = (v >> 48) & 0xff; | |||
| ptr[2] = (v >> 40) & 0xff; | |||
| ptr[3] = (v >> 32) & 0xff; | |||
| ptr[0] = (v >> 32) & 0xff; | |||
| ptr[1] = (v >> 40) & 0xff; | |||
| ptr[2] = (v >> 48) & 0xff; | |||
| ptr[3] = (v >> 56) & 0xff; | |||
| } | |||
| } | |||
| else | |||
| @@ -823,10 +823,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| _v = activation_sse(_v, activation_type, activation_params); | |||
| _v = _mm_mul_ps(_v, _scale_out); | |||
| int64_t v = float2int8_sse(_v, _v); | |||
| ptr[0] = (v >> 56) & 0xff; | |||
| ptr[1] = (v >> 48) & 0xff; | |||
| ptr[2] = (v >> 40) & 0xff; | |||
| ptr[3] = (v >> 32) & 0xff; | |||
| ptr[0] = (v >> 32) & 0xff; | |||
| ptr[1] = (v >> 40) & 0xff; | |||
| ptr[2] = (v >> 48) & 0xff; | |||
| ptr[3] = (v >> 56) & 0xff; | |||
| } | |||
| } | |||
| } | |||
| @@ -848,10 +848,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| _v = activation_sse(_v, activation_type, activation_params); | |||
| _v = _mm_mul_ps(_v, _scale_out); | |||
| int64_t v = float2int8_sse(_v, _v); | |||
| ptr[0] = (v >> 56) & 0xff; | |||
| ptr[1] = (v >> 48) & 0xff; | |||
| ptr[2] = (v >> 40) & 0xff; | |||
| ptr[3] = (v >> 32) & 0xff; | |||
| ptr[0] = (v >> 32) & 0xff; | |||
| ptr[1] = (v >> 40) & 0xff; | |||
| ptr[2] = (v >> 48) & 0xff; | |||
| ptr[3] = (v >> 56) & 0xff; | |||
| } | |||
| } | |||
| else if (bias_data_size == 1) | |||
| @@ -870,10 +870,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| _v = activation_sse(_v, activation_type, activation_params); | |||
| _v = _mm_mul_ps(_v, _scale_out); | |||
| int64_t v = float2int8_sse(_v, _v); | |||
| ptr[0] = (v >> 56) & 0xff; | |||
| ptr[1] = (v >> 48) & 0xff; | |||
| ptr[2] = (v >> 40) & 0xff; | |||
| ptr[3] = (v >> 32) & 0xff; | |||
| ptr[0] = (v >> 32) & 0xff; | |||
| ptr[1] = (v >> 40) & 0xff; | |||
| ptr[2] = (v >> 48) & 0xff; | |||
| ptr[3] = (v >> 56) & 0xff; | |||
| } | |||
| } | |||
| else | |||
| @@ -891,10 +891,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| _v = activation_sse(_v, activation_type, activation_params); | |||
| _v = _mm_mul_ps(_v, _scale_out); | |||
| int64_t v = float2int8_sse(_v, _v); | |||
| ptr[0] = (v >> 56) & 0xff; | |||
| ptr[1] = (v >> 48) & 0xff; | |||
| ptr[2] = (v >> 40) & 0xff; | |||
| ptr[3] = (v >> 32) & 0xff; | |||
| ptr[0] = (v >> 32) & 0xff; | |||
| ptr[1] = (v >> 40) & 0xff; | |||
| ptr[2] = (v >> 48) & 0xff; | |||
| ptr[3] = (v >> 56) & 0xff; | |||
| } | |||
| } | |||
| } | |||
| @@ -915,10 +915,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| _v = activation_sse(_v, activation_type, activation_params); | |||
| _v = _mm_mul_ps(_v, _scale_out); | |||
| int64_t v = float2int8_sse(_v, _v); | |||
| ptr[0] = (v >> 56) & 0xff; | |||
| ptr[1] = (v >> 48) & 0xff; | |||
| ptr[2] = (v >> 40) & 0xff; | |||
| ptr[3] = (v >> 32) & 0xff; | |||
| ptr[0] = (v >> 32) & 0xff; | |||
| ptr[1] = (v >> 40) & 0xff; | |||
| ptr[2] = (v >> 48) & 0xff; | |||
| ptr[3] = (v >> 56) & 0xff; | |||
| } | |||
| } | |||
| else if (bias_data_size == 1) | |||
| @@ -938,10 +938,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| _v = activation_sse(_v, activation_type, activation_params); | |||
| _v = _mm_mul_ps(_v, _scale_out); | |||
| int64_t v = float2int8_sse(_v, _v); | |||
| ptr[0] = (v >> 56) & 0xff; | |||
| ptr[1] = (v >> 48) & 0xff; | |||
| ptr[2] = (v >> 40) & 0xff; | |||
| ptr[3] = (v >> 32) & 0xff; | |||
| ptr[0] = (v >> 32) & 0xff; | |||
| ptr[1] = (v >> 40) & 0xff; | |||
| ptr[2] = (v >> 48) & 0xff; | |||
| ptr[3] = (v >> 56) & 0xff; | |||
| } | |||
| } | |||
| else | |||
| @@ -960,10 +960,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| _v = activation_sse(_v, activation_type, activation_params); | |||
| _v = _mm_mul_ps(_v, _scale_out); | |||
| int64_t v = float2int8_sse(_v, _v); | |||
| ptr[0] = (v >> 56) & 0xff; | |||
| ptr[1] = (v >> 48) & 0xff; | |||
| ptr[2] = (v >> 40) & 0xff; | |||
| ptr[3] = (v >> 32) & 0xff; | |||
| ptr[0] = (v >> 32) & 0xff; | |||
| ptr[1] = (v >> 40) & 0xff; | |||
| ptr[2] = (v >> 48) & 0xff; | |||
| ptr[3] = (v >> 56) & 0xff; | |||
| } | |||
| } | |||
| } | |||
| @@ -1072,10 +1072,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| _v = activation_sse(_v, activation_type, activation_params); | |||
| _v = _mm_mul_ps(_v, _scale_out); | |||
| int64_t v = float2int8_sse(_v, _v); | |||
| ptr0[0] = (v >> 56) & 0xff; | |||
| ptr1[0] = (v >> 48) & 0xff; | |||
| ptr2[0] = (v >> 40) & 0xff; | |||
| ptr3[0] = (v >> 32) & 0xff; | |||
| ptr0[0] = (v >> 32) & 0xff; | |||
| ptr1[0] = (v >> 40) & 0xff; | |||
| ptr2[0] = (v >> 48) & 0xff; | |||
| ptr3[0] = (v >> 56) & 0xff; | |||
| intptr += 4; | |||
| ptr0 += 1; | |||
| @@ -1107,10 +1107,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| _v = activation_sse(_v, activation_type, activation_params); | |||
| _v = _mm_mul_ps(_v, _scale_out); | |||
| int64_t v = float2int8_sse(_v, _v); | |||
| ptr0[0] = (v >> 56) & 0xff; | |||
| ptr1[0] = (v >> 48) & 0xff; | |||
| ptr2[0] = (v >> 40) & 0xff; | |||
| ptr3[0] = (v >> 32) & 0xff; | |||
| ptr0[0] = (v >> 32) & 0xff; | |||
| ptr1[0] = (v >> 40) & 0xff; | |||
| ptr2[0] = (v >> 48) & 0xff; | |||
| ptr3[0] = (v >> 56) & 0xff; | |||
| intptr += 4; | |||
| ptr0 += 1; | |||
| @@ -1228,10 +1228,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| _v = activation_sse(_v, activation_type, activation_params); | |||
| _v = _mm_mul_ps(_v, _scale_out); | |||
| int64_t v = float2int8_sse(_v, _v); | |||
| ptr0[0] = (v >> 56) & 0xff; | |||
| ptr1[0] = (v >> 48) & 0xff; | |||
| ptr2[0] = (v >> 40) & 0xff; | |||
| ptr3[0] = (v >> 32) & 0xff; | |||
| ptr0[0] = (v >> 32) & 0xff; | |||
| ptr1[0] = (v >> 40) & 0xff; | |||
| ptr2[0] = (v >> 48) & 0xff; | |||
| ptr3[0] = (v >> 56) & 0xff; | |||
| intptr += 4; | |||
| ptr0 += 1; | |||
| @@ -1263,10 +1263,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| _v = activation_sse(_v, activation_type, activation_params); | |||
| _v = _mm_mul_ps(_v, _scale_out); | |||
| int64_t v = float2int8_sse(_v, _v); | |||
| ptr0[0] = (v >> 56) & 0xff; | |||
| ptr1[0] = (v >> 48) & 0xff; | |||
| ptr2[0] = (v >> 40) & 0xff; | |||
| ptr3[0] = (v >> 32) & 0xff; | |||
| ptr0[0] = (v >> 32) & 0xff; | |||
| ptr1[0] = (v >> 40) & 0xff; | |||
| ptr2[0] = (v >> 48) & 0xff; | |||
| ptr3[0] = (v >> 56) & 0xff; | |||
| intptr += 4; | |||
| ptr0 += 1; | |||