| @@ -74,8 +74,6 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| { | |||
| ptr[i] /= sum; | |||
| } | |||
| return 0; | |||
| } | |||
| if (dims == 2 && positive_axis == 0) | |||
| @@ -122,8 +120,6 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| ptr[j] /= sum[j]; | |||
| } | |||
| } | |||
| return 0; | |||
| } | |||
| if (dims == 2 && positive_axis == 1) | |||
| @@ -152,8 +148,6 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| ptr[j] /= s; | |||
| } | |||
| } | |||
| return 0; | |||
| } | |||
| if (dims == 3 && positive_axis == 0) | |||
| @@ -204,8 +198,6 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| ptr[i] /= sum[i]; | |||
| } | |||
| } | |||
| return 0; | |||
| } | |||
| if (dims == 3 && positive_axis == 1) | |||
| @@ -276,8 +268,6 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| ptr += w; | |||
| } | |||
| } | |||
| return 0; | |||
| } | |||
| if (dims == 3 && positive_axis == 2) | |||
| @@ -314,8 +304,6 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| ptr += w; | |||
| } | |||
| } | |||
| return 0; | |||
| } | |||
| return 0; | |||
| @@ -0,0 +1,32 @@ | |||
| // Tencent is pleased to support the open source community by making ncnn available. | |||
| // | |||
| // Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. | |||
| // | |||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||
| // in compliance with the License. You may obtain a copy of the License at | |||
| // | |||
| // https://opensource.org/licenses/BSD-3-Clause | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software distributed | |||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
| // specific language governing permissions and limitations under the License. | |||
| #ifndef LAYER_SOFTMAX_X86_H | |||
| #define LAYER_SOFTMAX_X86_H | |||
| #include "softmax.h" | |||
| namespace ncnn { | |||
| class Softmax_x86 : virtual public Softmax | |||
| { | |||
| public: | |||
| Softmax_x86(); | |||
| virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; | |||
| }; | |||
| } // namespace ncnn | |||
| #endif // LAYER_SOFTMAX_X86_H | |||
| @@ -46,6 +46,13 @@ static NCNN_FORCEINLINE float _mm_reduce_add_ps(__m128 x128) | |||
| return _mm_cvtss_f32(x32); | |||
| } | |||
| static NCNN_FORCEINLINE float _mm_reduce_max_ps(__m128 x128) | |||
| { | |||
| const __m128 x64 = _mm_max_ps(x128, _mm_movehl_ps(x128, x128)); | |||
| const __m128 x32 = _mm_max_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); | |||
| return _mm_cvtss_f32(x32); | |||
| } | |||
| static NCNN_FORCEINLINE int _mm_reduce_add_epi32(__m128i x) | |||
| { | |||
| __m128i hi64 = _mm_unpackhi_epi64(x, x); | |||
| @@ -287,6 +294,14 @@ static NCNN_FORCEINLINE float _mm256_reduce_add_ps(__m256 x) | |||
| return _mm_cvtss_f32(x32); | |||
| } | |||
| static NCNN_FORCEINLINE float _mm256_reduce_max_ps(__m256 x) | |||
| { | |||
| const __m128 x128 = _mm_max_ps(_mm256_extractf128_ps(x, 1), _mm256_castps256_ps128(x)); | |||
| const __m128 x64 = _mm_max_ps(x128, _mm_movehl_ps(x128, x128)); | |||
| const __m128 x32 = _mm_max_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); | |||
| return _mm_cvtss_f32(x32); | |||
| } | |||
| static NCNN_FORCEINLINE int64_t float2int8_avx(const __m256& _v0) | |||
| { | |||
| // _MM_FROUND_TO_NEAREST_INT round to even | |||
| @@ -473,6 +488,15 @@ static NCNN_FORCEINLINE float _mm512_comp_reduce_add_ps(__m512 x) | |||
| const __m128 x32 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); | |||
| return _mm_cvtss_f32(x32); | |||
| } | |||
| static NCNN_FORCEINLINE float _mm512_comp_reduce_max_ps(__m512 x) | |||
| { | |||
| const __m256 x256 = _mm256_max_ps(_mm512_castps512_ps256(x), _mm512_extractf32x8_ps(x, 1)); | |||
| const __m128 x128 = _mm_max_ps(_mm256_castps256_ps128(x256), _mm256_extractf128_ps(x256, 1)); | |||
| const __m128 x64 = _mm_max_ps(x128, _mm_movehl_ps(x128, x128)); | |||
| const __m128 x32 = _mm_max_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); | |||
| return _mm_cvtss_f32(x32); | |||
| } | |||
| #endif // __AVX512F__ | |||
| #endif // __AVX__ | |||
| #endif // __SSE2__ | |||
| @@ -34,9 +34,9 @@ static int test_softmax(const ncnn::Mat& a, int axis) | |||
| static int test_softmax_0() | |||
| { | |||
| ncnn::Mat a = RandomMat(5, 7, 24); | |||
| ncnn::Mat b = RandomMat(7, 9, 12); | |||
| ncnn::Mat c = RandomMat(3, 5, 13); | |||
| ncnn::Mat a = RandomMat(25, 27, 32); | |||
| ncnn::Mat b = RandomMat(27, 29, 28); | |||
| ncnn::Mat c = RandomMat(23, 25, 27); | |||
| return 0 | |||
| || test_softmax(a, 0) | |||
| @@ -63,9 +63,9 @@ static int test_softmax_0() | |||
| static int test_softmax_1() | |||
| { | |||
| ncnn::Mat a = RandomMat(15, 24); | |||
| ncnn::Mat b = RandomMat(17, 12); | |||
| ncnn::Mat c = RandomMat(19, 15); | |||
| ncnn::Mat a = RandomMat(25, 32); | |||
| ncnn::Mat b = RandomMat(27, 28); | |||
| ncnn::Mat c = RandomMat(29, 27); | |||
| return 0 | |||
| || test_softmax(a, 0) | |||