Browse Source

x86 sse/avx/avx512 optimization for softmax (#3712)

tags/20220420
nihui GitHub 4 years ago
parent
commit
0ea327b557
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 2559 additions and 18 deletions
  1. +0
    -12
      src/layer/softmax.cpp
  2. +2497
    -0
      src/layer/x86/softmax_x86.cpp
  3. +32
    -0
      src/layer/x86/softmax_x86.h
  4. +24
    -0
      src/layer/x86/x86_usability.h
  5. +6
    -6
      tests/test_softmax.cpp

+ 0
- 12
src/layer/softmax.cpp View File

@@ -74,8 +74,6 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
{
ptr[i] /= sum;
}

return 0;
}

if (dims == 2 && positive_axis == 0)
@@ -122,8 +120,6 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
ptr[j] /= sum[j];
}
}

return 0;
}

if (dims == 2 && positive_axis == 1)
@@ -152,8 +148,6 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
ptr[j] /= s;
}
}

return 0;
}

if (dims == 3 && positive_axis == 0)
@@ -204,8 +198,6 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
ptr[i] /= sum[i];
}
}

return 0;
}

if (dims == 3 && positive_axis == 1)
@@ -276,8 +268,6 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
ptr += w;
}
}

return 0;
}

if (dims == 3 && positive_axis == 2)
@@ -314,8 +304,6 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
ptr += w;
}
}

return 0;
}

return 0;


+ 2497
- 0
src/layer/x86/softmax_x86.cpp
File diff suppressed because it is too large
View File


+ 32
- 0
src/layer/x86/softmax_x86.h View File

@@ -0,0 +1,32 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#ifndef LAYER_SOFTMAX_X86_H
#define LAYER_SOFTMAX_X86_H

#include "softmax.h"

namespace ncnn {

class Softmax_x86 : virtual public Softmax
{
public:
Softmax_x86();

virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;
};

} // namespace ncnn

#endif // LAYER_SOFTMAX_X86_H

+ 24
- 0
src/layer/x86/x86_usability.h View File

@@ -46,6 +46,13 @@ static NCNN_FORCEINLINE float _mm_reduce_add_ps(__m128 x128)
return _mm_cvtss_f32(x32);
}

static NCNN_FORCEINLINE float _mm_reduce_max_ps(__m128 x128)
{
const __m128 x64 = _mm_max_ps(x128, _mm_movehl_ps(x128, x128));
const __m128 x32 = _mm_max_ss(x64, _mm_shuffle_ps(x64, x64, 0x55));
return _mm_cvtss_f32(x32);
}

static NCNN_FORCEINLINE int _mm_reduce_add_epi32(__m128i x)
{
__m128i hi64 = _mm_unpackhi_epi64(x, x);
@@ -287,6 +294,14 @@ static NCNN_FORCEINLINE float _mm256_reduce_add_ps(__m256 x)
return _mm_cvtss_f32(x32);
}

static NCNN_FORCEINLINE float _mm256_reduce_max_ps(__m256 x)
{
const __m128 x128 = _mm_max_ps(_mm256_extractf128_ps(x, 1), _mm256_castps256_ps128(x));
const __m128 x64 = _mm_max_ps(x128, _mm_movehl_ps(x128, x128));
const __m128 x32 = _mm_max_ss(x64, _mm_shuffle_ps(x64, x64, 0x55));
return _mm_cvtss_f32(x32);
}

static NCNN_FORCEINLINE int64_t float2int8_avx(const __m256& _v0)
{
// _MM_FROUND_TO_NEAREST_INT round to even
@@ -473,6 +488,15 @@ static NCNN_FORCEINLINE float _mm512_comp_reduce_add_ps(__m512 x)
const __m128 x32 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55));
return _mm_cvtss_f32(x32);
}

static NCNN_FORCEINLINE float _mm512_comp_reduce_max_ps(__m512 x)
{
const __m256 x256 = _mm256_max_ps(_mm512_castps512_ps256(x), _mm512_extractf32x8_ps(x, 1));
const __m128 x128 = _mm_max_ps(_mm256_castps256_ps128(x256), _mm256_extractf128_ps(x256, 1));
const __m128 x64 = _mm_max_ps(x128, _mm_movehl_ps(x128, x128));
const __m128 x32 = _mm_max_ss(x64, _mm_shuffle_ps(x64, x64, 0x55));
return _mm_cvtss_f32(x32);
}
#endif // __AVX512F__
#endif // __AVX__
#endif // __SSE2__


+ 6
- 6
tests/test_softmax.cpp View File

@@ -34,9 +34,9 @@ static int test_softmax(const ncnn::Mat& a, int axis)

static int test_softmax_0()
{
ncnn::Mat a = RandomMat(5, 7, 24);
ncnn::Mat b = RandomMat(7, 9, 12);
ncnn::Mat c = RandomMat(3, 5, 13);
ncnn::Mat a = RandomMat(25, 27, 32);
ncnn::Mat b = RandomMat(27, 29, 28);
ncnn::Mat c = RandomMat(23, 25, 27);

return 0
|| test_softmax(a, 0)
@@ -63,9 +63,9 @@ static int test_softmax_0()

static int test_softmax_1()
{
ncnn::Mat a = RandomMat(15, 24);
ncnn::Mat b = RandomMat(17, 12);
ncnn::Mat c = RandomMat(19, 15);
ncnn::Mat a = RandomMat(25, 32);
ncnn::Mat b = RandomMat(27, 28);
ncnn::Mat c = RandomMat(29, 27);

return 0
|| test_softmax(a, 0)


Loading…
Cancel
Save