diff --git a/.ci/test-coverage.yml b/.ci/test-coverage.yml index 7ccc10656..e0800bb6b 100644 --- a/.ci/test-coverage.yml +++ b/.ci/test-coverage.yml @@ -52,14 +52,14 @@ jobs: uses: cache@1.* with: cachePaths: swiftshader-install - cacheKey: swiftshader-linux-install-20230420-1 + cacheKey: swiftshader-linux-install-20240622 - name: checkout-swiftshader if: steps.cache-swiftshader.outputs.cacheHit != 'true' checkout: https://github.com/google/swiftshader.git with: pullType: COMMIT_ID - refName: dd55e592406dc0bae219df11adec6363840aff4a + refName: de870ac7518fe2b6bb651ecc22fc36647cf7b986 localPath: swiftshader enableSubmodule: false enableGitLfs: false diff --git a/.github/workflows/linux-x64-gpu-clang.yml b/.github/workflows/linux-x64-gpu-clang.yml index 8ab7e6ae9..0d0ba34f2 100644 --- a/.github/workflows/linux-x64-gpu-clang.yml +++ b/.github/workflows/linux-x64-gpu-clang.yml @@ -46,14 +46,14 @@ jobs: uses: actions/cache@v4 with: path: swiftshader-install - key: swiftshader-linux-install-20230420 + key: swiftshader-linux-install-20240622 - name: checkout-swiftshader if: steps.cache-swiftshader.outputs.cache-hit != 'true' uses: actions/checkout@v4 with: repository: google/swiftshader path: swiftshader - ref: dd55e592406dc0bae219df11adec6363840aff4a + ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986 - name: checkout-swiftshader-submodules if: steps.cache-swiftshader.outputs.cache-hit != 'true' run: | diff --git a/.github/workflows/linux-x64-gpu-gcc.yml b/.github/workflows/linux-x64-gpu-gcc.yml index 55eb9ff87..27052a9e4 100644 --- a/.github/workflows/linux-x64-gpu-gcc.yml +++ b/.github/workflows/linux-x64-gpu-gcc.yml @@ -46,14 +46,14 @@ jobs: uses: actions/cache@v4 with: path: swiftshader-install - key: swiftshader-linux-install-20230420 + key: swiftshader-linux-install-20240622 - name: checkout-swiftshader if: steps.cache-swiftshader.outputs.cache-hit != 'true' uses: actions/checkout@v4 with: repository: google/swiftshader path: swiftshader - ref: dd55e592406dc0bae219df11adec6363840aff4a + ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986 - name: checkout-swiftshader-submodules if: steps.cache-swiftshader.outputs.cache-hit != 'true' run: | diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index e4df7880a..160fe0faf 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -137,14 +137,14 @@ jobs: uses: actions/cache@v4 with: path: swiftshader-install - key: swiftshader-macos-install-20230420 + key: swiftshader-macos-install-20240622 - name: checkout-swiftshader if: steps.cache-swiftshader.outputs.cache-hit != 'true' uses: actions/checkout@v4 with: repository: google/swiftshader path: swiftshader - ref: dd55e592406dc0bae219df11adec6363840aff4a + ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986 - name: checkout-swiftshader-submodules if: steps.cache-swiftshader.outputs.cache-hit != 'true' run: | diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index e8c5ee384..ff73510d2 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -56,14 +56,14 @@ jobs: uses: actions/cache@v4 with: path: swiftshader-install - key: swiftshader-linux-install-20230420 + key: swiftshader-linux-install-20240622 - name: checkout-swiftshader if: matrix.os == 'ubuntu-latest' && steps.cache-swiftshader.outputs.cache-hit != 'true' uses: actions/checkout@v4 with: repository: google/swiftshader path: swiftshader - ref: dd55e592406dc0bae219df11adec6363840aff4a + ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986 - name: checkout-swiftshader-submodules if: matrix.os == 'ubuntu-latest' && steps.cache-swiftshader.outputs.cache-hit != 'true' run: | diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml index 3695bbed2..1384cc43d 100644 --- a/.github/workflows/test-coverage.yml +++ b/.github/workflows/test-coverage.yml @@ -52,11 +52,62 @@ jobs: lcov -r lcov.info '*/install/*' -o lcov.info lcov -r lcov.info '*/build/*' -o lcov.info lcov --list lcov.info + - name: codecov + id: codecov + continue-on-error: true + uses: codecov/codecov-action@v3 + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: build/lcov.info + - name: codecov-vlen256-retry-1 + continue-on-error: true + id: codecov-vlen256-retry-1 + if: steps.codecov.outcome=='failure' + uses: codecov/codecov-action@v3 + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: build/lcov.info + - name: codecov-vlen256-retry-2 + continue-on-error: true + id: codecov-vlen256-retry-2 + if: steps.codecov-vlen256-retry-1.outcome=='failure' + uses: codecov/codecov-action@v3 + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: build/lcov.info + - name: codecov-vlen256-retry-3 + continue-on-error: true + id: codecov-vlen256-retry-3 + if: steps.codecov-vlen256-retry-2.outcome=='failure' + uses: codecov/codecov-action@v3 + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: build/lcov.info + - name: codecov-vlen256-retry-4 + continue-on-error: true + id: codecov-vlen256-retry-4 + if: steps.codecov-vlen256-retry-3.outcome=='failure' uses: codecov/codecov-action@v3 with: token: ${{ secrets.CODECOV_TOKEN }} file: build/lcov.info + - name: codecov-vlen256-retry-5 + continue-on-error: true + id: codecov-vlen256-retry-5 + if: steps.codecov-vlen256-retry-4.outcome=='failure' + uses: codecov/codecov-action@v3 + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: build/lcov.info + - name: set the status + if: always() + run: | + if ${{ steps.codecov.outcome=='success' || steps.codecov-vlen256-retry-1.outcome=='success' || steps.codecov-vlen256-retry-2.outcome=='success' || steps.codecov-vlen256-retry-3.outcome=='success' || steps.codecov-vlen256-retry-4.outcome=='success' || steps.codecov-vlen256-retry-5.outcome=='success' }}; then + echo fine + else + exit 1 + fi linux-gcc-x64-avx512-spr: runs-on: ubuntu-22.04 diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index b171e1c8c..d099290a3 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -79,26 +79,26 @@ jobs: cmake --build . --config Release -j 4 cmake --build . --config Release --target install - name: cache-swiftshader - if: matrix.vs-version != 'vs2015' + if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017' id: cache-swiftshader uses: actions/cache@v4 with: path: swiftshader-install - key: swiftshader-${{ matrix.vs-version }}-x64-install-20230420 + key: swiftshader-${{ matrix.vs-version }}-x64-install-20240622 - name: checkout-swiftshader - if: matrix.vs-version != 'vs2015' && steps.cache-swiftshader.outputs.cache-hit != 'true' + if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017' && steps.cache-swiftshader.outputs.cache-hit != 'true' uses: actions/checkout@v4 with: repository: google/swiftshader path: swiftshader - ref: dd55e592406dc0bae219df11adec6363840aff4a + ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986 - name: checkout-swiftshader-submodules - if: matrix.vs-version != 'vs2015' && steps.cache-swiftshader.outputs.cache-hit != 'true' + if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017' && steps.cache-swiftshader.outputs.cache-hit != 'true' run: | cd swiftshader git -c submodule."third_party/git-hooks".update=none submodule update --init --recursive - name: swiftshader - if: matrix.vs-version != 'vs2015' && steps.cache-swiftshader.outputs.cache-hit != 'true' + if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017' && steps.cache-swiftshader.outputs.cache-hit != 'true' run: | cd swiftshader mkdir build-${{ matrix.vs-version }}; cd build-${{ matrix.vs-version }} @@ -113,7 +113,7 @@ jobs: cmake -T ${{ matrix.toolset-version }},host=x64 -A x64 -Dprotobuf_DIR="$env:GITHUB_WORKSPACE\protobuf-install\cmake" -DNCNN_VULKAN=ON -DNCNN_BUILD_TESTS=ON .. cmake --build . --config Release -j 4 - name: x64-test - if: matrix.vs-version != 'vs2015' + if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017' run: | echo "[Processor]`nThreadCount=1`n" > build-x64/tests/Release/SwiftShader.ini Copy-Item -Path "$env:GITHUB_WORKSPACE\swiftshader-install\vulkan-1.dll" -Destination 'build-x64\tests' diff --git a/docs/developer-guide/operators.md b/docs/developer-guide/operators.md index 6056c277b..05996f8d7 100644 --- a/docs/developer-guide/operators.md +++ b/docs/developer-guide/operators.md @@ -1266,21 +1266,22 @@ y = affine(out) | --------- | ------------- | ----- | --------- | ----------------- | | 0 | embed_dim | int | 0 | | | 1 | num_heads | int | 1 | | -| 2 | weight_data_size| int | 0 | | +| 2 | weight_data_size| int | 0 | qdim = weight_data_size / embed_dim | | 3 | kdim | int | embed_dim | | | 4 | vdim | int | embed_dim | | | 5 | attn_mask | int | 0 | | +| 6 | scale | float | 1.f / sqrt(embed_dim / num_heads) | | | weight | type | shape | | ------------- | ----- | --------------------- | -| q_weight_data | float/fp16/int8 | [weight_data_size] | +| q_weight_data | float/fp16/int8 | [embed_dim * qdim] | | q_bias_data | float | [embed_dim] | | k_weight_data | float/fp16/int8 | [embed_dim * kdim] | | k_bias_data | float | [embed_dim] | | v_weight_data | float/fp16/int8 | [embed_dim * vdim] | | v_bias_data | float | [embed_dim] | -| out_weight_data| float/fp16/int8 | [weight_data_size] | -| out_bias_data | float | [embed_dim] | +| out_weight_data| float/fp16/int8 | [qdim * embed_dim] | +| out_bias_data | float | [qdim] | # MVN ``` diff --git a/src/layer/arm/multiheadattention_arm.cpp b/src/layer/arm/multiheadattention_arm.cpp index f5826ddae..9fedf8b16 100644 --- a/src/layer/arm/multiheadattention_arm.cpp +++ b/src/layer/arm/multiheadattention_arm.cpp @@ -60,12 +60,9 @@ int MultiHeadAttention_arm::create_pipeline(const Option& _opt) const int qdim = weight_data_size / embed_dim; { - const int embed_dim_per_head = embed_dim / num_heads; - const float inv_sqrt_embed_dim_per_head = 1.f / sqrtf(embed_dim_per_head); - q_gemm = ncnn::create_layer_cpu(ncnn::LayerType::Gemm); ncnn::ParamDict pd; - pd.set(0, inv_sqrt_embed_dim_per_head); + pd.set(0, scale); pd.set(1, 1.f); pd.set(2, 0); // transA pd.set(3, 1); // transB diff --git a/src/layer/multiheadattention.cpp b/src/layer/multiheadattention.cpp index 284801a2c..e25eec88a 100644 --- a/src/layer/multiheadattention.cpp +++ b/src/layer/multiheadattention.cpp @@ -30,6 +30,7 @@ int MultiHeadAttention::load_param(const ParamDict& pd) kdim = pd.get(3, embed_dim); vdim = pd.get(4, embed_dim); attn_mask = pd.get(5, 0); + scale = pd.get(6, 1.f / sqrtf(embed_dim / num_heads)); return 0; } @@ -111,12 +112,10 @@ int MultiHeadAttention::forward(const std::vector& bottom_blobs, std::vecto if (xqkv.empty()) return -100; - const float inv_sqrt_embed_dim_per_head = 1.f / sqrtf(embed_dim_per_head); - #pragma omp parallel for num_threads(opt.num_threads) for (int q = 0; q < num_heads; q++) { - // xq = affine(q) * inv_sqrt_embed_dim_per_head + // xq = affine(q) * scale { Mat outm = xq.channel(q); @@ -135,7 +134,7 @@ int MultiHeadAttention::forward(const std::vector& bottom_blobs, std::vecto sum += *ptr++ * *kptr++; } - outptr[j] = sum * inv_sqrt_embed_dim_per_head; + outptr[j] = sum * scale; } } } diff --git a/src/layer/multiheadattention.h b/src/layer/multiheadattention.h index 50c8549ac..55764bd9c 100644 --- a/src/layer/multiheadattention.h +++ b/src/layer/multiheadattention.h @@ -37,6 +37,7 @@ public: int kdim; int vdim; int attn_mask; + float scale; Mat q_weight_data; Mat q_bias_data; diff --git a/src/layer/vulkan/multiheadattention_vulkan.cpp b/src/layer/vulkan/multiheadattention_vulkan.cpp index f1d7ce363..1abc09c30 100644 --- a/src/layer/vulkan/multiheadattention_vulkan.cpp +++ b/src/layer/vulkan/multiheadattention_vulkan.cpp @@ -48,12 +48,10 @@ int MultiHeadAttention_vulkan::create_pipeline(const Option& opt) const int embed_dim_per_head = embed_dim / num_heads; const int qdim = weight_data_size / embed_dim; { - const float inv_sqrt_embed_dim_per_head = 1.f / sqrtf(embed_dim_per_head); - q_gemm = ncnn::create_layer_vulkan(ncnn::LayerType::Gemm); q_gemm->vkdev = vkdev; ncnn::ParamDict pd; - pd.set(0, inv_sqrt_embed_dim_per_head); + pd.set(0, scale); pd.set(1, 1.f); pd.set(2, 0); // transA pd.set(3, 1); // transB diff --git a/src/layer/x86/multiheadattention_x86.cpp b/src/layer/x86/multiheadattention_x86.cpp index db5f730ae..9bddb3a78 100644 --- a/src/layer/x86/multiheadattention_x86.cpp +++ b/src/layer/x86/multiheadattention_x86.cpp @@ -41,12 +41,9 @@ int MultiHeadAttention_x86::create_pipeline(const Option& opt) const int qdim = weight_data_size / embed_dim; { - const int embed_dim_per_head = embed_dim / num_heads; - const float inv_sqrt_embed_dim_per_head = 1.f / sqrtf(embed_dim_per_head); - q_gemm = ncnn::create_layer_cpu(ncnn::LayerType::Gemm); ncnn::ParamDict pd; - pd.set(0, inv_sqrt_embed_dim_per_head); + pd.set(0, scale); pd.set(1, 1.f); pd.set(2, 0); // transA pd.set(3, 1); // transB diff --git a/tests/test_multiheadattention.cpp b/tests/test_multiheadattention.cpp index c509f8156..5f110284c 100644 --- a/tests/test_multiheadattention.cpp +++ b/tests/test_multiheadattention.cpp @@ -106,6 +106,7 @@ static int test_multiheadattention_sameqkv(const ncnn::Mat& a, int embed_dim, in pd.set(2, embed_dim * qdim); pd.set(3, qdim); pd.set(4, qdim); + pd.set(6, 0.7f / sqrtf(embed_dim / num_heads)); std::vector weights(8); weights[0] = RandomMat(embed_dim * qdim);