multiheadattention scale param (#5526)

* update swiftshader * skip vs2017 swiftshader
1 year ago · 4c3debae2d
--- a/.ci/test-coverage.yml
+++ b/.ci/test-coverage.yml
@@ -52,14 +52,14 @@ jobs:
      uses: cache@1.*
      with:
        cachePaths: swiftshader-install
        cacheKey: swiftshader-linux-install-20230420-1
        cacheKey: swiftshader-linux-install-20240622

    - name: checkout-swiftshader
      if: steps.cache-swiftshader.outputs.cacheHit != 'true'
      checkout: https://github.com/google/swiftshader.git
      with:
        pullType: COMMIT_ID
        refName: dd55e592406dc0bae219df11adec6363840aff4a
        refName: de870ac7518fe2b6bb651ecc22fc36647cf7b986
        localPath: swiftshader
        enableSubmodule: false
        enableGitLfs: false
--- a/.github/workflows/linux-x64-gpu-clang.yml
+++ b/.github/workflows/linux-x64-gpu-clang.yml
@@ -46,14 +46,14 @@ jobs:
      uses: actions/cache@v4
      with:
        path: swiftshader-install
        key: swiftshader-linux-install-20230420
        key: swiftshader-linux-install-20240622
    - name: checkout-swiftshader
      if: steps.cache-swiftshader.outputs.cache-hit != 'true'
      uses: actions/checkout@v4
      with:
        repository: google/swiftshader
        path: swiftshader
        ref: dd55e592406dc0bae219df11adec6363840aff4a
        ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
    - name: checkout-swiftshader-submodules
      if: steps.cache-swiftshader.outputs.cache-hit != 'true'
      run: |
--- a/.github/workflows/linux-x64-gpu-gcc.yml
+++ b/.github/workflows/linux-x64-gpu-gcc.yml
@@ -46,14 +46,14 @@ jobs:
      uses: actions/cache@v4
      with:
        path: swiftshader-install
        key: swiftshader-linux-install-20230420
        key: swiftshader-linux-install-20240622
    - name: checkout-swiftshader
      if: steps.cache-swiftshader.outputs.cache-hit != 'true'
      uses: actions/checkout@v4
      with:
        repository: google/swiftshader
        path: swiftshader
        ref: dd55e592406dc0bae219df11adec6363840aff4a
        ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
    - name: checkout-swiftshader-submodules
      if: steps.cache-swiftshader.outputs.cache-hit != 'true'
      run: |
--- a/.github/workflows/macos.yml
+++ b/.github/workflows/macos.yml
@@ -137,14 +137,14 @@ jobs:
      uses: actions/cache@v4
      with:
        path: swiftshader-install
        key: swiftshader-macos-install-20230420
        key: swiftshader-macos-install-20240622
    - name: checkout-swiftshader
      if: steps.cache-swiftshader.outputs.cache-hit != 'true'
      uses: actions/checkout@v4
      with:
        repository: google/swiftshader
        path: swiftshader
        ref: dd55e592406dc0bae219df11adec6363840aff4a
        ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
    - name: checkout-swiftshader-submodules
      if: steps.cache-swiftshader.outputs.cache-hit != 'true'
      run: |
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -56,14 +56,14 @@ jobs:
      uses: actions/cache@v4
      with:
        path: swiftshader-install
        key: swiftshader-linux-install-20230420
        key: swiftshader-linux-install-20240622
    - name: checkout-swiftshader
      if: matrix.os == 'ubuntu-latest' && steps.cache-swiftshader.outputs.cache-hit != 'true'
      uses: actions/checkout@v4
      with:
        repository: google/swiftshader
        path: swiftshader
        ref: dd55e592406dc0bae219df11adec6363840aff4a
        ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
    - name: checkout-swiftshader-submodules
      if: matrix.os == 'ubuntu-latest' && steps.cache-swiftshader.outputs.cache-hit != 'true'
      run: |
--- a/.github/workflows/test-coverage.yml
+++ b/.github/workflows/test-coverage.yml
@@ -52,11 +52,62 @@ jobs:
        lcov -r lcov.info '*/install/*' -o lcov.info
        lcov -r lcov.info '*/build/*' -o lcov.info
        lcov --list lcov.info

    - name: codecov
      id: codecov
      continue-on-error: true
      uses: codecov/codecov-action@v3
      with:
        token: ${{ secrets.CODECOV_TOKEN }}
        file: build/lcov.info
    - name: codecov-vlen256-retry-1
      continue-on-error: true
      id: codecov-vlen256-retry-1
      if: steps.codecov.outcome=='failure'
      uses: codecov/codecov-action@v3
      with:
        token: ${{ secrets.CODECOV_TOKEN }}
        file: build/lcov.info
    - name: codecov-vlen256-retry-2
      continue-on-error: true
      id: codecov-vlen256-retry-2
      if: steps.codecov-vlen256-retry-1.outcome=='failure'
      uses: codecov/codecov-action@v3
      with:
        token: ${{ secrets.CODECOV_TOKEN }}
        file: build/lcov.info
    - name: codecov-vlen256-retry-3
      continue-on-error: true
      id: codecov-vlen256-retry-3
      if: steps.codecov-vlen256-retry-2.outcome=='failure'
      uses: codecov/codecov-action@v3
      with:
        token: ${{ secrets.CODECOV_TOKEN }}
        file: build/lcov.info
    - name: codecov-vlen256-retry-4
      continue-on-error: true
      id: codecov-vlen256-retry-4
      if: steps.codecov-vlen256-retry-3.outcome=='failure'
      uses: codecov/codecov-action@v3
      with:
        token: ${{ secrets.CODECOV_TOKEN }}
        file: build/lcov.info
    - name: codecov-vlen256-retry-5
      continue-on-error: true
      id: codecov-vlen256-retry-5
      if: steps.codecov-vlen256-retry-4.outcome=='failure'
      uses: codecov/codecov-action@v3
      with:
        token: ${{ secrets.CODECOV_TOKEN }}
        file: build/lcov.info
    - name: set the status
      if: always()
      run: |
        if ${{ steps.codecov.outcome=='success' || steps.codecov-vlen256-retry-1.outcome=='success' || steps.codecov-vlen256-retry-2.outcome=='success' || steps.codecov-vlen256-retry-3.outcome=='success' || steps.codecov-vlen256-retry-4.outcome=='success' || steps.codecov-vlen256-retry-5.outcome=='success' }}; then
           echo fine
        else
           exit 1
        fi

  linux-gcc-x64-avx512-spr:
    runs-on: ubuntu-22.04
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -79,26 +79,26 @@ jobs:
        cmake --build . --config Release -j 4
        cmake --build . --config Release --target install
    - name: cache-swiftshader
      if: matrix.vs-version != 'vs2015'
      if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017'
      id: cache-swiftshader
      uses: actions/cache@v4
      with:
        path: swiftshader-install
        key: swiftshader-${{ matrix.vs-version }}-x64-install-20230420
        key: swiftshader-${{ matrix.vs-version }}-x64-install-20240622
    - name: checkout-swiftshader
      if: matrix.vs-version != 'vs2015' && steps.cache-swiftshader.outputs.cache-hit != 'true'
      if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017' && steps.cache-swiftshader.outputs.cache-hit != 'true'
      uses: actions/checkout@v4
      with:
        repository: google/swiftshader
        path: swiftshader
        ref: dd55e592406dc0bae219df11adec6363840aff4a
        ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
    - name: checkout-swiftshader-submodules
      if: matrix.vs-version != 'vs2015' && steps.cache-swiftshader.outputs.cache-hit != 'true'
      if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017' && steps.cache-swiftshader.outputs.cache-hit != 'true'
      run: |
        cd swiftshader
        git -c submodule."third_party/git-hooks".update=none submodule update --init --recursive
    - name: swiftshader
      if: matrix.vs-version != 'vs2015' && steps.cache-swiftshader.outputs.cache-hit != 'true'
      if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017' && steps.cache-swiftshader.outputs.cache-hit != 'true'
      run: |
        cd swiftshader
        mkdir build-${{ matrix.vs-version }}; cd build-${{ matrix.vs-version }}
@@ -113,7 +113,7 @@ jobs:
        cmake -T ${{ matrix.toolset-version }},host=x64 -A x64 -Dprotobuf_DIR="$env:GITHUB_WORKSPACE\protobuf-install\cmake" -DNCNN_VULKAN=ON -DNCNN_BUILD_TESTS=ON ..
        cmake --build . --config Release -j 4
    - name: x64-test
      if: matrix.vs-version != 'vs2015'
      if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017'
      run: |
        echo "[Processor]`nThreadCount=1`n" > build-x64/tests/Release/SwiftShader.ini
        Copy-Item -Path "$env:GITHUB_WORKSPACE\swiftshader-install\vulkan-1.dll" -Destination 'build-x64\tests'
--- a/docs/developer-guide/operators.md
+++ b/docs/developer-guide/operators.md
@@ -1266,21 +1266,22 @@ y = affine(out)
 | --------- | ------------- | ----- | --------- | ----------------- |
 | 0         | embed_dim     | int   | 0         |                   |
 | 1         | num_heads     | int   | 1         |                   |
 | 2         | weight_data_size| int | 0         |                   |
 | 2         | weight_data_size| int | 0         | qdim = weight_data_size / embed_dim |
 | 3         | kdim          | int   | embed_dim |                   |
 | 4         | vdim          | int   | embed_dim |                   |
 | 5         | attn_mask     | int   | 0         |                   |
 | 6         | scale         | float | 1.f / sqrt(embed_dim / num_heads) | |

 | weight        | type  | shape                 |
 | ------------- | ----- | --------------------- |
 | q_weight_data | float/fp16/int8 | [weight_data_size] |
 | q_weight_data | float/fp16/int8 | [embed_dim * qdim] |
 | q_bias_data   | float | [embed_dim]           |
 | k_weight_data | float/fp16/int8 | [embed_dim * kdim] |
 | k_bias_data   | float | [embed_dim]           |
 | v_weight_data | float/fp16/int8 | [embed_dim * vdim] |
 | v_bias_data   | float | [embed_dim]           |
 | out_weight_data| float/fp16/int8 | [weight_data_size] |
 | out_bias_data | float | [embed_dim]           |
 | out_weight_data| float/fp16/int8 | [qdim * embed_dim] |
 | out_bias_data | float | [qdim]                |

 # MVN
 ```
--- a/src/layer/arm/multiheadattention_arm.cpp
+++ b/src/layer/arm/multiheadattention_arm.cpp
@@ -60,12 +60,9 @@ int MultiHeadAttention_arm::create_pipeline(const Option& _opt)
    const int qdim = weight_data_size / embed_dim;

    {
        const int embed_dim_per_head = embed_dim / num_heads;
        const float inv_sqrt_embed_dim_per_head = 1.f / sqrtf(embed_dim_per_head);

        q_gemm = ncnn::create_layer_cpu(ncnn::LayerType::Gemm);
        ncnn::ParamDict pd;
        pd.set(0, inv_sqrt_embed_dim_per_head);
        pd.set(0, scale);
        pd.set(1, 1.f);
        pd.set(2, 0);         // transA
        pd.set(3, 1);         // transB
--- a/src/layer/multiheadattention.cpp
+++ b/src/layer/multiheadattention.cpp
@@ -30,6 +30,7 @@ int MultiHeadAttention::load_param(const ParamDict& pd)
    kdim = pd.get(3, embed_dim);
    vdim = pd.get(4, embed_dim);
    attn_mask = pd.get(5, 0);
    scale = pd.get(6, 1.f / sqrtf(embed_dim / num_heads));

    return 0;
 }
@@ -111,12 +112,10 @@ int MultiHeadAttention::forward(const std::vector<Mat>& bottom_blobs, std::vecto
    if (xqkv.empty())
        return -100;

    const float inv_sqrt_embed_dim_per_head = 1.f / sqrtf(embed_dim_per_head);

    #pragma omp parallel for num_threads(opt.num_threads)
    for (int q = 0; q < num_heads; q++)
    {
        // xq = affine(q) * inv_sqrt_embed_dim_per_head
        // xq = affine(q) * scale
        {
            Mat outm = xq.channel(q);

@@ -135,7 +134,7 @@ int MultiHeadAttention::forward(const std::vector<Mat>& bottom_blobs, std::vecto
                        sum += *ptr++ * *kptr++;
                    }

                    outptr[j] = sum * inv_sqrt_embed_dim_per_head;
                    outptr[j] = sum * scale;
                }
            }
        }
--- a/src/layer/multiheadattention.h
+++ b/src/layer/multiheadattention.h
@@ -37,6 +37,7 @@ public:
    int kdim;
    int vdim;
    int attn_mask;
    float scale;

    Mat q_weight_data;
    Mat q_bias_data;
--- a/src/layer/vulkan/multiheadattention_vulkan.cpp
+++ b/src/layer/vulkan/multiheadattention_vulkan.cpp
@@ -48,12 +48,10 @@ int MultiHeadAttention_vulkan::create_pipeline(const Option& opt)
    const int embed_dim_per_head = embed_dim / num_heads;
    const int qdim = weight_data_size / embed_dim;
    {
        const float inv_sqrt_embed_dim_per_head = 1.f / sqrtf(embed_dim_per_head);

        q_gemm = ncnn::create_layer_vulkan(ncnn::LayerType::Gemm);
        q_gemm->vkdev = vkdev;
        ncnn::ParamDict pd;
        pd.set(0, inv_sqrt_embed_dim_per_head);
        pd.set(0, scale);
        pd.set(1, 1.f);
        pd.set(2, 0);         // transA
        pd.set(3, 1);         // transB
--- a/src/layer/x86/multiheadattention_x86.cpp
+++ b/src/layer/x86/multiheadattention_x86.cpp
@@ -41,12 +41,9 @@ int MultiHeadAttention_x86::create_pipeline(const Option& opt)
    const int qdim = weight_data_size / embed_dim;

    {
        const int embed_dim_per_head = embed_dim / num_heads;
        const float inv_sqrt_embed_dim_per_head = 1.f / sqrtf(embed_dim_per_head);

        q_gemm = ncnn::create_layer_cpu(ncnn::LayerType::Gemm);
        ncnn::ParamDict pd;
        pd.set(0, inv_sqrt_embed_dim_per_head);
        pd.set(0, scale);
        pd.set(1, 1.f);
        pd.set(2, 0);         // transA
        pd.set(3, 1);         // transB
--- a/tests/test_multiheadattention.cpp
+++ b/tests/test_multiheadattention.cpp
@@ -106,6 +106,7 @@ static int test_multiheadattention_sameqkv(const ncnn::Mat& a, int embed_dim, in
    pd.set(2, embed_dim * qdim);
    pd.set(3, qdim);
    pd.set(4, qdim);
    pd.set(6, 0.7f / sqrtf(embed_dim / num_heads));

    std::vector<ncnn::Mat> weights(8);
    weights[0] = RandomMat(embed_dim * qdim);