Browse Source

multiheadattention scale param (#5526)

* update swiftshader

* skip vs2017 swiftshader
tags/20240820
nihui GitHub 1 year ago
parent
commit
4c3debae2d
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
14 changed files with 81 additions and 36 deletions
  1. +2
    -2
      .ci/test-coverage.yml
  2. +2
    -2
      .github/workflows/linux-x64-gpu-clang.yml
  3. +2
    -2
      .github/workflows/linux-x64-gpu-gcc.yml
  4. +2
    -2
      .github/workflows/macos.yml
  5. +2
    -2
      .github/workflows/python.yml
  6. +51
    -0
      .github/workflows/test-coverage.yml
  7. +7
    -7
      .github/workflows/windows.yml
  8. +5
    -4
      docs/developer-guide/operators.md
  9. +1
    -4
      src/layer/arm/multiheadattention_arm.cpp
  10. +3
    -4
      src/layer/multiheadattention.cpp
  11. +1
    -0
      src/layer/multiheadattention.h
  12. +1
    -3
      src/layer/vulkan/multiheadattention_vulkan.cpp
  13. +1
    -4
      src/layer/x86/multiheadattention_x86.cpp
  14. +1
    -0
      tests/test_multiheadattention.cpp

+ 2
- 2
.ci/test-coverage.yml View File

@@ -52,14 +52,14 @@ jobs:
uses: cache@1.*
with:
cachePaths: swiftshader-install
cacheKey: swiftshader-linux-install-20230420-1
cacheKey: swiftshader-linux-install-20240622

- name: checkout-swiftshader
if: steps.cache-swiftshader.outputs.cacheHit != 'true'
checkout: https://github.com/google/swiftshader.git
with:
pullType: COMMIT_ID
refName: dd55e592406dc0bae219df11adec6363840aff4a
refName: de870ac7518fe2b6bb651ecc22fc36647cf7b986
localPath: swiftshader
enableSubmodule: false
enableGitLfs: false


+ 2
- 2
.github/workflows/linux-x64-gpu-clang.yml View File

@@ -46,14 +46,14 @@ jobs:
uses: actions/cache@v4
with:
path: swiftshader-install
key: swiftshader-linux-install-20230420
key: swiftshader-linux-install-20240622
- name: checkout-swiftshader
if: steps.cache-swiftshader.outputs.cache-hit != 'true'
uses: actions/checkout@v4
with:
repository: google/swiftshader
path: swiftshader
ref: dd55e592406dc0bae219df11adec6363840aff4a
ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
- name: checkout-swiftshader-submodules
if: steps.cache-swiftshader.outputs.cache-hit != 'true'
run: |


+ 2
- 2
.github/workflows/linux-x64-gpu-gcc.yml View File

@@ -46,14 +46,14 @@ jobs:
uses: actions/cache@v4
with:
path: swiftshader-install
key: swiftshader-linux-install-20230420
key: swiftshader-linux-install-20240622
- name: checkout-swiftshader
if: steps.cache-swiftshader.outputs.cache-hit != 'true'
uses: actions/checkout@v4
with:
repository: google/swiftshader
path: swiftshader
ref: dd55e592406dc0bae219df11adec6363840aff4a
ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
- name: checkout-swiftshader-submodules
if: steps.cache-swiftshader.outputs.cache-hit != 'true'
run: |


+ 2
- 2
.github/workflows/macos.yml View File

@@ -137,14 +137,14 @@ jobs:
uses: actions/cache@v4
with:
path: swiftshader-install
key: swiftshader-macos-install-20230420
key: swiftshader-macos-install-20240622
- name: checkout-swiftshader
if: steps.cache-swiftshader.outputs.cache-hit != 'true'
uses: actions/checkout@v4
with:
repository: google/swiftshader
path: swiftshader
ref: dd55e592406dc0bae219df11adec6363840aff4a
ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
- name: checkout-swiftshader-submodules
if: steps.cache-swiftshader.outputs.cache-hit != 'true'
run: |


+ 2
- 2
.github/workflows/python.yml View File

@@ -56,14 +56,14 @@ jobs:
uses: actions/cache@v4
with:
path: swiftshader-install
key: swiftshader-linux-install-20230420
key: swiftshader-linux-install-20240622
- name: checkout-swiftshader
if: matrix.os == 'ubuntu-latest' && steps.cache-swiftshader.outputs.cache-hit != 'true'
uses: actions/checkout@v4
with:
repository: google/swiftshader
path: swiftshader
ref: dd55e592406dc0bae219df11adec6363840aff4a
ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
- name: checkout-swiftshader-submodules
if: matrix.os == 'ubuntu-latest' && steps.cache-swiftshader.outputs.cache-hit != 'true'
run: |


+ 51
- 0
.github/workflows/test-coverage.yml View File

@@ -52,11 +52,62 @@ jobs:
lcov -r lcov.info '*/install/*' -o lcov.info
lcov -r lcov.info '*/build/*' -o lcov.info
lcov --list lcov.info

- name: codecov
id: codecov
continue-on-error: true
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: build/lcov.info
- name: codecov-vlen256-retry-1
continue-on-error: true
id: codecov-vlen256-retry-1
if: steps.codecov.outcome=='failure'
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: build/lcov.info
- name: codecov-vlen256-retry-2
continue-on-error: true
id: codecov-vlen256-retry-2
if: steps.codecov-vlen256-retry-1.outcome=='failure'
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: build/lcov.info
- name: codecov-vlen256-retry-3
continue-on-error: true
id: codecov-vlen256-retry-3
if: steps.codecov-vlen256-retry-2.outcome=='failure'
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: build/lcov.info
- name: codecov-vlen256-retry-4
continue-on-error: true
id: codecov-vlen256-retry-4
if: steps.codecov-vlen256-retry-3.outcome=='failure'
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: build/lcov.info
- name: codecov-vlen256-retry-5
continue-on-error: true
id: codecov-vlen256-retry-5
if: steps.codecov-vlen256-retry-4.outcome=='failure'
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: build/lcov.info
- name: set the status
if: always()
run: |
if ${{ steps.codecov.outcome=='success' || steps.codecov-vlen256-retry-1.outcome=='success' || steps.codecov-vlen256-retry-2.outcome=='success' || steps.codecov-vlen256-retry-3.outcome=='success' || steps.codecov-vlen256-retry-4.outcome=='success' || steps.codecov-vlen256-retry-5.outcome=='success' }}; then
echo fine
else
exit 1
fi

linux-gcc-x64-avx512-spr:
runs-on: ubuntu-22.04


+ 7
- 7
.github/workflows/windows.yml View File

@@ -79,26 +79,26 @@ jobs:
cmake --build . --config Release -j 4
cmake --build . --config Release --target install
- name: cache-swiftshader
if: matrix.vs-version != 'vs2015'
if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017'
id: cache-swiftshader
uses: actions/cache@v4
with:
path: swiftshader-install
key: swiftshader-${{ matrix.vs-version }}-x64-install-20230420
key: swiftshader-${{ matrix.vs-version }}-x64-install-20240622
- name: checkout-swiftshader
if: matrix.vs-version != 'vs2015' && steps.cache-swiftshader.outputs.cache-hit != 'true'
if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017' && steps.cache-swiftshader.outputs.cache-hit != 'true'
uses: actions/checkout@v4
with:
repository: google/swiftshader
path: swiftshader
ref: dd55e592406dc0bae219df11adec6363840aff4a
ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
- name: checkout-swiftshader-submodules
if: matrix.vs-version != 'vs2015' && steps.cache-swiftshader.outputs.cache-hit != 'true'
if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017' && steps.cache-swiftshader.outputs.cache-hit != 'true'
run: |
cd swiftshader
git -c submodule."third_party/git-hooks".update=none submodule update --init --recursive
- name: swiftshader
if: matrix.vs-version != 'vs2015' && steps.cache-swiftshader.outputs.cache-hit != 'true'
if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017' && steps.cache-swiftshader.outputs.cache-hit != 'true'
run: |
cd swiftshader
mkdir build-${{ matrix.vs-version }}; cd build-${{ matrix.vs-version }}
@@ -113,7 +113,7 @@ jobs:
cmake -T ${{ matrix.toolset-version }},host=x64 -A x64 -Dprotobuf_DIR="$env:GITHUB_WORKSPACE\protobuf-install\cmake" -DNCNN_VULKAN=ON -DNCNN_BUILD_TESTS=ON ..
cmake --build . --config Release -j 4
- name: x64-test
if: matrix.vs-version != 'vs2015'
if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017'
run: |
echo "[Processor]`nThreadCount=1`n" > build-x64/tests/Release/SwiftShader.ini
Copy-Item -Path "$env:GITHUB_WORKSPACE\swiftshader-install\vulkan-1.dll" -Destination 'build-x64\tests'


+ 5
- 4
docs/developer-guide/operators.md View File

@@ -1266,21 +1266,22 @@ y = affine(out)
| --------- | ------------- | ----- | --------- | ----------------- |
| 0 | embed_dim | int | 0 | |
| 1 | num_heads | int | 1 | |
| 2 | weight_data_size| int | 0 | |
| 2 | weight_data_size| int | 0 | qdim = weight_data_size / embed_dim |
| 3 | kdim | int | embed_dim | |
| 4 | vdim | int | embed_dim | |
| 5 | attn_mask | int | 0 | |
| 6 | scale | float | 1.f / sqrt(embed_dim / num_heads) | |

| weight | type | shape |
| ------------- | ----- | --------------------- |
| q_weight_data | float/fp16/int8 | [weight_data_size] |
| q_weight_data | float/fp16/int8 | [embed_dim * qdim] |
| q_bias_data | float | [embed_dim] |
| k_weight_data | float/fp16/int8 | [embed_dim * kdim] |
| k_bias_data | float | [embed_dim] |
| v_weight_data | float/fp16/int8 | [embed_dim * vdim] |
| v_bias_data | float | [embed_dim] |
| out_weight_data| float/fp16/int8 | [weight_data_size] |
| out_bias_data | float | [embed_dim] |
| out_weight_data| float/fp16/int8 | [qdim * embed_dim] |
| out_bias_data | float | [qdim] |

# MVN
```


+ 1
- 4
src/layer/arm/multiheadattention_arm.cpp View File

@@ -60,12 +60,9 @@ int MultiHeadAttention_arm::create_pipeline(const Option& _opt)
const int qdim = weight_data_size / embed_dim;

{
const int embed_dim_per_head = embed_dim / num_heads;
const float inv_sqrt_embed_dim_per_head = 1.f / sqrtf(embed_dim_per_head);

q_gemm = ncnn::create_layer_cpu(ncnn::LayerType::Gemm);
ncnn::ParamDict pd;
pd.set(0, inv_sqrt_embed_dim_per_head);
pd.set(0, scale);
pd.set(1, 1.f);
pd.set(2, 0); // transA
pd.set(3, 1); // transB


+ 3
- 4
src/layer/multiheadattention.cpp View File

@@ -30,6 +30,7 @@ int MultiHeadAttention::load_param(const ParamDict& pd)
kdim = pd.get(3, embed_dim);
vdim = pd.get(4, embed_dim);
attn_mask = pd.get(5, 0);
scale = pd.get(6, 1.f / sqrtf(embed_dim / num_heads));

return 0;
}
@@ -111,12 +112,10 @@ int MultiHeadAttention::forward(const std::vector<Mat>& bottom_blobs, std::vecto
if (xqkv.empty())
return -100;

const float inv_sqrt_embed_dim_per_head = 1.f / sqrtf(embed_dim_per_head);

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < num_heads; q++)
{
// xq = affine(q) * inv_sqrt_embed_dim_per_head
// xq = affine(q) * scale
{
Mat outm = xq.channel(q);

@@ -135,7 +134,7 @@ int MultiHeadAttention::forward(const std::vector<Mat>& bottom_blobs, std::vecto
sum += *ptr++ * *kptr++;
}

outptr[j] = sum * inv_sqrt_embed_dim_per_head;
outptr[j] = sum * scale;
}
}
}


+ 1
- 0
src/layer/multiheadattention.h View File

@@ -37,6 +37,7 @@ public:
int kdim;
int vdim;
int attn_mask;
float scale;

Mat q_weight_data;
Mat q_bias_data;


+ 1
- 3
src/layer/vulkan/multiheadattention_vulkan.cpp View File

@@ -48,12 +48,10 @@ int MultiHeadAttention_vulkan::create_pipeline(const Option& opt)
const int embed_dim_per_head = embed_dim / num_heads;
const int qdim = weight_data_size / embed_dim;
{
const float inv_sqrt_embed_dim_per_head = 1.f / sqrtf(embed_dim_per_head);

q_gemm = ncnn::create_layer_vulkan(ncnn::LayerType::Gemm);
q_gemm->vkdev = vkdev;
ncnn::ParamDict pd;
pd.set(0, inv_sqrt_embed_dim_per_head);
pd.set(0, scale);
pd.set(1, 1.f);
pd.set(2, 0); // transA
pd.set(3, 1); // transB


+ 1
- 4
src/layer/x86/multiheadattention_x86.cpp View File

@@ -41,12 +41,9 @@ int MultiHeadAttention_x86::create_pipeline(const Option& opt)
const int qdim = weight_data_size / embed_dim;

{
const int embed_dim_per_head = embed_dim / num_heads;
const float inv_sqrt_embed_dim_per_head = 1.f / sqrtf(embed_dim_per_head);

q_gemm = ncnn::create_layer_cpu(ncnn::LayerType::Gemm);
ncnn::ParamDict pd;
pd.set(0, inv_sqrt_embed_dim_per_head);
pd.set(0, scale);
pd.set(1, 1.f);
pd.set(2, 0); // transA
pd.set(3, 1); // transB


+ 1
- 0
tests/test_multiheadattention.cpp View File

@@ -106,6 +106,7 @@ static int test_multiheadattention_sameqkv(const ncnn::Mat& a, int embed_dim, in
pd.set(2, embed_dim * qdim);
pd.set(3, qdim);
pd.set(4, qdim);
pd.set(6, 0.7f / sqrtf(embed_dim / num_heads));

std::vector<ncnn::Mat> weights(8);
weights[0] = RandomMat(embed_dim * qdim);


Loading…
Cancel
Save