Browse Source

fix avx512 illegal instruction (#3550)

tags/20220216
nihui GitHub 4 years ago
parent
commit
d276ecd401
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 162 additions and 162 deletions
  1. +8
    -8
      .github/workflows/release.yml
  2. +6
    -6
      .github/workflows/test-coverage.yml
  3. +4
    -4
      .github/workflows/windows-x64-cpu-vs2015.yml
  4. +4
    -4
      .github/workflows/windows-x64-cpu-vs2017.yml
  5. +3
    -3
      .github/workflows/windows-x64-cpu-vs2019.yml
  6. +1
    -1
      .github/workflows/windows-x64-gpu-vs2017.yml
  7. +1
    -1
      .github/workflows/windows-x86-cpu-vs2015.yml
  8. +1
    -1
      .github/workflows/windows-x86-cpu-vs2017.yml
  9. +7
    -7
      src/allocator.h
  10. +127
    -127
      src/mat.h

+ 8
- 8
.github/workflows/release.yml View File

@@ -1184,7 +1184,7 @@ jobs:
source emsdk/emsdk_env.sh
mkdir build && cd build
cmake -DCMAKE_TOOLCHAIN_FILE=../emsdk/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DNCNN_VERSION_STRING="${{ needs.setup.outputs.VERSION }}" \
-DNCNN_THREADS=OFF -DNCNN_OPENMP=OFF -DNCNN_SIMPLEOMP=OFF -DNCNN_SIMPLEOCV=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_SSE2=OFF -DNCNN_AVX2=OFF -DNCNN_AV2=OFF \
-DNCNN_THREADS=OFF -DNCNN_OPENMP=OFF -DNCNN_SIMPLEOMP=OFF -DNCNN_SIMPLEOCV=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_SSE2=OFF -DNCNN_AVX2=OFF -DNCNN_AVX=OFF \
-DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_BENCHMARK=OFF ..
cmake --build . -j 2
cmake --build . --target install
@@ -1193,7 +1193,7 @@ jobs:
source emsdk/emsdk_env.sh
mkdir build-simd && cd build-simd
cmake -DCMAKE_TOOLCHAIN_FILE=../emsdk/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DNCNN_VERSION_STRING="${{ needs.setup.outputs.VERSION }}" \
-DNCNN_THREADS=OFF -DNCNN_OPENMP=OFF -DNCNN_SIMPLEOMP=OFF -DNCNN_SIMPLEOCV=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_SSE2=ON -DNCNN_AVX2=OFF -DNCNN_AV2=OFF \
-DNCNN_THREADS=OFF -DNCNN_OPENMP=OFF -DNCNN_SIMPLEOMP=OFF -DNCNN_SIMPLEOCV=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_SSE2=ON -DNCNN_AVX2=OFF -DNCNN_AVX=OFF \
-DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_BENCHMARK=OFF ..
cmake --build . -j 2
cmake --build . --target install
@@ -1202,7 +1202,7 @@ jobs:
source emsdk/emsdk_env.sh
mkdir build-threads && cd build-threads
cmake -DCMAKE_TOOLCHAIN_FILE=../emsdk/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DNCNN_VERSION_STRING="${{ needs.setup.outputs.VERSION }}" \
-DNCNN_THREADS=ON -DNCNN_OPENMP=ON -DNCNN_SIMPLEOMP=ON -DNCNN_SIMPLEOCV=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_SSE2=OFF -DNCNN_AVX2=OFF -DNCNN_AV2=OFF \
-DNCNN_THREADS=ON -DNCNN_OPENMP=ON -DNCNN_SIMPLEOMP=ON -DNCNN_SIMPLEOCV=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_SSE2=OFF -DNCNN_AVX2=OFF -DNCNN_AVX=OFF \
-DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_BENCHMARK=OFF ..
cmake --build . -j 2
cmake --build . --target install
@@ -1211,7 +1211,7 @@ jobs:
source emsdk/emsdk_env.sh
mkdir build-simd-threads && cd build-simd-threads
cmake -DCMAKE_TOOLCHAIN_FILE=../emsdk/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DNCNN_VERSION_STRING="${{ needs.setup.outputs.VERSION }}" \
-DNCNN_THREADS=ON -DNCNN_OPENMP=ON -DNCNN_SIMPLEOMP=ON -DNCNN_SIMPLEOCV=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_SSE2=ON -DNCNN_AVX2=OFF -DNCNN_AV2=OFF \
-DNCNN_THREADS=ON -DNCNN_OPENMP=ON -DNCNN_SIMPLEOMP=ON -DNCNN_SIMPLEOCV=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_SSE2=ON -DNCNN_AVX2=OFF -DNCNN_AVX=OFF \
-DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_BENCHMARK=OFF ..
cmake --build . -j 2
cmake --build . --target install
@@ -1233,7 +1233,7 @@ jobs:

windows-vs2015:
needs: [setup]
runs-on: windows-2016
runs-on: windows-2019
env:
PACKAGENAME: ncnn-${{ needs.setup.outputs.VERSION }}-windows-vs2015
steps:
@@ -1316,7 +1316,7 @@ jobs:

windows-vs2015-shared:
needs: [setup]
runs-on: windows-2016
runs-on: windows-2019
env:
PACKAGENAME: ncnn-${{ needs.setup.outputs.VERSION }}-windows-vs2015-shared
steps:
@@ -1399,7 +1399,7 @@ jobs:

windows-vs2017:
needs: [setup]
runs-on: windows-2016
runs-on: windows-2019
env:
PACKAGENAME: ncnn-${{ needs.setup.outputs.VERSION }}-windows-vs2017
steps:
@@ -1482,7 +1482,7 @@ jobs:

windows-vs2017-shared:
needs: [setup]
runs-on: windows-2016
runs-on: windows-2019
env:
PACKAGENAME: ncnn-${{ needs.setup.outputs.VERSION }}-windows-vs2017-shared
steps:


+ 6
- 6
.github/workflows/test-coverage.yml View File

@@ -71,7 +71,7 @@ jobs:
mkdir $GITHUB_WORKSPACE/swiftshader-install
cp Linux/* $GITHUB_WORKSPACE/swiftshader-install
- name: configure
run: export VULKAN_SDK=`pwd`/1.2.189.0/x86_64 && mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_XOP=OFF -DNCNN_OPENMP=OFF -DNCNN_VULKAN=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
run: export VULKAN_SDK=`pwd`/1.2.189.0/x86_64 && mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_AVX512=OFF -DNCNN_XOP=OFF -DNCNN_OPENMP=OFF -DNCNN_VULKAN=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
- name: build
run: cmake --build build -j 2
- name: test
@@ -135,7 +135,7 @@ jobs:
ninja install

- name: configure
run: mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_XOP=OFF -DNCNN_OPENMP=OFF -DNCNN_VULKAN=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
run: mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_AVX512=OFF -DNCNN_XOP=OFF -DNCNN_OPENMP=OFF -DNCNN_VULKAN=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
- name: build
run: cmake --build build -j 2
- name: test
@@ -165,7 +165,7 @@ jobs:
- name: build-sse2
run: |
mkdir build-sse2 && cd build-sse2
cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX=OFF -DNCNN_AVX2=OFF -DNCNN_XOP=OFF -DNCNN_OPENMP=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX=OFF -DNCNN_AVX2=OFF -DNCNN_AVX512=OFF -DNCNN_XOP=OFF -DNCNN_OPENMP=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . -j 2
- name: test-sse2
run: cd build-sse2 && ctest --output-on-failure -j 2
@@ -185,7 +185,7 @@ jobs:
- name: build-avx
run: |
mkdir build-avx && cd build-avx
cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=OFF -DNCNN_AVX=ON -DNCNN_XOP=OFF -DNCNN_OPENMP=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=OFF -DNCNN_AVX=ON -DNCNN_AVX512=OFF -DNCNN_XOP=OFF -DNCNN_OPENMP=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . -j 2
- name: test-avx
run: cd build-avx && ctest --output-on-failure -j 2
@@ -205,7 +205,7 @@ jobs:
- name: build-avx2
run: |
mkdir build-avx2 && cd build-avx2
cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_XOP=OFF -DNCNN_OPENMP=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_AVX512=OFF -DNCNN_XOP=OFF -DNCNN_OPENMP=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . -j 2
- name: test-avx2
run: cd build-avx2 && ctest --output-on-failure -j 2
@@ -233,7 +233,7 @@ jobs:
LD_LIBRARY_PATH: /data/action/install/lib64
run: |
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_XOP=OFF -DNCNN_AVXVNNI=OFF -DNCNN_AVX512=ON -DNCNN_AVX512VNNI=ON -DNCNN_OPENMP=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_AVX512=OFF -DNCNN_XOP=OFF -DNCNN_AVXVNNI=OFF -DNCNN_AVX512=ON -DNCNN_AVX512VNNI=ON -DNCNN_OPENMP=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . -j 4
- name: test
env:


+ 4
- 4
.github/workflows/windows-x64-cpu-vs2015.yml View File

@@ -31,7 +31,7 @@ concurrency:
cancel-in-progress: true
jobs:
windows-vs2015:
runs-on: windows-2016
runs-on: windows-2019
steps:
- uses: actions/checkout@v2
- name: cache-protobuf
@@ -52,7 +52,7 @@ jobs:
- name: build-sse2
run: |
mkdir build-sse2; cd build-sse2
cmake -T v140,host=x64 -A x64 -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=OFF -DNCNN_AVX=OFF -DNCNN_XOP=OFF -DNCNN_BUILD_TESTS=ON ..
cmake -T v140,host=x64 -A x64 -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=OFF -DNCNN_AVX=OFF -DNCNN_AVX512=OFF -DNCNN_XOP=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . --config Release -j 2
- name: test-sse2
run: cd build-sse2; ctest -C Release --output-on-failure -j 2
@@ -64,14 +64,14 @@ jobs:
- name: build-avx2
run: |
mkdir build-avx2; cd build-avx2
cmake -T v140,host=x64 -A x64 -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_XOP=OFF -DNCNN_AVXVNNI=OFF -DNCNN_BUILD_TESTS=ON ..
cmake -T v140,host=x64 -A x64 -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_AVX512=OFF -DNCNN_XOP=OFF -DNCNN_AVXVNNI=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . --config Release -j 2
- name: test-avx2
run: cd build-avx2; ctest -C Release --output-on-failure -j 2
- name: build-avx
run: |
mkdir build-avx; cd build-avx
cmake -T v140,host=x64 -A x64 -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=OFF -DNCNN_XOP=OFF -DNCNN_AVX=ON -DNCNN_BUILD_TESTS=ON ..
cmake -T v140,host=x64 -A x64 -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=OFF -DNCNN_AVX512=OFF -DNCNN_XOP=OFF -DNCNN_AVX=ON -DNCNN_BUILD_TESTS=ON ..
cmake --build . --config Release -j 2
- name: test-avx
run: cd build-avx; ctest -C Release --output-on-failure -j 2

+ 4
- 4
.github/workflows/windows-x64-cpu-vs2017.yml View File

@@ -31,7 +31,7 @@ concurrency:
cancel-in-progress: true
jobs:
windows-vs2017:
runs-on: windows-2016
runs-on: windows-2019
steps:
- uses: actions/checkout@v2
- name: cache-protobuf
@@ -52,7 +52,7 @@ jobs:
- name: build-sse2
run: |
mkdir build-sse2; cd build-sse2
cmake -T v141,host=x64 -A x64 -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=OFF -DNCNN_AVX=OFF -DNCNN_XOP=OFF -DNCNN_BUILD_TESTS=ON ..
cmake -T v141,host=x64 -A x64 -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=OFF -DNCNN_AVX=OFF -DNCNN_AVX512=OFF -DNCNN_XOP=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . --config Release -j 2
- name: test-sse2
run: cd build-sse2; ctest -C Release --output-on-failure -j 2
@@ -64,14 +64,14 @@ jobs:
- name: build-avx2
run: |
mkdir build-avx2; cd build-avx2
cmake -T v141,host=x64 -A x64 -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_AVXVNNI=OFF -DNCNN_XOP=OFF -DNCNN_BUILD_TESTS=ON ..
cmake -T v141,host=x64 -A x64 -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_AVX512=OFF -DNCNN_AVXVNNI=OFF -DNCNN_XOP=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . --config Release -j 2
- name: test-avx2
run: cd build-avx2; ctest -C Release --output-on-failure -j 2
- name: build-avx
run: |
mkdir build-avx; cd build-avx
cmake -T v141,host=x64 -A x64 -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=OFF -DNCNN_AVX=ON -DNCNN_XOP=OFF -DNCNN_BUILD_TESTS=ON ..
cmake -T v141,host=x64 -A x64 -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=OFF -DNCNN_AVX=ON -DNCNN_AVX512=OFF -DNCNN_XOP=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . --config Release -j 2
- name: test-avx
run: cd build-avx; ctest -C Release --output-on-failure -j 2

+ 3
- 3
.github/workflows/windows-x64-cpu-vs2019.yml View File

@@ -52,7 +52,7 @@ jobs:
- name: build-sse2
run: |
mkdir build-sse2; cd build-sse2
cmake -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=OFF -DNCNN_AVX=OFF -DNCNN_XOP=OFF -DNCNN_BUILD_TESTS=ON ..
cmake -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=OFF -DNCNN_AVX=OFF -DNCNN_AVX512=OFF -DNCNN_XOP=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . --config Release -j 2
- name: test-sse2
run: cd build-sse2; ctest -C Release --output-on-failure -j 2
@@ -64,14 +64,14 @@ jobs:
- name: build-avx2
run: |
mkdir build-avx2; cd build-avx2
cmake -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_AVXVNNI=OFF -DNCNN_XOP=OFF -DNCNN_BUILD_TESTS=ON ..
cmake -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_AVX512=OFF -DNCNN_AVXVNNI=OFF -DNCNN_XOP=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . --config Release -j 2
- name: test-avx2
run: cd build-avx2; ctest -C Release --output-on-failure -j 2
- name: build-avx
run: |
mkdir build-avx; cd build-avx
cmake -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX=ON -DNCNN_AVX2=OFF -DNCNN_XOP=OFF -DNCNN_BUILD_TESTS=ON ..
cmake -DProtobuf_INCLUDE_DIR="$env:GITHUB_WORKSPACE\protobuf-install\include" -DProtobuf_LIBRARIES="$env:GITHUB_WORKSPACE\protobuf-install\lib\libprotobuf.lib" -DProtobuf_PROTOC_EXECUTABLE="$env:GITHUB_WORKSPACE\protobuf-install\bin\protoc.exe" -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX=ON -DNCNN_AVX2=OFF -DNCNN_AVX512=OFF -DNCNN_XOP=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . --config Release -j 2
- name: test-avx
run: cd build-avx; ctest -C Release --output-on-failure -j 2

+ 1
- 1
.github/workflows/windows-x64-gpu-vs2017.yml View File

@@ -33,7 +33,7 @@ concurrency:
cancel-in-progress: true
jobs:
windows-vs2017-gpu:
runs-on: windows-2016
runs-on: windows-2019
steps:
- uses: actions/checkout@v2
with:


+ 1
- 1
.github/workflows/windows-x86-cpu-vs2015.yml View File

@@ -25,7 +25,7 @@ concurrency:
cancel-in-progress: true
jobs:
windows-vs2015:
runs-on: windows-2016
runs-on: windows-2019
steps:
- uses: actions/checkout@v2
- name: build


+ 1
- 1
.github/workflows/windows-x86-cpu-vs2017.yml View File

@@ -25,7 +25,7 @@ concurrency:
cancel-in-progress: true
jobs:
windows-vs2017:
runs-on: windows-2016
runs-on: windows-2019
steps:
- uses: actions/checkout@v2
- name: build


+ 7
- 7
src/allocator.h View File

@@ -52,7 +52,7 @@ namespace ncnn {
// ptr Aligned pointer
// n Alignment size that must be a power of two
template<typename _Tp>
static inline _Tp* alignPtr(_Tp* ptr, int n = (int)sizeof(_Tp))
static NCNN_FORCEINLINE _Tp* alignPtr(_Tp* ptr, int n = (int)sizeof(_Tp))
{
return (_Tp*)(((size_t)ptr + n - 1) & -n);
}
@@ -61,12 +61,12 @@ static inline _Tp* alignPtr(_Tp* ptr, int n = (int)sizeof(_Tp))
// The function returns the minimum number that is greater or equal to sz and is divisible by n
// sz Buffer size to align
// n Alignment size that must be a power of two
static inline size_t alignSize(size_t sz, int n)
static NCNN_FORCEINLINE size_t alignSize(size_t sz, int n)
{
return (sz + n - 1) & -n;
}

static inline void* fastMalloc(size_t size)
static NCNN_FORCEINLINE void* fastMalloc(size_t size)
{
#if _MSC_VER
return _aligned_malloc(size, NCNN_MALLOC_ALIGN);
@@ -87,7 +87,7 @@ static inline void* fastMalloc(size_t size)
#endif
}

static inline void fastFree(void* ptr)
static NCNN_FORCEINLINE void fastFree(void* ptr)
{
if (ptr)
{
@@ -108,7 +108,7 @@ static inline void fastFree(void* ptr)
// exchange-add operation for atomic operations on reference counters
#if defined __riscv && !defined __riscv_atomic
// riscv target without A extension
static inline int NCNN_XADD(int* addr, int delta)
static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta)
{
int tmp = *addr;
*addr += delta;
@@ -136,7 +136,7 @@ static inline int NCNN_XADD(int* addr, int delta)
#define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
#else
// thread-unsafe branch
static inline int NCNN_XADD(int* addr, int delta)
static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta)
{
int tmp = *addr;
*addr += delta;
@@ -144,7 +144,7 @@ static inline int NCNN_XADD(int* addr, int delta)
}
#endif
#else // NCNN_THREADS
static inline int NCNN_XADD(int* addr, int delta)
static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta)
{
int tmp = *addr;
*addr += delta;


+ 127
- 127
src/mat.h View File

@@ -727,7 +727,7 @@ NCNN_EXPORT unsigned short float32_to_float16(float value);
// convert half precision floating point to float
NCNN_EXPORT float float16_to_float32(unsigned short value);
// convert float to brain half
NCNN_EXPORT inline unsigned short float32_to_bfloat16(float value)
NCNN_EXPORT NCNN_FORCEINLINE unsigned short float32_to_bfloat16(float value)
{
// 16 : 16
union
@@ -739,7 +739,7 @@ NCNN_EXPORT inline unsigned short float32_to_bfloat16(float value)
return tmp.u >> 16;
}
// convert brain half to float
NCNN_EXPORT inline float bfloat16_to_float32(unsigned short value)
NCNN_EXPORT NCNN_FORCEINLINE float bfloat16_to_float32(unsigned short value)
{
// 16 : 16
union
@@ -751,11 +751,11 @@ NCNN_EXPORT inline float bfloat16_to_float32(unsigned short value)
return tmp.f;
}
#if __ARM_NEON
NCNN_EXPORT inline uint16x4_t vcvt_bf16_f32(float32x4_t _v)
NCNN_EXPORT NCNN_FORCEINLINE uint16x4_t vcvt_bf16_f32(float32x4_t _v)
{
return vshrn_n_u32(vreinterpretq_u32_f32(_v), 16);
}
NCNN_EXPORT inline float32x4_t vcvt_f32_bf16(uint16x4_t _v)
NCNN_EXPORT NCNN_FORCEINLINE float32x4_t vcvt_f32_bf16(uint16x4_t _v)
{
return vreinterpretq_f32_u32(vshll_n_u16(_v, 16));
}
@@ -785,119 +785,119 @@ NCNN_EXPORT void quantize_to_int8(const Mat& src, Mat& dst, const Mat& scale_dat
NCNN_EXPORT void dequantize_from_int32(const Mat& src, Mat& dst, const Mat& scale_data, const Mat& bias_data, const Option& opt = Option());
NCNN_EXPORT void requantize_from_int32_to_int8(const Mat& src, Mat& dst, const Mat& scale_in_data, const Mat& scale_out_data, const Mat& bias_data, int activation_type, const Mat& activation_params, const Option& opt = Option());

inline Mat::Mat()
NCNN_FORCEINLINE Mat::Mat()
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
}

inline Mat::Mat(int _w, size_t _elemsize, Allocator* _allocator)
NCNN_FORCEINLINE Mat::Mat(int _w, size_t _elemsize, Allocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
create(_w, _elemsize, _allocator);
}

inline Mat::Mat(int _w, int _h, size_t _elemsize, Allocator* _allocator)
NCNN_FORCEINLINE Mat::Mat(int _w, int _h, size_t _elemsize, Allocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
create(_w, _h, _elemsize, _allocator);
}

inline Mat::Mat(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator)
NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
create(_w, _h, _c, _elemsize, _allocator);
}

inline Mat::Mat(int _w, int _h, int _d, int _c, size_t _elemsize, Allocator* _allocator)
NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, size_t _elemsize, Allocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
create(_w, _h, _d, _c, _elemsize, _allocator);
}

inline Mat::Mat(int _w, size_t _elemsize, int _elempack, Allocator* _allocator)
NCNN_FORCEINLINE Mat::Mat(int _w, size_t _elemsize, int _elempack, Allocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
create(_w, _elemsize, _elempack, _allocator);
}

inline Mat::Mat(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator)
NCNN_FORCEINLINE Mat::Mat(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
create(_w, _h, _elemsize, _elempack, _allocator);
}

inline Mat::Mat(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
create(_w, _h, _c, _elemsize, _elempack, _allocator);
}

inline Mat::Mat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
create(_w, _h, _d, _c, _elemsize, _elempack, _allocator);
}

inline Mat::Mat(const Mat& m)
NCNN_FORCEINLINE Mat::Mat(const Mat& m)
: data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c), cstep(m.cstep)
{
addref();
}

inline Mat::Mat(int _w, void* _data, size_t _elemsize, Allocator* _allocator)
NCNN_FORCEINLINE Mat::Mat(int _w, void* _data, size_t _elemsize, Allocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1)
{
cstep = w;
}

inline Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, Allocator* _allocator)
NCNN_FORCEINLINE Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, Allocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1)
{
cstep = (size_t)w * h;
}

inline Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, Allocator* _allocator)
NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, Allocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c)
{
cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
}

inline Mat::Mat(int _w, int _h, int _d, int _c, void* _data, size_t _elemsize, Allocator* _allocator)
NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, void* _data, size_t _elemsize, Allocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c)
{
cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize;
}

inline Mat::Mat(int _w, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
NCNN_FORCEINLINE Mat::Mat(int _w, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1)
{
cstep = w;
}

inline Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
NCNN_FORCEINLINE Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1)
{
cstep = (size_t)w * h;
}

inline Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c)
{
cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
}

inline Mat::Mat(int _w, int _h, int _d, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c)
{
cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize;
}

inline Mat::~Mat()
NCNN_FORCEINLINE Mat::~Mat()
{
release();
}

inline void Mat::fill(float _v)
NCNN_FORCEINLINE void Mat::fill(float _v)
{
int size = (int)total();
float* ptr = (float*)data;
@@ -949,7 +949,7 @@ inline void Mat::fill(float _v)
}
}

inline void Mat::fill(int _v)
NCNN_FORCEINLINE void Mat::fill(int _v)
{
int size = (int)total();
int* ptr = (int*)data;
@@ -1002,7 +1002,7 @@ inline void Mat::fill(int _v)
}

#if __ARM_NEON
inline void Mat::fill(float32x4_t _v)
NCNN_FORCEINLINE void Mat::fill(float32x4_t _v)
{
int size = (int)total();
float* ptr = (float*)data;
@@ -1013,7 +1013,7 @@ inline void Mat::fill(float32x4_t _v)
}
}

inline void Mat::fill(uint16x4_t _v)
NCNN_FORCEINLINE void Mat::fill(uint16x4_t _v)
{
int size = (int)total();
unsigned short* ptr = (unsigned short*)data;
@@ -1024,7 +1024,7 @@ inline void Mat::fill(uint16x4_t _v)
}
}

inline void Mat::fill(int32x4_t _v)
NCNN_FORCEINLINE void Mat::fill(int32x4_t _v)
{
int size = (int)total();
int* ptr = (int*)data;
@@ -1035,7 +1035,7 @@ inline void Mat::fill(int32x4_t _v)
}
}

inline void Mat::fill(int32x4_t _v0, int32x4_t _v1)
NCNN_FORCEINLINE void Mat::fill(int32x4_t _v0, int32x4_t _v1)
{
int size = (int)total();
int* ptr = (int*)data;
@@ -1047,7 +1047,7 @@ inline void Mat::fill(int32x4_t _v0, int32x4_t _v1)
}
}
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
inline void Mat::fill(float16x4_t _v)
NCNN_FORCEINLINE void Mat::fill(float16x4_t _v)
{
int size = (int)total();
__fp16* ptr = (__fp16*)data;
@@ -1058,7 +1058,7 @@ inline void Mat::fill(float16x4_t _v)
}
}

inline void Mat::fill(float16x8_t _v)
NCNN_FORCEINLINE void Mat::fill(float16x8_t _v)
{
int size = (int)total();
__fp16* ptr = (__fp16*)data;
@@ -1071,7 +1071,7 @@ inline void Mat::fill(float16x8_t _v)
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#endif // __ARM_NEON
#if __AVX__
inline void Mat::fill(__m256 _v)
NCNN_FORCEINLINE void Mat::fill(__m256 _v)
{
int size = (int)total();
float* ptr = (float*)data;
@@ -1081,7 +1081,7 @@ inline void Mat::fill(__m256 _v)
ptr += 8;
}
}
inline void Mat::fill(__m128i _v)
NCNN_FORCEINLINE void Mat::fill(__m128i _v)
{
int size = (int)total();
unsigned short* ptr = (unsigned short*)data;
@@ -1094,7 +1094,7 @@ inline void Mat::fill(__m128i _v)
#endif // __AVX__

#if __mips_msa
inline void Mat::fill(v4f32 _v)
NCNN_FORCEINLINE void Mat::fill(v4f32 _v)
{
int size = (int)total();
float* ptr = (float*)data;
@@ -1107,7 +1107,7 @@ inline void Mat::fill(v4f32 _v)
#endif // __mips_msa

#if __riscv_vector
inline void Mat::fill(vfloat32m1_t _v)
NCNN_FORCEINLINE void Mat::fill(vfloat32m1_t _v)
{
const int packn = cpu_riscv_vlenb() / 4;
const word_type vl = vsetvl_e32m1(packn);
@@ -1121,7 +1121,7 @@ inline void Mat::fill(vfloat32m1_t _v)
}
}

inline void Mat::fill(vuint16m1_t _v)
NCNN_FORCEINLINE void Mat::fill(vuint16m1_t _v)
{
const int packn = cpu_riscv_vlenb() / 2;
const word_type vl = vsetvl_e16m1(packn);
@@ -1135,7 +1135,7 @@ inline void Mat::fill(vuint16m1_t _v)
}
}

inline void Mat::fill(vint8m1_t _v)
NCNN_FORCEINLINE void Mat::fill(vint8m1_t _v)
{
const int packn = cpu_riscv_vlenb() / 1;
const word_type vl = vsetvl_e8m1(packn);
@@ -1149,7 +1149,7 @@ inline void Mat::fill(vint8m1_t _v)
}
}
#if __riscv_zfh
inline void Mat::fill(vfloat16m1_t _v)
NCNN_FORCEINLINE void Mat::fill(vfloat16m1_t _v)
{
const int packn = cpu_riscv_vlenb() / 2;
const word_type vl = vsetvl_e16m1(packn);
@@ -1166,7 +1166,7 @@ inline void Mat::fill(vfloat16m1_t _v)
#endif // __riscv_vector

template<typename T>
inline void Mat::fill(T _v)
NCNN_FORCEINLINE void Mat::fill(T _v)
{
int size = (int)total();
T* ptr = (T*)data;
@@ -1176,7 +1176,7 @@ inline void Mat::fill(T _v)
}
}

inline Mat& Mat::operator=(const Mat& m)
NCNN_FORCEINLINE Mat& Mat::operator=(const Mat& m)
{
if (this == &m)
return *this;
@@ -1203,13 +1203,13 @@ inline Mat& Mat::operator=(const Mat& m)
return *this;
}

inline void Mat::addref()
NCNN_FORCEINLINE void Mat::addref()
{
if (refcount)
NCNN_XADD(refcount, 1);
}

inline void Mat::release()
NCNN_FORCEINLINE void Mat::release()
{
if (refcount && NCNN_XADD(refcount, -1) == 1)
{
@@ -1235,22 +1235,22 @@ inline void Mat::release()
refcount = 0;
}

inline bool Mat::empty() const
NCNN_FORCEINLINE bool Mat::empty() const
{
return data == 0 || total() == 0;
}

inline size_t Mat::total() const
NCNN_FORCEINLINE size_t Mat::total() const
{
return cstep * c;
}

inline int Mat::elembits() const
NCNN_FORCEINLINE int Mat::elembits() const
{
return elempack ? static_cast<int>(elemsize * 8) / elempack : 0;
}

inline Mat Mat::shape() const
NCNN_FORCEINLINE Mat Mat::shape() const
{
if (dims == 1)
return Mat(w * elempack, (void*)0);
@@ -1264,7 +1264,7 @@ inline Mat Mat::shape() const
return Mat();
}

inline Mat Mat::channel(int _c)
NCNN_FORCEINLINE Mat Mat::channel(int _c)
{
Mat m(w, h, d, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator);
m.dims = dims - 1;
@@ -1273,7 +1273,7 @@ inline Mat Mat::channel(int _c)
return m;
}

inline const Mat Mat::channel(int _c) const
NCNN_FORCEINLINE const Mat Mat::channel(int _c) const
{
Mat m(w, h, d, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator);
m.dims = dims - 1;
@@ -1282,164 +1282,164 @@ inline const Mat Mat::channel(int _c) const
return m;
}

inline Mat Mat::depth(int z)
NCNN_FORCEINLINE Mat Mat::depth(int z)
{
return Mat(w, h, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator);
}

inline const Mat Mat::depth(int z) const
NCNN_FORCEINLINE const Mat Mat::depth(int z) const
{
return Mat(w, h, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator);
}

inline float* Mat::row(int y)
NCNN_FORCEINLINE float* Mat::row(int y)
{
return (float*)((unsigned char*)data + (size_t)w * y * elemsize);
}

inline const float* Mat::row(int y) const
NCNN_FORCEINLINE const float* Mat::row(int y) const
{
return (const float*)((unsigned char*)data + (size_t)w * y * elemsize);
}

template<typename T>
inline T* Mat::row(int y)
NCNN_FORCEINLINE T* Mat::row(int y)
{
return (T*)((unsigned char*)data + (size_t)w * y * elemsize);
}

template<typename T>
inline const T* Mat::row(int y) const
NCNN_FORCEINLINE const T* Mat::row(int y) const
{
return (const T*)((unsigned char*)data + (size_t)w * y * elemsize);
}

inline Mat Mat::channel_range(int _c, int channels)
NCNN_FORCEINLINE Mat Mat::channel_range(int _c, int channels)
{
Mat m(w, h, d, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator);
m.dims = dims;
return m;
}

inline const Mat Mat::channel_range(int _c, int channels) const
NCNN_FORCEINLINE const Mat Mat::channel_range(int _c, int channels) const
{
Mat m(w, h, d, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator);
m.dims = dims;
return m;
}

inline Mat Mat::depth_range(int z, int depths)
NCNN_FORCEINLINE Mat Mat::depth_range(int z, int depths)
{
Mat m(w, h, depths, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator);
m.cstep = (size_t)w * h;
return m;
}

inline const Mat Mat::depth_range(int z, int depths) const
NCNN_FORCEINLINE const Mat Mat::depth_range(int z, int depths) const
{
Mat m(w, h, depths, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator);
m.cstep = (size_t)w * h;
return m;
}

inline Mat Mat::row_range(int y, int rows)
NCNN_FORCEINLINE Mat Mat::row_range(int y, int rows)
{
return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator);
}

inline const Mat Mat::row_range(int y, int rows) const
NCNN_FORCEINLINE const Mat Mat::row_range(int y, int rows) const
{
return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator);
}

inline Mat Mat::range(int x, int n)
NCNN_FORCEINLINE Mat Mat::range(int x, int n)
{
return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator);
}

inline const Mat Mat::range(int x, int n) const
NCNN_FORCEINLINE const Mat Mat::range(int x, int n) const
{
return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator);
}

template<typename T>
inline Mat::operator T*()
NCNN_FORCEINLINE Mat::operator T*()
{
return (T*)data;
}

template<typename T>
inline Mat::operator const T*() const
NCNN_FORCEINLINE Mat::operator const T*() const
{
return (const T*)data;
}

inline float& Mat::operator[](size_t i)
NCNN_FORCEINLINE float& Mat::operator[](size_t i)
{
return ((float*)data)[i];
}

inline const float& Mat::operator[](size_t i) const
NCNN_FORCEINLINE const float& Mat::operator[](size_t i) const
{
return ((const float*)data)[i];
}

#if NCNN_VULKAN

inline VkMat::VkMat()
NCNN_FORCEINLINE VkMat::VkMat()
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
}

inline VkMat::VkMat(int _w, size_t _elemsize, VkAllocator* _allocator)
NCNN_FORCEINLINE VkMat::VkMat(int _w, size_t _elemsize, VkAllocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
create(_w, _elemsize, _allocator);
}

inline VkMat::VkMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
create(_w, _h, _elemsize, _allocator);
}

inline VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
create(_w, _h, _c, _elemsize, _allocator);
}

inline VkMat::VkMat(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator)
NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
create(_w, _h, _d, _c, _elemsize, _allocator);
}

inline VkMat::VkMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
NCNN_FORCEINLINE VkMat::VkMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
create(_w, _elemsize, _elempack, _allocator);
}

inline VkMat::VkMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
create(_w, _h, _elemsize, _elempack, _allocator);
}

inline VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
create(_w, _h, _c, _elemsize, _elempack, _allocator);
}

inline VkMat::VkMat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
{
create(_w, _h, _d, _c, _elemsize, _elempack, _allocator);
}

inline VkMat::VkMat(const VkMat& m)
NCNN_FORCEINLINE VkMat::VkMat(const VkMat& m)
: data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c)
{
addref();
@@ -1447,60 +1447,60 @@ inline VkMat::VkMat(const VkMat& m)
cstep = m.cstep;
}

inline VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
NCNN_FORCEINLINE VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1)
{
cstep = w;
}

inline VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1)
{
cstep = w * h;
}

inline VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c)
{
cstep = alignSize(w * h * elemsize, 16) / elemsize;
}

inline VkMat::VkMat(int _w, int _h, int _d, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c)
{
cstep = alignSize(w * h * d * elemsize, 16) / elemsize;
}

inline VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
NCNN_FORCEINLINE VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1)
{
cstep = w;
}

inline VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1)
{
cstep = w * h;
}

inline VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c)
{
cstep = alignSize(w * h * elemsize, 16) / elemsize;
}

inline VkMat::VkMat(int _w, int _h, int _d, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c)
{
cstep = alignSize(w * h * d * elemsize, 16) / elemsize;
}

inline VkMat::~VkMat()
NCNN_FORCEINLINE VkMat::~VkMat()
{
release();
}

inline VkMat& VkMat::operator=(const VkMat& m)
NCNN_FORCEINLINE VkMat& VkMat::operator=(const VkMat& m)
{
if (this == &m)
return *this;
@@ -1527,7 +1527,7 @@ inline VkMat& VkMat::operator=(const VkMat& m)
return *this;
}

inline Mat VkMat::mapped() const
NCNN_FORCEINLINE Mat VkMat::mapped() const
{
if (!allocator->mappable)
return Mat();
@@ -1547,7 +1547,7 @@ inline Mat VkMat::mapped() const
return Mat();
}

inline void* VkMat::mapped_ptr() const
NCNN_FORCEINLINE void* VkMat::mapped_ptr() const
{
if (!allocator->mappable)
return 0;
@@ -1555,13 +1555,13 @@ inline void* VkMat::mapped_ptr() const
return (unsigned char*)data->mapped_ptr + data->offset;
}

inline void VkMat::addref()
NCNN_FORCEINLINE void VkMat::addref()
{
if (refcount)
NCNN_XADD(refcount, 1);
}

inline void VkMat::release()
NCNN_FORCEINLINE void VkMat::release()
{
if (refcount && NCNN_XADD(refcount, -1) == 1)
{
@@ -1587,22 +1587,22 @@ inline void VkMat::release()
refcount = 0;
}

inline bool VkMat::empty() const
NCNN_FORCEINLINE bool VkMat::empty() const
{
return data == 0 || total() == 0;
}

inline size_t VkMat::total() const
NCNN_FORCEINLINE size_t VkMat::total() const
{
return cstep * c;
}

inline int VkMat::elembits() const
NCNN_FORCEINLINE int VkMat::elembits() const
{
return elempack ? static_cast<int>(elemsize) * 8 / elempack : 0;
}

inline Mat VkMat::shape() const
NCNN_FORCEINLINE Mat VkMat::shape() const
{
if (dims == 1)
return Mat(w * elempack, (void*)0);
@@ -1616,126 +1616,126 @@ inline Mat VkMat::shape() const
return Mat();
}

inline VkBuffer VkMat::buffer() const
NCNN_FORCEINLINE VkBuffer VkMat::buffer() const
{
return data->buffer;
}

inline size_t VkMat::buffer_offset() const
NCNN_FORCEINLINE size_t VkMat::buffer_offset() const
{
return data->offset;
}

inline size_t VkMat::buffer_capacity() const
NCNN_FORCEINLINE size_t VkMat::buffer_capacity() const
{
return data->capacity;
}

inline VkImageMat::VkImageMat()
NCNN_FORCEINLINE VkImageMat::VkImageMat()
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
{
}

inline VkImageMat::VkImageMat(int _w, size_t _elemsize, VkAllocator* _allocator)
NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, size_t _elemsize, VkAllocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
{
create(_w, _elemsize, _allocator);
}

inline VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
{
create(_w, _h, _elemsize, _allocator);
}

inline VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
{
create(_w, _h, _c, _elemsize, _allocator);
}

inline VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator)
NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
{
create(_w, _h, _d, _c, _elemsize, _allocator);
}

inline VkImageMat::VkImageMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
{
create(_w, _elemsize, _elempack, _allocator);
}

inline VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
{
create(_w, _h, _elemsize, _elempack, _allocator);
}

inline VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
{
create(_w, _h, _c, _elemsize, _elempack, _allocator);
}

inline VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
: data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
{
create(_w, _h, _d, _c, _elemsize, _elempack, _allocator);
}

inline VkImageMat::VkImageMat(const VkImageMat& m)
NCNN_FORCEINLINE VkImageMat::VkImageMat(const VkImageMat& m)
: data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c)
{
addref();
}

inline VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1)
{
}

inline VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1)
{
}

inline VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c)
{
}

inline VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c)
{
}

inline VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1)
{
}

inline VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1)
{
}

inline VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c)
{
}

inline VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
: data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c)
{
}

inline VkImageMat::~VkImageMat()
NCNN_FORCEINLINE VkImageMat::~VkImageMat()
{
release();
}

inline VkImageMat& VkImageMat::operator=(const VkImageMat& m)
NCNN_FORCEINLINE VkImageMat& VkImageMat::operator=(const VkImageMat& m)
{
if (this == &m)
return *this;
@@ -1760,7 +1760,7 @@ inline VkImageMat& VkImageMat::operator=(const VkImageMat& m)
return *this;
}

inline Mat VkImageMat::mapped() const
NCNN_FORCEINLINE Mat VkImageMat::mapped() const
{
if (!allocator->mappable || !data->mapped_ptr)
return Mat();
@@ -1780,7 +1780,7 @@ inline Mat VkImageMat::mapped() const
return Mat();
}

inline void* VkImageMat::mapped_ptr() const
NCNN_FORCEINLINE void* VkImageMat::mapped_ptr() const
{
if (!allocator->mappable || !data->mapped_ptr)
return 0;
@@ -1788,13 +1788,13 @@ inline void* VkImageMat::mapped_ptr() const
return (unsigned char*)data->mapped_ptr + data->bind_offset;
}

inline void VkImageMat::addref()
NCNN_FORCEINLINE void VkImageMat::addref()
{
if (refcount)
NCNN_XADD(refcount, 1);
}

inline void VkImageMat::release()
NCNN_FORCEINLINE void VkImageMat::release()
{
if (refcount && NCNN_XADD(refcount, -1) == 1)
{
@@ -1818,22 +1818,22 @@ inline void VkImageMat::release()
refcount = 0;
}

inline bool VkImageMat::empty() const
NCNN_FORCEINLINE bool VkImageMat::empty() const
{
return data == 0 || total() == 0;
}

inline size_t VkImageMat::total() const
NCNN_FORCEINLINE size_t VkImageMat::total() const
{
return w * h * d * c;
}

inline int VkImageMat::elembits() const
NCNN_FORCEINLINE int VkImageMat::elembits() const
{
return elempack ? static_cast<int>(elemsize) * 8 / elempack : 0;
}

inline Mat VkImageMat::shape() const
NCNN_FORCEINLINE Mat VkImageMat::shape() const
{
if (dims == 1)
return Mat(w * elempack, (void*)0);
@@ -1847,12 +1847,12 @@ inline Mat VkImageMat::shape() const
return Mat();
}

inline VkImage VkImageMat::image() const
NCNN_FORCEINLINE VkImage VkImageMat::image() const
{
return data->image;
}

inline VkImageView VkImageMat::imageview() const
NCNN_FORCEINLINE VkImageView VkImageMat::imageview() const
{
return data->imageview;
}


Loading…
Cancel
Save