* Add POWER8 VSX toolchains POWER8, though slower than POWER9, is still used in the wild; these toolchains should still be much faster on POWER8 than POWER8 without VSX optimizations. * VSX toolchains: set -cpu arg in QEMU CI teststags/20230816
| @@ -73,6 +73,52 @@ jobs: | |||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH | |||
| cd build | |||
| TESTS_EXECUTABLE_LOADER=qemu-ppc64le TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc64le-linux-gnu" ctest --output-on-failure -j 2 | |||
| linux-gcc-power8le-vsx: | |||
| runs-on: ubuntu-20.04 | |||
| steps: | |||
| - uses: actions/checkout@v3 | |||
| - name: cache-qemu | |||
| id: cache-qemu | |||
| uses: actions/cache@v3 | |||
| with: | |||
| path: qemu-install | |||
| key: qemu-ppc64le-install-20220502-2 | |||
| - name: install-qemu-build-deps | |||
| if: steps.cache-qemu.outputs.cache-hit != 'true' | |||
| run: | | |||
| sudo apt-get update | |||
| sudo apt-get install autoconf automake autotools-dev ninja-build | |||
| - name: checkout-qemu | |||
| if: steps.cache-qemu.outputs.cache-hit != 'true' | |||
| uses: actions/checkout@v3 | |||
| with: | |||
| repository: qemu/qemu | |||
| path: qemu | |||
| ref: f5643914a9e8f79c606a76e6a9d7ea82a3fc3e65 | |||
| - name: qemu | |||
| if: steps.cache-qemu.outputs.cache-hit != 'true' | |||
| run: | | |||
| cd qemu | |||
| ./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=ppc64le-linux-user --disable-system | |||
| make -j2 | |||
| make install | |||
| - name: powerpc64le-gnu-toolchain | |||
| run: | | |||
| sudo apt-get update | |||
| sudo apt-get install g++-powerpc64le-linux-gnu | |||
| - name: configure | |||
| run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/power8le-linux-gnu-vsx.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. | |||
| - name: build | |||
| run: cmake --build build -j 2 | |||
| - name: test | |||
| run: | | |||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH | |||
| cd build | |||
| TESTS_EXECUTABLE_LOADER=qemu-ppc64le TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc64le-linux-gnu;-cpu;power8_v2.0" ctest --output-on-failure -j 2 | |||
| linux-gcc-power9le-vsx: | |||
| runs-on: ubuntu-20.04 | |||
| steps: | |||
| @@ -118,4 +164,4 @@ jobs: | |||
| run: | | |||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH | |||
| cd build | |||
| TESTS_EXECUTABLE_LOADER=qemu-ppc64le TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc64le-linux-gnu" ctest --output-on-failure -j 2 | |||
| TESTS_EXECUTABLE_LOADER=qemu-ppc64le TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc64le-linux-gnu;-cpu;power9_v2.0" ctest --output-on-failure -j 2 | |||
| @@ -203,9 +203,9 @@ ncnn 目前已在腾讯多款应用中使用,如:QQ,Qzone,微信,天 | |||
| ## HowTo | |||
| **[how to build ncnn library](https://github.com/Tencent/ncnn/wiki/how-to-build) on Linux / Windows / macOS / Raspberry Pi3, Pi4 / Android / NVIDIA Jetson / iOS / WebAssembly / AllWinner D1 / Loongson 2K1000** | |||
| **[how to build ncnn library](https://github.com/Tencent/ncnn/wiki/how-to-build) on Linux / Windows / macOS / Raspberry Pi3, Pi4 / POWER / Android / NVIDIA Jetson / iOS / WebAssembly / AllWinner D1 / Loongson 2K1000** | |||
| - [Build for Linux / NVIDIA Jetson / Raspberry Pi3, Pi4 / POWER9](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-linux) | |||
| - [Build for Linux / NVIDIA Jetson / Raspberry Pi3, Pi4 / POWER](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-linux) | |||
| - [Build for Windows x64 using VS2017](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-windows-x64-using-visual-studio-community-2017) | |||
| - [Build for macOS](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-macos) | |||
| - [Build for ARM Cortex-A family with cross-compiling](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-arm-cortex-a-family-with-cross-compiling) | |||
| @@ -10,7 +10,7 @@ git submodule update --init | |||
| - [Build for Linux](#build-for-linux) | |||
| - [Nvidia Jetson](#nvidia-jetson) | |||
| - [Raspberry Pi](#raspberry-pi) | |||
| - [POWER9](#power9) | |||
| - [POWER](#power) | |||
| - [Intel oneAPI](#intel-oneapi) | |||
| - [Verification](#verification) | |||
| - [Build for Windows x64 using Visual Studio Community 2017](#build-for-windows-x64-using-visual-studio-community-2017) | |||
| @@ -89,9 +89,9 @@ You can add `-GNinja` to `cmake` above to use Ninja build system (invoke build u | |||
| For Rasberry Pi 3 on 32bit OS, add `-DCMAKE_TOOLCHAIN_FILE=../toolchains/pi3.toolchain.cmake` to cmake. You can also consider disabling Vulkan support as the Vulkan drivers for Rasberry Pi are still not mature, but it doesn't hurt to build the support in, but not use it. | |||
| #### POWER9 | |||
| #### POWER | |||
| With Clang 13 or higher: | |||
| For POWER9 with Clang 13 or higher: | |||
| ```shell | |||
| cd ncnn | |||
| @@ -103,7 +103,9 @@ make -j$(nproc) | |||
| Earlier versions of Clang may fail to build ncnn due to [Bug 49864](https://github.com/llvm/llvm-project/issues/49864). To use GCC instead, use the `power9le-linux-gnu-vsx.toolchain.cmake` toolchain file instead. Note that according to benchmarks, Clang appears to produce noticeably faster CPU inference than GCC for POWER9 targets. | |||
| Note that the POWER9 toolchain files only support little-endian mode. | |||
| For POWER8 instead of POWER9, use the `power8le-linux-gnu-vsx.clang.toolchain.cmake` or `power8le-linux-gnu-vsx.toolchain.cmake` toolchain file instead. POWER8 will be slower than POWER9. | |||
| Note that the POWER toolchain files only support little-endian mode. | |||
| #### Intel oneAPI | |||
| @@ -0,0 +1,19 @@ | |||
| set(CMAKE_SYSTEM_NAME Linux) | |||
| set(CMAKE_SYSTEM_PROCESSOR powerpc64le) | |||
| set(CMAKE_C_COMPILER "clang") | |||
| set(CMAKE_CXX_COMPILER "clang++") | |||
| set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) | |||
| set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) | |||
| set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) | |||
| set(CMAKE_C_FLAGS "-target powerpc64le-linux-gnu -I/usr/powerpc64le-linux-gnu/include -mcpu=power8 -mtune=power8 -DNO_WARN_X86_INTRINSICS -D__MMX__ -D__SSE__ -D__SSSE3__") | |||
| set(CMAKE_CXX_FLAGS "-target powerpc64le-linux-gnu -I/usr/powerpc64le-linux-gnu/include -I/usr/powerpc64le-linux-gnu/include/c++/10/powerpc64le-linux-gnu -mcpu=power8 -mtune=power8 -DNO_WARN_X86_INTRINSICS -D__MMX__ -D__SSE__ -D__SSSE3__") | |||
| # cache flags | |||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags") | |||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags") | |||
| # Auto-translate SSE to VSX | |||
| set(NCNN_PPC64LE_VSX ON) | |||
| @@ -0,0 +1,19 @@ | |||
| set(CMAKE_SYSTEM_NAME Linux) | |||
| set(CMAKE_SYSTEM_PROCESSOR powerpc64le) | |||
| set(CMAKE_C_COMPILER "powerpc64le-linux-gnu-gcc") | |||
| set(CMAKE_CXX_COMPILER "powerpc64le-linux-gnu-g++") | |||
| set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) | |||
| set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) | |||
| set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) | |||
| set(CMAKE_C_FLAGS "-mcpu=power8 -mtune=power8 -DNO_WARN_X86_INTRINSICS -D__MMX__ -D__SSE__ -D__SSSE3__") | |||
| set(CMAKE_CXX_FLAGS "-mcpu=power8 -mtune=power8 -DNO_WARN_X86_INTRINSICS -D__MMX__ -D__SSE__ -D__SSSE3__") | |||
| # cache flags | |||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags") | |||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags") | |||
| # Auto-translate SSE to VSX | |||
| set(NCNN_PPC64LE_VSX ON) | |||