* support arm-host and x86-cross-arm * fix trt myelib cmake build issue at 'trt copy env'(about 'copy env', Please refs cb92123f) about x86-cross-arm CUDA env: 1: run ./scripts/cmake-build/create_cuda_build_libs.py to prepare cuda/cudnn/trt env(download deb package info, Please refs create_cuda_build_libs.py) 2: export TRT_ROOT_DIR=xxxxx which may create by step 1 3: export CUDNN_ROOT_DIR=xxxx which may create by step 1 4: export PATH=xxxx:$PATH xxxx need create by step 1, which nvcc have relative path dir: ../targets/sbsa-linux/ GitOrigin-RevId: 440c76052aabe5b07a4b64d126e759f919c257a8tags/v1.3.0
| @@ -257,11 +257,25 @@ if(MGE_WITH_JIT_MLIR AND MGE_WITH_HALIDE) | |||||
| endif() | endif() | ||||
| if(MGE_WITH_CUDA) | if(MGE_WITH_CUDA) | ||||
| # FIXME: check_language(CUDA) failed when sbsa mode! | |||||
| # detail: https://gitlab.kitware.com/cmake/cmake/-/issues/20676 | |||||
| if(CMAKE_TOOLCHAIN_FILE) | |||||
| set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER}) | |||||
| message(WARNING "force set CMAKE_CUDA_HOST_COMPILER to CMAKE_CXX_COMPILER when nvcc sbsa mode!!") | |||||
| endif() | |||||
| include(CheckLanguage) | include(CheckLanguage) | ||||
| check_language(CUDA) | check_language(CUDA) | ||||
| if(NOT CMAKE_CUDA_COMPILER) | |||||
| if(NOT CMAKE_CUDA_COMPILER AND NOT CMAKE_TOOLCHAIN_FILE) | |||||
| message(FATAL_ERROR "CUDA compiler not found in PATH") | message(FATAL_ERROR "CUDA compiler not found in PATH") | ||||
| endif() | endif() | ||||
| # remove this after CMAKE fix nvcc sbsa | |||||
| if(NOT CMAKE_CUDA_COMPILER AND CMAKE_TOOLCHAIN_FILE) | |||||
| set(CMAKE_CUDA_COMPILER "nvcc") | |||||
| message(WARNING "force set CMAKE_CUDA_COMPILER to nvcc when nvcc sbsa mode!!") | |||||
| endif() | |||||
| enable_language(CUDA) | enable_language(CUDA) | ||||
| set(CMAKE_CUDA_STANDARD 14) | set(CMAKE_CUDA_STANDARD 14) | ||||
| set(CMAKE_CUDA_STANDARD_REQUIRED ON) | set(CMAKE_CUDA_STANDARD_REQUIRED ON) | ||||
| @@ -375,7 +389,7 @@ if(MGE_WITH_CUDA) | |||||
| set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-exceptions") | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-exceptions") | ||||
| endif() | endif() | ||||
| if(NOT MGE_CUDA_GENCODE) | if(NOT MGE_CUDA_GENCODE) | ||||
| if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386") | |||||
| if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "aarch64") | |||||
| set(MEGDNN_THREADS_512 0) | set(MEGDNN_THREADS_512 0) | ||||
| if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED)) | if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED)) | ||||
| message(WARNING "Static link CUDNN8 with many sm is unworkable, we only enable sm61 sm70 sm75 by default, and enable MGE_WITH_LARGE_ARCHIVE=ON") | message(WARNING "Static link CUDNN8 with many sm is unworkable, we only enable sm61 sm70 sm75 by default, and enable MGE_WITH_LARGE_ARCHIVE=ON") | ||||
| @@ -429,14 +443,14 @@ if(MGE_WITH_CUDA) | |||||
| if(MGE_CUDA_USE_STATIC) | if(MGE_CUDA_USE_STATIC) | ||||
| if(MGE_WITH_TRT) | if(MGE_WITH_TRT) | ||||
| if(MSVC OR WIN32) | if(MSVC OR WIN32) | ||||
| list(APPEND MGE_CUDA_LIBS ${TRT_LIBRARY} ${CUDNN_LIBRARY}) | |||||
| message(STATUS "windows TRT_LIBRARY: ${TRT_LIBRARY}") | message(STATUS "windows TRT_LIBRARY: ${TRT_LIBRARY}") | ||||
| list(APPEND MGE_CUDA_LIBS ${TRT_LIBRARY}) | |||||
| else() | else() | ||||
| if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7) | |||||
| list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer myelin_compiler_static myelin_executor_static myelin_pattern_runtime_static myelin_pattern_library_static -Wl,--no-whole-archive) | |||||
| else() | |||||
| list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer -Wl,--no-whole-archive) | |||||
| endif() | |||||
| list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer -Wl,--no-whole-archive) | |||||
| endif() | |||||
| if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7) | |||||
| message(STATUS "handle trt myelin lib after trt7") | |||||
| list(APPEND MGE_CUDA_LIBS libmyelin_compiler libmyelin_executor libmyelin_pattern_runtime libmyelin_pattern_library) | |||||
| endif() | endif() | ||||
| endif() | endif() | ||||
| @@ -497,6 +511,10 @@ if(MGE_WITH_CUDA) | |||||
| else() | else() | ||||
| if(MGE_WITH_TRT) | if(MGE_WITH_TRT) | ||||
| list(APPEND MGE_CUDA_LIBS libnvinfer) | list(APPEND MGE_CUDA_LIBS libnvinfer) | ||||
| if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7) | |||||
| message(STATUS "handle trt myelin lib after trt7") | |||||
| list(APPEND MGE_CUDA_LIBS libmyelin) | |||||
| endif() | |||||
| endif() | endif() | ||||
| list(APPEND MGE_CUDA_LIBS libcudnn) | list(APPEND MGE_CUDA_LIBS libcudnn) | ||||
| if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0") | if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0") | ||||
| @@ -779,6 +797,13 @@ if(MGE_ARCH STREQUAL "aarch64") | |||||
| endif() | endif() | ||||
| endif() | endif() | ||||
| if(MGE_WITH_CUDA) | |||||
| message(WARNING "aarch64 ld will add -mfix-cortex-a53-843419 and -mfix-cortex-a53-835769,\ | |||||
| when cuda enable and CMAKE with DEBUG build type,ld will take about 14min+,\ | |||||
| for save link time(14min->1min), you may open below flags if not deploy on\ | |||||
| arm a53 platform, or just build release type!") | |||||
| #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-fix-cortex-a53-843419 -mno-fix-cortex-a53-835769") | |||||
| endif() | |||||
| endif() | endif() | ||||
| if(MGE_ARCH STREQUAL "riscv64") | if(MGE_ARCH STREQUAL "riscv64") | ||||
| @@ -948,4 +973,4 @@ if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND("${CUDNN_VERSION}" VERSION_GREATER | |||||
| message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") | message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") | ||||
| message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") | message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") | ||||
| message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") | message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") | ||||
| endif() | |||||
| endif() | |||||
| @@ -65,3 +65,77 @@ set_target_properties(libnvinfer PROPERTIES | |||||
| message(STATUS "Found TensorRT: ${__found_trt_root} (found version: ${TRT_VERSION_STRING})") | message(STATUS "Found TensorRT: ${__found_trt_root} (found version: ${TRT_VERSION_STRING})") | ||||
| if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7) | |||||
| if(MGE_CUDA_USE_STATIC) | |||||
| find_library(LIBMYELIN_COMPILER | |||||
| NAMES libmyelin_compiler_static.a myelin_compiler_static.lib | |||||
| PATHS ${__found_trt_root}/lib | |||||
| ) | |||||
| if(LIBMYELIN_COMPILER STREQUAL "LIBMYELIN_COMPILER-NOTFOUND") | |||||
| message(FATAL_ERROR "Can not find LIBMYELIN_COMPILER Library") | |||||
| else() | |||||
| message(STATUS "Found TensorRT myelin_compiler: ${LIBMYELIN_COMPILER}") | |||||
| endif() | |||||
| add_library(libmyelin_compiler STATIC IMPORTED) | |||||
| set_target_properties(libmyelin_compiler PROPERTIES | |||||
| IMPORTED_LOCATION ${LIBMYELIN_COMPILER} | |||||
| ) | |||||
| find_library(LIBMYELIN_EXECUTOR | |||||
| NAMES libmyelin_executor_static.a myelin_executor_static.lib | |||||
| PATHS ${__found_trt_root}/lib | |||||
| ) | |||||
| if(LIBMYELIN_EXECUTOR STREQUAL "LIBMYELIN_EXECUTOR-NOTFOUND") | |||||
| message(FATAL_ERROR "Can not find LIBMYELIN_EXECUTOR Library") | |||||
| else() | |||||
| message(STATUS "Found TensorRT libmyelin_executor: ${LIBMYELIN_EXECUTOR}") | |||||
| endif() | |||||
| add_library(libmyelin_executor STATIC IMPORTED) | |||||
| set_target_properties(libmyelin_executor PROPERTIES | |||||
| IMPORTED_LOCATION ${LIBMYELIN_EXECUTOR} | |||||
| ) | |||||
| find_library(LIBMYELIN_PATTERN_RUNTIME | |||||
| NAMES libmyelin_pattern_runtime_static.a myelin_pattern_runtime_static.lib | |||||
| PATHS ${__found_trt_root}/lib | |||||
| ) | |||||
| if(LIBMYELIN_PATTERN_RUNTIME STREQUAL "LIBMYELIN_PATTERN_RUNTIME-NOTFOUND") | |||||
| message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_RUNTIME Library") | |||||
| else() | |||||
| message(STATUS "Found TensorRT libmyelin_pattern_runtime: ${LIBMYELIN_PATTERN_RUNTIME}") | |||||
| endif() | |||||
| add_library(libmyelin_pattern_runtime STATIC IMPORTED) | |||||
| set_target_properties(libmyelin_pattern_runtime PROPERTIES | |||||
| IMPORTED_LOCATION ${LIBMYELIN_PATTERN_RUNTIME} | |||||
| ) | |||||
| find_library(LIBMYELIN_PATTERN_LIBRARY | |||||
| NAMES libmyelin_pattern_library_static.a myelin_pattern_library_static.lib | |||||
| PATHS ${__found_trt_root}/lib | |||||
| ) | |||||
| if(LIBMYELIN_PATTERN_LIBRARY STREQUAL "LIBMYELIN_PATTERN_LIBRARY-NOTFOUND") | |||||
| message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_LIBRARY Library") | |||||
| else() | |||||
| message(STATUS "Found TensorRT libmyelin_pattern_library: ${LIBMYELIN_PATTERN_LIBRARY}") | |||||
| endif() | |||||
| add_library(libmyelin_pattern_library STATIC IMPORTED) | |||||
| set_target_properties(libmyelin_pattern_library PROPERTIES | |||||
| IMPORTED_LOCATION ${LIBMYELIN_PATTERN_LIBRARY} | |||||
| ) | |||||
| else() | |||||
| find_library(LIBMYELIN_SHARED | |||||
| NAMES libmyelin.so myelin.dll | |||||
| PATHS ${__found_trt_root}/lib | |||||
| ) | |||||
| if(LIBMYELIN_SHARED STREQUAL "LIBMYELIN_SHARED-NOTFOUND") | |||||
| message(FATAL_ERROR "Can not find LIBMYELIN_SHARED Library") | |||||
| else() | |||||
| message(STATUS "Found TensorRT libmyelin_shared: ${LIBMYELIN_SHARED}") | |||||
| endif() | |||||
| add_library(libmyelin SHARED IMPORTED) | |||||
| set_target_properties(libmyelin PROPERTIES | |||||
| IMPORTED_LOCATION ${LIBMYELIN_SHARED} | |||||
| ) | |||||
| endif() | |||||
| endif() | |||||
| @@ -66,7 +66,7 @@ Now we support ARM-Linux on Linux and Windows fully, also experimental on MacOS | |||||
| * commands: | * commands: | ||||
| ``` | ``` | ||||
| 1: download toolchains from https://releases.linaro.org/components/toolchain/gcc-linaro/ or https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads if use Windows or Linux | |||||
| 1: download toolchains from http://releases.linaro.org/components/toolchain/binaries/ or https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads if use Windows or Linux | |||||
| 2: download toolchains from https://github.com/thinkski/osx-arm-linux-toolchains if use MacOS | 2: download toolchains from https://github.com/thinkski/osx-arm-linux-toolchains if use MacOS | ||||
| ``` | ``` | ||||
| @@ -0,0 +1,175 @@ | |||||
| #!/usr/bin/env python3 | |||||
| import argparse | |||||
| import os | |||||
| import subprocess | |||||
| import glob | |||||
| def handle_cuda_libs(path): | |||||
| subprocess.check_call('rm -rf tmp && rm -rf tmp_sub', shell=True) | |||||
| print('\nhandle cuda file from.{}'.format(path)) | |||||
| cmd = 'dpkg-deb -xv {} tmp'.format(path) | |||||
| subprocess.check_call(cmd, shell=True) | |||||
| sub_debs = glob.glob('tmp/**/*.deb', recursive=True) | |||||
| assert(len(sub_debs) > 0) | |||||
| for sub_deb in sub_debs: | |||||
| subprocess.check_call('rm -rf tmp_sub', shell=True) | |||||
| print('handle sub_deb: {}'.format(sub_deb)) | |||||
| cmd = 'dpkg-deb -xv {} tmp_sub'.format(sub_deb) | |||||
| subprocess.check_call(cmd, shell=True) | |||||
| sub_sub_debs = glob.glob('tmp_sub/**/*.deb', recursive=True) | |||||
| assert(len(sub_sub_debs) == 0) | |||||
| if (os.path.isdir('tmp_sub/usr/share/')): | |||||
| subprocess.check_call('cp -v tmp_sub/usr/share/* output/ -rf', shell=True) | |||||
| if (os.path.isdir('tmp_sub/usr/local/')): | |||||
| subprocess.check_call('cp -v tmp_sub/usr/local/* output/ -rf', shell=True) | |||||
| def main(): | |||||
| parser = argparse.ArgumentParser() | |||||
| parser.add_argument( | |||||
| "-s", | |||||
| "--sbsa_mode", | |||||
| action="store_true", | |||||
| help="create cuda sbsa libs, which means use to x86 cross build for aarch64 cuda libs", | |||||
| ) | |||||
| parser.add_argument( | |||||
| "-t", | |||||
| "--target_aarch", | |||||
| type=str, | |||||
| choices=['x86-64', 'aarch64'], | |||||
| help="create libs build for, now support x86-64 and aarch64", | |||||
| dest="target_aarch", | |||||
| required=True, | |||||
| ) | |||||
| parser.add_argument( | |||||
| "-d", | |||||
| "--cudnn_deb", | |||||
| help="cudnn deb package, download from: download from: https://developer.nvidia.com/cudnn-download-survey", | |||||
| dest="cudnn_deb", | |||||
| type=str, | |||||
| required=True, | |||||
| ) | |||||
| parser.add_argument( | |||||
| "-r", | |||||
| "--trt_deb", | |||||
| help="trt deb package, download from: https://developer.nvidia.com/nvidia-tensorrt-download", | |||||
| dest="trt_deb", | |||||
| type=str, | |||||
| required=True, | |||||
| ) | |||||
| parser.add_argument( | |||||
| "-c", | |||||
| "--cuda_deb", | |||||
| help="cuda deb package, download from: https://developer.nvidia.com/cuda-downloads", | |||||
| dest="cuda_deb", | |||||
| type=str, | |||||
| required=True, | |||||
| ) | |||||
| parser.add_argument( | |||||
| "-a", | |||||
| "--cuda_aarch64_deb", | |||||
| help="cuda aarch64 libs package: download from: https://developer.nvidia.com/cuda-downloads", | |||||
| type=str, | |||||
| dest="cuda_aarch64_deb", | |||||
| ) | |||||
| args = parser.parse_args() | |||||
| if (args.target_aarch == 'x86-64' and args.sbsa_mode): | |||||
| print('ERROR: sbsa_mode only support target_aarch = \'aarch64\' now') | |||||
| exit(-1) | |||||
| if (args.sbsa_mode and not args.cuda_aarch64_deb): | |||||
| print('ERROR: sbsa_mode need -a/--cuda_aarch64_deb to provide cuda aarch64 libs package') | |||||
| exit(-1) | |||||
| if (not os.path.isfile(args.cuda_deb)): | |||||
| print('ERROR: can not find file:{}'.format(args.cuda_deb)) | |||||
| exit(-1) | |||||
| if (args.sbsa_mode and not os.path.isfile(args.cuda_aarch64_deb)): | |||||
| print('ERROR: can not find file:{}'.format(args.cuda_aarch64_deb)) | |||||
| exit(-1) | |||||
| if (not os.path.isfile(args.cudnn_deb)): | |||||
| print('ERROR: can not find file:{}'.format(args.cudnn_deb)) | |||||
| exit(-1) | |||||
| if (not os.path.isfile(args.trt_deb)): | |||||
| print('ERROR: can not find file:{}'.format(args.trt_deb)) | |||||
| exit(-1) | |||||
| print("CONFIG SUMMARY: create cuda cmake build libs for {}, is for sbsa_mode: {}".format(args.target_aarch, args.sbsa_mode)) | |||||
| cmd = 'rm -rf output && mkdir output' | |||||
| subprocess.check_call(cmd, shell=True) | |||||
| #handle cuda | |||||
| handle_cuda_libs(args.cuda_deb) | |||||
| #handle sbsa_mode | |||||
| if (args.sbsa_mode): | |||||
| handle_cuda_libs(args.cuda_aarch64_deb) | |||||
| # check cuda/sbsa_mode valid and handle link | |||||
| nvcc = glob.glob('./output/*/bin/nvcc', recursive=True) | |||||
| cuda_version = nvcc[0][9:-9] | |||||
| print('cuda version: {}'.format(cuda_version)) | |||||
| assert(len(nvcc) == 1) | |||||
| if (args.sbsa_mode): | |||||
| subprocess.check_call('file {} | grep {}'.format(nvcc[0], 'x86-64'), shell=True) | |||||
| remove_x86_64_libs = ['targets/x86_64-linux', 'include', 'lib64'] | |||||
| for remove_lib in remove_x86_64_libs: | |||||
| subprocess.check_call('rm -rf ./output/{}/{}'.format(cuda_version, remove_lib), shell=True) | |||||
| #create link for sbsa | |||||
| cwd = os.getcwd() | |||||
| os.chdir('output/{}'.format(cuda_version)) | |||||
| cmd = 'ln -s targets/sbsa-linux/include/ include && ln -s targets/sbsa-linux/lib/ lib64' | |||||
| subprocess.check_call(cmd, shell=True) | |||||
| #handle libnvrtc.so | |||||
| readelf_nvrtc = os.popen('readelf -d lib64/stubs/libnvrtc.so | grep SONAME').read().split('\n')[0] | |||||
| loc = readelf_nvrtc.find('[') | |||||
| libnvrtc_with_version = readelf_nvrtc[loc+1:-1] | |||||
| print('libnvrtc_with_version: {}'.format(libnvrtc_with_version)) | |||||
| cmd = 'cp lib64/stubs/libnvrtc.so lib64/{}'.format(libnvrtc_with_version) | |||||
| subprocess.check_call(cmd, shell=True) | |||||
| os.chdir(cwd) | |||||
| else: | |||||
| subprocess.check_call('file {} | grep {}'.format(nvcc[0], args.target_aarch), shell=True) | |||||
| # handle cudnn | |||||
| subprocess.check_call('rm -rf tmp && rm -rf tmp_sub && mkdir tmp', shell=True) | |||||
| print('\nhandle cuda file from.{}'.format(args.cudnn_deb)) | |||||
| # FIXME: later release cudnn may dir not with cuda, nvidia may fix later!! | |||||
| cmd = 'tar -xvf {} -C tmp && mv tmp/cuda output/cudnn'.format(args.cudnn_deb) | |||||
| subprocess.check_call(cmd, shell=True) | |||||
| cudnn_libs = glob.glob('output/cudnn/lib64/libcudnn.so*') | |||||
| cudnn_real_libs = [] | |||||
| for lib in cudnn_libs: | |||||
| if (not os.path.islink(lib)): | |||||
| cudnn_real_libs.append(lib) | |||||
| assert(len(cudnn_real_libs) > 0) | |||||
| for lib in cudnn_real_libs: | |||||
| subprocess.check_call('file {} | grep {}'.format(lib, args.target_aarch), shell=True) | |||||
| # handle trt | |||||
| print('\nhandle cuda file from.{}'.format(args.trt_deb)) | |||||
| cmd = 'tar -xvf {} -C output'.format(args.trt_deb) | |||||
| subprocess.check_call(cmd, shell=True) | |||||
| trt_libs = glob.glob('output/TensorRT-*/lib/libnvinfer.so.*') | |||||
| trt_real_libs = [] | |||||
| for lib in trt_libs: | |||||
| if (not os.path.islink(lib)): | |||||
| trt_real_libs.append(lib) | |||||
| assert(len(trt_real_libs) > 0) | |||||
| for lib in trt_real_libs: | |||||
| subprocess.check_call('file {} | grep {}'.format(lib, args.target_aarch), shell=True) | |||||
| if __name__ == "__main__": | |||||
| main() | |||||
| @@ -3,17 +3,21 @@ set -e | |||||
| ARCHS=("arm64-v8a" "armeabi-v7a-softfp" "armeabi-v7a-hardfp") | ARCHS=("arm64-v8a" "armeabi-v7a-softfp" "armeabi-v7a-hardfp") | ||||
| BUILD_TYPE=Release | BUILD_TYPE=Release | ||||
| MGE_WITH_CUDA=OFF | |||||
| MGE_ARMV8_2_FEATURE_FP16=OFF | MGE_ARMV8_2_FEATURE_FP16=OFF | ||||
| MGE_ARMV8_2_FEATURE_DOTPROD=OFF | MGE_ARMV8_2_FEATURE_DOTPROD=OFF | ||||
| MGE_DISABLE_FLOAT16=OFF | MGE_DISABLE_FLOAT16=OFF | ||||
| ARCH=arm64-v8a | ARCH=arm64-v8a | ||||
| REMOVE_OLD_BUILD=false | REMOVE_OLD_BUILD=false | ||||
| CMAKE_C_FLAGS="-Wno-psabi" | |||||
| CMAKE_CXX_FLAGS="-Wno-psabi" | |||||
| echo "EXTRA_CMAKE_ARGS: ${EXTRA_CMAKE_ARGS}" | echo "EXTRA_CMAKE_ARGS: ${EXTRA_CMAKE_ARGS}" | ||||
| function usage() { | function usage() { | ||||
| echo "$0 args1 args2 .." | echo "$0 args1 args2 .." | ||||
| echo "available args detail:" | echo "available args detail:" | ||||
| echo "-d : Build with Debug mode, default Release mode" | echo "-d : Build with Debug mode, default Release mode" | ||||
| echo "-c : Build with CUDA, default without CUDA(for arm with cuda, example tx1)" | |||||
| echo "-f : enable MGE_ARMV8_2_FEATURE_FP16 for ARM64, need toolchain and hardware support" | echo "-f : enable MGE_ARMV8_2_FEATURE_FP16 for ARM64, need toolchain and hardware support" | ||||
| echo "-p : enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64, need toolchain and hardware support" | echo "-p : enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64, need toolchain and hardware support" | ||||
| echo "-k : open MGE_DISABLE_FLOAT16 for NEON " | echo "-k : open MGE_DISABLE_FLOAT16 for NEON " | ||||
| @@ -25,13 +29,17 @@ function usage() { | |||||
| exit -1 | exit -1 | ||||
| } | } | ||||
| while getopts "rkhdfpa:" arg | |||||
| while getopts "rkhdcfpa:" arg | |||||
| do | do | ||||
| case $arg in | case $arg in | ||||
| d) | d) | ||||
| echo "Build with Debug mode" | echo "Build with Debug mode" | ||||
| BUILD_TYPE=Debug | BUILD_TYPE=Debug | ||||
| ;; | ;; | ||||
| c) | |||||
| echo "Build with CUDA" | |||||
| MGE_WITH_CUDA=ON | |||||
| ;; | |||||
| f) | f) | ||||
| echo "enable MGE_ARMV8_2_FEATURE_FP16 for ARM64" | echo "enable MGE_ARMV8_2_FEATURE_FP16 for ARM64" | ||||
| MGE_ARMV8_2_FEATURE_FP16=ON | MGE_ARMV8_2_FEATURE_FP16=ON | ||||
| @@ -77,6 +85,7 @@ done | |||||
| echo "----------------------------------------------------" | echo "----------------------------------------------------" | ||||
| echo "build config summary:" | echo "build config summary:" | ||||
| echo "BUILD_TYPE: $BUILD_TYPE" | echo "BUILD_TYPE: $BUILD_TYPE" | ||||
| echo "MGE_WITH_CUDA: $MGE_WITH_CUDA" | |||||
| echo "MGE_ARMV8_2_FEATURE_FP16: $MGE_ARMV8_2_FEATURE_FP16" | echo "MGE_ARMV8_2_FEATURE_FP16: $MGE_ARMV8_2_FEATURE_FP16" | ||||
| echo "MGE_ARMV8_2_FEATURE_DOTPROD: $MGE_ARMV8_2_FEATURE_DOTPROD" | echo "MGE_ARMV8_2_FEATURE_DOTPROD: $MGE_ARMV8_2_FEATURE_DOTPROD" | ||||
| echo "MGE_DISABLE_FLOAT16: $MGE_DISABLE_FLOAT16" | echo "MGE_DISABLE_FLOAT16: $MGE_DISABLE_FLOAT16" | ||||
| @@ -94,17 +103,35 @@ elif [[ $OS =~ "NT" ]]; then | |||||
| MAKEFILE_TYPE="Unix" | MAKEFILE_TYPE="Unix" | ||||
| fi | fi | ||||
| if [ ! $OS = "Linux" ] && [ $MGE_WITH_CUDA = "ON" ];then | |||||
| echo "cross build for arm with cuda only support from Linux" | |||||
| exit -1 | |||||
| fi | |||||
| if [ $MGE_WITH_CUDA = "ON" ] && [ ! $ARCH = "arm64-v8a" ];then | |||||
| echo "arm with cuda only support ARCH: arm64-v8a" | |||||
| exit -1 | |||||
| fi | |||||
| if [ $MGE_WITH_CUDA = "OFF" ];then | |||||
| echo "config -Werror=unused-parameter when cuda off for CI check" | |||||
| CMAKE_C_FLAGS="-Werror=unused-parameter -Wno-psabi" | |||||
| CMAKE_CXX_FLAGS="-Werror=unused-parameter -Wno-psabi" | |||||
| fi | |||||
| SRC_DIR=$($READLINK -f "`dirname $0`/../../") | SRC_DIR=$($READLINK -f "`dirname $0`/../../") | ||||
| source $SRC_DIR/scripts/cmake-build/utils/utils.sh | source $SRC_DIR/scripts/cmake-build/utils/utils.sh | ||||
| function cmake_build() { | function cmake_build() { | ||||
| BUILD_DIR=$SRC_DIR/build_dir/gnu-linux/$1/$BUILD_TYPE/build | |||||
| BUILD_DIR=$SRC_DIR/build_dir/gnu-linux/MGE_WITH_CUDA_$3/$1/$BUILD_TYPE/build | |||||
| INSTALL_DIR=$BUILD_DIR/../install | INSTALL_DIR=$BUILD_DIR/../install | ||||
| TOOLCHAIN=$SRC_DIR/toolchains/$2 | TOOLCHAIN=$SRC_DIR/toolchains/$2 | ||||
| MGE_WITH_CUDA=$3 | |||||
| echo "build dir: $BUILD_DIR" | echo "build dir: $BUILD_DIR" | ||||
| echo "install dir: $INSTALL_DIR" | echo "install dir: $INSTALL_DIR" | ||||
| echo "build type: $BUILD_TYPE" | echo "build type: $BUILD_TYPE" | ||||
| echo "build toolchain: $TOOLCHAIN" | echo "build toolchain: $TOOLCHAIN" | ||||
| echo "MGE_WITH_CUDA: $MGE_WITH_CUDA" | |||||
| echo "BUILD MAKEFILE_TYPE: $MAKEFILE_TYPE" | echo "BUILD MAKEFILE_TYPE: $MAKEFILE_TYPE" | ||||
| try_remove_old_build $REMOVE_OLD_BUILD $BUILD_DIR $INSTALL_DIR | try_remove_old_build $REMOVE_OLD_BUILD $BUILD_DIR $INSTALL_DIR | ||||
| @@ -113,10 +140,12 @@ function cmake_build() { | |||||
| mkdir -p $INSTALL_DIR | mkdir -p $INSTALL_DIR | ||||
| cd $BUILD_DIR | cd $BUILD_DIR | ||||
| cmake -G "$MAKEFILE_TYPE Makefiles" \ | cmake -G "$MAKEFILE_TYPE Makefiles" \ | ||||
| -DCMAKE_C_FLAGS=$CMAKE_C_FLAGS \ | |||||
| -DCMAKE_CXX_FLAGS=$CMAKE_CXX_FLAGS \ | |||||
| -DCMAKE_TOOLCHAIN_FILE=$TOOLCHAIN \ | -DCMAKE_TOOLCHAIN_FILE=$TOOLCHAIN \ | ||||
| -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ | -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ | ||||
| -DMGE_INFERENCE_ONLY=ON \ | -DMGE_INFERENCE_ONLY=ON \ | ||||
| -DMGE_WITH_CUDA=OFF \ | |||||
| -DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | |||||
| -DMGE_ARMV8_2_FEATURE_FP16= $MGE_ARMV8_2_FEATURE_FP16 \ | -DMGE_ARMV8_2_FEATURE_FP16= $MGE_ARMV8_2_FEATURE_FP16 \ | ||||
| -DMGE_ARMV8_2_FEATURE_DOTPROD=$MGE_ARMV8_2_FEATURE_DOTPROD \ | -DMGE_ARMV8_2_FEATURE_DOTPROD=$MGE_ARMV8_2_FEATURE_DOTPROD \ | ||||
| -DMGE_DISABLE_FLOAT16=$MGE_DISABLE_FLOAT16 \ | -DMGE_DISABLE_FLOAT16=$MGE_DISABLE_FLOAT16 \ | ||||
| @@ -141,4 +170,4 @@ else | |||||
| echo "ERR CONFIG ABORT NOW!!" | echo "ERR CONFIG ABORT NOW!!" | ||||
| exit -1 | exit -1 | ||||
| fi | fi | ||||
| cmake_build $ARCH $toolchain | |||||
| cmake_build $ARCH $toolchain $MGE_WITH_CUDA | |||||
| @@ -1,8 +1,6 @@ | |||||
| set(ARM_CROSS_BUILD_ARCH aarch64) | set(ARM_CROSS_BUILD_ARCH aarch64) | ||||
| set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc") | set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc") | ||||
| set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++") | set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++") | ||||
| set(CMAKE_C_FLAGS "-Werror=unused-parameter -Wno-psabi") | |||||
| set(CMAKE_CXX_FLAGS "-Werror=unused-parameter -Wno-psabi") | |||||
| set(CMAKE_STRIP "aarch64-linux-gnu-strip") | set(CMAKE_STRIP "aarch64-linux-gnu-strip") | ||||
| set(CMAKE_SYSTEM_PROCESSOR aarch64) | set(CMAKE_SYSTEM_PROCESSOR aarch64) | ||||
| set(CMAKE_SYSTEM_NAME Linux) | set(CMAKE_SYSTEM_NAME Linux) | ||||