GitOrigin-RevId: 721ca73bae
tags/v1.3.0
| @@ -216,7 +216,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_QS8) { | |||||
| } | } | ||||
| } | } | ||||
| //! close for cu111 ci, reopen it when bug fixed | //! close for cu111 ci, reopen it when bug fixed | ||||
| #if 0 | |||||
| #if CUDA_VERSION < 11000 | |||||
| TEST_F(CUDA, CONV_BIAS_NCHW_QS8) { | TEST_F(CUDA, CONV_BIAS_NCHW_QS8) { | ||||
| //! not support NonlineMode::SIGMOID and NonlineMode::H_SWISH | //! not support NonlineMode::SIGMOID and NonlineMode::H_SWISH | ||||
| require_compute_capability(6, 1); | require_compute_capability(6, 1); | ||||
| @@ -63,6 +63,7 @@ add_custom_command( | |||||
| TARGET ${MODULE_NAME} POST_BUILD | TARGET ${MODULE_NAME} POST_BUILD | ||||
| COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/LICENSE ${PROJECT_SOURCE_DIR}/ACKNOWLEDGMENTS ${PROJECT_BINARY_DIR} | COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/LICENSE ${PROJECT_SOURCE_DIR}/ACKNOWLEDGMENTS ${PROJECT_BINARY_DIR} | ||||
| COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/core/$<TARGET_FILE_NAME:${MODULE_NAME}> # clean develop | COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/core/$<TARGET_FILE_NAME:${MODULE_NAME}> # clean develop | ||||
| COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/version.py # clean develop | |||||
| COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine ${CMAKE_CURRENT_BINARY_DIR}/python/megengine | COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine ${CMAKE_CURRENT_BINARY_DIR}/python/megengine | ||||
| COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/test ${CMAKE_CURRENT_BINARY_DIR}/python/test | COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/test ${CMAKE_CURRENT_BINARY_DIR}/python/test | ||||
| COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/setup.py ${CMAKE_CURRENT_BINARY_DIR}/python/setup.py | COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/setup.py ${CMAKE_CURRENT_BINARY_DIR}/python/setup.py | ||||
| @@ -74,7 +74,7 @@ ALL_PYTHON="36m" ./scripts/whl/manylinux2014/build_wheel_common.sh -sdk cu101 | |||||
| * If you just want to build with cpu only version, you can set `BUILD_WHL_CPU_ONLY` environment 'ON'. eg: | * If you just want to build with cpu only version, you can set `BUILD_WHL_CPU_ONLY` environment 'ON'. eg: | ||||
| ```bash | ```bash | ||||
| BUILD_WHL_CPU_ONLY="ON" ALL_PYTHON="36m" ./scripts/whl/manylinux2014/build_wheel_common.sh -sdk cu101 | |||||
| ALL_PYTHON="36m" ./scripts/whl/manylinux2014/build_wheel_common.sh -sdk cpu | |||||
| ``` | ``` | ||||
| ## Build for MacOS | ## Build for MacOS | ||||
| @@ -9,16 +9,16 @@ local_path=$(dirname $(readlink -f $0)) | |||||
| CUDNN_LIB_DIR="/opt/cudnn/lib64/" | CUDNN_LIB_DIR="/opt/cudnn/lib64/" | ||||
| CUDA_LIB_DIR="/usr/local/cuda/lib64/" | CUDA_LIB_DIR="/usr/local/cuda/lib64/" | ||||
| CUDA_SDK="unknown" | |||||
| SDK_NAME="unknown" | |||||
| function usage() { | function usage() { | ||||
| echo "use '-sdk cu111' to specify cuda toolkit config, also support cu101, cu112" | |||||
| echo "use '-sdk cu111' to specify cuda toolkit config, also support cu101, cu112, cpu" | |||||
| } | } | ||||
| while [ "$1" != "" ]; do | while [ "$1" != "" ]; do | ||||
| case $1 in | case $1 in | ||||
| -sdk) | -sdk) | ||||
| shift | shift | ||||
| CUDA_SDK=$1 | |||||
| SDK_NAME=$1 | |||||
| shift | shift | ||||
| ;; | ;; | ||||
| *) | *) | ||||
| @@ -27,17 +27,16 @@ while [ "$1" != "" ]; do | |||||
| esac | esac | ||||
| done | done | ||||
| echo "Build with ${CUDA_SDK}" | |||||
| echo "Build with ${SDK_NAME}" | |||||
| if [ $CUDA_SDK == "cu101" ];then | |||||
| if [ $SDK_NAME == "cu101" ];then | |||||
| COPY_LIB_LIST="${CUDA_LIB_DIR}/libnvrtc.so.10.1" | COPY_LIB_LIST="${CUDA_LIB_DIR}/libnvrtc.so.10.1" | ||||
| EXTRA_CMAKE_FLAG=" -DMGE_WITH_CUDNN_SHARED=OFF" | EXTRA_CMAKE_FLAG=" -DMGE_WITH_CUDNN_SHARED=OFF" | ||||
| OUT_DIR="cu101" | |||||
| BUILD_GCC8="ON" | BUILD_GCC8="ON" | ||||
| REQUIR_CUDA_VERSION="10010" | REQUIR_CUDA_VERSION="10010" | ||||
| REQUIR_CUDNN_VERSION="7.6.3" | REQUIR_CUDNN_VERSION="7.6.3" | ||||
| REQUIR_TENSORRT_VERSION="6.0.1.5" | REQUIR_TENSORRT_VERSION="6.0.1.5" | ||||
| elif [ $CUDA_SDK == "cu111" ];then | |||||
| elif [ $SDK_NAME == "cu111" ];then | |||||
| COPY_LIB_LIST="\ | COPY_LIB_LIST="\ | ||||
| ${CUDA_LIB_DIR}/libnvrtc.so.11.1:\ | ${CUDA_LIB_DIR}/libnvrtc.so.11.1:\ | ||||
| ${CUDA_LIB_DIR}/libcublasLt.so.11:\ | ${CUDA_LIB_DIR}/libcublasLt.so.11:\ | ||||
| @@ -56,11 +55,10 @@ elif [ $CUDA_SDK == "cu111" ];then | |||||
| arch=compute_80,code=sm_80 \ | arch=compute_80,code=sm_80 \ | ||||
| arch=compute_86,code=sm_86 \ | arch=compute_86,code=sm_86 \ | ||||
| arch=compute_86,code=compute_86" | arch=compute_86,code=compute_86" | ||||
| OUT_DIR="cu111" | |||||
| REQUIR_CUDA_VERSION="11010" | REQUIR_CUDA_VERSION="11010" | ||||
| REQUIR_CUDNN_VERSION="8.0.5" | |||||
| REQUIR_CUDNN_VERSION="8.0.4" | |||||
| REQUIR_TENSORRT_VERSION="7.2.2.3" | REQUIR_TENSORRT_VERSION="7.2.2.3" | ||||
| elif [ $CUDA_SDK == "cu112" ];then | |||||
| elif [ $SDK_NAME == "cu112" ];then | |||||
| COPY_LIB_LIST="\ | COPY_LIB_LIST="\ | ||||
| ${CUDA_LIB_DIR}/libnvrtc.so.11.2:\ | ${CUDA_LIB_DIR}/libnvrtc.so.11.2:\ | ||||
| ${CUDA_LIB_DIR}/libcublasLt.so.11:\ | ${CUDA_LIB_DIR}/libcublasLt.so.11:\ | ||||
| @@ -79,16 +77,17 @@ elif [ $CUDA_SDK == "cu112" ];then | |||||
| arch=compute_80,code=sm_80 \ | arch=compute_80,code=sm_80 \ | ||||
| arch=compute_86,code=sm_86 \ | arch=compute_86,code=sm_86 \ | ||||
| arch=compute_86,code=compute_86" | arch=compute_86,code=compute_86" | ||||
| OUT_DIR="cu112" | |||||
| REQUIR_CUDA_VERSION="11020" | REQUIR_CUDA_VERSION="11020" | ||||
| REQUIR_CUDNN_VERSION="8.0.5" | |||||
| REQUIR_CUDNN_VERSION="8.0.4" | |||||
| REQUIR_TENSORRT_VERSION="7.2.2.3" | REQUIR_TENSORRT_VERSION="7.2.2.3" | ||||
| elif [ $SDK_NAME == "cpu" ];then | |||||
| echo "use $SDK_NAME without cuda support" | |||||
| BUILD_WHL_CPU_ONLY="ON" | |||||
| else | else | ||||
| echo "no support sdk ${CUDA_SDK}, please set by '-sdk cu111'" | |||||
| echo "no support sdk ${SDK_NAME}, please set by '-sdk cu111'" | |||||
| exit -1 | exit -1 | ||||
| fi | fi | ||||
| BUILD_WHL_CPU_ONLY=${BUILD_WHL_CPU_ONLY} | |||||
| if [[ -z ${BUILD_WHL_CPU_ONLY} ]] | if [[ -z ${BUILD_WHL_CPU_ONLY} ]] | ||||
| then | then | ||||
| BUILD_WHL_CPU_ONLY="OFF" | BUILD_WHL_CPU_ONLY="OFF" | ||||
| @@ -205,7 +204,7 @@ docker run --rm -it $TMPFS_ARGS \ | |||||
| -e ALL_PYTHON="${ALL_PYTHON}" \ | -e ALL_PYTHON="${ALL_PYTHON}" \ | ||||
| -e EXTRA_CMAKE_FLAG="$EXTRA_CMAKE_FLAG" \ | -e EXTRA_CMAKE_FLAG="$EXTRA_CMAKE_FLAG" \ | ||||
| -e COPY_LIB_LIST="$COPY_LIB_LIST" \ | -e COPY_LIB_LIST="$COPY_LIB_LIST" \ | ||||
| -e OUT_DIR="$OUT_DIR" \ | |||||
| -e SDK_NAME="$SDK_NAME" \ | |||||
| -v ${CUDA_ROOT_DIR}:/usr/local/cuda \ | -v ${CUDA_ROOT_DIR}:/usr/local/cuda \ | ||||
| -v ${CUDNN_ROOT_DIR}:/opt/cudnn \ | -v ${CUDNN_ROOT_DIR}:/opt/cudnn \ | ||||
| -v ${TENSORRT_ROOT_DIR}:/opt/tensorrt \ | -v ${TENSORRT_ROOT_DIR}:/opt/tensorrt \ | ||||
| @@ -119,13 +119,13 @@ do | |||||
| if [ ${USE_AUDITWHEEL} = "ON" ]; then | if [ ${USE_AUDITWHEEL} = "ON" ]; then | ||||
| LD_LIBRARY_PATH=${BUILD_DIR}/dnn/cuda-stub:$LD_LIBRARY_PATH auditwheel repair -L ${NEW_LIB_PATH} ${BUILD_DIR}/staging/dist/Meg*.whl | LD_LIBRARY_PATH=${BUILD_DIR}/dnn/cuda-stub:$LD_LIBRARY_PATH auditwheel repair -L ${NEW_LIB_PATH} ${BUILD_DIR}/staging/dist/Meg*.whl | ||||
| else | else | ||||
| mkdir -p ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${OUT_DIR} | |||||
| mkdir -p ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${SDK_NAME} | |||||
| cd ${BUILD_DIR}/staging/dist/ | cd ${BUILD_DIR}/staging/dist/ | ||||
| org_whl_name=`ls Meg*${ver}*.whl` | org_whl_name=`ls Meg*${ver}*.whl` | ||||
| compat_whl_name=`echo ${org_whl_name} | sed 's/linux/manylinux2014/'` | compat_whl_name=`echo ${org_whl_name} | sed 's/linux/manylinux2014/'` | ||||
| echo "org whl name: ${org_whl_name}" | echo "org whl name: ${org_whl_name}" | ||||
| echo "comapt whl name: ${compat_whl_name}" | echo "comapt whl name: ${compat_whl_name}" | ||||
| mv ${org_whl_name} ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${OUT_DIR}/${compat_whl_name} | |||||
| mv ${org_whl_name} ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${SDK_NAME}/${compat_whl_name} | |||||
| cd /home/output | cd /home/output | ||||
| fi | fi | ||||
| chown -R ${UID}.${UID} . | chown -R ${UID}.${UID} . | ||||
| @@ -1836,7 +1836,7 @@ TEST(TestEnableTensorCore, SmallInputShape) { | |||||
| } | } | ||||
| //! close for cu111 ci, reopen it when bug fixed | //! close for cu111 ci, reopen it when bug fixed | ||||
| #if 0 | |||||
| #if CUDA_VERSION < 11000 | |||||
| TEST(TestEnableTensorCore, Nchw4Nchw) { | TEST(TestEnableTensorCore, Nchw4Nchw) { | ||||
| REQUIRE_GPU(1); | REQUIRE_GPU(1); | ||||
| auto cn = CompNode::load("gpu0"); | auto cn = CompNode::load("gpu0"); | ||||
| @@ -1936,7 +1936,7 @@ TEST(TestEnableTensorCore, Nchw4Nchw) { | |||||
| #endif | #endif | ||||
| //! close for cu111 ci, reopen it when bug fixed | //! close for cu111 ci, reopen it when bug fixed | ||||
| #if 0 | |||||
| #if CUDA_VERSION < 11000 | |||||
| TEST(TestEnableTensorCore, ConvBiasWithZ) { | TEST(TestEnableTensorCore, ConvBiasWithZ) { | ||||
| REQUIRE_GPU(1); | REQUIRE_GPU(1); | ||||
| auto cn = CompNode::load("gpu0"); | auto cn = CompNode::load("gpu0"); | ||||
| @@ -2002,7 +2002,7 @@ TEST(TestEnableTensorCore, ConvBiasWithZ) { | |||||
| #endif | #endif | ||||
| //! close for cu111 ci, reopen it when bug fixed | //! close for cu111 ci, reopen it when bug fixed | ||||
| #if 0 | |||||
| #if CUDA_VERSION < 11000 | |||||
| TEST(TestEnableTensorCore, Pooling) { | TEST(TestEnableTensorCore, Pooling) { | ||||
| REQUIRE_GPU(1); | REQUIRE_GPU(1); | ||||
| auto cn = CompNode::load("gpu0"); | auto cn = CompNode::load("gpu0"); | ||||
| @@ -2148,7 +2148,7 @@ TEST(TestGoptInference, EnableTensorCore) { | |||||
| } | } | ||||
| //! close for cu111 ci, reopen it when bug fixed | //! close for cu111 ci, reopen it when bug fixed | ||||
| #if 0 | |||||
| #if CUDA_VERSION < 11000 | |||||
| TEST(FuseConvBiasZPass, BlockFuse) { | TEST(FuseConvBiasZPass, BlockFuse) { | ||||
| REQUIRE_GPU(1); | REQUIRE_GPU(1); | ||||
| auto cn = CompNode::load("gpu0"); | auto cn = CompNode::load("gpu0"); | ||||
| @@ -2273,7 +2273,7 @@ TEST(FuseConvBiasZPass, BlockFuse) { | |||||
| #endif | #endif | ||||
| //! close for cu111 ci, reopen it when bug fixed | //! close for cu111 ci, reopen it when bug fixed | ||||
| #if 0 | |||||
| #if CUDA_VERSION < 11000 | |||||
| TEST(TestEnableTensorCore, ShuffleMerge) { | TEST(TestEnableTensorCore, ShuffleMerge) { | ||||
| REQUIRE_GPU(1); | REQUIRE_GPU(1); | ||||
| auto cn = CompNode::load("gpu0"); | auto cn = CompNode::load("gpu0"); | ||||
| @@ -2450,7 +2450,7 @@ TEST(FuseConvBiasZPass, Basic) { | |||||
| #if MGB_CUDA | #if MGB_CUDA | ||||
| //! close for cu111 ci, reopen it when bug fixed | //! close for cu111 ci, reopen it when bug fixed | ||||
| #if 0 | |||||
| #if CUDA_VERSION < 11000 | |||||
| TEST(TestGoptInference, EnableCHWN4) { | TEST(TestGoptInference, EnableCHWN4) { | ||||
| REQUIRE_GPU(1); | REQUIRE_GPU(1); | ||||
| auto cn = CompNode::load("gpu0"); | auto cn = CompNode::load("gpu0"); | ||||
| @@ -2552,7 +2552,7 @@ TEST(TestGoptInference, EnableCHWN4) { | |||||
| #endif | #endif | ||||
| //! close for cu111 ci, reopen it when bug fixed | //! close for cu111 ci, reopen it when bug fixed | ||||
| #if 0 | |||||
| #if CUDA_VERSION < 11000 | |||||
| TEST(TestGoptInference, EnableCHWN4WarpPespective) { | TEST(TestGoptInference, EnableCHWN4WarpPespective) { | ||||
| REQUIRE_GPU(1); | REQUIRE_GPU(1); | ||||
| auto cn = CompNode::load("gpu0"); | auto cn = CompNode::load("gpu0"); | ||||
| @@ -2731,7 +2731,7 @@ TEST(TestGoptInference, EnableCHWN4Pooling) { | |||||
| } | } | ||||
| //! close for cu111 ci, reopen it when bug fixed | //! close for cu111 ci, reopen it when bug fixed | ||||
| #if 0 | |||||
| #if CUDA_VERSION < 11000 | |||||
| TEST(TestGoptInference, EnableCHWN4ShuffleRemove) { | TEST(TestGoptInference, EnableCHWN4ShuffleRemove) { | ||||
| REQUIRE_GPU(1); | REQUIRE_GPU(1); | ||||
| auto cn = CompNode::load("gpu0"); | auto cn = CompNode::load("gpu0"); | ||||
| @@ -2857,7 +2857,7 @@ TEST(TestGoptInference, EnableCHWN4ShuffleRemove) { | |||||
| #endif | #endif | ||||
| //! close for cu111 ci, reopen it when bug fixed | //! close for cu111 ci, reopen it when bug fixed | ||||
| #if 0 | |||||
| #if CUDA_VERSION < 11000 | |||||
| TEST(TestGoptInference, ConvertFormatNCHW4GPU) { | TEST(TestGoptInference, ConvertFormatNCHW4GPU) { | ||||
| REQUIRE_GPU(1); | REQUIRE_GPU(1); | ||||
| auto cn = CompNode::load("gpu0"); | auto cn = CompNode::load("gpu0"); | ||||
| @@ -3076,7 +3076,7 @@ TEST(TestGoptInference, ConvertFormatNCHW4) { | |||||
| } | } | ||||
| //! close for cu111 ci, reopen it when bug fixed | //! close for cu111 ci, reopen it when bug fixed | ||||
| #if 0 | |||||
| #if CUDA_VERSION < 11000 | |||||
| TEST(TestGoptInference, ConvertFormatNCHW4Ic3) { | TEST(TestGoptInference, ConvertFormatNCHW4Ic3) { | ||||
| REQUIRE_GPU(1); | REQUIRE_GPU(1); | ||||
| auto cn = CompNode::load("gpu0"); | auto cn = CompNode::load("gpu0"); | ||||
| @@ -3945,7 +3945,7 @@ TEST(TestGoptInference, FoldingConvDimshuffle) { | |||||
| } | } | ||||
| //! close for cu111 ci, reopen it when bug fixed | //! close for cu111 ci, reopen it when bug fixed | ||||
| #if 0 | |||||
| #if CUDA_VERSION < 11000 | |||||
| TEST(TestGoptInference, FoldingConvDimshuffleNCHW4NCHW32) { | TEST(TestGoptInference, FoldingConvDimshuffleNCHW4NCHW32) { | ||||
| REQUIRE_GPU(1); | REQUIRE_GPU(1); | ||||
| auto cn = CompNode::load("gpu0"); | auto cn = CompNode::load("gpu0"); | ||||
| @@ -1978,7 +1978,7 @@ TEST(TestTensorRTReplace, FuseConvAdd) { | |||||
| MGB_ASSERT_TENSOR_NEAR(outputs[1], outputs[3], 1e-3); | MGB_ASSERT_TENSOR_NEAR(outputs[1], outputs[3], 1e-3); | ||||
| } | } | ||||
| //! close for cu111 ci, reopen it when bug fixed | //! close for cu111 ci, reopen it when bug fixed | ||||
| #if 0 | |||||
| #if CUDA_VERSION < 11000 | |||||
| TEST(TestTensorRTReplace, FuseConvAddNchw2nchw4) { | TEST(TestTensorRTReplace, FuseConvAddNchw2nchw4) { | ||||
| REQUIRE_GPU(1); | REQUIRE_GPU(1); | ||||
| auto cn = CompNode::load("gpu0"); | auto cn = CompNode::load("gpu0"); | ||||