GitOrigin-RevId: d412108732
tags/v1.3.1
| @@ -34,7 +34,6 @@ option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" OFF) | |||
| option(MGE_WITH_MIDOUT_PROFILE "Build MegEngine with Midout profile." OFF) | |||
| option(MGE_WITH_MINIMUM_SIZE "Swith off MGE_ENABLE_RTTI、MGE_ENABLE_EXCEPTIONS、MGE_ENABLE_LOGGING and switch on MGE_INFERENCE_ONLY so that compile minimum load_and_run. Take effect only when MGE_BIN_REDUCE was set" OFF) | |||
| option(MGE_ARMV8_2_FEATURE_FP16 "Enable armv8.2-a+fp16 support" OFF) | |||
| option(MGE_ARMV8_2_FEATURE_DOTPROD "enable armv8.2-a+dotprod support" OFF) | |||
| option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF) | |||
| option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON) | |||
| option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON) | |||
| @@ -773,6 +772,14 @@ if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386") | |||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse") | |||
| endif() | |||
| endif() | |||
| # dotprod is not enable by default on APPLE, cpuinfo has some problem on APPLE | |||
| if(NOT APPLE) | |||
| CHECK_CXX_COMPILER_FLAG("-march=armv8.2-a+dotprod" CXX_COMPILER_SUPPORT_DOT) | |||
| if(CXX_COMPILER_SUPPORT_DOT) | |||
| message(STATUS "Enable dotprod feature in armv8.2-a using MGB_ENABLE_DOT") | |||
| set(MGB_ENABLE_DOT 1) | |||
| endif() | |||
| endif() | |||
| if(MGE_ARCH STREQUAL "armv7") | |||
| # -funsafe-math-optimizations to enable neon auto-vectorization (since neon is not fully IEEE 754 compatible, GCC does not turn on neon auto-vectorization by default. | |||
| @@ -797,15 +804,6 @@ if(MGE_ARCH STREQUAL "aarch64") | |||
| set(MARCH "-march=armv8.2-a+fp16") | |||
| endif() | |||
| if(MGE_ARMV8_2_FEATURE_DOTPROD) | |||
| message(STATUS "Enable dotprod feature support in armv8.2") | |||
| if(MGE_ARMV8_2_FEATURE_FP16) | |||
| set(MARCH "-march=armv8.2-a+fp16+dotprod") | |||
| else() | |||
| set(MARCH "-march=armv8.2-a+dotprod") | |||
| endif() | |||
| endif() | |||
| if(MGE_WITH_CUDA) | |||
| message(WARNING "aarch64 ld will add -mfix-cortex-a53-843419 and -mfix-cortex-a53-835769,\ | |||
| when cuda enable and CMAKE with DEBUG build type,ld will take about 14min+,\ | |||
| @@ -10,6 +10,10 @@ | |||
| * implied. | |||
| */ | |||
| #pragma once | |||
| #include "megdnn/arch.h" | |||
| #include "src/common/unroll_macro.h" | |||
| #if MGB_ENABLE_DOT | |||
| #if defined(__ARM_FEATURE_DOTPROD) | |||
| #undef __ARM_FEATURE_DOTPROD | |||
| @@ -17,8 +21,6 @@ | |||
| #define __ARM_FEATURE_DOTPROD 1 | |||
| #endif | |||
| #include <arm_neon.h> | |||
| #include "megdnn/arch.h" | |||
| #include "src/common/unroll_macro.h" | |||
| // GCC does not support __nodebug__, it reports: | |||
| // '__nodebug__' attribute directive ignored | |||
| @@ -4,7 +4,6 @@ set -e | |||
| ARCHS=("arm64-v8a" "armeabi-v7a") | |||
| BUILD_TYPE=Release | |||
| MGE_ARMV8_2_FEATURE_FP16=OFF | |||
| MGE_ARMV8_2_FEATURE_DOTPROD=OFF | |||
| MGE_DISABLE_FLOAT16=OFF | |||
| ARCH=arm64-v8a | |||
| REMOVE_OLD_BUILD=false | |||
| @@ -15,7 +14,6 @@ function usage() { | |||
| echo "available args detail:" | |||
| echo "-d : Build with Debug mode, default Release mode" | |||
| echo "-f : enable MGE_ARMV8_2_FEATURE_FP16 for ARM64, need toolchain and hardware support" | |||
| echo "-p : enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64, need toolchain and hardware support" | |||
| echo "-k : open MGE_DISABLE_FLOAT16 for NEON " | |||
| echo "-a : config build arch available: ${ARCHS[@]}" | |||
| echo "-r : remove old build dir before make, default off" | |||
| @@ -25,7 +23,7 @@ function usage() { | |||
| exit -1 | |||
| } | |||
| while getopts "rkhdfpa:" arg | |||
| while getopts "rkhdfa:" arg | |||
| do | |||
| case $arg in | |||
| d) | |||
| @@ -36,10 +34,6 @@ do | |||
| echo "enable MGE_ARMV8_2_FEATURE_FP16 for ARM64" | |||
| MGE_ARMV8_2_FEATURE_FP16=ON | |||
| ;; | |||
| p) | |||
| echo "enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64" | |||
| MGE_ARMV8_2_FEATURE_DOTPROD=ON | |||
| ;; | |||
| k) | |||
| echo "open MGE_DISABLE_FLOAT16 for NEON" | |||
| MGE_DISABLE_FLOAT16=ON | |||
| @@ -78,7 +72,6 @@ echo "----------------------------------------------------" | |||
| echo "build config summary:" | |||
| echo "BUILD_TYPE: $BUILD_TYPE" | |||
| echo "MGE_ARMV8_2_FEATURE_FP16: $MGE_ARMV8_2_FEATURE_FP16" | |||
| echo "MGE_ARMV8_2_FEATURE_DOTPROD: $MGE_ARMV8_2_FEATURE_DOTPROD" | |||
| echo "MGE_DISABLE_FLOAT16: $MGE_DISABLE_FLOAT16" | |||
| echo "ARCH: $ARCH" | |||
| echo "----------------------------------------------------" | |||
| @@ -129,7 +122,6 @@ function cmake_build() { | |||
| -DMGE_INFERENCE_ONLY=ON \ | |||
| -DMGE_WITH_CUDA=OFF \ | |||
| -DMGE_ARMV8_2_FEATURE_FP16= $MGE_ARMV8_2_FEATURE_FP16 \ | |||
| -DMGE_ARMV8_2_FEATURE_DOTPROD=$MGE_ARMV8_2_FEATURE_DOTPROD \ | |||
| -DMGE_DISABLE_FLOAT16=$MGE_DISABLE_FLOAT16 \ | |||
| -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | |||
| ${EXTRA_CMAKE_ARGS} \ | |||
| @@ -4,7 +4,6 @@ set -e | |||
| ARCHS=("arm64" "armv7") | |||
| BUILD_TYPE=Release | |||
| MGE_ARMV8_2_FEATURE_FP16=OFF | |||
| MGE_ARMV8_2_FEATURE_DOTPROD=OFF | |||
| MGE_DISABLE_FLOAT16=OFF | |||
| ARCH=arm64 | |||
| REMOVE_OLD_BUILD=false | |||
| @@ -15,7 +14,6 @@ function usage() { | |||
| echo "available args detail:" | |||
| echo "-d : Build with Debug mode, default Release mode" | |||
| echo "-f : enable MGE_ARMV8_2_FEATURE_FP16 for ARM64, need toolchain and hardware support" | |||
| echo "-p : enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64, need toolchain and hardware support" | |||
| echo "-k : open MGE_DISABLE_FLOAT16 for NEON " | |||
| echo "-a : config build arch available: ${ARCHS[@]}" | |||
| echo "-r : remove old build dir before make, default off" | |||
| @@ -25,7 +23,7 @@ function usage() { | |||
| exit -1 | |||
| } | |||
| while getopts "rkhdfpa:" arg | |||
| while getopts "rkhdfa:" arg | |||
| do | |||
| case $arg in | |||
| d) | |||
| @@ -36,10 +34,6 @@ do | |||
| echo "enable MGE_ARMV8_2_FEATURE_FP16 for ARM64" | |||
| MGE_ARMV8_2_FEATURE_FP16=ON | |||
| ;; | |||
| p) | |||
| echo "enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64" | |||
| MGE_ARMV8_2_FEATURE_DOTPROD=ON | |||
| ;; | |||
| k) | |||
| echo "open MGE_DISABLE_FLOAT16 for NEON" | |||
| MGE_DISABLE_FLOAT16=ON | |||
| @@ -78,7 +72,6 @@ echo "----------------------------------------------------" | |||
| echo "build config summary:" | |||
| echo "BUILD_TYPE: $BUILD_TYPE" | |||
| echo "MGE_ARMV8_2_FEATURE_FP16: $MGE_ARMV8_2_FEATURE_FP16" | |||
| echo "MGE_ARMV8_2_FEATURE_DOTPROD: $MGE_ARMV8_2_FEATURE_DOTPROD" | |||
| echo "MGE_DISABLE_FLOAT16: $MGE_DISABLE_FLOAT16" | |||
| echo "ARCH: $ARCH" | |||
| echo "----------------------------------------------------" | |||
| @@ -126,7 +119,6 @@ function cmake_build() { | |||
| -DPYTHON_EXECUTABLE=/usr/local/bin/python3 \ | |||
| -DMGE_WITH_CUDA=OFF \ | |||
| -DMGE_ARMV8_2_FEATURE_FP16= $MGE_ARMV8_2_FEATURE_FP16 \ | |||
| -DMGE_ARMV8_2_FEATURE_DOTPROD=$MGE_ARMV8_2_FEATURE_DOTPROD \ | |||
| -DMGE_DISABLE_FLOAT16=$MGE_DISABLE_FLOAT16 \ | |||
| -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | |||
| ${EXTRA_CMAKE_ARGS} \ | |||
| @@ -5,7 +5,6 @@ ARCHS=("arm64-v8a" "armeabi-v7a-softfp" "armeabi-v7a-hardfp") | |||
| BUILD_TYPE=Release | |||
| MGE_WITH_CUDA=OFF | |||
| MGE_ARMV8_2_FEATURE_FP16=OFF | |||
| MGE_ARMV8_2_FEATURE_DOTPROD=OFF | |||
| MGE_DISABLE_FLOAT16=OFF | |||
| ARCH=arm64-v8a | |||
| REMOVE_OLD_BUILD=false | |||
| @@ -19,7 +18,6 @@ function usage() { | |||
| echo "-d : Build with Debug mode, default Release mode" | |||
| echo "-c : Build with CUDA, default without CUDA(for arm with cuda, example tx1)" | |||
| echo "-f : enable MGE_ARMV8_2_FEATURE_FP16 for ARM64, need toolchain and hardware support" | |||
| echo "-p : enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64, need toolchain and hardware support" | |||
| echo "-k : open MGE_DISABLE_FLOAT16 for NEON " | |||
| echo "-a : config build arch available: ${ARCHS[@]}" | |||
| echo "-r : remove old build dir before make, default off" | |||
| @@ -29,7 +27,7 @@ function usage() { | |||
| exit -1 | |||
| } | |||
| while getopts "rkhdcfpa:" arg | |||
| while getopts "rkhdcfa:" arg | |||
| do | |||
| case $arg in | |||
| d) | |||
| @@ -44,10 +42,6 @@ do | |||
| echo "enable MGE_ARMV8_2_FEATURE_FP16 for ARM64" | |||
| MGE_ARMV8_2_FEATURE_FP16=ON | |||
| ;; | |||
| p) | |||
| echo "enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64" | |||
| MGE_ARMV8_2_FEATURE_DOTPROD=ON | |||
| ;; | |||
| k) | |||
| echo "open MGE_DISABLE_FLOAT16 for NEON" | |||
| MGE_DISABLE_FLOAT16=ON | |||
| @@ -87,7 +81,6 @@ echo "build config summary:" | |||
| echo "BUILD_TYPE: $BUILD_TYPE" | |||
| echo "MGE_WITH_CUDA: $MGE_WITH_CUDA" | |||
| echo "MGE_ARMV8_2_FEATURE_FP16: $MGE_ARMV8_2_FEATURE_FP16" | |||
| echo "MGE_ARMV8_2_FEATURE_DOTPROD: $MGE_ARMV8_2_FEATURE_DOTPROD" | |||
| echo "MGE_DISABLE_FLOAT16: $MGE_DISABLE_FLOAT16" | |||
| echo "ARCH: $ARCH" | |||
| echo "----------------------------------------------------" | |||
| @@ -147,7 +140,6 @@ function cmake_build() { | |||
| -DMGE_INFERENCE_ONLY=ON \ | |||
| -DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | |||
| -DMGE_ARMV8_2_FEATURE_FP16= $MGE_ARMV8_2_FEATURE_FP16 \ | |||
| -DMGE_ARMV8_2_FEATURE_DOTPROD=$MGE_ARMV8_2_FEATURE_DOTPROD \ | |||
| -DMGE_DISABLE_FLOAT16=$MGE_DISABLE_FLOAT16 \ | |||
| -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | |||
| ${EXTRA_CMAKE_ARGS} \ | |||
| @@ -93,11 +93,6 @@ | |||
| #define MGB_ENABLE_CPUINFO 1 | |||
| #endif | |||
| #ifdef IOS | |||
| #undef MGB_ENABLE_CPUINFO | |||
| #define MGB_ENABLE_CPUINFO 0 | |||
| #endif | |||
| //! use one MACRO indicate enable_arm_dotprod | |||
| #if __ARM_FEATURE_DOTPROD | |||
| #ifdef MGB_ENABLE_DOT | |||
| @@ -117,7 +112,12 @@ | |||
| #endif | |||
| #endif | |||
| //! IOS disabled cpuinfo and dotprod, cpuinfo has some problem on ios | |||
| #ifdef IOS | |||
| #undef MGB_ENABLE_CPUINFO | |||
| #define MGB_ENABLE_CPUINFO 0 | |||
| #undef MGB_ENABLE_DOT | |||
| #endif | |||
| // whether to include actual class name in mgb::Typeinfo object; if this is | |||
| // disabled, mgb::serialization::OprRegistry::find_opr_by_name would not work. | |||