| @@ -484,7 +484,7 @@ jobs: | |||
| CXX: g++-12 | |||
| run: | | |||
| mkdir build-avx512-spr && cd build-avx512-spr | |||
| cmake cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_AVX512=ON -DNCNN_AVX512VNNI=ON -DNCNN_AVX512BF16=ON -DNCNN_AVX512FP16=ON -DNCNN_XOP=OFF -DNCNN_OPENMP=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. | |||
| cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_AVX2=ON -DNCNN_AVX512=ON -DNCNN_AVX512VNNI=ON -DNCNN_AVX512BF16=ON -DNCNN_AVX512FP16=ON -DNCNN_XOP=OFF -DNCNN_OPENMP=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. | |||
| cmake --build . -j 2 | |||
| - name: test-avx512-spr | |||
| run: | | |||
| @@ -163,8 +163,32 @@ if((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm") | |||
| set(CMAKE_REQUIRED_FLAGS "-march=armv8.2-a+fp16") | |||
| check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16x8_t _s, _a, _b; _s = vfmaq_f16(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM82_FP16) | |||
| set(CMAKE_REQUIRED_FLAGS "-march=armv8.2-a+fp16+dotprod") | |||
| check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int32x4_t _s; int8x16_t _a, _b; _s = vdotq_s32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM82_FP16_DOTPROD) | |||
| set(CMAKE_REQUIRED_FLAGS "-march=armv8.2-a+dotprod") | |||
| check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int32x4_t _s; int8x16_t _a, _b; _s = vdotq_s32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM82_DOTPROD) | |||
| set(CMAKE_REQUIRED_FLAGS "-march=armv8.2-a+fp16fml") | |||
| check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float32x4_t _s; float16x8_t _a, _b; _s = vfmlalq_low_f16(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM82_FP16FML) | |||
| set(CMAKE_REQUIRED_FLAGS "-march=armv8.4-a+bf16") | |||
| check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float32x4_t _s; bfloat16x8_t _a, _b; _s = vbfmmlaq_f32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM84_BF16) | |||
| set(CMAKE_REQUIRED_FLAGS "-march=armv8.4-a+i8mm") | |||
| check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int32x4_t _s; int8x16_t _a, _b; _s = vmmlaq_s32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM84_I8MM) | |||
| set(CMAKE_REQUIRED_FLAGS "-march=armv8.6-a+sve") | |||
| check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svfloat16_t _s, _a, _b; svbool_t bp; _s = svmla_f16_z(bp, _s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVE) | |||
| set(CMAKE_REQUIRED_FLAGS "-march=armv8.6-a+sve2") | |||
| check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svint16_t _s; svint8_t _a, _b; _s = svmlslb_s16(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVE2) | |||
| set(CMAKE_REQUIRED_FLAGS "-march=armv8.6-a+sve+bf16") | |||
| check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svfloat32_t _s; svbfloat16_t _a, _b; _s = svbfmmla_f32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVEBF16) | |||
| set(CMAKE_REQUIRED_FLAGS "-march=armv8.6-a+sve+i8mm") | |||
| check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svint32_t _s; svint8_t _a, _b; _s = svmmla_s32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVEI8MM) | |||
| set(CMAKE_REQUIRED_FLAGS "-march=armv8.6-a+sve+f32mm") | |||
| check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svfloat32_t _s, _a, _b; _s = svmmla_f32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVEF32MM) | |||
| unset(CMAKE_REQUIRED_FLAGS) | |||
| endif() | |||
| @@ -176,16 +200,72 @@ if((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm") | |||
| endif() | |||
| if(NCNN_COMPILER_SUPPORT_ARM82_FP16) | |||
| option(NCNN_ARM82 "optimize aarch64 platform with armv8.2" ON) | |||
| if(NCNN_COMPILER_SUPPORT_ARM82_FP16_DOTPROD) | |||
| option(NCNN_ARM82 "optimize aarch64 platform with armv8.2 fp16" ON) | |||
| if(NCNN_COMPILER_SUPPORT_ARM82_DOTPROD) | |||
| if(NCNN_ARM82) | |||
| option(NCNN_ARM82DOT "optimize aarch64 platform with armv8.2 dotprod" ON) | |||
| if(NCNN_COMPILER_SUPPORT_ARM82_FP16FML) | |||
| if(NCNN_ARM82DOT) | |||
| option(NCNN_ARM82FP16FML "optimize aarch64 platform with armv8.2 fp16fml" ON) | |||
| if(NCNN_COMPILER_SUPPORT_ARM84_BF16) | |||
| if(NCNN_ARM82FP16FML) | |||
| option(NCNN_ARM84BF16 "optimize aarch64 platform with armv8.4 bf16" ON) | |||
| endif() | |||
| else() | |||
| message(WARNING "The compiler does not support armv8.4 bf16. NCNN_ARM86BF16 will be OFF.") | |||
| endif() | |||
| if(NCNN_COMPILER_SUPPORT_ARM84_I8MM) | |||
| if(NCNN_ARM82FP16FML) | |||
| option(NCNN_ARM86I8MM "optimize aarch64 platform with armv8.4 i8mm" ON) | |||
| endif() | |||
| else() | |||
| message(WARNING "The compiler does not support armv8.4 i8mm. NCNN_ARM86I8MM will be OFF.") | |||
| endif() | |||
| if(NCNN_COMPILER_SUPPORT_ARM86_SVE) | |||
| if(NCNN_ARM84BF16 AND NCNN_ARM84I8MM) | |||
| option(NCNN_ARM86SVE "optimize aarch64 platform with armv8.6 sve" ON) | |||
| if(NCNN_COMPILER_SUPPORT_ARM86_SVE2) | |||
| if(NCNN_ARM86SVE) | |||
| option(NCNN_ARM86SVE2 "optimize aarch64 platform with armv8.6 sve2" ON) | |||
| endif() | |||
| else() | |||
| message(WARNING "The compiler does not support armv8.6 sve2. NCNN_ARM86SVE2 will be OFF.") | |||
| endif() | |||
| if(NCNN_COMPILER_SUPPORT_ARM86_SVEBF16) | |||
| if(NCNN_ARM86SVE) | |||
| option(NCNN_ARM86SVEBF16 "optimize aarch64 platform with armv8.6 sve bf16" ON) | |||
| endif() | |||
| else() | |||
| message(WARNING "The compiler does not support armv8.6 sve bf16. NCNN_ARM86SVEBF16 will be OFF.") | |||
| endif() | |||
| if(NCNN_COMPILER_SUPPORT_ARM86_SVEI8MM) | |||
| if(NCNN_ARM86SVE) | |||
| option(NCNN_ARM86SVEI8MM "optimize aarch64 platform with armv8.6 sve i8mm" ON) | |||
| endif() | |||
| else() | |||
| message(WARNING "The compiler does not support armv8.6 sve i8mm. NCNN_ARM86SVEI8MM will be OFF.") | |||
| endif() | |||
| if(NCNN_COMPILER_SUPPORT_ARM86_SVEF32MM) | |||
| if(NCNN_ARM86SVE) | |||
| option(NCNN_ARM86SVEF32MM "optimize aarch64 platform with armv8.6 sve f32mm" ON) | |||
| endif() | |||
| else() | |||
| message(WARNING "The compiler does not support armv8.6 sve f32mm. NCNN_ARM86SVEF32MM will be OFF.") | |||
| endif() | |||
| endif() | |||
| else() | |||
| message(WARNING "The compiler does not support armv8.6 sve. NCNN_ARM86SVE will be OFF.") | |||
| endif() | |||
| endif() | |||
| else() | |||
| message(WARNING "The compiler does not support armv8.2 fp16fml. NCNN_ARM82FP16FML will be OFF.") | |||
| endif() | |||
| endif() | |||
| else() | |||
| message(WARNING "The compiler does not support armv8.2 dotprod. NCNN_ARM82DOT will be OFF.") | |||
| endif() | |||
| else() | |||
| message(WARNING "The compiler does not support armv8.2. NCNN_ARM82 will be OFF.") | |||
| message(WARNING "The compiler does not support armv8.2 fp16. NCNN_ARM82 will be OFF.") | |||
| endif() | |||
| elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(mips)") | |||
| set(NCNN_TARGET_ARCH mips) | |||
| @@ -235,6 +235,30 @@ macro(ncnn_add_layer class) | |||
| if(NCNN_ARM82DOT) | |||
| ncnn_add_arch_opt_source(${class} asimddp "-march=armv8.2-a+fp16+dotprod") | |||
| endif() | |||
| if(NCNN_ARM82FP16FML) | |||
| ncnn_add_arch_opt_source(${class} asimdfhm "-march=armv8.2-a+fp16+dotprod+fp16fml") | |||
| endif() | |||
| if(NCNN_ARM84BF16) | |||
| ncnn_add_arch_opt_source(${class} bf16 "-march=armv8.4-a+bf16") | |||
| endif() | |||
| if(NCNN_ARM84I8MM) | |||
| ncnn_add_arch_opt_source(${class} i8mm "-march=armv8.4-a+i8mm") | |||
| endif() | |||
| if(NCNN_ARM86SVE) | |||
| ncnn_add_arch_opt_source(${class} sve "-march=armv8.6-a+sve") | |||
| endif() | |||
| if(NCNN_ARM86SVE2) | |||
| ncnn_add_arch_opt_source(${class} sve2 "-march=armv8.6-a+sve2") | |||
| endif() | |||
| if(NCNN_ARM86SVEBF16) | |||
| ncnn_add_arch_opt_source(${class} svebf16 "-march=armv8.6-a+sve+bf16") | |||
| endif() | |||
| if(NCNN_ARM86SVEI8MM) | |||
| ncnn_add_arch_opt_source(${class} svei8mm "-march=armv8.6-a+sve+i8mm") | |||
| endif() | |||
| if(NCNN_ARM86SVEF32MM) | |||
| ncnn_add_arch_opt_source(${class} svef32mm "-march=armv8.6-a+sve+f32mm") | |||
| endif() | |||
| endif() | |||
| if(NCNN_RUNTIME_CPU AND NCNN_TARGET_ARCH STREQUAL "mips") | |||
| @@ -413,13 +413,38 @@ if((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm") OR (CMAKE_SYSTEM_PROCESSOR MA | |||
| endif() | |||
| if(((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64") OR (APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm64|aarch64)"))) | |||
| if(NOT NCNN_RUNTIME_CPU AND NCNN_ARM82) | |||
| if(NOT NCNN_RUNTIME_CPU AND NCNN_ARM86SVE) | |||
| set(ARM_MARCH_FLAG "-march=armv8.6-a+sve") | |||
| if(NCNN_ARM86SVE2) | |||
| set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+sve2") | |||
| endif() | |||
| if(NCNN_ARM86SVEBF16) | |||
| set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+bf16") | |||
| endif() | |||
| if(NCNN_ARM86SVEI8MM) | |||
| set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+i8mm") | |||
| endif() | |||
| if(NCNN_ARM86SVEF32MM) | |||
| set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+f32mm") | |||
| endif() | |||
| elseif(NOT NCNN_RUNTIME_CPU AND (NCNN_ARM84BF16 OR NCNN_ARM84I8MM)) | |||
| set(ARM_MARCH_FLAG "-march=armv8.4-a") | |||
| if(NCNN_ARM84BF16) | |||
| set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+bf16") | |||
| endif() | |||
| if(NCNN_ARM84I8MM) | |||
| set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+i8mm") | |||
| endif() | |||
| elseif(NOT NCNN_RUNTIME_CPU AND NCNN_ARM82) | |||
| set(ARM_MARCH_FLAG "-march=armv8.2-a+fp16") | |||
| if(NCNN_ARM82DOT) | |||
| target_compile_options(ncnn PRIVATE -march=armv8.2-a+fp16+dotprod) | |||
| else() | |||
| target_compile_options(ncnn PRIVATE -march=armv8.2-a+fp16) | |||
| set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+dotprod") | |||
| endif() | |||
| if(NCNN_ARM82FP16FML) | |||
| set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+fp16fml") | |||
| endif() | |||
| endif() | |||
| target_compile_options(ncnn PRIVATE ${ARM_MARCH_FLAG}) | |||
| endif() | |||
| if(NCNN_TARGET_ARCH STREQUAL "mips") | |||
| @@ -114,10 +114,10 @@ namespace ncnn { | |||
| // its implementation does not parse /proc/self/auxv. Instead it depends | |||
| // on values that are passed by the kernel at process-init time to the | |||
| // C runtime initialization layer. | |||
| static unsigned int get_elf_hwcap_from_getauxval() | |||
| static unsigned int get_elf_hwcap_from_getauxval(unsigned int type) | |||
| { | |||
| #if __ANDROID_API__ >= 18 | |||
| unsigned int hwcap = getauxval(AT_HWCAP); | |||
| unsigned int hwcap = getauxval(type); | |||
| if (hwcap) | |||
| return hwcap; | |||
| #endif | |||
| @@ -141,7 +141,7 @@ static unsigned int get_elf_hwcap_from_getauxval() | |||
| else | |||
| { | |||
| // Note: getauxval() returns 0 on failure. Doesn't touch errno. | |||
| result = (unsigned int)(*func)(AT_HWCAP); | |||
| result = (unsigned int)(*func)(type); | |||
| } | |||
| dlclose(libc_handle); | |||
| @@ -150,7 +150,7 @@ static unsigned int get_elf_hwcap_from_getauxval() | |||
| #endif // defined __ANDROID__ | |||
| // extract the ELF HW capabilities bitmap from /proc/self/auxv | |||
| static unsigned int get_elf_hwcap_from_proc_self_auxv() | |||
| static unsigned int get_elf_hwcap_from_proc_self_auxv(unsigned int type) | |||
| { | |||
| FILE* fp = fopen("/proc/self/auxv", "rb"); | |||
| if (!fp) | |||
| @@ -184,7 +184,7 @@ static unsigned int get_elf_hwcap_from_proc_self_auxv() | |||
| if (entry.tag == 0 && entry.value == 0) | |||
| break; | |||
| if (entry.tag == AT_HWCAP) | |||
| if (entry.tag == type) | |||
| { | |||
| result = entry.value; | |||
| break; | |||
| @@ -196,24 +196,33 @@ static unsigned int get_elf_hwcap_from_proc_self_auxv() | |||
| return result; | |||
| } | |||
| static unsigned int get_elf_hwcap() | |||
| static unsigned int get_elf_hwcap(unsigned int type) | |||
| { | |||
| #if defined __ANDROID__ | |||
| unsigned int hwcap = get_elf_hwcap_from_getauxval(); | |||
| unsigned int hwcap = get_elf_hwcap_from_getauxval(type); | |||
| if (hwcap) | |||
| return hwcap; | |||
| #endif | |||
| return get_elf_hwcap_from_proc_self_auxv(); | |||
| return get_elf_hwcap_from_proc_self_auxv(type); | |||
| } | |||
| static unsigned int g_hwcaps = get_elf_hwcap(); | |||
| static unsigned int g_hwcaps = get_elf_hwcap(AT_HWCAP); | |||
| static unsigned int g_hwcaps2 = get_elf_hwcap(AT_HWCAP2); | |||
| #if __aarch64__ | |||
| // from arch/arm64/include/uapi/asm/hwcap.h | |||
| #define HWCAP_ASIMD (1 << 1) | |||
| #define HWCAP_ASIMDHP (1 << 10) | |||
| #define HWCAP_ASIMDDP (1 << 20) | |||
| #define HWCAP_ASIMD (1 << 1) | |||
| #define HWCAP_ASIMDHP (1 << 10) | |||
| #define HWCAP_ASIMDDP (1 << 20) | |||
| #define HWCAP_SVE (1 << 22) | |||
| #define HWCAP_ASIMDFHM (1 << 23) | |||
| #define HWCAP2_SVE2 (1 << 1) | |||
| #define HWCAP2_SVEI8MM (1 << 9) | |||
| #define HWCAP2_SVEF32MM (1 << 10) | |||
| #define HWCAP2_SVEBF16 (1 << 12) | |||
| #define HWCAP2_I8MM (1 << 13) | |||
| #define HWCAP2_BF16 (1 << 14) | |||
| #else | |||
| // from arch/arm/include/uapi/asm/hwcap.h | |||
| #define HWCAP_NEON (1 << 12) | |||
| @@ -443,6 +452,158 @@ int cpu_support_arm_asimddp() | |||
| #endif | |||
| } | |||
| int cpu_support_arm_asimdfhm() | |||
| { | |||
| #if defined __ANDROID__ || defined __linux__ | |||
| #if __aarch64__ | |||
| return g_hwcaps & HWCAP_ASIMDFHM; | |||
| #else | |||
| return 0; | |||
| #endif | |||
| #elif __APPLE__ | |||
| #if __aarch64__ | |||
| return g_hw_cpufamily == CPUFAMILY_ARM_LIGHTNING_THUNDER || g_hw_cpufamily == CPUFAMILY_ARM_FIRESTORM_ICESTORM || g_hw_cpufamily == CPUFAMILY_ARM_AVALANCHE_BLIZZARD; | |||
| #else | |||
| return 0; | |||
| #endif | |||
| #else | |||
| return 0; | |||
| #endif | |||
| } | |||
| int cpu_support_arm_bf16() | |||
| { | |||
| #if defined __ANDROID__ || defined __linux__ | |||
| #if __aarch64__ | |||
| return g_hwcaps2 & HWCAP2_BF16; | |||
| #else | |||
| return 0; | |||
| #endif | |||
| #elif __APPLE__ | |||
| #if __aarch64__ | |||
| return 0; // no known apple cpu support armv8.6 bf16 | |||
| #else | |||
| return 0; | |||
| #endif | |||
| #else | |||
| return 0; | |||
| #endif | |||
| } | |||
| int cpu_support_arm_i8mm() | |||
| { | |||
| #if defined __ANDROID__ || defined __linux__ | |||
| #if __aarch64__ | |||
| return g_hwcaps2 & HWCAP2_I8MM; | |||
| #else | |||
| return 0; | |||
| #endif | |||
| #elif __APPLE__ | |||
| #if __aarch64__ | |||
| return 0; // no known apple cpu support armv8.6 i8mm | |||
| #else | |||
| return 0; | |||
| #endif | |||
| #else | |||
| return 0; | |||
| #endif | |||
| } | |||
| int cpu_support_arm_sve() | |||
| { | |||
| #if defined __ANDROID__ || defined __linux__ | |||
| #if __aarch64__ | |||
| return g_hwcaps & HWCAP_SVE; | |||
| #else | |||
| return 0; | |||
| #endif | |||
| #elif __APPLE__ | |||
| #if __aarch64__ | |||
| return 0; // no known apple cpu support armv8.6 sve | |||
| #else | |||
| return 0; | |||
| #endif | |||
| #else | |||
| return 0; | |||
| #endif | |||
| } | |||
| int cpu_support_arm_sve2() | |||
| { | |||
| #if defined __ANDROID__ || defined __linux__ | |||
| #if __aarch64__ | |||
| return g_hwcaps2 & HWCAP2_SVE2; | |||
| #else | |||
| return 0; | |||
| #endif | |||
| #elif __APPLE__ | |||
| #if __aarch64__ | |||
| return 0; // no known apple cpu support armv8.6 sve2 | |||
| #else | |||
| return 0; | |||
| #endif | |||
| #else | |||
| return 0; | |||
| #endif | |||
| } | |||
| int cpu_support_arm_svebf16() | |||
| { | |||
| #if defined __ANDROID__ || defined __linux__ | |||
| #if __aarch64__ | |||
| return g_hwcaps2 & HWCAP2_SVEBF16; | |||
| #else | |||
| return 0; | |||
| #endif | |||
| #elif __APPLE__ | |||
| #if __aarch64__ | |||
| return 0; // no known apple cpu support armv8.6 svebf16 | |||
| #else | |||
| return 0; | |||
| #endif | |||
| #else | |||
| return 0; | |||
| #endif | |||
| } | |||
| int cpu_support_arm_svei8mm() | |||
| { | |||
| #if defined __ANDROID__ || defined __linux__ | |||
| #if __aarch64__ | |||
| return g_hwcaps2 & HWCAP2_SVEI8MM; | |||
| #else | |||
| return 0; | |||
| #endif | |||
| #elif __APPLE__ | |||
| #if __aarch64__ | |||
| return 0; // no known apple cpu support armv8.6 svei8mm | |||
| #else | |||
| return 0; | |||
| #endif | |||
| #else | |||
| return 0; | |||
| #endif | |||
| } | |||
| int cpu_support_arm_svef32mm() | |||
| { | |||
| #if defined __ANDROID__ || defined __linux__ | |||
| #if __aarch64__ | |||
| return g_hwcaps2 & HWCAP2_SVEF32MM; | |||
| #else | |||
| return 0; | |||
| #endif | |||
| #elif __APPLE__ | |||
| #if __aarch64__ | |||
| return 0; // no known apple cpu support armv8.6 svef32mm | |||
| #else | |||
| return 0; | |||
| #endif | |||
| #else | |||
| return 0; | |||
| #endif | |||
| } | |||
| #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) | |||
| static inline void x86_cpuid(int level, unsigned int out[4]) | |||
| { | |||
| @@ -53,6 +53,22 @@ NCNN_EXPORT int cpu_support_arm_vfpv4(); | |||
| NCNN_EXPORT int cpu_support_arm_asimdhp(); | |||
| // asimddp = aarch64 asimd dot product | |||
| NCNN_EXPORT int cpu_support_arm_asimddp(); | |||
| // asimdfhm = aarch64 asimd fhm | |||
| NCNN_EXPORT int cpu_support_arm_asimdfhm(); | |||
| // bf16 = aarch64 bf16 | |||
| NCNN_EXPORT int cpu_support_arm_bf16(); | |||
| // i8mm = aarch64 i8mm | |||
| NCNN_EXPORT int cpu_support_arm_i8mm(); | |||
| // sve = aarch64 sve | |||
| NCNN_EXPORT int cpu_support_arm_sve(); | |||
| // sve2 = aarch64 sve2 | |||
| NCNN_EXPORT int cpu_support_arm_sve2(); | |||
| // svebf16 = aarch64 svebf16 | |||
| NCNN_EXPORT int cpu_support_arm_svebf16(); | |||
| // svei8mm = aarch64 svei8mm | |||
| NCNN_EXPORT int cpu_support_arm_svei8mm(); | |||
| // svef32mm = aarch64 svef32mm | |||
| NCNN_EXPORT int cpu_support_arm_svef32mm(); | |||
| // avx = x86 avx | |||
| NCNN_EXPORT int cpu_support_x86_avx(); | |||
| @@ -45,6 +45,14 @@ | |||
| #if __aarch64__ | |||
| #cmakedefine01 NCNN_ARM82 | |||
| #cmakedefine01 NCNN_ARM82DOT | |||
| #cmakedefine01 NCNN_ARM82FP16FML | |||
| #cmakedefine01 NCNN_ARM84BF16 | |||
| #cmakedefine01 NCNN_ARM84I8MM | |||
| #cmakedefine01 NCNN_ARM86SVE | |||
| #cmakedefine01 NCNN_ARM86SVE2 | |||
| #cmakedefine01 NCNN_ARM86SVEBF16 | |||
| #cmakedefine01 NCNN_ARM86SVEI8MM | |||
| #cmakedefine01 NCNN_ARM86SVEF32MM | |||
| #endif // __aarch64__ | |||
| #cmakedefine01 NCNN_MSA | |||
| #cmakedefine01 NCNN_MMI | |||