|
- if(CMAKE_TOOLCHAIN_FILE)
- set(LIBRARY_OUTPUT_PATH_ROOT ${CMAKE_BINARY_DIR} CACHE PATH "root for library output, set this to change where android libs are compiled to")
- # get absolute path, but get_filename_component ABSOLUTE only refer with source dir, so find_file here :(
- get_filename_component(CMAKE_TOOLCHAIN_FILE_NAME ${CMAKE_TOOLCHAIN_FILE} NAME)
- find_file(CMAKE_TOOLCHAIN_FILE ${CMAKE_TOOLCHAIN_FILE_NAME} PATHS ${CMAKE_SOURCE_DIR} NO_DEFAULT_PATH)
- message(STATUS "CMAKE_TOOLCHAIN_FILE = ${CMAKE_TOOLCHAIN_FILE}")
- endif()
-
- if(NOT DEFINED CMAKE_INSTALL_PREFIX)
- set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}/install" CACHE PATH "Installation Directory")
- endif()
- message(STATUS "CMAKE_INSTALL_PREFIX = ${CMAKE_INSTALL_PREFIX}")
-
- if(NOT DEFINED NCNN_VERSION)
- string(TIMESTAMP NCNN_VERSION "%Y%m%d")
- endif()
-
- set(NCNN_VERSION_MAJOR 1)
- set(NCNN_VERSION_MINOR 0)
- set(NCNN_VERSION_PATCH ${NCNN_VERSION})
- set(NCNN_VERSION_STRING ${NCNN_VERSION_MAJOR}.${NCNN_VERSION_MINOR}.${NCNN_VERSION_PATCH})
- message(STATUS "NCNN_VERSION_STRING = ${NCNN_VERSION_STRING}")
-
- cmake_minimum_required(VERSION 2.8.12...3.10)
-
- if(NOT CMAKE_BUILD_TYPE)
- set(CMAKE_BUILD_TYPE release CACHE STRING "Choose the type of build" FORCE)
- endif()
-
- if(NOT CMAKE_VERSION VERSION_LESS "3.15")
- # enable CMAKE_MSVC_RUNTIME_LIBRARY
- cmake_policy(SET CMP0091 NEW)
- endif()
-
- if(POLICY CMP0025)
- # reference from https://cmake.org/cmake/help/latest/policy/CMP0025.html
- cmake_policy(SET CMP0025 NEW)
- endif()
-
- if(POLICY CMP0057)
- # reference from https://cmake.org/cmake/help/latest/policy/CMP0057.html
- cmake_policy(SET CMP0057 NEW)
- endif()
-
- project(ncnn)
-
- if(MSVC AND NOT CMAKE_VERSION VERSION_LESS "3.15")
- option(NCNN_BUILD_WITH_STATIC_CRT "Enables use of statically linked CRT for statically linked ncnn" OFF)
- if(NCNN_BUILD_WITH_STATIC_CRT)
- # cmake before version 3.15 not work
- set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
- endif()
- endif()
-
- if(CMAKE_FIND_LIBRARY_SUFFIXES_INIT)
- # project() overwrite CMAKE_FIND_LIBRARY_SUFFIXES in toolchain, restore it
- set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_INIT})
- endif()
-
- option(NCNN_SHARED_LIB "shared library support" OFF)
- option(NCNN_ENABLE_LTO "enable link-time optimization" OFF)
- option(NCNN_OPENMP "openmp support" ON)
- option(NCNN_STDIO "load model from external file" ON)
- option(NCNN_STRING "plain and verbose string" ON)
- option(NCNN_INSTALL_SDK "install ncnn library and headers" ON)
- option(NCNN_SIMPLEOCV "minimal opencv structure emulation" OFF)
- option(NCNN_SIMPLEOMP "minimal openmp runtime emulation" OFF)
- option(NCNN_SIMPLESTL "minimal cpp stl structure emulation" OFF)
- option(NCNN_SIMPLEMATH "minimal cmath" OFF)
- option(NCNN_THREADS "build with threads" ON)
- option(NCNN_BENCHMARK "print benchmark information for every layer" OFF)
- option(NCNN_C_API "build with C api" ON)
- option(NCNN_PLATFORM_API "build with platform api candy" ON)
- option(NCNN_PIXEL "convert and resize from/to image pixel" ON)
- option(NCNN_PIXEL_ROTATE "rotate image pixel orientation" ON)
- option(NCNN_PIXEL_AFFINE "warp affine image pixel" ON)
- option(NCNN_PIXEL_DRAWING "draw basic figure and text" ON)
- option(NCNN_CMAKE_VERBOSE "print verbose cmake messages" OFF)
- option(NCNN_VULKAN "vulkan compute support" OFF)
- option(NCNN_SIMPLEVK "minimal in-house vulkan loader" ON)
- option(NCNN_SYSTEM_GLSLANG "use system glslang library" OFF)
- option(NCNN_RUNTIME_CPU "runtime dispatch cpu routines" ON)
- option(NCNN_DISABLE_PIC "disable position-independent code" OFF)
- option(NCNN_BUILD_TESTS "build tests" OFF)
- option(NCNN_COVERAGE "build for coverage" OFF)
- option(NCNN_ASAN "build for address sanitizer" OFF)
- option(NCNN_BUILD_BENCHMARK "build benchmark" ON)
- option(NCNN_PYTHON "build python api" OFF)
- option(NCNN_INT8 "int8 inference" ON)
- option(NCNN_BF16 "bf16 inference" ON)
- option(NCNN_FORCE_INLINE "force inline some function" ON)
-
- if(ANDROID OR IOS OR NCNN_SIMPLESTL)
- option(NCNN_DISABLE_RTTI "disable rtti" ON)
- option(NCNN_DISABLE_EXCEPTION "disable exception" ON)
- else()
- option(NCNN_DISABLE_RTTI "disable rtti" OFF)
- option(NCNN_DISABLE_EXCEPTION "disable exception" OFF)
- endif()
-
- if(ANDROID OR IOS OR NCNN_SIMPLESTL OR CMAKE_CROSSCOMPILING)
- option(NCNN_BUILD_TOOLS "build tools" OFF)
- option(NCNN_BUILD_EXAMPLES "build examples" OFF)
- else()
- option(NCNN_BUILD_TOOLS "build tools" ON)
- option(NCNN_BUILD_EXAMPLES "build examples" ON)
- endif()
-
- if(NCNN_SHARED_LIB)
- if(NCNN_ENABLE_LTO)
- # enable global link time optimization
- cmake_policy(SET CMP0069 NEW)
- set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
- include(CheckIPOSupported)
- check_ipo_supported(RESULT ipo_supported OUTPUT ipo_supported_output)
- if(ipo_supported)
- set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
- else()
- message(WARNING "IPO is not supported: ${ipo_supported_output}")
- set(NCNN_ENABLE_LTO OFF)
- endif()
- endif()
- endif()
-
- if(NOT NCNN_STDIO OR NOT NCNN_STRING)
- if(NCNN_BUILD_TOOLS)
- message(WARNING "NCNN_STDIO or NCNN_STRING disabled, NCNN_BUILD_TOOLS will be turned off.")
- set(NCNN_BUILD_TOOLS OFF)
- endif()
- if(NCNN_BUILD_EXAMPLES)
- message(WARNING "NCNN_STDIO or NCNN_STRING disabled, NCNN_BUILD_EXAMPLES will be turned off.")
- set(NCNN_BUILD_EXAMPLES OFF)
- endif()
- if(NCNN_BUILD_BENCHMARK)
- message(WARNING "NCNN_STDIO or NCNN_STRING disabled, NCNN_BUILD_BENCHMARK will be turned off.")
- set(NCNN_BUILD_BENCHMARK OFF)
- endif()
- if(NCNN_BUILD_TESTS)
- message(WARNING "NCNN_STDIO or NCNN_STRING disabled, NCNN_BUILD_TESTS will be turned off.")
- set(NCNN_BUILD_TESTS OFF)
- endif()
- endif()
-
- ##############################################
-
- include(CheckCXXCompilerFlag)
- set(CMAKE_TRY_COMPILE_CONFIGURATION release)
- set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
-
- # gnu inline assembly in clang msvc does not work actually
- if(NOT (CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")))
- check_cxx_source_compiles("int test(int a) { asm volatile(\"\" : \"=r\"(a) : \"0\"(a) : \"memory\"); return a; }" NCNN_COMPILER_SUPPORT_GNU_INLINE_ASM)
- if(NCNN_COMPILER_SUPPORT_GNU_INLINE_ASM)
- option(NCNN_GNU_INLINE_ASM "optimize platform with gnu style inline assembly" ON)
- else()
- message(WARNING "The compiler does not support gnu style inline assembly. NCNN_GNU_INLINE_ASM will be OFF.")
- endif()
- endif()
-
- if((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm")
- OR (APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
- OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|aarch64)")
- OR (CMAKE_CXX_COMPILER_ARCHITECTURE_ID MATCHES "(ARMV7|ARM64)")
- OR ((CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")) AND (${CMAKE_GENERATOR_PLATFORM} MATCHES "^(arm|arm64)")))
- set(NCNN_TARGET_ARCH arm)
-
- if(APPLE AND CMAKE_OSX_ARCHITECTURES STREQUAL "arm64_32")
- set(NCNN_TARGET_ILP32 TRUE)
- endif()
-
- if(CMAKE_SIZEOF_VOID_P EQUAL 4 AND NOT NCNN_TARGET_ILP32)
- check_cxx_source_compiles("#include <arm_neon.h>\nfloat32x4_t test(float32x4_t s, float32x4_t a, float32x4_t b) { return vmlaq_f32(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM_NEON)
-
- if(NCNN_COMPILER_SUPPORT_ARM_NEON)
- if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
- set(CMAKE_REQUIRED_FLAGS "/arch:VFPv4")
- check_cxx_source_compiles("#include <arm_neon.h>\nfloat16x4_t test(float32x4_t a) { return vcvt_f16_f32(a); }" NCNN_COMPILER_SUPPORT_ARM_VFPV4)
-
- unset(CMAKE_REQUIRED_FLAGS)
- else()
- set(CMAKE_REQUIRED_FLAGS "-mfpu=neon-vfpv4")
- check_cxx_source_compiles("#include <arm_neon.h>\nfloat16x4_t test(float32x4_t a) { return vcvt_f16_f32(a); }" NCNN_COMPILER_SUPPORT_ARM_VFPV4)
-
- if(NOT NCNN_COMPILER_SUPPORT_ARM_VFPV4)
- set(CMAKE_REQUIRED_FLAGS "-mfpu=neon-vfpv4 -mfp16-format=ieee")
- check_cxx_source_compiles("#include <arm_neon.h>\nfloat16x4_t test(float32x4_t a) { return vcvt_f16_f32(a); }" NCNN_COMPILER_SUPPORT_ARM_VFPV4_FP16)
- endif()
-
- unset(CMAKE_REQUIRED_FLAGS)
- endif()
- endif()
-
- if(NCNN_COMPILER_SUPPORT_ARM_VFPV4 OR NCNN_COMPILER_SUPPORT_ARM_VFPV4_FP16)
- option(NCNN_VFPV4 "optimize armv7 platform with vfpv4" ON)
- else()
- message(WARNING "The compiler does not support arm vfpv4. NCNN_VFPV4 will be OFF.")
- endif()
- endif()
-
- if(CMAKE_SIZEOF_VOID_P EQUAL 8 OR NCNN_TARGET_ILP32)
- if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.0")
- check_cxx_source_compiles("#include <arm_neon.h>\nfloat16x4_t test(float32x4_t a) { return vcvt_f16_f32(a); }" NCNN_COMPILER_SUPPORT_ARM_VFPV4)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.2")
- check_cxx_source_compiles("#include <arm_neon.h>\nfloat16x8_t test(float16x8_t s, float16x8_t a, float16x8_t b) { return vfmaq_f16(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM82_FP16)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.2")
- check_cxx_source_compiles("#include <arm_neon.h>\nint32x4_t test(int32x4_t s, int8x16_t a, int8x16_t b) { return vdotq_s32(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM82_DOTPROD)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.2")
- check_cxx_source_compiles("#include <arm_neon.h>\nfloat32x4_t test(float32x4_t s, float16x8_t a, float16x8_t b) { return vfmlalq_low_f16(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM82_FP16FML)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.4")
- check_cxx_source_compiles("#include <arm_neon.h>\nfloat32x4_t test(float32x4_t s, bfloat16x8_t a, bfloat16x8_t b) { return vcvt_f32_bf16(vcvt_bf16_f32(vbfmmlaq_f32(s, a, b))); }" NCNN_COMPILER_SUPPORT_ARM84_BF16)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.4")
- check_cxx_source_compiles("#include <arm_neon.h>\nint32x4_t test(int32x4_t s, int8x16_t a, int8x16_t b) { return vmmlaq_s32(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM84_I8MM)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6")
- check_cxx_source_compiles("#include <arm_sve.h>\nsvfloat16_t test(svfloat16_t s, svfloat16_t a, svfloat16_t b, svbool_t bp) { return svmla_f16_z(bp, s, a, b); }" NCNN_COMPILER_SUPPORT_ARM86_SVE)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6")
- check_cxx_source_compiles("#include <arm_sve.h>\nsvint16_t test(svint16_t s, svint8_t a, svint8_t b) { return svmlslb_s16(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM86_SVE2)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6")
- check_cxx_source_compiles("#include <arm_sve.h>\nsvfloat32_t test(svfloat32_t s, svbfloat16_t a, svbfloat16_t b) { return svbfmmla_f32(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM86_SVEBF16)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6")
- check_cxx_source_compiles("#include <arm_sve.h>\nsvint32_t test(svint32_t s, svint8_t a, svint8_t b) { return svmmla_s32(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM86_SVEI8MM)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6")
- check_cxx_source_compiles("#include <arm_sve.h>\nsvfloat32_t test(svfloat32_t s, svfloat32_t a, svfloat32_t b) { return svmmla_f32(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM86_SVEF32MM)
-
- unset(CMAKE_REQUIRED_FLAGS)
- elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.0")
- check_cxx_source_compiles("#include <arm_neon.h>\nfloat16x4_t test(float32x4_t a) { return vcvt_f16_f32(a); }" NCNN_COMPILER_SUPPORT_ARM_VFPV4)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.2 -march=armv8.2-a+fp16")
- check_cxx_source_compiles("#include <arm_neon.h>\nfloat16x8_t test(float16x8_t s, float16x8_t a, float16x8_t b) { return vfmaq_f16(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM82_FP16)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.2 -march=armv8.2-a+dotprod")
- check_cxx_source_compiles("#include <arm_neon.h>\nint32x4_t test(int32x4_t s, int8x16_t a, int8x16_t b) { return vdotq_s32(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM82_DOTPROD)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.2 -march=armv8.2-a+fp16fml")
- check_cxx_source_compiles("#include <arm_neon.h>\nfloat32x4_t test(float32x4_t s, float16x8_t a, float16x8_t b) { return vfmlalq_low_f16(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM82_FP16FML)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.4 -march=armv8.4-a+bf16")
- check_cxx_source_compiles("#include <arm_neon.h>\nfloat32x4_t test(float32x4_t s, bfloat16x8_t a, bfloat16x8_t b) { return vcvt_f32_bf16(vcvt_bf16_f32(vbfmmlaq_f32(s, a, b))); }" NCNN_COMPILER_SUPPORT_ARM84_BF16)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.4 -march=armv8.4-a+i8mm")
- check_cxx_source_compiles("#include <arm_neon.h>\nint32x4_t test(int32x4_t s, int8x16_t a, int8x16_t b) { return vmmlaq_s32(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM84_I8MM)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve")
- check_cxx_source_compiles("#include <arm_sve.h>\nsvfloat16_t test(svfloat16_t s, svfloat16_t a, svfloat16_t b, svbool_t bp) { return svmla_f16_z(bp, s, a, b); }" NCNN_COMPILER_SUPPORT_ARM86_SVE)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve2")
- check_cxx_source_compiles("#include <arm_sve.h>\nsvint16_t test(svint16_t s, svint8_t a, svint8_t b) { return svmlslb_s16(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM86_SVE2)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve+bf16")
- check_cxx_source_compiles("#include <arm_sve.h>\nsvfloat32_t test(svfloat32_t s, svbfloat16_t a, svbfloat16_t b) { return svbfmmla_f32(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM86_SVEBF16)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve+i8mm")
- check_cxx_source_compiles("#include <arm_sve.h>\nsvint32_t test(svint32_t s, svint8_t a, svint8_t b) { return svmmla_s32(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM86_SVEI8MM)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve+f32mm")
- check_cxx_source_compiles("#include <arm_sve.h>\nsvfloat32_t test(svfloat32_t s, svfloat32_t a, svfloat32_t b) { return svmmla_f32(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM86_SVEF32MM)
-
- unset(CMAKE_REQUIRED_FLAGS)
- else()
- set(CMAKE_REQUIRED_FLAGS "-march=armv8-a")
- check_cxx_source_compiles("#include <arm_neon.h>\nfloat16x4_t test(float32x4_t a) { return vcvt_f16_f32(a); }" NCNN_COMPILER_SUPPORT_ARM_VFPV4)
-
- set(CMAKE_REQUIRED_FLAGS "-march=armv8.2-a+fp16")
- check_cxx_source_compiles("#include <arm_neon.h>\nfloat16x8_t test(float16x8_t s, float16x8_t a, float16x8_t b) { return vfmaq_f16(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM82_FP16)
-
- set(CMAKE_REQUIRED_FLAGS "-march=armv8.2-a+dotprod")
- check_cxx_source_compiles("#include <arm_neon.h>\nint32x4_t test(int32x4_t s, int8x16_t a, int8x16_t b) { return vdotq_s32(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM82_DOTPROD)
-
- set(CMAKE_REQUIRED_FLAGS "-march=armv8.2-a+fp16fml")
- check_cxx_source_compiles("#include <arm_neon.h>\nfloat32x4_t test(float32x4_t s, float16x8_t a, float16x8_t b) { return vfmlalq_low_f16(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM82_FP16FML)
-
- set(CMAKE_REQUIRED_FLAGS "-march=armv8.4-a+bf16")
- check_cxx_source_compiles("#include <arm_neon.h>\nfloat32x4_t test(float32x4_t s, bfloat16x8_t a, bfloat16x8_t b) { return vcvt_f32_bf16(vcvt_bf16_f32(vbfmmlaq_f32(s, a, b))); }" NCNN_COMPILER_SUPPORT_ARM84_BF16)
-
- set(CMAKE_REQUIRED_FLAGS "-march=armv8.4-a+i8mm")
- check_cxx_source_compiles("#include <arm_neon.h>\nint32x4_t test(int32x4_t s, int8x16_t a, int8x16_t b) { return vmmlaq_s32(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM84_I8MM)
-
- set(CMAKE_REQUIRED_FLAGS "-march=armv8.6-a+sve")
- check_cxx_source_compiles("#include <arm_sve.h>\nsvfloat16_t test(svfloat16_t s, svfloat16_t a, svfloat16_t b, svbool_t bp) { return svmla_f16_z(bp, s, a, b); }" NCNN_COMPILER_SUPPORT_ARM86_SVE)
-
- set(CMAKE_REQUIRED_FLAGS "-march=armv8.6-a+sve2")
- check_cxx_source_compiles("#include <arm_sve.h>\nsvint16_t test(svint16_t s, svint8_t a, svint8_t b) { return svmlslb_s16(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM86_SVE2)
-
- set(CMAKE_REQUIRED_FLAGS "-march=armv8.6-a+sve+bf16")
- check_cxx_source_compiles("#include <arm_sve.h>\nsvfloat32_t test(svfloat32_t s, svbfloat16_t a, svbfloat16_t b) { return svbfmmla_f32(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM86_SVEBF16)
-
- set(CMAKE_REQUIRED_FLAGS "-march=armv8.6-a+sve+i8mm")
- check_cxx_source_compiles("#include <arm_sve.h>\nsvint32_t test(svint32_t s, svint8_t a, svint8_t b) { return svmmla_s32(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM86_SVEI8MM)
-
- set(CMAKE_REQUIRED_FLAGS "-march=armv8.6-a+sve+f32mm")
- check_cxx_source_compiles("#include <arm_sve.h>\nsvfloat32_t test(svfloat32_t s, svfloat32_t a, svfloat32_t b) { return svmmla_f32(s, a, b); }" NCNN_COMPILER_SUPPORT_ARM86_SVEF32MM)
-
- unset(CMAKE_REQUIRED_FLAGS)
- endif()
-
- if(NCNN_COMPILER_SUPPORT_ARM_VFPV4)
- option(NCNN_VFPV4 "optimize aarch64 platform with vfpv4" ON)
- else()
- message(WARNING "The compiler does not support arm vfpv4. NCNN_VFPV4 will be OFF.")
- endif()
-
- if(NCNN_COMPILER_SUPPORT_ARM82_FP16)
- option(NCNN_ARM82 "optimize aarch64 platform with armv8.2 fp16" ON)
- if(NCNN_COMPILER_SUPPORT_ARM82_DOTPROD)
- if(NCNN_ARM82)
- option(NCNN_ARM82DOT "optimize aarch64 platform with armv8.2 dotprod" ON)
- endif()
- else()
- message(WARNING "The compiler does not support armv8.2 dotprod. NCNN_ARM82DOT will be OFF.")
- endif()
- if(NCNN_COMPILER_SUPPORT_ARM82_FP16FML)
- if(NCNN_ARM82)
- option(NCNN_ARM82FP16FML "optimize aarch64 platform with armv8.2 fp16fml" ON)
- endif()
- else()
- message(WARNING "The compiler does not support armv8.2 fp16fml. NCNN_ARM82FP16FML will be OFF.")
- endif()
- if(NCNN_COMPILER_SUPPORT_ARM84_BF16)
- if(NCNN_ARM82DOT AND NCNN_ARM82FP16FML)
- option(NCNN_ARM84BF16 "optimize aarch64 platform with armv8.4 bf16" ON)
- endif()
- else()
- message(WARNING "The compiler does not support armv8.4 bf16. NCNN_ARM86BF16 will be OFF.")
- endif()
- if(NCNN_COMPILER_SUPPORT_ARM84_I8MM)
- if(NCNN_ARM82DOT AND NCNN_ARM82FP16FML)
- option(NCNN_ARM84I8MM "optimize aarch64 platform with armv8.4 i8mm" ON)
- endif()
- else()
- message(WARNING "The compiler does not support armv8.4 i8mm. NCNN_ARM84I8MM will be OFF.")
- endif()
- if(NCNN_COMPILER_SUPPORT_ARM86_SVE)
- if(NCNN_ARM84BF16 AND NCNN_ARM84I8MM)
- option(NCNN_ARM86SVE "optimize aarch64 platform with armv8.6 sve" ON)
- if(NCNN_COMPILER_SUPPORT_ARM86_SVE2)
- if(NCNN_ARM86SVE)
- option(NCNN_ARM86SVE2 "optimize aarch64 platform with armv8.6 sve2" ON)
- endif()
- else()
- message(WARNING "The compiler does not support armv8.6 sve2. NCNN_ARM86SVE2 will be OFF.")
- endif()
- if(NCNN_COMPILER_SUPPORT_ARM86_SVEBF16)
- if(NCNN_ARM86SVE)
- option(NCNN_ARM86SVEBF16 "optimize aarch64 platform with armv8.6 sve bf16" ON)
- endif()
- else()
- message(WARNING "The compiler does not support armv8.6 sve bf16. NCNN_ARM86SVEBF16 will be OFF.")
- endif()
- if(NCNN_COMPILER_SUPPORT_ARM86_SVEI8MM)
- if(NCNN_ARM86SVE)
- option(NCNN_ARM86SVEI8MM "optimize aarch64 platform with armv8.6 sve i8mm" ON)
- endif()
- else()
- message(WARNING "The compiler does not support armv8.6 sve i8mm. NCNN_ARM86SVEI8MM will be OFF.")
- endif()
- if(NCNN_COMPILER_SUPPORT_ARM86_SVEF32MM)
- if(NCNN_ARM86SVE)
- option(NCNN_ARM86SVEF32MM "optimize aarch64 platform with armv8.6 sve f32mm" ON)
- endif()
- else()
- message(WARNING "The compiler does not support armv8.6 sve f32mm. NCNN_ARM86SVEF32MM will be OFF.")
- endif()
- endif()
- else()
- message(WARNING "The compiler does not support armv8.6 sve. NCNN_ARM86SVE will be OFF.")
- endif()
- else()
- message(WARNING "The compiler does not support armv8.2 fp16. NCNN_ARM82 will be OFF.")
- endif()
- endif()
- elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(mips)")
- set(NCNN_TARGET_ARCH mips)
-
- check_cxx_compiler_flag("-mmsa" NCNN_COMPILER_SUPPORT_MIPS_MSA)
-
- set(CMAKE_REQUIRED_FLAGS "-mloongson-mmi -I${CMAKE_CURRENT_SOURCE_DIR}/src/layer/mips")
- check_cxx_source_compiles("#include \"loongson_mmi.h\"\nint32x2_t test(int16x4_t a, int16x4_t b) { return __mmi_pmaddhw(a, b); }" NCNN_COMPILER_SUPPORT_LOONGSON_MMI)
-
- unset(CMAKE_REQUIRED_FLAGS)
-
- if(NCNN_COMPILER_SUPPORT_MIPS_MSA)
- option(NCNN_MSA "optimize mips platform with msa extension" ON)
- else()
- message(WARNING "The compiler does not support msa extension. NCNN_MSA will be OFF.")
- endif()
- if(NCNN_COMPILER_SUPPORT_LOONGSON_MMI)
- option(NCNN_MMI "optimize mips platform with loongson mmi extension" ON)
- else()
- message(WARNING "The compiler does not support loongson mmi extension. NCNN_MMI will be OFF.")
- endif()
- elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(loongarch64|loongarch32)")
- set(NCNN_TARGET_ARCH loongarch)
-
- set(CMAKE_REQUIRED_FLAGS "-mlsx")
- check_cxx_source_compiles("#include <lsxintrin.h>\n__m128 test(__m128 a, __m128 b, __m128 c) { return __lsx_vfmadd_s(a, b, c); }" NCNN_COMPILER_SUPPORT_LOONGARCH_LSX)
-
- set(CMAKE_REQUIRED_FLAGS "-mlasx")
- check_cxx_source_compiles("#include <lasxintrin.h>\n__m256 test(__m256 a, __m256 b, __m256 c) { return __lasx_xvfmadd_s(a, b, c); }" NCNN_COMPILER_SUPPORT_LOONGARCH_LASX)
-
- unset(CMAKE_REQUIRED_FLAGS)
-
- if(NCNN_COMPILER_SUPPORT_LOONGARCH_LSX)
- option(NCNN_LSX "optimize loongarch platform with lsx extension" ON)
- if(NCNN_COMPILER_SUPPORT_LOONGARCH_LASX)
- option(NCNN_LASX "optimize loongarch platform with lasx extension" ON)
- else()
- message(WARNING "The compiler does not support lasx extension. NCNN_LASX will be OFF.")
- endif()
- else()
- message(WARNING "The compiler does not support lsx extension. NCNN_LSX will be OFF.")
- endif()
-
- elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv)")
- set(NCNN_TARGET_ARCH riscv)
-
- if(CMAKE_SIZEOF_VOID_P EQUAL 8)
- set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv")
- check_cxx_source_compiles("#include <riscv_vector.h>\nvfloat32m8_t test(vfloat32m8_t s, vfloat32m8_t w, float v, size_t vl) { return __riscv_vfmacc_vf_f32m8(s, v, w, vl); }\nvfloat32m1x2_t test2(vfloat32m1_t x) { return __riscv_vcreate_v_f32m1x2(x, x); }" NCNN_COMPILER_SUPPORT_RISCV_V)
-
- set(CMAKE_REQUIRED_FLAGS "-march=rv64gc_zfh -D__fp16=_Float16")
- check_cxx_source_compiles("__fp16 test(__fp16 a) { return a * a; }" NCNN_COMPILER_SUPPORT_RISCV_ZFH)
-
- set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv_zfh_zvfh -D__fp16=_Float16")
- check_cxx_source_compiles("#include <riscv_vector.h>\nvfloat16m8_t test(vfloat16m8_t s, vfloat16m8_t w, __fp16 v, size_t vl) { return __riscv_vfmacc_vf_f16m8(s, v, w, vl); }\nvfloat16m1x2_t test2(vfloat16m1_t x){ return __riscv_vcreate_v_f16m1x2(x, x); }" NCNN_COMPILER_SUPPORT_RISCV_ZVFH)
-
- set(CMAKE_REQUIRED_FLAGS "-march=rv64gc_zfh_xtheadvector -D__fp16=_Float16")
- check_cxx_source_compiles("#include <riscv_vector.h>\nvfloat16m8_t test(vfloat16m8_t s, vfloat16m8_t w, __fp16 v, size_t vl) { return __riscv_vfmacc_vf_f16m8(s, v, w, vl); }\nvfloat16m1x2_t test2(vfloat16m1_t x){ return __riscv_vcreate_v_f16m1x2(x, x); }" NCNN_COMPILER_SUPPORT_RISCV_XTHEADVECTOR)
-
- unset(CMAKE_REQUIRED_FLAGS)
-
- if(NCNN_COMPILER_SUPPORT_RISCV_V OR NCNN_COMPILER_SUPPORT_RISCV_XTHEADVECTOR)
- option(NCNN_RVV "optimize risc-v platform with v extension" ON)
- else()
- message(WARNING "The compiler does not support risc-v v or xtheadvector extension. NCNN_RVV will be OFF.")
- endif()
-
- if(NCNN_COMPILER_SUPPORT_RISCV_XTHEADVECTOR)
- option(NCNN_XTHEADVECTOR "optimize risc-v platform with xtheadvector extension" ON)
- else()
- message(WARNING "The compiler does not support risc-v xtheadvector extension. NCNN_XTHEADVECTOR will be OFF.")
- endif()
-
- if(NCNN_COMPILER_SUPPORT_RISCV_ZFH)
- option(NCNN_ZFH "optimize risc-v platform with zfh extension" ON)
- if(NCNN_COMPILER_SUPPORT_RISCV_ZVFH OR NCNN_COMPILER_SUPPORT_RISCV_XTHEADVECTOR)
- if(NCNN_RVV AND NCNN_ZFH)
- option(NCNN_ZVFH "optimize risc-v platform with zvfh extension" ON)
- endif()
- else()
- message(WARNING "The compiler does not support zvfh extension. NCNN_ZVFH will be OFF.")
- endif()
- else()
- message(WARNING "The compiler does not support risc-v zfh extension. NCNN_ZFH will be OFF.")
- endif()
-
- endif()
- elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)")
- set(NCNN_TARGET_ARCH powerpc)
-
- if(NCNN_PPC64LE_VSX)
- set(NCNN_TARGET_ARCH x86)
-
- set(CMAKE_REQUIRED_FLAGS "-DNO_WARN_X86_INTRINSICS -D__SSE2__")
- check_cxx_source_compiles("#include <emmintrin.h>\n__m128i test(__m128i a, __m128i b) { return _mm_madd_epi16(a, b); }" NCNN_COMPILER_SUPPORT_PPC64LE_SSE2)
- unset(CMAKE_REQUIRED_FLAGS)
-
- set(CMAKE_REQUIRED_FLAGS "-DNO_WARN_X86_INTRINSICS -D__SSE4_1__")
- check_cxx_source_compiles("#include <smmintrin.h>\n__m128i test(__m128i a, __m128i b) { return _mm_packus_epi32(a, b); }" NCNN_COMPILER_SUPPORT_PPC64LE_SSE41)
- unset(CMAKE_REQUIRED_FLAGS)
-
- if(NCNN_COMPILER_SUPPORT_PPC64LE_SSE2)
- option(NCNN_VSX_SSE2 "optimize ppc64le platform with sse2 extension" ON)
- else()
- message(WARNING "The compiler does not support sse2 extension. NCNN_VSX_SSE2 will be OFF.")
- endif()
-
- if(NCNN_COMPILER_SUPPORT_PPC64LE_SSE41)
- option(NCNN_VSX_SSE41 "optimize ppc64le platform with sse4.1 extension" ON)
- else()
- message(WARNING "The compiler does not support sse4.1 extension. NCNN_VSX_SSE41 will be OFF.")
- endif()
- endif()
- elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(xtensa)")
- set(NCNN_TARGET_ARCH xtensa)
- elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x)")
- set(NCNN_TARGET_ARCH s390x)
- elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(sw_64)")
- set(NCNN_TARGET_ARCH sw_64)
- #sw_64 is alpha-like platform
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mieee")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mieee")
- else()
- set(NCNN_TARGET_ARCH x86)
-
- option(NCNN_SSE2 "optimize x86 platform with sse2 extension" ON)
-
- if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256 test(__m256 a, __m256 b) { return _mm256_mul_ps(a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256 test(__m256 s, __m256 a, __m256 b) { return _mm256_fmadd_ps(a, b, s); }" NCNN_COMPILER_SUPPORT_X86_FMA)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX")
- check_cxx_source_compiles("#include <immintrin.h>\n#include <ammintrin.h>\n__m128i test(__m128i s, __m128i a, __m128i b) { return _mm_maddd_epi16(a, b, s); }" NCNN_COMPILER_SUPPORT_X86_XOP)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256 test(__m128i a) { return _mm256_cvtph_ps(a); }" NCNN_COMPILER_SUPPORT_X86_F16C)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX2")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256i test(__m256i a, __m256i b) { return _mm256_madd_epi16(a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX2)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX512")
- check_cxx_source_compiles("#include <immintrin.h>\n__m512i test(__m512i a, __m512i b) { return _mm512_madd_epi16(a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX512)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX2")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256i test(__m256i s, __m256i a, __m256i b) { return _mm256_dpwssd_avx_epi32(s, a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX2")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256i test(__m256i s, __m256i a, __m256i b) { return _mm256_dpbssd_epi32(s, a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI_INT8)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX2")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256i test(__m256i s, __m256i a, __m256i b) { return _mm256_dpwsud_avx_epi32(s, a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI_INT16)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX2")
- check_cxx_source_compiles("#include <immintrin.h>\n__m128bh test(__m256 a) { return _mm256_cvtneps_avx_pbh(a); }" NCNN_COMPILER_SUPPORT_X86_AVX_NE_CONVERT)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX512")
- check_cxx_source_compiles("#include <immintrin.h>\n__m512i test(__m512i s, __m512i a, __m512i b) { return _mm512_dpwssd_epi32(s, a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX512")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256bh test(__m256bh s, __m512bh a, __m512bh b) { return _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(s), a, b)); }\n__m512i test2(__m512 a) { __m256i _a = (__m256i)_mm512_cvtneps_pbh(a); return _mm512_inserti32x8(_mm512_castsi256_si512(_a), _a, 1); }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX512")
- check_cxx_source_compiles("#include <immintrin.h>\n__m512h test(__m512h s, __m512h a, __m512h b) { return _mm512_fmadd_ph(s, a, b); }\n__m512 test2(__m512 a) { return _mm512_cvtxph_ps(_mm512_cvtxps_ph(a)); }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)
-
- unset(CMAKE_REQUIRED_FLAGS)
- elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")
- check_cxx_compiler_flag("-mrecip=none" NCNN_COMPILER_SUPPORT_X86_RECIP_NONE)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256 test(__m256 a, __m256 b) { return _mm256_mul_ps(a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX -mfma -mf16c")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256 test(__m256 s, __m256 a, __m256 b) { return _mm256_fmadd_ps(a, b, s); }" NCNN_COMPILER_SUPPORT_X86_FMA)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX -mxop")
- check_cxx_source_compiles("#include <x86intrin.h>\n__m128i test(__m128i s, __m128i a, __m128i b) { return _mm_maddd_epi16(a, b, s); }" NCNN_COMPILER_SUPPORT_X86_XOP)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX -mf16c")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256 test(__m128i a) { return _mm256_cvtph_ps(a); }" NCNN_COMPILER_SUPPORT_X86_F16C)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX2 -mfma -mf16c")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256i test(__m256i a, __m256i b) { return _mm256_madd_epi16(a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX2)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl")
- check_cxx_source_compiles("#include <immintrin.h>\n__m512i test(__m512i a, __m512i b) { return _mm512_madd_epi16(a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX512)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX2 -mfma -mf16c -mavxvnni")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256i test(__m256i s, __m256i a, __m256i b) { return _mm256_dpwssd_avx_epi32(s, a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX2 -mfma -mf16c -mavxvnni -mavxvnniint8")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256i test(__m256i s, __m256i a, __m256i b) { return _mm256_dpbssd_epi32(s, a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI_INT8)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX2 -mfma -mf16c -mavxvnni -mavxvnniint16")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256i test(__m256i s, __m256i a, __m256i b) { return _mm256_dpwsud_avx_epi32(s, a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI_INT16)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX2 -mfma -mf16c -mavxneconvert")
- check_cxx_source_compiles("#include <immintrin.h>\n__m128bh test(__m256 a) { return _mm256_cvtneps_avx_pbh(a); }" NCNN_COMPILER_SUPPORT_X86_AVX_NE_CONVERT)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512vnni")
- check_cxx_source_compiles("#include <immintrin.h>\n__m512i test(__m512i s, __m512i a, __m512i b) { return _mm512_dpwssd_epi32(s, a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256bh test(__m256bh s, __m512bh a, __m512bh b) { return _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(s), a, b)); }\n__m512i test2(__m512 a) { __m256i _a = (__m256i)_mm512_cvtneps_pbh(a); return _mm512_inserti32x8(_mm512_castsi256_si512(_a), _a, 1); }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)
-
- set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16")
- check_cxx_source_compiles("#include <immintrin.h>\n__m512h test(__m512h s, __m512h a, __m512h b) { return _mm512_fmadd_ph(s, a, b); }\n__m512 test2(__m512 a) { return _mm512_cvtxph_ps(_mm512_cvtxps_ph(a)); }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)
-
- unset(CMAKE_REQUIRED_FLAGS)
- else()
- check_cxx_compiler_flag("-mrecip=none" NCNN_COMPILER_SUPPORT_X86_RECIP_NONE)
-
- set(CMAKE_REQUIRED_FLAGS "-mavx")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256 test(__m256 a, __m256 b) { return _mm256_mul_ps(a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX)
-
- set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256 test(__m256 s, __m256 a, __m256 b) { return _mm256_fmadd_ps(a, b, s); }" NCNN_COMPILER_SUPPORT_X86_FMA)
-
- set(CMAKE_REQUIRED_FLAGS "-mfma -mxop")
- check_cxx_source_compiles("#include <x86intrin.h>\n__m128i test(__m128i s, __m128i a, __m128i b) { return _mm_maddd_epi16(a, b, s); }" NCNN_COMPILER_SUPPORT_X86_XOP)
-
- set(CMAKE_REQUIRED_FLAGS "-mf16c")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256 test(__m128i a) { return _mm256_cvtph_ps(a); }" NCNN_COMPILER_SUPPORT_X86_F16C)
-
- set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx2")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256i test(__m256i a, __m256i b) { return _mm256_madd_epi16(a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX2)
-
- set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl")
- check_cxx_source_compiles("#include <immintrin.h>\n__m512i test(__m512i a, __m512i b) { return _mm512_madd_epi16(a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX512)
-
- set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx2 -mavxvnni")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256i test(__m256i s, __m256i a, __m256i b) { return _mm256_dpwssd_avx_epi32(s, a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI)
-
- set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx2 -mavxvnni -mavxvnniint8")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256i test(__m256i s, __m256i a, __m256i b) { return _mm256_dpbssd_epi32(s, a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI_INT8)
-
- set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx2 -mavxvnni -mavxvnniint16")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256i test(__m256i s, __m256i a, __m256i b) { return _mm256_dpwsud_avx_epi32(s, a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI_INT16)
-
- set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx2 -mavxneconvert")
- check_cxx_source_compiles("#include <immintrin.h>\n__m128bh test(__m256 a) { return _mm256_cvtneps_avx_pbh(a); }" NCNN_COMPILER_SUPPORT_X86_AVX_NE_CONVERT)
-
- set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512vnni")
- check_cxx_source_compiles("#include <immintrin.h>\n__m512i test(__m512i s, __m512i a, __m512i b) { return _mm512_dpwssd_epi32(s, a, b); }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)
-
- set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16")
- check_cxx_source_compiles("#include <immintrin.h>\n__m256bh test(__m256bh s, __m512bh a, __m512bh b) { return _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(s), a, b)); }\n__m512i test2(__m512 a) { __m256i _a = (__m256i)_mm512_cvtneps_pbh(a); return _mm512_inserti32x8(_mm512_castsi256_si512(_a), _a, 1); }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)
-
- set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16")
- check_cxx_source_compiles("#include <immintrin.h>\n__m512h test(__m512h s, __m512h a, __m512h b) { return _mm512_fmadd_ph(s, a, b); }\n__m512 test2(__m512 a) { return _mm512_cvtxph_ps(_mm512_cvtxps_ph(a)); }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)
-
- unset(CMAKE_REQUIRED_FLAGS)
- endif()
-
- if(NOT CMAKE_SYSTEM_NAME MATCHES "Emscripten|WASI" AND NCNN_COMPILER_SUPPORT_X86_AVX)
- option(NCNN_AVX "optimize x86 platform with avx extension" ON)
- if(NCNN_COMPILER_SUPPORT_X86_FMA)
- if(NCNN_AVX)
- option(NCNN_FMA "optimize x86 platform with fma extension" ON)
- endif()
- else()
- message(WARNING "The compiler does not support fma extension. NCNN_FMA will be OFF.")
- endif()
- if(NCNN_COMPILER_SUPPORT_X86_XOP)
- if(NCNN_AVX)
- option(NCNN_XOP "optimize x86 platform with xop extension" ON)
- endif()
- else()
- message(WARNING "The compiler does not support xop extension. NCNN_XOP will be OFF.")
- endif()
- if(NCNN_COMPILER_SUPPORT_X86_F16C)
- if(NCNN_AVX)
- option(NCNN_F16C "optimize x86 platform with f16c extension" ON)
- endif()
- else()
- message(WARNING "The compiler does not support f16c extension. NCNN_F16C will be OFF.")
- endif()
- if(NCNN_COMPILER_SUPPORT_X86_AVX2)
- if(NCNN_AVX)
- option(NCNN_AVX2 "optimize x86 platform with avx2 extension" ON)
- endif()
- if(NCNN_COMPILER_SUPPORT_X86_AVX_VNNI)
- if(NCNN_AVX2)
- option(NCNN_AVXVNNI "optimize x86 platform with avx vnni extension" ON)
- endif()
- if(NCNN_COMPILER_SUPPORT_X86_AVX_VNNI_INT8)
- if(NCNN_AVXVNNI)
- option(NCNN_AVXVNNIINT8 "optimize x86 platform with avx vnni int8 extension" ON)
- endif()
- else()
- message(WARNING "The compiler does not support avx vnni int8 extension. NCNN_AVXVNNIINT8 will be OFF.")
- endif()
- if(NCNN_COMPILER_SUPPORT_X86_AVX_VNNI_INT16)
- if(NCNN_AVXVNNI)
- option(NCNN_AVXVNNIINT16 "optimize x86 platform with avx vnni int16 extension" ON)
- endif()
- else()
- message(WARNING "The compiler does not support avx vnni int16 extension. NCNN_AVXVNNIINT16 will be OFF.")
- endif()
- else()
- message(WARNING "The compiler does not support avx vnni extension. NCNN_AVXVNNI will be OFF.")
- endif()
- if(NCNN_COMPILER_SUPPORT_X86_AVX_NE_CONVERT)
- if(NCNN_AVX2)
- option(NCNN_AVXNECONVERT "optimize x86 platform with avx ne convert extension" ON)
- endif()
- else()
- message(WARNING "The compiler does not support avx ne convert extension. NCNN_AVXNECONVERT will be OFF.")
- endif()
- if(NCNN_COMPILER_SUPPORT_X86_AVX512)
- if(NCNN_AVX2)
- option(NCNN_AVX512 "optimize x86 platform with avx512 extension" ON)
- endif()
- if(NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)
- if(NCNN_AVX512)
- option(NCNN_AVX512VNNI "optimize x86 platform with avx512 vnni extension" ON)
- endif()
- else()
- message(WARNING "The compiler does not support avx512 vnni extension. NCNN_AVX512VNNI will be OFF.")
- endif()
- if(NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)
- if(NCNN_AVX512)
- option(NCNN_AVX512BF16 "optimize x86 platform with avx512 bf16 extension" ON)
- endif()
- else()
- message(WARNING "The compiler does not support avx512 bf16 extension. NCNN_AVX512BF16 will be OFF.")
- endif()
- if(NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)
- if(NCNN_AVX512)
- option(NCNN_AVX512FP16 "optimize x86 platform with avx512 fp16 extension" ON)
- endif()
- else()
- message(WARNING "The compiler does not support avx512 fp16 extension. NCNN_AVX512FP16 will be OFF.")
- endif()
- else()
- message(WARNING "The compiler does not support avx512 extension. NCNN_AVX512 will be OFF.")
- endif()
- else()
- message(WARNING "The compiler does not support avx2 extension. NCNN_AVX2 will be OFF.")
- endif()
- else()
- message(WARNING "The compiler does not support avx extension. NCNN_AVX will be OFF.")
- endif()
- endif()
-
- unset(CMAKE_TRY_COMPILE_CONFIGURATION)
- unset(CMAKE_TRY_COMPILE_TARGET_TYPE)
-
- if(NCNN_TARGET_ILP32)
- message(STATUS "Target arch: ${NCNN_TARGET_ARCH} 64bit ilp32")
- elseif(CMAKE_SIZEOF_VOID_P EQUAL 8)
- message(STATUS "Target arch: ${NCNN_TARGET_ARCH} 64bit")
- else()
- message(STATUS "Target arch: ${NCNN_TARGET_ARCH} 32bit")
- endif()
-
- ##############################################
-
- # set cmake default folder name
- set_property(GLOBAL PROPERTY USE_FOLDERS ON)
- set_property(GLOBAL PROPERTY PREDEFINED_TARGETS_FOLDER "cmake")
-
- if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=256MB -s EXIT_RUNTIME=1")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=256MB -s EXIT_RUNTIME=1")
- set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=256MB -s EXIT_RUNTIME=1")
-
- if(NCNN_OPENMP AND NCNN_SIMPLEOMP)
- # TODO better flags for emscripten
- # node --experimental-wasm-threads xxx.js
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=15")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=15")
- set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=15")
- endif()
- endif()
-
- if(NCNN_VULKAN)
- if(NCNN_SYSTEM_GLSLANG)
- find_package(Threads)
- find_package(SPIRV-Tools QUIET)
- find_package(SPIRV-Tools-opt QUIET)
- find_package(glslang QUIET)
- if(glslang_FOUND)
- add_library(glslang ALIAS glslang::glslang)
- add_library(SPIRV ALIAS glslang::SPIRV)
- else()
- set(GLSLANG_TARGET_DIR "GLSLANG-NOTFOUND" CACHE PATH "Absolute path to glslangTargets.cmake directory")
- if(NOT GLSLANG_TARGET_DIR AND NOT DEFINED ENV{GLSLANG_TARGET_DIR})
- message(WARNING "set glslang_DIR to glslang-config.cmake directory for using system glslang.")
- message(WARNING "GLSLANG_TARGET_DIR must be defined! NCNN_SYSTEM_GLSLANG will be turned off.")
- set(NCNN_SYSTEM_GLSLANG OFF)
- else()
- include("${GLSLANG_TARGET_DIR}/OSDependentTargets.cmake")
- include("${GLSLANG_TARGET_DIR}/OGLCompilerTargets.cmake")
- if(EXISTS "${GLSLANG_TARGET_DIR}/HLSLTargets.cmake")
- # hlsl support can be optional
- include("${GLSLANG_TARGET_DIR}/HLSLTargets.cmake")
- endif()
- include("${GLSLANG_TARGET_DIR}/glslangTargets.cmake")
- include("${GLSLANG_TARGET_DIR}/SPIRVTargets.cmake")
- endif()
- endif()
-
- if(TARGET glslang AND TARGET SPIRV)
- get_property(glslang_location TARGET glslang PROPERTY LOCATION)
- get_property(SPIRV_location TARGET SPIRV PROPERTY LOCATION)
- message(STATUS "Found glslang: ${glslang_location} (found version \"${glslang_VERSION}\")")
- message(STATUS "Found SPIRV: ${SPIRV_location} (found version \"${glslang_VERSION}\")")
- else()
- message(WARNING "glslang or SPIRV target not found! NCNN_SYSTEM_GLSLANG will be turned off.")
- set(NCNN_SYSTEM_GLSLANG OFF)
- endif()
- endif()
-
- if(NOT NCNN_SYSTEM_GLSLANG)
- if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/glslang/CMakeLists.txt")
- message(FATAL_ERROR "The submodules were not downloaded! Please update submodules with \"git submodule update --init\" and try again.")
- else()
- # glslang requires c++11
- set(CMAKE_CXX_STANDARD 11)
-
- option(BUILD_EXTERNAL "" OFF)
- option(ENABLE_SPVREMAPPER "" OFF)
- option(ENABLE_GLSLANG_BINARIES "" OFF)
- option(ENABLE_HLSL "" OFF)
- option(ENABLE_RTTI "" OFF)
- option(ENABLE_EXCEPTIONS "" OFF)
- option(ENABLE_OPT "" OFF)
- option(ENABLE_PCH "" OFF)
- option(GLSLANG_TESTS "" OFF)
- if(NCNN_SHARED_LIB)
- option(GLSLANG_ENABLE_INSTALL "" OFF)
- else()
- option(GLSLANG_ENABLE_INSTALL "" ON)
- endif()
- add_subdirectory(glslang)
- if(NCNN_SHARED_LIB)
- if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
- target_compile_options(glslang PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
- target_compile_options(glslang-default-resource-limits PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
- endif()
- if(NCNN_ENABLE_LTO)
- set_target_properties(glslang PROPERTIES INTERPROCEDURAL_OPTIMIZATION ON)
- set_target_properties(glslang-default-resource-limits PROPERTIES INTERPROCEDURAL_OPTIMIZATION ON)
- endif()
- endif()
- endif()
- endif()
- endif()
-
- add_subdirectory(src)
- if(NCNN_BUILD_BENCHMARK)
- add_subdirectory(benchmark)
- endif()
- if(NCNN_BUILD_EXAMPLES)
- add_subdirectory(examples)
- endif()
- if(NCNN_BUILD_TOOLS)
- add_subdirectory(tools)
- endif()
- if(NCNN_BUILD_TESTS)
- enable_testing()
- add_subdirectory(tests)
- endif()
- if(NCNN_PYTHON)
- add_subdirectory(python)
- endif()
|