Fix handling of cpu capability flags in DYNAMIC_ARCH buildstags/v0.3.13^2
| @@ -6,7 +6,7 @@ | |||||
| INCLUDED = 1 | INCLUDED = 1 | ||||
| ifndef TOPDIR | ifndef TOPDIR | ||||
| TOPDIR = . | |||||
| TOPDIR = . | |||||
| endif | endif | ||||
| # If ARCH is not set, we use the host system's architecture for getarch compile options. | # If ARCH is not set, we use the host system's architecture for getarch compile options. | ||||
| @@ -252,6 +252,22 @@ DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" | |||||
| ifndef TARGET_CORE | ifndef TARGET_CORE | ||||
| include $(TOPDIR)/Makefile.conf | include $(TOPDIR)/Makefile.conf | ||||
| else | else | ||||
| HAVE_NEON= | |||||
| HAVE_VFP= | |||||
| HAVE_VFPV3= | |||||
| HAVE_VFPV4= | |||||
| HAVE_MMX= | |||||
| HAVE_SSE= | |||||
| HAVE_SSE2= | |||||
| HAVE_SSE3= | |||||
| HAVE_SSSE3= | |||||
| HAVE_SSE4_1= | |||||
| HAVE_SSE4_2= | |||||
| HAVE_SSE4A= | |||||
| HAVE_SSE5= | |||||
| HAVE_AVX= | |||||
| HAVE_AVX2= | |||||
| HAVE_FMA3= | |||||
| include $(TOPDIR)/Makefile_kernel.conf | include $(TOPDIR)/Makefile_kernel.conf | ||||
| endif | endif | ||||
| @@ -1522,6 +1538,8 @@ export HAVE_SSE4_2 | |||||
| export HAVE_SSE4A | export HAVE_SSE4A | ||||
| export HAVE_SSE5 | export HAVE_SSE5 | ||||
| export HAVE_AVX | export HAVE_AVX | ||||
| export HAVE_AVX2 | |||||
| export HAVE_FMA3 | |||||
| export HAVE_VFP | export HAVE_VFP | ||||
| export HAVE_VFPV3 | export HAVE_VFPV3 | ||||
| export HAVE_VFPV4 | export HAVE_VFPV4 | ||||
| @@ -9,9 +9,9 @@ endif | |||||
| endif | endif | ||||
| ifdef HAVE_SSE3 | ifdef HAVE_SSE3 | ||||
| ifndef DYNAMIC_ARCH | |||||
| CCOMMON_OPT += -msse3 | CCOMMON_OPT += -msse3 | ||||
| FCOMMON_OPT += -msse3 | FCOMMON_OPT += -msse3 | ||||
| endif | |||||
| ifdef HAVE_SSSE3 | ifdef HAVE_SSSE3 | ||||
| CCOMMON_OPT += -mssse3 | CCOMMON_OPT += -mssse3 | ||||
| FCOMMON_OPT += -mssse3 | FCOMMON_OPT += -mssse3 | ||||
| @@ -20,7 +20,17 @@ ifdef HAVE_SSE4_1 | |||||
| CCOMMON_OPT += -msse4.1 | CCOMMON_OPT += -msse4.1 | ||||
| FCOMMON_OPT += -msse4.1 | FCOMMON_OPT += -msse4.1 | ||||
| endif | endif | ||||
| ifdef HAVE_AVX | |||||
| CCOMMON_OPT += -mavx | |||||
| FCOMMON_OPT += -mavx | |||||
| endif | endif | ||||
| ifdef HAVE_AVX2 | |||||
| CCOMMON_OPT += -mavx2 | |||||
| FCOMMON_OPT += -mavx2 | |||||
| endif | |||||
| ifdef HAVE_FMA3 | |||||
| CCOMMON_OPT += -mfma | |||||
| FCOMMON_OPT += -mfma | |||||
| endif | endif | ||||
| ifeq ($(CORE), SKYLAKEX) | ifeq ($(CORE), SKYLAKEX) | ||||
| @@ -66,8 +76,7 @@ endif | |||||
| endif | endif | ||||
| endif | endif | ||||
| ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE)) | |||||
| ifndef DYNAMIC_ARCH | |||||
| ifdef HAVE_AVX2 | |||||
| ifndef NO_AVX2 | ifndef NO_AVX2 | ||||
| ifeq ($(C_COMPILER), GCC) | ifeq ($(C_COMPILER), GCC) | ||||
| # AVX2 support was added in 4.7.0 | # AVX2 support was added in 4.7.0 | ||||
| @@ -96,7 +105,6 @@ endif | |||||
| endif | endif | ||||
| endif | endif | ||||
| endif | endif | ||||
| endif | |||||
| @@ -96,7 +96,7 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "SUN") | |||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| if (${CORE} STREQUAL "SKYLAKEX") | |||||
| if (${CORE} STREQUAL SKYLAKEX) | |||||
| if (NOT DYNAMIC_ARCH) | if (NOT DYNAMIC_ARCH) | ||||
| if (NOT NO_AVX512) | if (NOT NO_AVX512) | ||||
| set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512") | set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512") | ||||
| @@ -104,7 +104,7 @@ if (${CORE} STREQUAL "SKYLAKEX") | |||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| if (${CORE} STREQUAL "COOPERLAKE") | |||||
| if (${CORE} STREQUAL COOPERLAKE) | |||||
| if (NOT DYNAMIC_ARCH) | if (NOT DYNAMIC_ARCH) | ||||
| if (NOT NO_AVX512) | if (NOT NO_AVX512) | ||||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | ||||
| @@ -139,36 +139,6 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS | |||||
| set(CGEMM3M_UNROLL_N 4) | set(CGEMM3M_UNROLL_N 4) | ||||
| set(ZGEMM3M_UNROLL_M 4) | set(ZGEMM3M_UNROLL_M 4) | ||||
| set(ZGEMM3M_UNROLL_N 4) | set(ZGEMM3M_UNROLL_N 4) | ||||
| elseif ("${TCORE}" STREQUAL "BARCELONA") | |||||
| file(APPEND ${TARGET_CONF_TEMP} | |||||
| "#define HAVE_SSE3\n") | |||||
| elseif ("${TCORE}" STREQUAL "STEAMROLLER") | |||||
| file(APPEND ${TARGET_CONF_TEMP} | |||||
| "#define HAVE_SSE3\n") | |||||
| elseif ("${TCORE}" STREQUAL "EXCAVATOR") | |||||
| file(APPEND ${TARGET_CONF_TEMP} | |||||
| "#define HAVE_SSE3\n") | |||||
| elseif ("${TCORE}" STREQUAL "NEHALEM") | |||||
| file(APPEND ${TARGET_CONF_TEMP} | |||||
| "#define HAVE_SSE3\n") | |||||
| elseif ("${TCORE}" STREQUAL "PRESCOTT") | |||||
| file(APPEND ${TARGET_CONF_TEMP} | |||||
| "#define HAVE_SSE3\n") | |||||
| elseif ("${TCORE}" STREQUAL "SANDYBRIDGE") | |||||
| file(APPEND ${TARGET_CONF_TEMP} | |||||
| "#define HAVE_AVX\n") | |||||
| elseif ("${TCORE}" STREQUAL "HASWELL") | |||||
| file(APPEND ${TARGET_CONF_TEMP} | |||||
| "#define HAVE_AVX2\n") | |||||
| elseif ("${TCORE}" STREQUAL "ZEN") | |||||
| file(APPEND ${TARGET_CONF_TEMP} | |||||
| "#define HAVE_AVX2\n") | |||||
| elseif ("${TCORE}" STREQUAL "SKYLAKEX") | |||||
| file(APPEND ${TARGET_CONF_TEMP} | |||||
| "#define HAVE_AVX512\n") | |||||
| elseif ("${TCORE}" STREQUAL "COOPERLAKE") | |||||
| file(APPEND ${TARGET_CONF_TEMP} | |||||
| "#define HAVE_AVX512\n") | |||||
| elseif ("${TCORE}" STREQUAL "ARMV7") | elseif ("${TCORE}" STREQUAL "ARMV7") | ||||
| file(APPEND ${TARGET_CONF_TEMP} | file(APPEND ${TARGET_CONF_TEMP} | ||||
| "#define L1_DATA_SIZE\t65536\n" | "#define L1_DATA_SIZE\t65536\n" | ||||
| @@ -586,6 +556,21 @@ else(NOT CMAKE_CROSSCOMPILING) | |||||
| MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}") | MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}") | ||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| unset (HAVE_AVX2) | |||||
| unset (HAVE_AVX) | |||||
| unset (HAVE_FMA3) | |||||
| unset (HAVE_MMX) | |||||
| unset (HAVE_SSE) | |||||
| unset (HAVE_SSE2) | |||||
| unset (HAVE_SSE3) | |||||
| unset (HAVE_SSSE3) | |||||
| unset (HAVE_SSE4A) | |||||
| unset (HAVE_SSE4_1) | |||||
| unset (HAVE_SSE4_2) | |||||
| unset (HAVE_NEON) | |||||
| unset (HAVE_VFP) | |||||
| unset (HAVE_VFPV3) | |||||
| unset (HAVE_VFPV4) | |||||
| message(STATUS "Running getarch") | message(STATUS "Running getarch") | ||||
| # use the cmake binary w/ the -E param to run a shell command in a cross-platform way | # use the cmake binary w/ the -E param to run a shell command in a cross-platform way | ||||
| @@ -44,74 +44,9 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) | |||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| if (DEFINED TARGET) | |||||
| if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512) | |||||
| # if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") | |||||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||||
| if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake") | |||||
| else() | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | |||||
| endif() | |||||
| # elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") | |||||
| # set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") | |||||
| # endif() | |||||
| endif() | |||||
| if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | |||||
| endif() | |||||
| if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2) | |||||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") | |||||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||||
| if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2") | |||||
| endif() | |||||
| elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse -msse3 -mavx2") | |||||
| endif() | |||||
| endif() | |||||
| if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2") | |||||
| endif() | |||||
| if (${TARGET} STREQUAL "ZEN" AND NOT NO_AVX2) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2") | |||||
| endif() | |||||
| if (${TARGET} STREQUAL "SANDYBRIDGE" AND NOT NO_AVX) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx") | |||||
| endif() | |||||
| if (${TARGET} STREQUAL "BARCELONA" OR ${TARGET} STREQUAL "STEAMROLLER" OR ${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "EXCAVATOR") | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") | |||||
| endif() | |||||
| if (${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "BOBCAT" OR ${TARGET} STREQUAL "OPTERON_SSE3") | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") | |||||
| endif() | |||||
| if (${TARGET} STREQUAL "PRESCOTT" OR ${TARGET} STREQUAL "NANO") | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") | |||||
| endif() | |||||
| if (${TARGET} STREQUAL "NEHALEM" OR ${TARGET} STREQUAL "ATOM") | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") | |||||
| endif() | |||||
| if (${TARGET} STREQUAL "CORE2" OR ${TARGET} STREQUAL "PENRYN" OR ${TARGET} STREQUAL "DUNNINGTON") | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSE) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSE2) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSE3) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSSE3) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSE4_1) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1") | |||||
| endif() | |||||
| endif() | |||||
| if (DEFINED TARGET) | if (DEFINED TARGET) | ||||
| message(STATUS "-- -- -- -- -- -- -- -- -- -- -- -- --") | |||||
| message(STATUS "Targeting the ${TARGET} architecture.") | message(STATUS "Targeting the ${TARGET} architecture.") | ||||
| set(GETARCH_FLAGS "-DFORCE_${TARGET}") | set(GETARCH_FLAGS "-DFORCE_${TARGET}") | ||||
| endif () | endif () | ||||
| @@ -211,6 +146,63 @@ else() | |||||
| endif () | endif () | ||||
| include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake") | include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake") | ||||
| if (DEFINED TARGET) | |||||
| if (${TARGET} STREQUAL COOPERLAKE AND NOT NO_AVX512) | |||||
| # if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") | |||||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||||
| if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake") | |||||
| else() | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | |||||
| endif() | |||||
| # elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") | |||||
| # set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") | |||||
| # endif() | |||||
| endif() | |||||
| if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | |||||
| endif() | |||||
| if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2) | |||||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") | |||||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||||
| if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") | |||||
| endif() | |||||
| elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") | |||||
| endif() | |||||
| endif() | |||||
| if (DEFINED HAVE_AVX) | |||||
| if (NOT NO_AVX) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx") | |||||
| endif() | |||||
| endif() | |||||
| if (DEFINED HAVE_AVX2) | |||||
| if (NOT NO_AVX2) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") | |||||
| endif() | |||||
| endif() | |||||
| if (DEFINED HAVE_FMA3) | |||||
| if (NOT NO_AVX2) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mfma") | |||||
| endif() | |||||
| endif() | |||||
| if (DEFINED HAVE_SSE) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSE2) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSE3) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSSE3) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSE4_1) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1") | |||||
| endif() | |||||
| endif() | |||||
| if (DEFINED BINARY) | if (DEFINED BINARY) | ||||
| message(STATUS "Compiling a ${BINARY}-bit binary.") | message(STATUS "Compiling a ${BINARY}-bit binary.") | ||||
| endif () | endif () | ||||
| @@ -5,13 +5,6 @@ endif | |||||
| TOPDIR = .. | TOPDIR = .. | ||||
| include $(TOPDIR)/Makefile.system | include $(TOPDIR)/Makefile.system | ||||
| ifdef HAVE_SSE3 | |||||
| CFLAGS += -msse3 | |||||
| endif | |||||
| ifdef HAVE_SSSE3 | |||||
| CFLAGS += -mssse3 | |||||
| endif | |||||
| ifeq ($(ARCH), power) | ifeq ($(ARCH), power) | ||||
| ifeq ($(C_COMPILER), CLANG) | ifeq ($(C_COMPILER), CLANG) | ||||
| override CFLAGS += -fno-integrated-as | override CFLAGS += -fno-integrated-as | ||||
| @@ -38,12 +31,6 @@ ifdef NO_AVX2 | |||||
| endif | endif | ||||
| ifdef TARGET_CORE | ifdef TARGET_CORE | ||||
| ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3)) | |||||
| override CFLAGS += -msse -msse2 -msse3 -mssse3 -msse4.1 | |||||
| endif | |||||
| ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),KATMAI COPPERMINE BANIAS NORTHWOOD ATHLON OPTERON)) | |||||
| override CFLAGS += -msse -msse2 | |||||
| endif | |||||
| ifeq ($(TARGET_CORE), COOPERLAKE) | ifeq ($(TARGET_CORE), COOPERLAKE) | ||||
| override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) | override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) | ||||
| ifeq ($(GCCVERSIONGTEQ10), 1) | ifeq ($(GCCVERSIONGTEQ10), 1) | ||||