Browse Source

Merge pull request #2978 from martin-frbg/fixdynfeatures

Fix handling of cpu capability flags in DYNAMIC_ARCH builds
tags/v0.3.13^2
Martin Kroeker GitHub 5 years ago
parent
commit
d2faa1be4e
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 106 additions and 116 deletions
  1. +19
    -1
      Makefile.system
  2. +12
    -4
      Makefile.x86_64
  3. +2
    -2
      cmake/cc.cmake
  4. +15
    -30
      cmake/prebuild.cmake
  5. +58
    -66
      cmake/system.cmake
  6. +0
    -13
      kernel/Makefile

+ 19
- 1
Makefile.system View File

@@ -6,7 +6,7 @@
INCLUDED = 1

ifndef TOPDIR
TOPDIR = .
TOPDIR = .
endif

# If ARCH is not set, we use the host system's architecture for getarch compile options.
@@ -252,6 +252,22 @@ DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)"
ifndef TARGET_CORE
include $(TOPDIR)/Makefile.conf
else
HAVE_NEON=
HAVE_VFP=
HAVE_VFPV3=
HAVE_VFPV4=
HAVE_MMX=
HAVE_SSE=
HAVE_SSE2=
HAVE_SSE3=
HAVE_SSSE3=
HAVE_SSE4_1=
HAVE_SSE4_2=
HAVE_SSE4A=
HAVE_SSE5=
HAVE_AVX=
HAVE_AVX2=
HAVE_FMA3=
include $(TOPDIR)/Makefile_kernel.conf
endif

@@ -1522,6 +1538,8 @@ export HAVE_SSE4_2
export HAVE_SSE4A
export HAVE_SSE5
export HAVE_AVX
export HAVE_AVX2
export HAVE_FMA3
export HAVE_VFP
export HAVE_VFPV3
export HAVE_VFPV4


+ 12
- 4
Makefile.x86_64 View File

@@ -9,9 +9,9 @@ endif
endif

ifdef HAVE_SSE3
ifndef DYNAMIC_ARCH
CCOMMON_OPT += -msse3
FCOMMON_OPT += -msse3
endif
ifdef HAVE_SSSE3
CCOMMON_OPT += -mssse3
FCOMMON_OPT += -mssse3
@@ -20,7 +20,17 @@ ifdef HAVE_SSE4_1
CCOMMON_OPT += -msse4.1
FCOMMON_OPT += -msse4.1
endif
ifdef HAVE_AVX
CCOMMON_OPT += -mavx
FCOMMON_OPT += -mavx
endif
ifdef HAVE_AVX2
CCOMMON_OPT += -mavx2
FCOMMON_OPT += -mavx2
endif
ifdef HAVE_FMA3
CCOMMON_OPT += -mfma
FCOMMON_OPT += -mfma
endif

ifeq ($(CORE), SKYLAKEX)
@@ -66,8 +76,7 @@ endif
endif
endif

ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE))
ifndef DYNAMIC_ARCH
ifdef HAVE_AVX2
ifndef NO_AVX2
ifeq ($(C_COMPILER), GCC)
# AVX2 support was added in 4.7.0
@@ -96,7 +105,6 @@ endif
endif
endif
endif
endif





+ 2
- 2
cmake/cc.cmake View File

@@ -96,7 +96,7 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "SUN")
endif ()
endif ()

if (${CORE} STREQUAL "SKYLAKEX")
if (${CORE} STREQUAL SKYLAKEX)
if (NOT DYNAMIC_ARCH)
if (NOT NO_AVX512)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
@@ -104,7 +104,7 @@ if (${CORE} STREQUAL "SKYLAKEX")
endif ()
endif ()

if (${CORE} STREQUAL "COOPERLAKE")
if (${CORE} STREQUAL COOPERLAKE)
if (NOT DYNAMIC_ARCH)
if (NOT NO_AVX512)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)


+ 15
- 30
cmake/prebuild.cmake View File

@@ -139,36 +139,6 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
set(CGEMM3M_UNROLL_N 4)
set(ZGEMM3M_UNROLL_M 4)
set(ZGEMM3M_UNROLL_N 4)
elseif ("${TCORE}" STREQUAL "BARCELONA")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_SSE3\n")
elseif ("${TCORE}" STREQUAL "STEAMROLLER")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_SSE3\n")
elseif ("${TCORE}" STREQUAL "EXCAVATOR")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_SSE3\n")
elseif ("${TCORE}" STREQUAL "NEHALEM")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_SSE3\n")
elseif ("${TCORE}" STREQUAL "PRESCOTT")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_SSE3\n")
elseif ("${TCORE}" STREQUAL "SANDYBRIDGE")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_AVX\n")
elseif ("${TCORE}" STREQUAL "HASWELL")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_AVX2\n")
elseif ("${TCORE}" STREQUAL "ZEN")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_AVX2\n")
elseif ("${TCORE}" STREQUAL "SKYLAKEX")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_AVX512\n")
elseif ("${TCORE}" STREQUAL "COOPERLAKE")
file(APPEND ${TARGET_CONF_TEMP}
"#define HAVE_AVX512\n")
elseif ("${TCORE}" STREQUAL "ARMV7")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_DATA_SIZE\t65536\n"
@@ -586,6 +556,21 @@ else(NOT CMAKE_CROSSCOMPILING)
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
endif ()
endif ()
unset (HAVE_AVX2)
unset (HAVE_AVX)
unset (HAVE_FMA3)
unset (HAVE_MMX)
unset (HAVE_SSE)
unset (HAVE_SSE2)
unset (HAVE_SSE3)
unset (HAVE_SSSE3)
unset (HAVE_SSE4A)
unset (HAVE_SSE4_1)
unset (HAVE_SSE4_2)
unset (HAVE_NEON)
unset (HAVE_VFP)
unset (HAVE_VFPV3)
unset (HAVE_VFPV4)
message(STATUS "Running getarch")

# use the cmake binary w/ the -E param to run a shell command in a cross-platform way


+ 58
- 66
cmake/system.cmake View File

@@ -44,74 +44,9 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
endif ()
endif ()

if (DEFINED TARGET)
if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512)
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
else()
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
# endif()
endif()
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2)
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2")
endif()
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse -msse3 -mavx2")
endif()
endif()
if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2")
endif()
if (${TARGET} STREQUAL "ZEN" AND NOT NO_AVX2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2")
endif()
if (${TARGET} STREQUAL "SANDYBRIDGE" AND NOT NO_AVX)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx")
endif()
if (${TARGET} STREQUAL "BARCELONA" OR ${TARGET} STREQUAL "STEAMROLLER" OR ${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "EXCAVATOR")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "BOBCAT" OR ${TARGET} STREQUAL "OPTERON_SSE3")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (${TARGET} STREQUAL "PRESCOTT" OR ${TARGET} STREQUAL "NANO")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (${TARGET} STREQUAL "NEHALEM" OR ${TARGET} STREQUAL "ATOM")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (${TARGET} STREQUAL "CORE2" OR ${TARGET} STREQUAL "PENRYN" OR ${TARGET} STREQUAL "DUNNINGTON")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (DEFINED HAVE_SSE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
endif()
if (DEFINED HAVE_SSE2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2")
endif()
if (DEFINED HAVE_SSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (DEFINED HAVE_SSSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3")
endif()
if (DEFINED HAVE_SSE4_1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1")
endif()
endif()

if (DEFINED TARGET)
message(STATUS "-- -- -- -- -- -- -- -- -- -- -- -- --")
message(STATUS "Targeting the ${TARGET} architecture.")
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
endif ()
@@ -211,6 +146,63 @@ else()
endif ()

include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
if (DEFINED TARGET)
if (${TARGET} STREQUAL COOPERLAKE AND NOT NO_AVX512)
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
else()
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
# endif()
endif()
if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2)
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
endif()
if (DEFINED HAVE_AVX)
if (NOT NO_AVX)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx")
endif()
endif()
if (DEFINED HAVE_AVX2)
if (NOT NO_AVX2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
endif()
if (DEFINED HAVE_FMA3)
if (NOT NO_AVX2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mfma")
endif()
endif()
if (DEFINED HAVE_SSE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
endif()
if (DEFINED HAVE_SSE2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2")
endif()
if (DEFINED HAVE_SSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (DEFINED HAVE_SSSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3")
endif()
if (DEFINED HAVE_SSE4_1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1")
endif()
endif()
if (DEFINED BINARY)
message(STATUS "Compiling a ${BINARY}-bit binary.")
endif ()


+ 0
- 13
kernel/Makefile View File

@@ -5,13 +5,6 @@ endif
TOPDIR = ..
include $(TOPDIR)/Makefile.system

ifdef HAVE_SSE3
CFLAGS += -msse3
endif
ifdef HAVE_SSSE3
CFLAGS += -mssse3
endif

ifeq ($(ARCH), power)
ifeq ($(C_COMPILER), CLANG)
override CFLAGS += -fno-integrated-as
@@ -38,12 +31,6 @@ ifdef NO_AVX2
endif

ifdef TARGET_CORE
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3))
override CFLAGS += -msse -msse2 -msse3 -mssse3 -msse4.1
endif
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),KATMAI COPPERMINE BANIAS NORTHWOOD ATHLON OPTERON))
override CFLAGS += -msse -msse2
endif
ifeq ($(TARGET_CORE), COOPERLAKE)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
ifeq ($(GCCVERSIONGTEQ10), 1)


Loading…
Cancel
Save