| @@ -29,10 +29,8 @@ option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding proc | |||
| else() | |||
| set(NO_AFFINITY 1) | |||
| endif() | |||
| option(BUILD_SINGLE "Single precision" OFF) | |||
| option(BUILD_DOUBLE "Double precision" OFF) | |||
| option(BUILD_COMPLEX "Single precision" OFF) | |||
| option(BUILD_COMPLEX16 "Single precision" OFF) | |||
| option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF) | |||
| option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF) | |||
| # Add a prefix or suffix to all exported symbol names in the shared library. | |||
| # Avoids conflicts with other BLAS libraries, especially when using | |||
| @@ -91,13 +89,13 @@ if (NOT NO_LAPACK) | |||
| list(APPEND SUBDIRS lapack) | |||
| endif () | |||
| if (NOT DEFINED BUILD_HALF) | |||
| set (BUILD_HALF false) | |||
| if (NOT DEFINED BUILD_BFLOAT16) | |||
| set (BUILD_BFLOAT16 false) | |||
| endif () | |||
| # set which float types we want to build for | |||
| if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16) | |||
| # if none are defined, build for all | |||
| # set(BUILD_HALF true) | |||
| # set(BUILD_BFLOAT16 true) | |||
| set(BUILD_SINGLE true) | |||
| set(BUILD_DOUBLE true) | |||
| set(BUILD_COMPLEX true) | |||
| @@ -110,33 +108,28 @@ endif() | |||
| set(FLOAT_TYPES "") | |||
| if (BUILD_SINGLE) | |||
| message(STATUS "Building Songle Precision") | |||
| list(APPEND FLOAT_TYPES "SINGLE") | |||
| # set(CCOMMON_OPT "${CCOMMON_OPT} -DBUILD_SINGLE=1") | |||
| message(STATUS "Building Single Precision") | |||
| list(APPEND FLOAT_TYPES "SINGLE") # defines nothing | |||
| endif () | |||
| if (BUILD_DOUBLE) | |||
| message(STATUS "Building Double Precision") | |||
| list(APPEND FLOAT_TYPES "DOUBLE") | |||
| #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_DOUBLE=1") | |||
| list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE | |||
| endif () | |||
| if (BUILD_COMPLEX) | |||
| message(STATUS "Building Complex Precision") | |||
| list(APPEND FLOAT_TYPES "COMPLEX") | |||
| #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX=1") | |||
| endif () | |||
| list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX | |||
| endif () | |||
| if (BUILD_COMPLEX16) | |||
| message(STATUS "Building Double Complex Precision") | |||
| list(APPEND FLOAT_TYPES "ZCOMPLEX") | |||
| #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX16=1") | |||
| list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE | |||
| endif () | |||
| if (BUILD_HALF) | |||
| if (BUILD_BFLOAT16) | |||
| message(STATUS "Building Half Precision") | |||
| list(APPEND FLOAT_TYPES "HALF") | |||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_HALF") | |||
| list(APPEND FLOAT_TYPES "BFLOAT16") # defines nothing | |||
| endif () | |||
| if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN") | |||
| @@ -243,6 +236,9 @@ if (NOT MSVC AND NOT NOFORTRAN) | |||
| add_subdirectory(ctest) | |||
| endif() | |||
| add_subdirectory(lapack-netlib/TESTING) | |||
| if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV) | |||
| add_subdirectory(cpp_thread_test) | |||
| endif() | |||
| endif() | |||
| set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES | |||
| @@ -272,17 +272,33 @@ COMMON_PROF = -pg | |||
| # work at all. | |||
| # | |||
| # CPP_THREAD_SAFETY_TEST = 1 | |||
| # | |||
| # use this to run only the less memory-hungry GEMV test | |||
| # CPP_THREAD_SAFETY_GEMV = 1 | |||
| # If you want to enable the experimental BFLOAT16 support | |||
| # BUILD_HALF = 1 | |||
| # | |||
| # Select if you need to build only select types | |||
| # BUILD_SINGLE = 1 | |||
| # BUILD_DOUBLE = 1 | |||
| # BUILD_COMPLEX = 1 | |||
| # BUILD_COMPLEX16 = 1 | |||
| # | |||
| # | |||
| # BUILD_BFLOAT16 = 1 | |||
| # Set the thread number threshold beyond which the job array for the threaded level3 BLAS | |||
| # will be allocated on the heap rather than the stack. (This array alone requires | |||
| # NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu | |||
| # counts, but obviously it is not the only item that ends up on the stack. | |||
| # The default value of 32 ensures that the overall requirement is compatible | |||
| # with the default 1MB stacksize imposed by having the Java VM loaded without use | |||
| # of its -Xss parameter. | |||
| # The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible | |||
| # with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java | |||
| # VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code | |||
| # BLAS3_MEM_ALLOC_THRESHOLD = 160 | |||
| # the below is not yet configurable, use cmake if you need to build only select types | |||
| BUILD_SINGLE = 1 | |||
| BUILD_DOUBLE = 1 | |||
| BUILD_COMPLEX = 1 | |||
| BUILD_COMPLEX16 = 1 | |||
| # End of user configuration | |||
| # | |||
| @@ -1232,8 +1232,8 @@ ifeq ($(USE_TLS), 1) | |||
| CCOMMON_OPT += -DUSE_TLS | |||
| endif | |||
| ifeq ($(BUILD_HALF), 1) | |||
| CCOMMON_OPT += -DBUILD_HALF | |||
| ifeq ($(BUILD_BFLOAT16), 1) | |||
| CCOMMON_OPT += -DBUILD_BFLOAT16 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE), 1) | |||
| CCOMMON_OPT += -DBUILD_SINGLE=1 | |||
| @@ -1521,10 +1521,10 @@ export KERNELDIR | |||
| export FUNCTION_PROFILE | |||
| export TARGET_CORE | |||
| export NO_AVX512 | |||
| export BUILD_HALF | |||
| export BUILD_BFLOAT16 | |||
| export SHGEMM_UNROLL_M | |||
| export SHGEMM_UNROLL_N | |||
| export SBGEMM_UNROLL_M | |||
| export SBGEMM_UNROLL_N | |||
| export SGEMM_UNROLL_M | |||
| export SGEMM_UNROLL_N | |||
| export DGEMM_UNROLL_M | |||
| @@ -24,14 +24,14 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS) | |||
| BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P) | |||
| endif | |||
| $(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DHALF -UDOUBLE -UCOMPLEX | |||
| $(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX | |||
| $(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX | |||
| $(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX | |||
| $(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX | |||
| $(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX | |||
| $(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX | |||
| $(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX | |||
| $(SHEXTOBJS) $(SHEXTOBJS_P) : override CFLAGS += -DHALF -UDOUBLE -UCOMPLEX | |||
| $(SHEXTOBJS) $(SHEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX | |||
| $(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
| $(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||