| @@ -66,3 +66,5 @@ test/sblat3 | |||||
| test/zblat1 | test/zblat1 | ||||
| test/zblat2 | test/zblat2 | ||||
| test/zblat3 | test/zblat3 | ||||
| build | |||||
| build.* | |||||
| @@ -0,0 +1,190 @@ | |||||
| ## | |||||
| ## Author: Hank Anderson <hank@statease.com> | |||||
| ## | |||||
| cmake_minimum_required(VERSION 2.8.4) | |||||
| project(OpenBLAS) | |||||
| set(OpenBLAS_MAJOR_VERSION 0) | |||||
| set(OpenBLAS_MINOR_VERSION 2) | |||||
| set(OpenBLAS_PATCH_VERSION 14) | |||||
| set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") | |||||
| enable_language(ASM) | |||||
| enable_language(C) | |||||
| if(MSVC) | |||||
| set(OpenBLAS_LIBNAME libopenblas) | |||||
| else() | |||||
| set(OpenBLAS_LIBNAME openblas) | |||||
| endif() | |||||
| ####### | |||||
| if(MSVC) | |||||
| option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON) | |||||
| endif() | |||||
| option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF) | |||||
| option(BUILD_DEBUG "Build Debug Version" OFF) | |||||
| ####### | |||||
| if(BUILD_WITHOUT_LAPACK) | |||||
| set(NO_LAPACK 1) | |||||
| set(NO_LAPACKE 1) | |||||
| endif() | |||||
| if(BUILD_DEBUG) | |||||
| set(CMAKE_BUILD_TYPE Debug) | |||||
| else() | |||||
| set(CMAKE_BUILD_TYPE Release) | |||||
| endif() | |||||
| if(BUILD_WITHOUT_CBLAS) | |||||
| set(NO_CBLAS 1) | |||||
| endif() | |||||
| ####### | |||||
| message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.") | |||||
| include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake") | |||||
| include("${CMAKE_SOURCE_DIR}/cmake/system.cmake") | |||||
| set(BLASDIRS interface driver/level2 driver/level3 driver/others) | |||||
| if (NOT DYNAMIC_ARCH) | |||||
| list(APPEND BLASDIRS kernel) | |||||
| endif () | |||||
| if (DEFINED UTEST_CHECK) | |||||
| set(SANITY_CHECK 1) | |||||
| endif () | |||||
| if (DEFINED SANITY_CHECK) | |||||
| list(APPEND BLASDIRS reference) | |||||
| endif () | |||||
| set(SUBDIRS ${BLASDIRS}) | |||||
| if (NOT NO_LAPACK) | |||||
| list(APPEND SUBDIRS lapack) | |||||
| endif () | |||||
| # set which float types we want to build for | |||||
| if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16) | |||||
| # if none are defined, build for all | |||||
| set(BUILD_SINGLE true) | |||||
| set(BUILD_DOUBLE true) | |||||
| set(BUILD_COMPLEX true) | |||||
| set(BUILD_COMPLEX16 true) | |||||
| endif () | |||||
| set(FLOAT_TYPES "") | |||||
| if (BUILD_SINGLE) | |||||
| message(STATUS "Building Single Precision") | |||||
| list(APPEND FLOAT_TYPES "SINGLE") # defines nothing | |||||
| endif () | |||||
| if (BUILD_DOUBLE) | |||||
| message(STATUS "Building Double Precision") | |||||
| list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE | |||||
| endif () | |||||
| if (BUILD_COMPLEX) | |||||
| message(STATUS "Building Complex Precision") | |||||
| list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX | |||||
| endif () | |||||
| if (BUILD_COMPLEX16) | |||||
| message(STATUS "Building Double Complex Precision") | |||||
| list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE | |||||
| endif () | |||||
| set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench) | |||||
| # all :: libs netlib tests shared | |||||
| # libs : | |||||
| if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN") | |||||
| message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.") | |||||
| endif () | |||||
| if (${NO_STATIC} AND ${NO_SHARED}) | |||||
| message(FATAL_ERROR "Neither static nor shared are enabled.") | |||||
| endif () | |||||
| # get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html) | |||||
| set(TARGET_OBJS "") | |||||
| foreach (SUBDIR ${SUBDIRS}) | |||||
| add_subdirectory(${SUBDIR}) | |||||
| string(REPLACE "/" "_" subdir_obj ${SUBDIR}) | |||||
| list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:${subdir_obj}>") | |||||
| endforeach () | |||||
| # netlib: | |||||
| # Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. | |||||
| # Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. | |||||
| if (NOT NOFORTRAN AND NOT NO_LAPACK) | |||||
| include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake") | |||||
| if (NOT NO_LAPACKE) | |||||
| include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake") | |||||
| endif () | |||||
| endif () | |||||
| #Only generate .def for dll on MSVC | |||||
| if(MSVC) | |||||
| set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def") | |||||
| endif() | |||||
| # add objects to the openblas lib | |||||
| add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) | |||||
| include("${CMAKE_SOURCE_DIR}/cmake/export.cmake") | |||||
| if(NOT MSVC) | |||||
| #only build shared library for MSVC | |||||
| add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS}) | |||||
| set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME}) | |||||
| set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1) | |||||
| if(SMP) | |||||
| target_link_libraries(${OpenBLAS_LIBNAME} pthread) | |||||
| target_link_libraries(${OpenBLAS_LIBNAME}_static pthread) | |||||
| endif() | |||||
| #build test and ctest | |||||
| enable_testing() | |||||
| add_subdirectory(test) | |||||
| if(NOT NO_CBLAS) | |||||
| add_subdirectory(ctest) | |||||
| endif() | |||||
| endif() | |||||
| set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES | |||||
| VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION} | |||||
| SOVERSION ${OpenBLAS_MAJOR_VERSION} | |||||
| ) | |||||
| # TODO: Why is the config saved here? Is this necessary with CMake? | |||||
| #Save the config files for installation | |||||
| # @cp Makefile.conf Makefile.conf_last | |||||
| # @cp config.h config_last.h | |||||
| #ifdef QUAD_PRECISION | |||||
| # @echo "#define QUAD_PRECISION">> config_last.h | |||||
| #endif | |||||
| #ifeq ($(EXPRECISION), 1) | |||||
| # @echo "#define EXPRECISION">> config_last.h | |||||
| #endif | |||||
| ### | |||||
| #ifeq ($(DYNAMIC_ARCH), 1) | |||||
| # @$(MAKE) -C kernel commonlibs || exit 1 | |||||
| # @for d in $(DYNAMIC_CORE) ; \ | |||||
| # do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ | |||||
| # done | |||||
| # @echo DYNAMIC_ARCH=1 >> Makefile.conf_last | |||||
| #endif | |||||
| #ifdef USE_THREAD | |||||
| # @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last | |||||
| #endif | |||||
| # @touch lib.grd | |||||
| @@ -1,4 +1,57 @@ | |||||
| OpenBLAS ChangeLog | OpenBLAS ChangeLog | ||||
| ==================================================================== | |||||
| Version 0.2.15 | |||||
| 27-Oct-2015 | |||||
| common: | |||||
| * Support cmake on x86/x86-64. Natively compiling on MS Visual Studio. | |||||
| (experimental. Thank Hank Anderson for the initial cmake porting work.) | |||||
| On Linux and Mac OSX, OpenBLAS cmake supports assembly kernels. | |||||
| e.g. cmake . | |||||
| make | |||||
| make test (Optional) | |||||
| On Windows MS Visual Studio, OpenBLAS cmake only support C kernels. | |||||
| (OpenBLAS uses AT&T style assembly, which is not supported by MSVC.) | |||||
| e.g. cmake -G "Visual Studio 12 Win64" . | |||||
| Open OpenBLAS.sln and build. | |||||
| * Enable MAX_STACK_ALLOC flags by default. | |||||
| Improve ger and gemv for small matrices. | |||||
| * Improve gemv parallel with small m and large n case. | |||||
| * Improve ?imatcopy when lda==ldb (#633. Thanks, Martin Koehler) | |||||
| * Add vecLib benchmarks (#565. Thanks, Andreas Noack.) | |||||
| * Fix LAPACK lantr for row major matrices (#634. Thanks, Dan Kortschak) | |||||
| * Fix LAPACKE lansy (#640. Thanks, Dan Kortschak) | |||||
| * Import bug fixes for LAPACKE s/dormlq, c/zunmlq | |||||
| * Raise the signal when pthread_create fails (#668. Thanks, James K. Lowden) | |||||
| * Remove g77 from compiler list. | |||||
| * Enable AppVeyor Windows CI. | |||||
| x86/x86-64: | |||||
| * Support pure C generic kernels for x86/x86-64. | |||||
| * Support Intel Boardwell and Skylake by Haswell kernels. | |||||
| * Support AMD Excavator by Steamroller kernels. | |||||
| * Optimize s/d/c/zdot for Intel SandyBridge and Haswell. | |||||
| * Optimize s/d/c/zdot for AMD Piledriver and Steamroller. | |||||
| * Optimize s/d/c/zapxy for Intel SandyBridge and Haswell. | |||||
| * Optimize s/d/c/zapxy for AMD Piledriver and Steamroller. | |||||
| * Optimize d/c/zscal for Intel Haswell, dscal for Intel SandyBridge. | |||||
| * Optimize d/c/zscal for AMD Bulldozer, Piledriver and Steamroller. | |||||
| * Optimize s/dger for Intel SandyBridge. | |||||
| * Optimize s/dsymv for Intel SandyBridge. | |||||
| * Optimize ssymv for Intel Haswell. | |||||
| * Optimize dgemv for Intel Nehalem and Haswell. | |||||
| * Optimize dtrmm for Intel Haswell. | |||||
| ARM: | |||||
| * Support Android NDK armeabi-v7a-hard ABI (-mfloat-abi=hard) | |||||
| e.g. make HOSTCC=gcc CC=arm-linux-androideabi-gcc NO_LAPACK=1 TARGET=ARMV7 | |||||
| * Fix lock, rpcc bugs (#616, #617. Thanks, Grazvydas Ignotas) | |||||
| POWER: | |||||
| * Support ppc64le platform (ELF ABI v2. #612. Thanks, Matthew Brandyberry.) | |||||
| * Support POWER7/8 by POWER6 kernels. (#612. Thanks, Fábio Perez.) | |||||
| ==================================================================== | ==================================================================== | ||||
| Version 0.2.14 | Version 0.2.14 | ||||
| 24-Mar-2015 | 24-Mar-2015 | ||||
| @@ -3,7 +3,7 @@ | |||||
| # | # | ||||
| # This library's version | # This library's version | ||||
| VERSION = 0.2.14 | |||||
| VERSION = 0.2.15 | |||||
| # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | ||||
| # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | ||||
| @@ -169,6 +169,9 @@ COMMON_PROF = -pg | |||||
| # 64 bit integer interfaces in OpenBLAS. | # 64 bit integer interfaces in OpenBLAS. | ||||
| # For details, https://github.com/xianyi/OpenBLAS/pull/459 | # For details, https://github.com/xianyi/OpenBLAS/pull/459 | ||||
| # | # | ||||
| # The same prefix and suffix are also added to the library name, | |||||
| # i.e. you get lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) rather than libopenblas | |||||
| # | |||||
| # SYMBOLPREFIX= | # SYMBOLPREFIX= | ||||
| # SYMBOLSUFFIX= | # SYMBOLSUFFIX= | ||||
| @@ -891,12 +891,6 @@ ifdef USE_SIMPLE_THREADED_LEVEL3 | |||||
| CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3 | CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3 | ||||
| endif | endif | ||||
| ifndef LIBNAMESUFFIX | |||||
| LIBPREFIX = libopenblas | |||||
| else | |||||
| LIBPREFIX = libopenblas_$(LIBNAMESUFFIX) | |||||
| endif | |||||
| ifndef SYMBOLPREFIX | ifndef SYMBOLPREFIX | ||||
| SYMBOLPREFIX = | SYMBOLPREFIX = | ||||
| endif | endif | ||||
| @@ -905,6 +899,12 @@ ifndef SYMBOLSUFFIX | |||||
| SYMBOLSUFFIX = | SYMBOLSUFFIX = | ||||
| endif | endif | ||||
| ifndef LIBNAMESUFFIX | |||||
| LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) | |||||
| else | |||||
| LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX) | |||||
| endif | |||||
| KERNELDIR = $(TOPDIR)/kernel/$(ARCH) | KERNELDIR = $(TOPDIR)/kernel/$(ARCH) | ||||
| include $(TOPDIR)/Makefile.$(ARCH) | include $(TOPDIR)/Makefile.$(ARCH) | ||||
| @@ -2,8 +2,9 @@ | |||||
| [](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | [](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | ||||
| [](https://travis-ci.org/xianyi/OpenBLAS) | |||||
| Travis CI: [](https://travis-ci.org/xianyi/OpenBLAS) | |||||
| AppVeyor: [](https://ci.appveyor.com/project/xianyi/openblas/branch/develop) | |||||
| ## Introduction | ## Introduction | ||||
| OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. | OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. | ||||
| @@ -0,0 +1,42 @@ | |||||
| version: 0.2.15.{build} | |||||
| #environment: | |||||
| platform: | |||||
| - x64 | |||||
| configuration: Release | |||||
| clone_folder: c:\projects\OpenBLAS | |||||
| init: | |||||
| - git config --global core.autocrlf input | |||||
| build: | |||||
| project: OpenBLAS.sln | |||||
| clone_depth: 5 | |||||
| #branches to build | |||||
| branches: | |||||
| only: | |||||
| - master | |||||
| - develop | |||||
| - cmake | |||||
| skip_tags: true | |||||
| matrix: | |||||
| fast_finish: true | |||||
| skip_commits: | |||||
| # Add [av skip] to commit messages | |||||
| message: /\[av skip\]/ | |||||
| before_build: | |||||
| - echo Running cmake... | |||||
| - cd c:\projects\OpenBLAS | |||||
| - cmake -G "Visual Studio 12 Win64" . | |||||
| test_script: | |||||
| - echo Build OK! | |||||
| @@ -30,7 +30,7 @@ if ($ARGV[0] =~ /(.*)(-[.\d]+)/) { | |||||
| $cross_suffix = $1; | $cross_suffix = $1; | ||||
| } | } | ||||
| } else { | } else { | ||||
| if ($ARGV[0] =~ /(.*-)(.*)/) { | |||||
| if ($ARGV[0] =~ /([^\/]*-)([^\/]*$)/) { | |||||
| $cross_suffix = $1; | $cross_suffix = $1; | ||||
| } | } | ||||
| } | } | ||||
| @@ -0,0 +1,115 @@ | |||||
| ## | |||||
| ## Author: Hank Anderson <hank@statease.com> | |||||
| ## Description: Ported from portion of OpenBLAS/Makefile.system | |||||
| ## Sets various variables based on architecture. | |||||
| if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64") | |||||
| if (${ARCH} STREQUAL "x86") | |||||
| if (NOT BINARY) | |||||
| set(NO_BINARY_MODE 1) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT NO_EXPRECISION) | |||||
| if (${F_COMPILER} MATCHES "GFORTRAN") | |||||
| # N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") | |||||
| set(EXPRECISION 1) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double") | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") | |||||
| endif () | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "Clang") | |||||
| set(EXPRECISION 1) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION") | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") | |||||
| endif () | |||||
| endif () | |||||
| endif () | |||||
| endif () | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "Intel") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -wd981") | |||||
| endif () | |||||
| if (USE_OPENMP) | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") | |||||
| endif () | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "Clang") | |||||
| message(WARNING "Clang doesn't support OpenMP yet.") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") | |||||
| endif () | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "Intel") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -openmp") | |||||
| endif () | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "PGI") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | |||||
| endif () | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | |||||
| set(CEXTRALIB "${CEXTRALIB} -lstdc++") | |||||
| endif () | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | |||||
| endif () | |||||
| endif () | |||||
| if (DYNAMIC_ARCH) | |||||
| if (${ARCH} STREQUAL "x86") | |||||
| set(DYNAMIC_CORE "KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO") | |||||
| endif () | |||||
| if (${ARCH} STREQUAL "x86_64") | |||||
| set(DYNAMIC_CORE "PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO") | |||||
| if (NOT NO_AVX) | |||||
| set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER") | |||||
| endif () | |||||
| if (NOT NO_AVX2) | |||||
| set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL") | |||||
| endif () | |||||
| endif () | |||||
| if (NOT DYNAMIC_CORE) | |||||
| unset(DYNAMIC_ARCH) | |||||
| endif () | |||||
| endif () | |||||
| if (${ARCH} STREQUAL "ia64") | |||||
| set(NO_BINARY_MODE 1) | |||||
| set(BINARY_DEFINED 1) | |||||
| if (${F_COMPILER} MATCHES "GFORTRAN") | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "GNU") | |||||
| # EXPRECISION = 1 | |||||
| # CCOMMON_OPT += -DEXPRECISION | |||||
| endif () | |||||
| endif () | |||||
| endif () | |||||
| if (${ARCH} STREQUAL "mips64") | |||||
| set(NO_BINARY_MODE 1) | |||||
| endif () | |||||
| if (${ARCH} STREQUAL "alpha") | |||||
| set(NO_BINARY_MODE 1) | |||||
| set(BINARY_DEFINED 1) | |||||
| endif () | |||||
| if (${ARCH} STREQUAL "arm") | |||||
| set(NO_BINARY_MODE 1) | |||||
| set(BINARY_DEFINED 1) | |||||
| endif () | |||||
| if (${ARCH} STREQUAL "arm64") | |||||
| set(NO_BINARY_MODE 1) | |||||
| set(BINARY_DEFINED 1) | |||||
| endif () | |||||
| @@ -0,0 +1,89 @@ | |||||
| ## | |||||
| ## Author: Hank Anderson <hank@statease.com> | |||||
| ## Description: Ported from the OpenBLAS/c_check perl script. | |||||
| ## This is triggered by prebuild.cmake and runs before any of the code is built. | |||||
| ## Creates config.h and Makefile.conf. | |||||
| # CMake vars set by this file: | |||||
| # OSNAME (use CMAKE_SYSTEM_NAME) | |||||
| # ARCH | |||||
| # C_COMPILER (use CMAKE_C_COMPILER) | |||||
| # BINARY32 | |||||
| # BINARY64 | |||||
| # FU | |||||
| # CROSS_SUFFIX | |||||
| # CROSS | |||||
| # CEXTRALIB | |||||
| # Defines set by this file: | |||||
| # OS_ | |||||
| # ARCH_ | |||||
| # C_ | |||||
| # __32BIT__ | |||||
| # __64BIT__ | |||||
| # FUNDERSCORE | |||||
| # PTHREAD_CREATE_FUNC | |||||
| # N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables. | |||||
| set(FU "") | |||||
| if(APPLE) | |||||
| set(FU "_") | |||||
| elseif(MSVC) | |||||
| set(FU "_") | |||||
| elseif(UNIX) | |||||
| set(FU "") | |||||
| endif() | |||||
| # Convert CMake vars into the format that OpenBLAS expects | |||||
| string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS) | |||||
| if (${HOST_OS} STREQUAL "WINDOWS") | |||||
| set(HOST_OS WINNT) | |||||
| endif () | |||||
| # added by hpa - check size of void ptr to detect 64-bit compile | |||||
| if (NOT DEFINED BINARY) | |||||
| set(BINARY 32) | |||||
| if (CMAKE_SIZEOF_VOID_P EQUAL 8) | |||||
| set(BINARY 64) | |||||
| endif () | |||||
| endif () | |||||
| if (BINARY EQUAL 64) | |||||
| set(BINARY64 1) | |||||
| else () | |||||
| set(BINARY32 1) | |||||
| endif () | |||||
| # CMake docs define these: | |||||
| # CMAKE_SYSTEM_PROCESSOR - The name of the CPU CMake is building for. | |||||
| # CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on. | |||||
| # | |||||
| # TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check | |||||
| set(ARCH ${CMAKE_SYSTEM_PROCESSOR}) | |||||
| if (${ARCH} STREQUAL "AMD64") | |||||
| set(ARCH "x86_64") | |||||
| endif () | |||||
| # If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong | |||||
| if (${ARCH} STREQUAL "x86_64" AND BINARY EQUAL 32) | |||||
| set(ARCH x86) | |||||
| endif () | |||||
| if (${ARCH} STREQUAL "X86") | |||||
| set(ARCH x86) | |||||
| endif () | |||||
| set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID}) | |||||
| if (${COMPILER_ID} STREQUAL "GNU") | |||||
| set(COMPILER_ID "GCC") | |||||
| endif () | |||||
| string(TOUPPER ${ARCH} UC_ARCH) | |||||
| file(WRITE ${TARGET_CONF} | |||||
| "#define OS_${HOST_OS}\t1\n" | |||||
| "#define ARCH_${UC_ARCH}\t1\n" | |||||
| "#define C_${COMPILER_ID}\t1\n" | |||||
| "#define __${BINARY}BIT__\t1\n" | |||||
| "#define FUNDERSCORE\t${FU}\n") | |||||
| @@ -0,0 +1,103 @@ | |||||
| ## | |||||
| ## Author: Hank Anderson <hank@statease.com> | |||||
| ## Description: Ported from portion of OpenBLAS/Makefile.system | |||||
| ## Sets C related variables. | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -Wall") | |||||
| set(COMMON_PROF "${COMMON_PROF} -fno-inline") | |||||
| set(NO_UNINITIALIZED_WARN "-Wno-uninitialized") | |||||
| if (QUIET_MAKE) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} ${NO_UNINITIALIZED_WARN} -Wno-unused") | |||||
| endif () | |||||
| if (NO_BINARY_MODE) | |||||
| if (${ARCH} STREQUAL "mips64") | |||||
| if (BINARY64) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64") | |||||
| else () | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=n32") | |||||
| endif () | |||||
| set(BINARY_DEFINED 1) | |||||
| endif () | |||||
| if (${CORE} STREQUAL "LOONGSON3A") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64") | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64") | |||||
| endif () | |||||
| if (${CORE} STREQUAL "LOONGSON3B") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64") | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64") | |||||
| endif () | |||||
| if (${OSNAME} STREQUAL "AIX") | |||||
| set(BINARY_DEFINED 1) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT BINARY_DEFINED) | |||||
| if (BINARY64) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -m64") | |||||
| else () | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -m32") | |||||
| endif () | |||||
| endif () | |||||
| endif () | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "PGI") | |||||
| if (BINARY64) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7-64") | |||||
| else () | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7") | |||||
| endif () | |||||
| endif () | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") | |||||
| if (BINARY64) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -m64") | |||||
| else () | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -m32") | |||||
| endif () | |||||
| endif () | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") | |||||
| if (${ARCH} STREQUAL "mips64") | |||||
| if (NOT BINARY64) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -n32") | |||||
| else () | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -n64") | |||||
| endif () | |||||
| if (${CORE} STREQUAL "LOONGSON3A") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static") | |||||
| endif () | |||||
| if (${CORE} STREQUAL "LOONGSON3B") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static") | |||||
| endif () | |||||
| else () | |||||
| if (BINARY64) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -m32") | |||||
| else () | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -m64") | |||||
| endif () | |||||
| endif () | |||||
| endif () | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "SUN") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -w") | |||||
| if (${ARCH} STREQUAL "x86") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -m32") | |||||
| else () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | |||||
| endif () | |||||
| endif () | |||||
| @@ -0,0 +1,60 @@ | |||||
| #Only generate .def for dll on MSVC | |||||
| if(MSVC) | |||||
| set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1) | |||||
| if (NOT DEFINED ARCH) | |||||
| set(ARCH_IN "x86_64") | |||||
| else() | |||||
| set(ARCH_IN ${ARCH}) | |||||
| endif() | |||||
| if (${CORE} STREQUAL "generic") | |||||
| set(ARCH_IN "GENERIC") | |||||
| endif () | |||||
| if (NOT DEFINED EXPRECISION) | |||||
| set(EXPRECISION_IN 0) | |||||
| else() | |||||
| set(EXPRECISION_IN ${EXPRECISION}) | |||||
| endif() | |||||
| if (NOT DEFINED NO_CBLAS) | |||||
| set(NO_CBLAS_IN 0) | |||||
| else() | |||||
| set(NO_CBLAS_IN ${NO_CBLAS}) | |||||
| endif() | |||||
| if (NOT DEFINED NO_LAPACK) | |||||
| set(NO_LAPACK_IN 0) | |||||
| else() | |||||
| set(NO_LAPACK_IN ${NO_LAPACK}) | |||||
| endif() | |||||
| if (NOT DEFINED NO_LAPACKE) | |||||
| set(NO_LAPACKE_IN 0) | |||||
| else() | |||||
| set(NO_LAPACKE_IN ${NO_LAPACKE}) | |||||
| endif() | |||||
| if (NOT DEFINED NEED2UNDERSCORES) | |||||
| set(NEED2UNDERSCORES_IN 0) | |||||
| else() | |||||
| set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES}) | |||||
| endif() | |||||
| if (NOT DEFINED ONLY_CBLAS) | |||||
| set(ONLY_CBLAS_IN 0) | |||||
| else() | |||||
| set(ONLY_CBLAS_IN ${ONLY_CBLAS}) | |||||
| endif() | |||||
| add_custom_command( | |||||
| TARGET ${OpenBLAS_LIBNAME} PRE_LINK | |||||
| COMMAND perl | |||||
| ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def" | |||||
| COMMENT "Create openblas.def file" | |||||
| VERBATIM) | |||||
| endif() | |||||
| @@ -0,0 +1,66 @@ | |||||
| ## | |||||
| ## Author: Hank Anderson <hank@statease.com> | |||||
| ## Copyright: (c) Stat-Ease, Inc. | |||||
| ## Created: 12/29/14 | |||||
| ## Last Modified: 12/29/14 | |||||
| ## Description: Ported from the OpenBLAS/f_check perl script. | |||||
| ## This is triggered by prebuild.cmake and runs before any of the code is built. | |||||
| ## Appends Fortran information to config.h and Makefile.conf. | |||||
| # CMake vars set by this file: | |||||
| # F_COMPILER | |||||
| # FC | |||||
| # BU | |||||
| # NOFORTRAN | |||||
| # NEED2UNDERSCORES | |||||
| # FEXTRALIB | |||||
| # Defines set by this file: | |||||
| # BUNDERSCORE | |||||
| # NEEDBUNDERSCORE | |||||
| # NEED2UNDERSCORES | |||||
| if (MSVC) | |||||
| # had to do this for MSVC, else CMake automatically assumes I have ifort... -hpa | |||||
| include(CMakeForceCompiler) | |||||
| CMAKE_FORCE_Fortran_COMPILER(gfortran GNU) | |||||
| endif () | |||||
| if (NOT NO_LAPACK) | |||||
| enable_language(Fortran) | |||||
| else() | |||||
| include(CMakeForceCompiler) | |||||
| CMAKE_FORCE_Fortran_COMPILER(gfortran GNU) | |||||
| endif() | |||||
| if (NOT ONLY_CBLAS) | |||||
| # N.B. f_check is not cross-platform, so instead try to use CMake variables | |||||
| # run f_check (appends to TARGET files) | |||||
| # message(STATUS "Running f_check...") | |||||
| # execute_process(COMMAND perl f_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_Fortran_COMPILER} | |||||
| # WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) | |||||
| # TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile | |||||
| # TODO: set FEXTRALIB flags a la f_check? | |||||
| set(BU "_") | |||||
| file(APPEND ${TARGET_CONF} | |||||
| "#define BUNDERSCORE _\n" | |||||
| "#define NEEDBUNDERSCORE 1\n" | |||||
| "#define NEED2UNDERSCORES 0\n") | |||||
| else () | |||||
| #When we only build CBLAS, we set NOFORTRAN=2 | |||||
| set(NOFORTRAN 2) | |||||
| set(NO_FBLAS 1) | |||||
| #set(F_COMPILER GFORTRAN) # CMake handles the fortran compiler | |||||
| set(BU "_") | |||||
| file(APPEND ${TARGET_CONF} | |||||
| "#define BUNDERSCORE _\n" | |||||
| "#define NEEDBUNDERSCORE 1\n") | |||||
| endif() | |||||
| get_filename_component(F_COMPILER ${CMAKE_Fortran_COMPILER} NAME_WE) | |||||
| string(TOUPPER ${F_COMPILER} F_COMPILER) | |||||
| @@ -0,0 +1,200 @@ | |||||
| ## | |||||
| ## Author: Hank Anderson <hank@statease.com> | |||||
| ## Description: Ported from portion of OpenBLAS/Makefile.system | |||||
| ## Sets Fortran related variables. | |||||
| if (${F_COMPILER} STREQUAL "G77") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77") | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") | |||||
| if (NOT NO_BINARY_MODE) | |||||
| if (BINARY64) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | |||||
| else () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m32") | |||||
| endif () | |||||
| endif () | |||||
| endif () | |||||
| if (${F_COMPILER} STREQUAL "G95") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G95") | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") | |||||
| if (NOT NO_BINARY_MODE) | |||||
| if (BINARY64) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | |||||
| else () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m32") | |||||
| endif () | |||||
| endif () | |||||
| endif () | |||||
| if (${F_COMPILER} STREQUAL "GFORTRAN") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT") | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") | |||||
| #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc | |||||
| if (NOT NO_LAPACK) | |||||
| set(EXTRALIB "{EXTRALIB} -lgfortran") | |||||
| endif () | |||||
| if (NO_BINARY_MODE) | |||||
| if (${ARCH} STREQUAL "mips64") | |||||
| if (BINARY64) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64") | |||||
| else () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32") | |||||
| endif () | |||||
| endif () | |||||
| else () | |||||
| if (BINARY64) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | |||||
| if (INTERFACE64) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8") | |||||
| endif () | |||||
| else () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m32") | |||||
| endif () | |||||
| endif () | |||||
| if (USE_OPENMP) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp") | |||||
| endif () | |||||
| endif () | |||||
| if (${F_COMPILER} STREQUAL "INTEL") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL") | |||||
| if (INTERFACE64) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -i8") | |||||
| endif () | |||||
| if (USE_OPENMP) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") | |||||
| endif () | |||||
| endif () | |||||
| if (${F_COMPILER} STREQUAL "FUJITSU") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FUJITSU") | |||||
| if (USE_OPENMP) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") | |||||
| endif () | |||||
| endif () | |||||
| if (${F_COMPILER} STREQUAL "IBM") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_IBM") | |||||
| # FCOMMON_OPT += -qarch=440 | |||||
| if (BINARY64) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -q64") | |||||
| if (INTERFACE64) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -qintsize=8") | |||||
| endif () | |||||
| else () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -q32") | |||||
| endif () | |||||
| if (USE_OPENMP) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") | |||||
| endif () | |||||
| endif () | |||||
| if (${F_COMPILER} STREQUAL "PGI") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PGI") | |||||
| set(COMMON_PROF "${COMMON_PROF} -DPGICOMPILER") | |||||
| if (BINARY64) | |||||
| if (INTERFACE64) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -i8") | |||||
| endif () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7-64") | |||||
| else () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7") | |||||
| endif () | |||||
| if (USE_OPENMP) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -mp") | |||||
| endif () | |||||
| endif () | |||||
| if (${F_COMPILER} STREQUAL "PATHSCALE") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PATHSCALE") | |||||
| if (BINARY64) | |||||
| if (INTERFACE64) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -i8") | |||||
| endif () | |||||
| endif () | |||||
| if (NOT ${ARCH} STREQUAL "mips64") | |||||
| if (NOT BINARY64) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m32") | |||||
| else () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | |||||
| endif () | |||||
| else () | |||||
| if (BINARY64) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64") | |||||
| else () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32") | |||||
| endif () | |||||
| endif () | |||||
| if (USE_OPENMP) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -mp") | |||||
| endif () | |||||
| endif () | |||||
| if (${F_COMPILER} STREQUAL "OPEN64") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_OPEN64") | |||||
| if (BINARY64) | |||||
| if (INTERFACE64) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -i8") | |||||
| endif () | |||||
| endif () | |||||
| if (${ARCH} STREQUAL "mips64") | |||||
| if (NOT BINARY64) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -n32") | |||||
| else () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -n64") | |||||
| endif () | |||||
| if (${CORE} STREQUAL "LOONGSON3A") | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static") | |||||
| endif () | |||||
| if (${CORE} STREQUAL "LOONGSON3B") | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static") | |||||
| endif () | |||||
| else () | |||||
| if (NOT BINARY64) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m32") | |||||
| else () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | |||||
| endif () | |||||
| endif () | |||||
| if (USE_OPENMP) | |||||
| set(FEXTRALIB "${FEXTRALIB} -lstdc++") | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -mp") | |||||
| endif () | |||||
| endif () | |||||
| if (${F_COMPILER} STREQUAL "SUN") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN") | |||||
| if (${ARCH} STREQUAL "x86") | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m32") | |||||
| else () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | |||||
| endif () | |||||
| if (USE_OPENMP) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -xopenmp=parallel") | |||||
| endif () | |||||
| endif () | |||||
| if (${F_COMPILER} STREQUAL "COMPAQ") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_COMPAQ") | |||||
| if (USE_OPENMP) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") | |||||
| endif () | |||||
| endif () | |||||
| # from the root Makefile - this is for lapack-netlib to compile the correct secnd file. | |||||
| if (${F_COMPILER} STREQUAL "GFORTRAN") | |||||
| set(TIMER "INT_ETIME") | |||||
| else () | |||||
| set(TIMER "NONE") | |||||
| endif () | |||||
| @@ -0,0 +1,165 @@ | |||||
| # helper functions for the kernel CMakeLists.txt | |||||
| # Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file. | |||||
| macro(SetDefaultL1) | |||||
| set(SAMAXKERNEL amax.S) | |||||
| set(DAMAXKERNEL amax.S) | |||||
| set(QAMAXKERNEL amax.S) | |||||
| set(CAMAXKERNEL zamax.S) | |||||
| set(ZAMAXKERNEL zamax.S) | |||||
| set(XAMAXKERNEL zamax.S) | |||||
| set(SAMINKERNEL amin.S) | |||||
| set(DAMINKERNEL amin.S) | |||||
| set(QAMINKERNEL amin.S) | |||||
| set(CAMINKERNEL zamin.S) | |||||
| set(ZAMINKERNEL zamin.S) | |||||
| set(XAMINKERNEL zamin.S) | |||||
| set(SMAXKERNEL max.S) | |||||
| set(DMAXKERNEL max.S) | |||||
| set(QMAXKERNEL max.S) | |||||
| set(SMINKERNEL min.S) | |||||
| set(DMINKERNEL min.S) | |||||
| set(QMINKERNEL min.S) | |||||
| set(ISAMAXKERNEL iamax.S) | |||||
| set(IDAMAXKERNEL iamax.S) | |||||
| set(IQAMAXKERNEL iamax.S) | |||||
| set(ICAMAXKERNEL izamax.S) | |||||
| set(IZAMAXKERNEL izamax.S) | |||||
| set(IXAMAXKERNEL izamax.S) | |||||
| set(ISAMINKERNEL iamin.S) | |||||
| set(IDAMINKERNEL iamin.S) | |||||
| set(IQAMINKERNEL iamin.S) | |||||
| set(ICAMINKERNEL izamin.S) | |||||
| set(IZAMINKERNEL izamin.S) | |||||
| set(IXAMINKERNEL izamin.S) | |||||
| set(ISMAXKERNEL iamax.S) | |||||
| set(IDMAXKERNEL iamax.S) | |||||
| set(IQMAXKERNEL iamax.S) | |||||
| set(ISMINKERNEL iamin.S) | |||||
| set(IDMINKERNEL iamin.S) | |||||
| set(IQMINKERNEL iamin.S) | |||||
| set(SASUMKERNEL asum.S) | |||||
| set(DASUMKERNEL asum.S) | |||||
| set(CASUMKERNEL zasum.S) | |||||
| set(ZASUMKERNEL zasum.S) | |||||
| set(QASUMKERNEL asum.S) | |||||
| set(XASUMKERNEL zasum.S) | |||||
| set(SAXPYKERNEL axpy.S) | |||||
| set(DAXPYKERNEL axpy.S) | |||||
| set(CAXPYKERNEL zaxpy.S) | |||||
| set(ZAXPYKERNEL zaxpy.S) | |||||
| set(QAXPYKERNEL axpy.S) | |||||
| set(XAXPYKERNEL zaxpy.S) | |||||
| set(SCOPYKERNEL copy.S) | |||||
| set(DCOPYKERNEL copy.S) | |||||
| set(CCOPYKERNEL zcopy.S) | |||||
| set(ZCOPYKERNEL zcopy.S) | |||||
| set(QCOPYKERNEL copy.S) | |||||
| set(XCOPYKERNEL zcopy.S) | |||||
| set(SDOTKERNEL dot.S) | |||||
| set(DDOTKERNEL dot.S) | |||||
| set(CDOTKERNEL zdot.S) | |||||
| set(ZDOTKERNEL zdot.S) | |||||
| set(QDOTKERNEL dot.S) | |||||
| set(XDOTKERNEL zdot.S) | |||||
| set(SNRM2KERNEL nrm2.S) | |||||
| set(DNRM2KERNEL nrm2.S) | |||||
| set(QNRM2KERNEL nrm2.S) | |||||
| set(CNRM2KERNEL znrm2.S) | |||||
| set(ZNRM2KERNEL znrm2.S) | |||||
| set(XNRM2KERNEL znrm2.S) | |||||
| set(SROTKERNEL rot.S) | |||||
| set(DROTKERNEL rot.S) | |||||
| set(QROTKERNEL rot.S) | |||||
| set(CROTKERNEL zrot.S) | |||||
| set(ZROTKERNEL zrot.S) | |||||
| set(XROTKERNEL zrot.S) | |||||
| set(SSCALKERNEL scal.S) | |||||
| set(DSCALKERNEL scal.S) | |||||
| set(CSCALKERNEL zscal.S) | |||||
| set(ZSCALKERNEL zscal.S) | |||||
| set(QSCALKERNEL scal.S) | |||||
| set(XSCALKERNEL zscal.S) | |||||
| set(SSWAPKERNEL swap.S) | |||||
| set(DSWAPKERNEL swap.S) | |||||
| set(CSWAPKERNEL zswap.S) | |||||
| set(ZSWAPKERNEL zswap.S) | |||||
| set(QSWAPKERNEL swap.S) | |||||
| set(XSWAPKERNEL zswap.S) | |||||
| set(SGEMVNKERNEL gemv_n.S) | |||||
| set(SGEMVTKERNEL gemv_t.S) | |||||
| set(DGEMVNKERNEL gemv_n.S) | |||||
| set(DGEMVTKERNEL gemv_t.S) | |||||
| set(CGEMVNKERNEL zgemv_n.S) | |||||
| set(CGEMVTKERNEL zgemv_t.S) | |||||
| set(ZGEMVNKERNEL zgemv_n.S) | |||||
| set(ZGEMVTKERNEL zgemv_t.S) | |||||
| set(QGEMVNKERNEL gemv_n.S) | |||||
| set(QGEMVTKERNEL gemv_t.S) | |||||
| set(XGEMVNKERNEL zgemv_n.S) | |||||
| set(XGEMVTKERNEL zgemv_t.S) | |||||
| set(SCABS_KERNEL ../generic/cabs.c) | |||||
| set(DCABS_KERNEL ../generic/cabs.c) | |||||
| set(QCABS_KERNEL ../generic/cabs.c) | |||||
| set(LSAME_KERNEL ../generic/lsame.c) | |||||
| set(SAXPBYKERNEL ../arm/axpby.c) | |||||
| set(DAXPBYKERNEL ../arm/axpby.c) | |||||
| set(CAXPBYKERNEL ../arm/zaxpby.c) | |||||
| set(ZAXPBYKERNEL ../arm/zaxpby.c) | |||||
| endmacro () | |||||
| macro(SetDefaultL2) | |||||
| set(SGEMVNKERNEL gemv_n.S) | |||||
| set(SGEMVTKERNEL gemv_t.S) | |||||
| set(DGEMVNKERNEL gemv_n.S) | |||||
| set(DGEMVTKERNEL gemv_t.S) | |||||
| set(CGEMVNKERNEL zgemv_n.S) | |||||
| set(CGEMVTKERNEL zgemv_t.S) | |||||
| set(ZGEMVNKERNEL zgemv_n.S) | |||||
| set(ZGEMVTKERNEL zgemv_t.S) | |||||
| set(QGEMVNKERNEL gemv_n.S) | |||||
| set(QGEMVTKERNEL gemv_t.S) | |||||
| set(XGEMVNKERNEL zgemv_n.S) | |||||
| set(XGEMVTKERNEL zgemv_t.S) | |||||
| set(SGERKERNEL ../generic/ger.c) | |||||
| set(DGERKERNEL ../generic/ger.c) | |||||
| set(QGERKERNEL ../generic/ger.c) | |||||
| set(CGERUKERNEL ../generic/zger.c) | |||||
| set(CGERCKERNEL ../generic/zger.c) | |||||
| set(ZGERUKERNEL ../generic/zger.c) | |||||
| set(ZGERCKERNEL ../generic/zger.c) | |||||
| set(XGERUKERNEL ../generic/zger.c) | |||||
| set(XGERCKERNEL ../generic/zger.c) | |||||
| set(SSYMV_U_KERNEL ../generic/symv_k.c) | |||||
| set(SSYMV_L_KERNEL ../generic/symv_k.c) | |||||
| set(DSYMV_U_KERNEL ../generic/symv_k.c) | |||||
| set(DSYMV_L_KERNEL ../generic/symv_k.c) | |||||
| set(QSYMV_U_KERNEL ../generic/symv_k.c) | |||||
| set(QSYMV_L_KERNEL ../generic/symv_k.c) | |||||
| set(CSYMV_U_KERNEL ../generic/zsymv_k.c) | |||||
| set(CSYMV_L_KERNEL ../generic/zsymv_k.c) | |||||
| set(ZSYMV_U_KERNEL ../generic/zsymv_k.c) | |||||
| set(ZSYMV_L_KERNEL ../generic/zsymv_k.c) | |||||
| set(XSYMV_U_KERNEL ../generic/zsymv_k.c) | |||||
| set(XSYMV_L_KERNEL ../generic/zsymv_k.c) | |||||
| set(CHEMV_U_KERNEL ../generic/zhemv_k.c) | |||||
| set(CHEMV_L_KERNEL ../generic/zhemv_k.c) | |||||
| set(CHEMV_V_KERNEL ../generic/zhemv_k.c) | |||||
| set(CHEMV_M_KERNEL ../generic/zhemv_k.c) | |||||
| set(ZHEMV_U_KERNEL ../generic/zhemv_k.c) | |||||
| set(ZHEMV_L_KERNEL ../generic/zhemv_k.c) | |||||
| set(ZHEMV_V_KERNEL ../generic/zhemv_k.c) | |||||
| set(ZHEMV_M_KERNEL ../generic/zhemv_k.c) | |||||
| set(XHEMV_U_KERNEL ../generic/zhemv_k.c) | |||||
| set(XHEMV_L_KERNEL ../generic/zhemv_k.c) | |||||
| set(XHEMV_V_KERNEL ../generic/zhemv_k.c) | |||||
| set(XHEMV_M_KERNEL ../generic/zhemv_k.c) | |||||
| endmacro () | |||||
| macro(SetDefaultL3) | |||||
| set(SGEADD_KERNEL ../generic/geadd.c) | |||||
| set(DGEADD_KERNEL ../generic/geadd.c) | |||||
| set(CGEADD_KERNEL ../generic/zgeadd.c) | |||||
| set(ZGEADD_KERNEL ../generic/zgeadd.c) | |||||
| endmacro () | |||||
| @@ -0,0 +1,347 @@ | |||||
| # Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files. | |||||
| set(ALLAUX | |||||
| ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f | |||||
| ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f | |||||
| ../INSTALL/ilaver.f ../INSTALL/slamch.f | |||||
| ) | |||||
| set(SCLAUX | |||||
| sbdsdc.f | |||||
| sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f | |||||
| slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f | |||||
| slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f | |||||
| slagts.f slamrg.f slanst.f | |||||
| slapy2.f slapy3.f slarnv.f | |||||
| slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f | |||||
| slarrk.f slarrr.f slaneg.f | |||||
| slartg.f slaruv.f slas2.f slascl.f | |||||
| slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f | |||||
| slasd7.f slasd8.f slasda.f slasdq.f slasdt.f | |||||
| slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f | |||||
| slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f | |||||
| ssteqr.f ssterf.f slaisnan.f sisnan.f | |||||
| slartgp.f slartgs.f | |||||
| ../INSTALL/second_${TIMER}.f | |||||
| ) | |||||
| set(DZLAUX | |||||
| dbdsdc.f | |||||
| dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f | |||||
| dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f | |||||
| dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f | |||||
| dlagts.f dlamrg.f dlanst.f | |||||
| dlapy2.f dlapy3.f dlarnv.f | |||||
| dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f | |||||
| dlarrk.f dlarrr.f dlaneg.f | |||||
| dlartg.f dlaruv.f dlas2.f dlascl.f | |||||
| dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f | |||||
| dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f | |||||
| dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f | |||||
| dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f | |||||
| dsteqr.f dsterf.f dlaisnan.f disnan.f | |||||
| dlartgp.f dlartgs.f | |||||
| ../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f | |||||
| ) | |||||
| set(SLASRC | |||||
| sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f | |||||
| sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f | |||||
| sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f | |||||
| sgegs.f sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f | |||||
| sgels.f sgelsd.f sgelss.f sgelsx.f sgelsy.f sgeql2.f sgeqlf.f | |||||
| sgeqp3.f sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f | |||||
| sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f | |||||
| sgetc2.f sgetri.f | |||||
| sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f | |||||
| sggglm.f sgghrd.f sgglse.f sggqrf.f | |||||
| sggrqf.f sggsvd.f sggsvp.f sgtcon.f sgtrfs.f sgtsv.f | |||||
| sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f | |||||
| shsein.f shseqr.f slabrd.f slacon.f slacn2.f | |||||
| slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f | |||||
| slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f | |||||
| slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f | |||||
| slansy.f slantb.f slantp.f slantr.f slanv2.f | |||||
| slapll.f slapmt.f | |||||
| slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f | |||||
| slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f | |||||
| slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f | |||||
| slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f | |||||
| slarrv.f slartv.f | |||||
| slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f | |||||
| slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f slatzm.f | |||||
| sopgtr.f sopmtr.f sorg2l.f sorg2r.f | |||||
| sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f | |||||
| sorgrq.f sorgtr.f sorm2l.f sorm2r.f | |||||
| sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f | |||||
| sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f | |||||
| spbstf.f spbsv.f spbsvx.f | |||||
| spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f | |||||
| sposvx.f spstrf.f spstf2.f | |||||
| sppcon.f sppequ.f | |||||
| spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f | |||||
| spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f | |||||
| ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f | |||||
| ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f | |||||
| sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f | |||||
| ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f | |||||
| sstevx.f | |||||
| ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f | |||||
| ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f | |||||
| ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f | |||||
| ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f | |||||
| ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f | |||||
| ssytri_rook.f ssycon_rook.f ssysv_rook.f | |||||
| stbcon.f | |||||
| stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f | |||||
| stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f | |||||
| stptrs.f | |||||
| strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f | |||||
| strtrs.f stzrqf.f stzrzf.f sstemr.f | |||||
| slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f | |||||
| stfttr.f stpttf.f stpttr.f strttf.f strttp.f | |||||
| sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f | |||||
| sgeequb.f ssyequb.f spoequb.f sgbequb.f | |||||
| sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f | |||||
| sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f | |||||
| sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f | |||||
| stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f | |||||
| ) | |||||
| set(DSLASRC spotrs.f) | |||||
| set(CLASRC | |||||
| cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f | |||||
| cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f | |||||
| cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f | |||||
| cgegs.f cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f | |||||
| cgels.f cgelsd.f cgelss.f cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f | |||||
| cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f | |||||
| cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f | |||||
| cgesvx.f cgetc2.f cgetri.f | |||||
| cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f | |||||
| cgghrd.f cgglse.f cggqrf.f cggrqf.f | |||||
| cggsvd.f cggsvp.f | |||||
| cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f | |||||
| chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f | |||||
| checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f | |||||
| chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f | |||||
| chetf2.f chetrd.f | |||||
| chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f | |||||
| chetrs.f chetrs2.f | |||||
| chetf2_rook.f chetrf_rook.f chetri_rook.f chetrs_rook.f checon_rook.f chesv_rook.f | |||||
| chgeqz.f chpcon.f chpev.f chpevd.f | |||||
| chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f | |||||
| chpsvx.f | |||||
| chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f | |||||
| clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f | |||||
| claed0.f claed7.f claed8.f | |||||
| claein.f claesy.f claev2.f clags2.f clagtm.f | |||||
| clahef.f clahef_rook.f clahqr.f | |||||
| clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f | |||||
| clanhb.f clanhe.f | |||||
| clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f | |||||
| clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f | |||||
| claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f | |||||
| claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f | |||||
| claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f | |||||
| clarf.f clarfb.f clarfg.f clarft.f clarfgp.f | |||||
| clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f | |||||
| clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f | |||||
| clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f | |||||
| clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f | |||||
| cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f | |||||
| cposv.f cposvx.f cpstrf.f cpstf2.f | |||||
| cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f | |||||
| cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f | |||||
| crot.f cspcon.f csprfs.f cspsv.f | |||||
| cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f | |||||
| cstegr.f cstein.f csteqr.f | |||||
| csycon.f | |||||
| csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f csytri2.f csytri2x.f | |||||
| csyswapr.f csytrs.f csytrs2.f csyconv.f | |||||
| csytf2_rook.f csytrf_rook.f csytrs_rook.f | |||||
| csytri_rook.f csycon_rook.f csysv_rook.f | |||||
| ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f | |||||
| ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f | |||||
| ctprfs.f ctptri.f | |||||
| ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f | |||||
| ctrsyl.f ctrtrs.f ctzrqf.f ctzrzf.f cung2l.f cung2r.f | |||||
| cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f | |||||
| cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f | |||||
| cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f | |||||
| cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f | |||||
| chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f | |||||
| ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f | |||||
| cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f | |||||
| cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f | |||||
| cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f | |||||
| cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f | |||||
| ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f | |||||
| ) | |||||
| set(ZCLASRC cpotrs.f) | |||||
| set(DLASRC | |||||
| dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f | |||||
| dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f | |||||
| dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f | |||||
| dgegs.f dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f | |||||
| dgels.f dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f | |||||
| dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f | |||||
| dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f | |||||
| dgetc2.f dgetri.f | |||||
| dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f | |||||
| dggglm.f dgghrd.f dgglse.f dggqrf.f | |||||
| dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f | |||||
| dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f | |||||
| dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f | |||||
| dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f | |||||
| dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f | |||||
| dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f | |||||
| dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f | |||||
| dlapll.f dlapmt.f | |||||
| dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f | |||||
| dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f | |||||
| dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f | |||||
| dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f | |||||
| dlargv.f dlarrv.f dlartv.f | |||||
| dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f | |||||
| dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f | |||||
| dopgtr.f dopmtr.f dorg2l.f dorg2r.f | |||||
| dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f | |||||
| dorgrq.f dorgtr.f dorm2l.f dorm2r.f | |||||
| dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f | |||||
| dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f | |||||
| dpbstf.f dpbsv.f dpbsvx.f | |||||
| dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f | |||||
| dposvx.f dpotrs.f dpstrf.f dpstf2.f | |||||
| dppcon.f dppequ.f | |||||
| dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f | |||||
| dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f | |||||
| dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f | |||||
| dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f | |||||
| dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f | |||||
| dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f | |||||
| dstevx.f | |||||
| dsycon.f dsyev.f dsyevd.f dsyevr.f | |||||
| dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f | |||||
| dsysv.f dsysvx.f | |||||
| dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f | |||||
| dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f | |||||
| dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f | |||||
| dsytri_rook.f dsycon_rook.f dsysv_rook.f | |||||
| dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f | |||||
| dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f | |||||
| dtptrs.f | |||||
| dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f | |||||
| dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f | |||||
| dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f | |||||
| dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f | |||||
| dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f | |||||
| dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f | |||||
| dgeequb.f dsyequb.f dpoequb.f dgbequb.f | |||||
| dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f | |||||
| dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f | |||||
| dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f | |||||
| dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f | |||||
| ) | |||||
| set(ZLASRC | |||||
| zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f | |||||
| zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f | |||||
| zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f | |||||
| zgegs.f zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f | |||||
| zgels.f zgelsd.f zgelss.f zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f | |||||
| zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f | |||||
| zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f | |||||
| zgetri.f | |||||
| zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f | |||||
| zgghrd.f zgglse.f zggqrf.f zggrqf.f | |||||
| zggsvd.f zggsvp.f | |||||
| zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f | |||||
| zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f | |||||
| zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f | |||||
| zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f | |||||
| zhetf2.f zhetrd.f | |||||
| zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f | |||||
| zhetrs.f zhetrs2.f | |||||
| zhetf2_rook.f zhetrf_rook.f zhetri_rook.f zhetrs_rook.f zhecon_rook.f zhesv_rook.f | |||||
| zhgeqz.f zhpcon.f zhpev.f zhpevd.f | |||||
| zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f | |||||
| zhpsvx.f | |||||
| zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f | |||||
| zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f | |||||
| zlaed0.f zlaed7.f zlaed8.f | |||||
| zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f | |||||
| zlahef.f zlahef_rook.f zlahqr.f | |||||
| zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f | |||||
| zlangt.f zlanhb.f | |||||
| zlanhe.f | |||||
| zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f | |||||
| zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f | |||||
| zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f | |||||
| zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f | |||||
| zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f | |||||
| zlarcm.f zlarf.f zlarfb.f | |||||
| zlarfg.f zlarft.f zlarfgp.f | |||||
| zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f | |||||
| zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f | |||||
| zlassq.f zlasyf.f zlasyf_rook.f | |||||
| zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlatzm.f | |||||
| zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f | |||||
| zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f | |||||
| zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f | |||||
| zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f | |||||
| zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f | |||||
| zrot.f zspcon.f zsprfs.f zspsv.f | |||||
| zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f | |||||
| zstegr.f zstein.f zsteqr.f | |||||
| zsycon.f | |||||
| zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f | |||||
| zsyswapr.f zsytrs.f zsytrs2.f zsyconv.f | |||||
| zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f | |||||
| zsytri_rook.f zsycon_rook.f zsysv_rook.f | |||||
| ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f | |||||
| ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f | |||||
| ztprfs.f ztptri.f | |||||
| ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f | |||||
| ztrsyl.f ztrtrs.f ztzrqf.f ztzrzf.f zung2l.f | |||||
| zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f | |||||
| zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f | |||||
| zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f | |||||
| zunmtr.f zupgtr.f | |||||
| zupmtr.f izmax1.f dzsum1.f zstemr.f | |||||
| zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f | |||||
| zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f | |||||
| ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f | |||||
| zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f | |||||
| zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f | |||||
| zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f | |||||
| zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f | |||||
| ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f | |||||
| ) | |||||
| set(LA_REL_SRC ${ALLAUX}) | |||||
| if (BUILD_SINGLE) | |||||
| list(APPEND LA_REL_SRC ${SLASRC} ${DSLASRC} ${SCLAUX}) | |||||
| endif () | |||||
| if (BUILD_DOUBLE) | |||||
| list(APPEND LA_REL_SRC ${DLASRC} ${DSLASRC} ${DZLAUX}) | |||||
| endif () | |||||
| if (BUILD_COMPLEX) | |||||
| list(APPEND LA_REL_SRC ${CLASRC} ${ZCLASRC} ${SCLAUX}) | |||||
| endif () | |||||
| if (BUILD_COMPLEX16) | |||||
| list(APPEND LA_REL_SRC ${ZLASRC} ${ZCLASRC} ${DZLAUX}) | |||||
| endif () | |||||
| # add lapack-netlib folder to the sources | |||||
| set(LA_SOURCES "") | |||||
| foreach (LA_FILE ${LA_REL_SRC}) | |||||
| list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/${LA_FILE}") | |||||
| endforeach () | |||||
| set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}") | |||||
| @@ -0,0 +1,104 @@ | |||||
| ## | |||||
| ## Author: Hank Anderson <hank@statease.com> | |||||
| ## Description: Ported from portion of OpenBLAS/Makefile.system | |||||
| ## Detects the OS and sets appropriate variables. | |||||
| if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") | |||||
| set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var | |||||
| set(MD5SUM "md5 -r") | |||||
| endif () | |||||
| if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD") | |||||
| set(MD5SUM "md5 -r") | |||||
| endif () | |||||
| if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD") | |||||
| set(MD5SUM "md5 -n") | |||||
| endif () | |||||
| if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") | |||||
| set(EXTRALIB "${EXTRALIB} -lm") | |||||
| set(NO_EXPRECISION 1) | |||||
| endif () | |||||
| if (${CMAKE_SYSTEM_NAME} STREQUAL "AIX") | |||||
| set(EXTRALIB "${EXTRALIB} -lm") | |||||
| endif () | |||||
| # TODO: this is probably meant for mingw, not other windows compilers | |||||
| if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") | |||||
| set(NEED_PIC 0) | |||||
| set(NO_EXPRECISION 1) | |||||
| set(EXTRALIB "${EXTRALIB} -defaultlib:advapi32") | |||||
| # probably not going to use these | |||||
| set(SUFFIX "obj") | |||||
| set(PSUFFIX "pobj") | |||||
| set(LIBSUFFIX "a") | |||||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI") | |||||
| endif () | |||||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") | |||||
| # Test for supporting MS_ABI | |||||
| # removed string parsing in favor of CMake's version comparison -hpa | |||||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||||
| if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7) | |||||
| # GCC Version >=4.7 | |||||
| # It is compatible with MSVC ABI. | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI") | |||||
| endif () | |||||
| endif () | |||||
| # Ensure the correct stack alignment on Win32 | |||||
| # http://permalink.gmane.org/gmane.comp.lib.openblas.general/97 | |||||
| if (${ARCH} STREQUAL "x86") | |||||
| if (NOT MSVC AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2") | |||||
| endif () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2") | |||||
| endif () | |||||
| endif () | |||||
| if (${CMAKE_SYSTEM_NAME} STREQUAL "Interix") | |||||
| set(NEED_PIC 0) | |||||
| set(NO_EXPRECISION 1) | |||||
| set(INTERIX_TOOL_DIR STREQUAL "/opt/gcc.3.3/i586-pc-interix3/bin") | |||||
| endif () | |||||
| if (CYGWIN) | |||||
| set(NEED_PIC 0) | |||||
| set(NO_EXPRECISION 1) | |||||
| endif () | |||||
| if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix") | |||||
| if (SMP) | |||||
| set(EXTRALIB "${EXTRALIB} -lpthread") | |||||
| endif () | |||||
| endif () | |||||
| if (QUAD_PRECISION) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DQUAD_PRECISION") | |||||
| set(NO_EXPRECISION 1) | |||||
| endif () | |||||
| if (${ARCH} STREQUAL "x86") | |||||
| set(NO_EXPRECISION 1) | |||||
| endif () | |||||
| if (UTEST_CHECK) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK") | |||||
| set(SANITY_CHECK 1) | |||||
| endif () | |||||
| if (SANITY_CHECK) | |||||
| # TODO: need some way to get $(*F) (target filename) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DSANITY_CHECK -DREFNAME=$(*F)f${BU}") | |||||
| endif () | |||||
| @@ -0,0 +1,113 @@ | |||||
| ## | |||||
| ## Author: Hank Anderson <hank@statease.com> | |||||
| ## Description: Ported from OpenBLAS/Makefile.prebuild | |||||
| ## This is triggered by system.cmake and runs before any of the code is built. | |||||
| ## Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files). | |||||
| ## Next it runs f_check and appends some fortran information to the files. | |||||
| ## Finally it runs getarch and getarch_2nd for even more environment information. | |||||
| # CMake vars set by this file: | |||||
| # CORE | |||||
| # LIBCORE | |||||
| # NUM_CORES | |||||
| # HAVE_MMX | |||||
| # HAVE_SSE | |||||
| # HAVE_SSE2 | |||||
| # HAVE_SSE3 | |||||
| # MAKE | |||||
| # SGEMM_UNROLL_M | |||||
| # SGEMM_UNROLL_N | |||||
| # DGEMM_UNROLL_M | |||||
| # DGEMM_UNROLL_M | |||||
| # QGEMM_UNROLL_N | |||||
| # QGEMM_UNROLL_N | |||||
| # CGEMM_UNROLL_M | |||||
| # CGEMM_UNROLL_M | |||||
| # ZGEMM_UNROLL_N | |||||
| # ZGEMM_UNROLL_N | |||||
| # XGEMM_UNROLL_M | |||||
| # XGEMM_UNROLL_N | |||||
| # CGEMM3M_UNROLL_M | |||||
| # CGEMM3M_UNROLL_N | |||||
| # ZGEMM3M_UNROLL_M | |||||
| # ZGEMM3M_UNROLL_M | |||||
| # XGEMM3M_UNROLL_N | |||||
| # XGEMM3M_UNROLL_N | |||||
| # CPUIDEMU = ../../cpuid/table.o | |||||
| if (DEFINED CPUIDEMU) | |||||
| set(EXFLAGS "-DCPUIDEMU -DVENDOR=99") | |||||
| endif () | |||||
| if (DEFINED TARGET_CORE) | |||||
| # set the C flags for just this file | |||||
| set(GETARCH2_FLAGS "-DBUILD_KERNEL") | |||||
| set(TARGET_MAKE "Makefile_kernel.conf") | |||||
| set(TARGET_CONF "config_kernel.h") | |||||
| else() | |||||
| set(TARGET_MAKE "Makefile.conf") | |||||
| set(TARGET_CONF "config.h") | |||||
| endif () | |||||
| include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake") | |||||
| if (NOT NOFORTRAN) | |||||
| include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake") | |||||
| endif () | |||||
| # compile getarch | |||||
| set(GETARCH_SRC | |||||
| ${CMAKE_SOURCE_DIR}/getarch.c | |||||
| ${CPUIDEMO} | |||||
| ) | |||||
| if (NOT MSVC) | |||||
| list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S) | |||||
| endif () | |||||
| if (MSVC) | |||||
| #Use generic for MSVC now | |||||
| set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC) | |||||
| endif() | |||||
| set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") | |||||
| set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") | |||||
| file(MAKE_DIRECTORY ${GETARCH_DIR}) | |||||
| try_compile(GETARCH_RESULT ${GETARCH_DIR} | |||||
| SOURCES ${GETARCH_SRC} | |||||
| COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR} | |||||
| OUTPUT_VARIABLE GETARCH_LOG | |||||
| COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN} | |||||
| ) | |||||
| message(STATUS "Running getarch") | |||||
| # use the cmake binary w/ the -E param to run a shell command in a cross-platform way | |||||
| execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT) | |||||
| execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT) | |||||
| message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}") | |||||
| # append config data from getarch to the TARGET file and read in CMake vars | |||||
| file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT}) | |||||
| ParseGetArchVars(${GETARCH_MAKE_OUT}) | |||||
| set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build") | |||||
| set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}") | |||||
| file(MAKE_DIRECTORY ${GETARCH2_DIR}) | |||||
| try_compile(GETARCH2_RESULT ${GETARCH2_DIR} | |||||
| SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c | |||||
| COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR} | |||||
| OUTPUT_VARIABLE GETARCH2_LOG | |||||
| COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} | |||||
| ) | |||||
| # use the cmake binary w/ the -E param to run a shell command in a cross-platform way | |||||
| execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT) | |||||
| execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT) | |||||
| # append config data from getarch_2nd to the TARGET file and read in CMake vars | |||||
| file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT}) | |||||
| ParseGetArchVars(${GETARCH2_MAKE_OUT}) | |||||
| @@ -0,0 +1,552 @@ | |||||
| ## | |||||
| ## Author: Hank Anderson <hank@statease.com> | |||||
| ## Description: Ported from OpenBLAS/Makefile.system | |||||
| ## | |||||
| set(NETLIB_LAPACK_DIR "${CMAKE_SOURCE_DIR}/lapack-netlib") | |||||
| # TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa | |||||
| # http://stackoverflow.com/questions/714100/os-detecting-makefile | |||||
| # TODO: Makefile.system sets HOSTCC = $(CC) here if not already set -hpa | |||||
| # TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1. | |||||
| if (DEFINED TARGET_CORE) | |||||
| set(TARGET ${TARGET_CORE}) | |||||
| endif () | |||||
| # Force fallbacks for 32bit | |||||
| if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) | |||||
| message(STATUS "Compiling a ${BINARY}-bit binary.") | |||||
| set(NO_AVX 1) | |||||
| if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE") | |||||
| set(TARGET "NEHALEM") | |||||
| endif () | |||||
| if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER") | |||||
| set(TARGET "BARCELONA") | |||||
| endif () | |||||
| endif () | |||||
| if (DEFINED TARGET) | |||||
| message(STATUS "Targetting the ${TARGET} architecture.") | |||||
| set(GETARCH_FLAGS "-DFORCE_${TARGET}") | |||||
| endif () | |||||
| if (INTERFACE64) | |||||
| message(STATUS "Using 64-bit integers.") | |||||
| set(GETARCH_FLAGS "${GETARCH_FLAGS} -DUSE64BITINT") | |||||
| endif () | |||||
| if (NOT DEFINED GEMM_MULTITHREAD_THRESHOLD) | |||||
| set(GEMM_MULTITHREAD_THRESHOLD 4) | |||||
| endif () | |||||
| message(STATUS "GEMM multithread threshold set to ${GEMM_MULTITHREAD_THRESHOLD}.") | |||||
| set(GETARCH_FLAGS "${GETARCH_FLAGS} -DGEMM_MULTITHREAD_THRESHOLD=${GEMM_MULTITHREAD_THRESHOLD}") | |||||
| if (NO_AVX) | |||||
| message(STATUS "Disabling Advanced Vector Extensions (AVX).") | |||||
| set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX") | |||||
| endif () | |||||
| if (NO_AVX2) | |||||
| message(STATUS "Disabling Advanced Vector Extensions 2 (AVX2).") | |||||
| set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2") | |||||
| endif () | |||||
| if (CMAKE_BUILD_TYPE STREQUAL Debug) | |||||
| set(GETARCH_FLAGS "${GETARCH_FLAGS} -g") | |||||
| endif () | |||||
| # TODO: let CMake handle this? -hpa | |||||
| #if (${QUIET_MAKE}) | |||||
| # set(MAKE "${MAKE} -s") | |||||
| #endif() | |||||
| if (NOT DEFINED NO_PARALLEL_MAKE) | |||||
| set(NO_PARALLEL_MAKE 0) | |||||
| endif () | |||||
| set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_PARALLEL_MAKE=${NO_PARALLEL_MAKE}") | |||||
| if (CMAKE_CXX_COMPILER STREQUAL loongcc) | |||||
| set(GETARCH_FLAGS "${GETARCH_FLAGS} -static") | |||||
| endif () | |||||
| #if don't use Fortran, it will only compile CBLAS. | |||||
| if (ONLY_CBLAS) | |||||
| set(NO_LAPACK 1) | |||||
| else () | |||||
| set(ONLY_CBLAS 0) | |||||
| endif () | |||||
| include("${CMAKE_SOURCE_DIR}/cmake/prebuild.cmake") | |||||
| if (NOT DEFINED NUM_THREADS) | |||||
| set(NUM_THREADS ${NUM_CORES}) | |||||
| endif () | |||||
| if (${NUM_THREADS} EQUAL 1) | |||||
| set(USE_THREAD 0) | |||||
| endif () | |||||
| if (DEFINED USE_THREAD) | |||||
| if (NOT ${USE_THREAD}) | |||||
| unset(SMP) | |||||
| else () | |||||
| set(SMP 1) | |||||
| endif () | |||||
| else () | |||||
| # N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa | |||||
| if (${NUM_THREADS} EQUAL 1) | |||||
| unset(SMP) | |||||
| else () | |||||
| set(SMP 1) | |||||
| endif () | |||||
| endif () | |||||
| if (${SMP}) | |||||
| message(STATUS "SMP enabled.") | |||||
| endif () | |||||
| if (NOT DEFINED NEED_PIC) | |||||
| set(NEED_PIC 1) | |||||
| endif () | |||||
| # TODO: I think CMake should be handling all this stuff -hpa | |||||
| unset(ARFLAGS) | |||||
| set(CPP "${COMPILER} -E") | |||||
| set(AR "${CROSS_SUFFIX}ar") | |||||
| set(AS "${CROSS_SUFFIX}as") | |||||
| set(LD "${CROSS_SUFFIX}ld") | |||||
| set(RANLIB "${CROSS_SUFFIX}ranlib") | |||||
| set(NM "${CROSS_SUFFIX}nm") | |||||
| set(DLLWRAP "${CROSS_SUFFIX}dllwrap") | |||||
| set(OBJCOPY "${CROSS_SUFFIX}objcopy") | |||||
| set(OBJCONV "${CROSS_SUFFIX}objconv") | |||||
| # OS dependent settings | |||||
| include("${CMAKE_SOURCE_DIR}/cmake/os.cmake") | |||||
| # Architecture dependent settings | |||||
| include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake") | |||||
| # C Compiler dependent settings | |||||
| include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake") | |||||
| if (NOT NOFORTRAN) | |||||
| # Fortran Compiler dependent settings | |||||
| include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake") | |||||
| endif () | |||||
| if (BINARY64) | |||||
| if (INTERFACE64) | |||||
| # CCOMMON_OPT += -DUSE64BITINT | |||||
| endif () | |||||
| endif () | |||||
| if (NEED_PIC) | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "IBM") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -qpic=large") | |||||
| else () | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC") | |||||
| endif () | |||||
| if (${F_COMPILER} STREQUAL "SUN") | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -pic") | |||||
| else () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC") | |||||
| endif () | |||||
| endif () | |||||
| if (DYNAMIC_ARCH) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH") | |||||
| endif () | |||||
| if (NO_LAPACK) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACK") | |||||
| #Disable LAPACK C interface | |||||
| set(NO_LAPACKE 1) | |||||
| endif () | |||||
| if (NO_LAPACKE) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACKE") | |||||
| endif () | |||||
| if (NO_AVX) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX") | |||||
| endif () | |||||
| if (${ARCH} STREQUAL "x86") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX") | |||||
| endif () | |||||
| if (NO_AVX2) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2") | |||||
| endif () | |||||
| if (SMP) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER") | |||||
| if (${ARCH} STREQUAL "mips64") | |||||
| if (NOT ${CORE} STREQUAL "LOONGSON3B") | |||||
| set(USE_SIMPLE_THREADED_LEVEL3 1) | |||||
| endif () | |||||
| endif () | |||||
| if (USE_OPENMP) | |||||
| # USE_SIMPLE_THREADED_LEVEL3 = 1 | |||||
| # NO_AFFINITY = 1 | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_OPENMP") | |||||
| endif () | |||||
| if (BIGNUMA) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DBIGNUMA") | |||||
| endif () | |||||
| endif () | |||||
| if (NO_WARMUP) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_WARMUP") | |||||
| endif () | |||||
| if (CONSISTENT_FPCSR) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DCONSISTENT_FPCSR") | |||||
| endif () | |||||
| # Only for development | |||||
| # set(CCOMMON_OPT "${CCOMMON_OPT} -DPARAMTEST") | |||||
| # set(CCOMMON_OPT "${CCOMMON_OPT} -DPREFETCHTEST") | |||||
| # set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_SWITCHING") | |||||
| # set(USE_PAPI 1) | |||||
| if (USE_PAPI) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_PAPI") | |||||
| set(EXTRALIB "${EXTRALIB} -lpapi -lperfctr") | |||||
| endif () | |||||
| if (DYNAMIC_THREADS) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_THREADS") | |||||
| endif () | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}") | |||||
| if (USE_SIMPLE_THREADED_LEVEL3) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3") | |||||
| endif () | |||||
| if (DEFINED LIBNAMESUFFIX) | |||||
| set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}") | |||||
| else () | |||||
| set(LIBPREFIX "libopenblas") | |||||
| endif () | |||||
| if (NOT DEFINED SYMBOLPREFIX) | |||||
| set(SYMBOLPREFIX "") | |||||
| endif () | |||||
| if (NOT DEFINED SYMBOLSUFFIX) | |||||
| set(SYMBOLSUFFIX "") | |||||
| endif () | |||||
| set(KERNELDIR "${CMAKE_SOURCE_DIR}/kernel/${ARCH}") | |||||
| # TODO: nead to convert these Makefiles | |||||
| # include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake | |||||
| if (${CORE} STREQUAL "PPC440") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC") | |||||
| endif () | |||||
| if (${CORE} STREQUAL "PPC440FP2") | |||||
| set(STATIC_ALLOCATION 1) | |||||
| endif () | |||||
| if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux") | |||||
| set(NO_AFFINITY 1) | |||||
| endif () | |||||
| if (NOT ${ARCH} STREQUAL "x86_64" AND NOT ${ARCH} STREQUAL "x86" AND NOT ${CORE} STREQUAL "LOONGSON3B") | |||||
| set(NO_AFFINITY 1) | |||||
| endif () | |||||
| if (NO_AFFINITY) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AFFINITY") | |||||
| endif () | |||||
| if (FUNCTION_PROFILE) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DFUNCTION_PROFILE") | |||||
| endif () | |||||
| if (HUGETLB_ALLOCATION) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLB") | |||||
| endif () | |||||
| if (DEFINED HUGETLBFILE_ALLOCATION) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=${HUGETLBFILE_ALLOCATION})") | |||||
| endif () | |||||
| if (STATIC_ALLOCATION) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_STATIC") | |||||
| endif () | |||||
| if (DEVICEDRIVER_ALLOCATION) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_DEVICEDRIVER -DDEVICEDRIVER_NAME=\"/dev/mapper\"") | |||||
| endif () | |||||
| if (MIXED_MEMORY_ALLOCATION) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION") | |||||
| endif () | |||||
| if (${CMAKE_SYSTEM_NAME} STREQUAL "SunOS") | |||||
| set(TAR gtar) | |||||
| set(PATCH gpatch) | |||||
| set(GREP ggrep) | |||||
| else () | |||||
| set(TAR tar) | |||||
| set(PATCH patch) | |||||
| set(GREP grep) | |||||
| endif () | |||||
| if (NOT DEFINED MD5SUM) | |||||
| set(MD5SUM md5sum) | |||||
| endif () | |||||
| set(AWK awk) | |||||
| set(REVISION "-r${OpenBLAS_VERSION}") | |||||
| set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION}) | |||||
| if (DEBUG) | |||||
| set(COMMON_OPT "${COMMON_OPT} -g") | |||||
| endif () | |||||
| if (NOT DEFINED COMMON_OPT) | |||||
| set(COMMON_OPT "-O2") | |||||
| endif () | |||||
| #For x86 32-bit | |||||
| if (DEFINED BINARY AND BINARY EQUAL 32) | |||||
| if (NOT MSVC) | |||||
| set(COMMON_OPT "${COMMON_OPT} -m32") | |||||
| endif() | |||||
| endif() | |||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}") | |||||
| if(NOT MSVC) | |||||
| set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}") | |||||
| endif() | |||||
| # TODO: not sure what PFLAGS is -hpa | |||||
| set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}") | |||||
| set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COMMON_OPT} ${FCOMMON_OPT}") | |||||
| # TODO: not sure what FPFLAGS is -hpa | |||||
| set(FPFLAGS "${FPFLAGS} ${COMMON_OPT} ${FCOMMON_OPT} ${COMMON_PROF}") | |||||
| #For LAPACK Fortran codes. | |||||
| set(LAPACK_FFLAGS "${LAPACK_FFLAGS} ${CMAKE_Fortran_FLAGS}") | |||||
| set(LAPACK_FPFLAGS "${LAPACK_FPFLAGS} ${FPFLAGS}") | |||||
| #Disable -fopenmp for LAPACK Fortran codes on Windows. | |||||
| if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") | |||||
| set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parralel") | |||||
| foreach (FILTER_FLAG ${FILTER_FLAGS}) | |||||
| string(REPLACE ${FILTER_FLAG} "" LAPACK_FFLAGS ${LAPACK_FFLAGS}) | |||||
| string(REPLACE ${FILTER_FLAG} "" LAPACK_FPFLAGS ${LAPACK_FPFLAGS}) | |||||
| endforeach () | |||||
| endif () | |||||
| if ("${F_COMPILER}" STREQUAL "GFORTRAN") | |||||
| # lapack-netlib is rife with uninitialized warnings -hpa | |||||
| set(LAPACK_FFLAGS "${LAPACK_FFLAGS} -Wno-maybe-uninitialized") | |||||
| endif () | |||||
| set(LAPACK_CFLAGS "${CMAKE_C_CFLAGS} -DHAVE_LAPACK_CONFIG_H") | |||||
| if (INTERFACE64) | |||||
| set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_ILP64") | |||||
| endif () | |||||
| if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") | |||||
| set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DOPENBLAS_OS_WINDOWS") | |||||
| endif () | |||||
| if (${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Windows") | |||||
| set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE") | |||||
| endif () | |||||
| if (NOT DEFINED SUFFIX) | |||||
| set(SUFFIX o) | |||||
| endif () | |||||
| if (NOT DEFINED PSUFFIX) | |||||
| set(PSUFFIX po) | |||||
| endif () | |||||
| if (NOT DEFINED LIBSUFFIX) | |||||
| set(LIBSUFFIX a) | |||||
| endif () | |||||
| if (DYNAMIC_ARCH) | |||||
| if (DEFINED SMP) | |||||
| set(LIBNAME "${LIBPREFIX}p${REVISION}.${LIBSUFFIX}") | |||||
| set(LIBNAME_P "${LIBPREFIX}p${REVISION}_p.${LIBSUFFIX}") | |||||
| else () | |||||
| set(LIBNAME "${LIBPREFIX}${REVISION}.${LIBSUFFIX}") | |||||
| set(LIBNAME_P "${LIBPREFIX}${REVISION}_p.${LIBSUFFIX}") | |||||
| endif () | |||||
| else () | |||||
| if (DEFINED SMP) | |||||
| set(LIBNAME "${LIBPREFIX}_${LIBCORE}p${REVISION}.${LIBSUFFIX}") | |||||
| set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}p${REVISION}_p.${LIBSUFFIX}") | |||||
| else () | |||||
| set(LIBNAME "${LIBPREFIX}_${LIBCORE}${REVISION}.${LIBSUFFIX}") | |||||
| set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}${REVISION}_p.${LIBSUFFIX}") | |||||
| endif () | |||||
| endif () | |||||
| set(LIBDLLNAME "${LIBPREFIX}.dll") | |||||
| set(LIBSONAME "${LIBNAME}.${LIBSUFFIX}.so") | |||||
| set(LIBDYNNAME "${LIBNAME}.${LIBSUFFIX}.dylib") | |||||
| set(LIBDEFNAME "${LIBNAME}.${LIBSUFFIX}.def") | |||||
| set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp") | |||||
| set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip") | |||||
| set(LIBS "${CMAKE_SOURCE_DIR}/${LIBNAME}") | |||||
| set(LIBS_P "${CMAKE_SOURCE_DIR}/${LIBNAME_P}") | |||||
| set(LIB_COMPONENTS BLAS) | |||||
| if (NOT NO_CBLAS) | |||||
| set(LIB_COMPONENTS "${LIB_COMPONENTS} CBLAS") | |||||
| endif () | |||||
| if (NOT NO_LAPACK) | |||||
| set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACK") | |||||
| if (NOT NO_LAPACKE) | |||||
| set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACKE") | |||||
| endif () | |||||
| endif () | |||||
| if (ONLY_CBLAS) | |||||
| set(LIB_COMPONENTS CBLAS) | |||||
| endif () | |||||
| # For GEMM3M | |||||
| set(USE_GEMM3M 0) | |||||
| if (DEFINED ARCH) | |||||
| if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS") | |||||
| set(USE_GEMM3M 1) | |||||
| endif () | |||||
| if (${CORE} STREQUAL "generic") | |||||
| set(USE_GEMM3M 0) | |||||
| endif () | |||||
| endif () | |||||
| #export OSNAME | |||||
| #export ARCH | |||||
| #export CORE | |||||
| #export LIBCORE | |||||
| #export PGCPATH | |||||
| #export CONFIG | |||||
| #export CC | |||||
| #export FC | |||||
| #export BU | |||||
| #export FU | |||||
| #export NEED2UNDERSCORES | |||||
| #export USE_THREAD | |||||
| #export NUM_THREADS | |||||
| #export NUM_CORES | |||||
| #export SMP | |||||
| #export MAKEFILE_RULE | |||||
| #export NEED_PIC | |||||
| #export BINARY | |||||
| #export BINARY32 | |||||
| #export BINARY64 | |||||
| #export F_COMPILER | |||||
| #export C_COMPILER | |||||
| #export USE_OPENMP | |||||
| #export CROSS | |||||
| #export CROSS_SUFFIX | |||||
| #export NOFORTRAN | |||||
| #export NO_FBLAS | |||||
| #export EXTRALIB | |||||
| #export CEXTRALIB | |||||
| #export FEXTRALIB | |||||
| #export HAVE_SSE | |||||
| #export HAVE_SSE2 | |||||
| #export HAVE_SSE3 | |||||
| #export HAVE_SSSE3 | |||||
| #export HAVE_SSE4_1 | |||||
| #export HAVE_SSE4_2 | |||||
| #export HAVE_SSE4A | |||||
| #export HAVE_SSE5 | |||||
| #export HAVE_AVX | |||||
| #export HAVE_VFP | |||||
| #export HAVE_VFPV3 | |||||
| #export HAVE_VFPV4 | |||||
| #export HAVE_NEON | |||||
| #export KERNELDIR | |||||
| #export FUNCTION_PROFILE | |||||
| #export TARGET_CORE | |||||
| # | |||||
| #export SGEMM_UNROLL_M | |||||
| #export SGEMM_UNROLL_N | |||||
| #export DGEMM_UNROLL_M | |||||
| #export DGEMM_UNROLL_N | |||||
| #export QGEMM_UNROLL_M | |||||
| #export QGEMM_UNROLL_N | |||||
| #export CGEMM_UNROLL_M | |||||
| #export CGEMM_UNROLL_N | |||||
| #export ZGEMM_UNROLL_M | |||||
| #export ZGEMM_UNROLL_N | |||||
| #export XGEMM_UNROLL_M | |||||
| #export XGEMM_UNROLL_N | |||||
| #export CGEMM3M_UNROLL_M | |||||
| #export CGEMM3M_UNROLL_N | |||||
| #export ZGEMM3M_UNROLL_M | |||||
| #export ZGEMM3M_UNROLL_N | |||||
| #export XGEMM3M_UNROLL_M | |||||
| #export XGEMM3M_UNROLL_N | |||||
| #if (USE_CUDA) | |||||
| # export CUDADIR | |||||
| # export CUCC | |||||
| # export CUFLAGS | |||||
| # export CULIB | |||||
| #endif | |||||
| #.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f | |||||
| # | |||||
| #.f.$(SUFFIX): | |||||
| # $(FC) $(FFLAGS) -c $< -o $(@F) | |||||
| # | |||||
| #.f.$(PSUFFIX): | |||||
| # $(FC) $(FPFLAGS) -pg -c $< -o $(@F) | |||||
| # these are not cross-platform | |||||
| #ifdef BINARY64 | |||||
| #PATHSCALEPATH = /opt/pathscale/lib/3.1 | |||||
| #PGIPATH = /opt/pgi/linux86-64/7.1-5/lib | |||||
| #else | |||||
| #PATHSCALEPATH = /opt/pathscale/lib/3.1/32 | |||||
| #PGIPATH = /opt/pgi/linux86/7.1-5/lib | |||||
| #endif | |||||
| #ACMLPATH = /opt/acml/4.3.0 | |||||
| #ifneq ($(OSNAME), Darwin) | |||||
| #MKLPATH = /opt/intel/mkl/10.2.2.025/lib | |||||
| #else | |||||
| #MKLPATH = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib | |||||
| #endif | |||||
| #ATLASPATH = /opt/atlas/3.9.17/opteron | |||||
| #FLAMEPATH = $(HOME)/flame/lib | |||||
| #ifneq ($(OSNAME), SunOS) | |||||
| #SUNPATH = /opt/sunstudio12.1 | |||||
| #else | |||||
| #SUNPATH = /opt/SUNWspro | |||||
| #endif | |||||
| @@ -0,0 +1,346 @@ | |||||
| # Functions to help with the OpenBLAS build | |||||
| # Reads string from getarch into CMake vars. Format of getarch vars is VARNAME=VALUE | |||||
| function(ParseGetArchVars GETARCH_IN) | |||||
| string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH_IN}") | |||||
| foreach (GETARCH_LINE ${GETARCH_RESULT_LIST}) | |||||
| # split the line into var and value, then assign the value to a CMake var | |||||
| string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}") | |||||
| list(GET SPLIT_VAR 0 VAR_NAME) | |||||
| list(GET SPLIT_VAR 1 VAR_VALUE) | |||||
| set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE) | |||||
| endforeach () | |||||
| endfunction () | |||||
| # Reads a Makefile into CMake vars. | |||||
| macro(ParseMakefileVars MAKEFILE_IN) | |||||
| message(STATUS "Reading vars from ${MAKEFILE_IN}...") | |||||
| file(STRINGS ${MAKEFILE_IN} makefile_contents) | |||||
| foreach (makefile_line ${makefile_contents}) | |||||
| string(REGEX MATCH "([0-9_a-zA-Z]+)[ \t]*=[ \t]*(.+)$" line_match "${makefile_line}") | |||||
| if (NOT "${line_match}" STREQUAL "") | |||||
| set(var_name ${CMAKE_MATCH_1}) | |||||
| set(var_value ${CMAKE_MATCH_2}) | |||||
| # check for Makefile variables in the string, e.g. $(TSUFFIX) | |||||
| string(REGEX MATCHALL "\\$\\(([0-9_a-zA-Z]+)\\)" make_var_matches ${var_value}) | |||||
| foreach (make_var ${make_var_matches}) | |||||
| # strip out Makefile $() markup | |||||
| string(REGEX REPLACE "\\$\\(([0-9_a-zA-Z]+)\\)" "\\1" make_var ${make_var}) | |||||
| # now replace the instance of the Makefile variable with the value of the CMake variable (note the double quote) | |||||
| string(REPLACE "$(${make_var})" "${${make_var}}" var_value ${var_value}) | |||||
| endforeach () | |||||
| set(${var_name} ${var_value}) | |||||
| else () | |||||
| string(REGEX MATCH "include \\$\\(KERNELDIR\\)/(.+)$" line_match "${makefile_line}") | |||||
| if (NOT "${line_match}" STREQUAL "") | |||||
| ParseMakefileVars(${KERNELDIR}/${CMAKE_MATCH_1}) | |||||
| endif () | |||||
| endif () | |||||
| endforeach () | |||||
| endmacro () | |||||
| # Returns all combinations of the input list, as a list with colon-separated combinations | |||||
| # E.g. input of A B C returns A B C A:B A:C B:C | |||||
| # N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")). | |||||
| # #param absent_codes codes to use when an element is absent from a combination. For example, if you have TRANS;UNIT;UPPER you may want the code to be NNL when nothing is present. | |||||
| # @returns LIST_OUT a list of combinations | |||||
| # CODES_OUT a list of codes corresponding to each combination, with N meaning the item is not present, and the first letter of the list item meaning it is presen | |||||
| function(AllCombinations list_in absent_codes_in) | |||||
| list(LENGTH list_in list_count) | |||||
| set(num_combos 1) | |||||
| # subtract 1 since we will iterate from 0 to num_combos | |||||
| math(EXPR num_combos "(${num_combos} << ${list_count}) - 1") | |||||
| set(LIST_OUT "") | |||||
| set(CODES_OUT "") | |||||
| foreach (c RANGE 0 ${num_combos}) | |||||
| set(current_combo "") | |||||
| set(current_code "") | |||||
| # this is a little ridiculous just to iterate through a list w/ indices | |||||
| math(EXPR last_list_index "${list_count} - 1") | |||||
| foreach (list_index RANGE 0 ${last_list_index}) | |||||
| math(EXPR bit "1 << ${list_index}") | |||||
| math(EXPR combo_has_bit "${c} & ${bit}") | |||||
| list(GET list_in ${list_index} list_elem) | |||||
| if (combo_has_bit) | |||||
| if (current_combo) | |||||
| set(current_combo "${current_combo}:${list_elem}") | |||||
| else () | |||||
| set(current_combo ${list_elem}) | |||||
| endif () | |||||
| string(SUBSTRING ${list_elem} 0 1 code_char) | |||||
| else () | |||||
| list(GET absent_codes_in ${list_index} code_char) | |||||
| endif () | |||||
| set(current_code "${current_code}${code_char}") | |||||
| endforeach () | |||||
| if (current_combo STREQUAL "") | |||||
| list(APPEND LIST_OUT " ") # Empty set is a valid combination, but CMake isn't appending the empty string for some reason, use a space | |||||
| else () | |||||
| list(APPEND LIST_OUT ${current_combo}) | |||||
| endif () | |||||
| list(APPEND CODES_OUT ${current_code}) | |||||
| endforeach () | |||||
| set(LIST_OUT ${LIST_OUT} PARENT_SCOPE) | |||||
| set(CODES_OUT ${CODES_OUT} PARENT_SCOPE) | |||||
| endfunction () | |||||
| # generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition | |||||
| # @param sources_in the source files to build from | |||||
| # @param defines_in (optional) preprocessor definitions that will be applied to all objects | |||||
| # @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended. | |||||
| # e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax" | |||||
| # @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU) | |||||
| # @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters) | |||||
| # @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) | |||||
| # @param complex_filename_scheme some routines have separate source files for complex and non-complex float types. | |||||
| # 0 - compiles for all types | |||||
| # 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE) | |||||
| # 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX) | |||||
| # 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c) | |||||
| # 4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c) | |||||
| # STRING - compiles only the given type (e.g. DOUBLE) | |||||
| function(GenerateNamedObjects sources_in) | |||||
| if (DEFINED ARGV1) | |||||
| set(defines_in ${ARGV1}) | |||||
| endif () | |||||
| if (DEFINED ARGV2 AND NOT "${ARGV2}" STREQUAL "") | |||||
| set(name_in ${ARGV2}) | |||||
| # strip off extension for kernel files that pass in the object name. | |||||
| get_filename_component(name_in ${name_in} NAME_WE) | |||||
| endif () | |||||
| if (DEFINED ARGV3) | |||||
| set(use_cblas ${ARGV3}) | |||||
| else () | |||||
| set(use_cblas false) | |||||
| endif () | |||||
| if (DEFINED ARGV4) | |||||
| set(replace_last_with ${ARGV4}) | |||||
| endif () | |||||
| if (DEFINED ARGV5) | |||||
| set(append_with ${ARGV5}) | |||||
| endif () | |||||
| if (DEFINED ARGV6) | |||||
| set(no_float_type ${ARGV6}) | |||||
| else () | |||||
| set(no_float_type false) | |||||
| endif () | |||||
| if (no_float_type) | |||||
| set(float_list "DUMMY") # still need to loop once | |||||
| else () | |||||
| set(float_list "${FLOAT_TYPES}") | |||||
| endif () | |||||
| set(real_only false) | |||||
| set(complex_only false) | |||||
| set(mangle_complex_sources false) | |||||
| if (DEFINED ARGV7 AND NOT "${ARGV7}" STREQUAL "") | |||||
| if (${ARGV7} EQUAL 1) | |||||
| set(real_only true) | |||||
| elseif (${ARGV7} EQUAL 2) | |||||
| set(complex_only true) | |||||
| elseif (${ARGV7} EQUAL 3) | |||||
| set(mangle_complex_sources true) | |||||
| elseif (${ARGV7} EQUAL 4) | |||||
| set(mangle_complex_sources true) | |||||
| set(complex_only true) | |||||
| elseif (NOT ${ARGV7} EQUAL 0) | |||||
| set(float_list ${ARGV7}) | |||||
| endif () | |||||
| endif () | |||||
| if (complex_only) | |||||
| list(REMOVE_ITEM float_list "SINGLE") | |||||
| list(REMOVE_ITEM float_list "DOUBLE") | |||||
| elseif (real_only) | |||||
| list(REMOVE_ITEM float_list "COMPLEX") | |||||
| list(REMOVE_ITEM float_list "ZCOMPLEX") | |||||
| endif () | |||||
| set(float_char "") | |||||
| set(OBJ_LIST_OUT "") | |||||
| foreach (float_type ${float_list}) | |||||
| foreach (source_file ${sources_in}) | |||||
| if (NOT no_float_type) | |||||
| string(SUBSTRING ${float_type} 0 1 float_char) | |||||
| string(TOLOWER ${float_char} float_char) | |||||
| endif () | |||||
| if (NOT name_in) | |||||
| get_filename_component(source_name ${source_file} NAME_WE) | |||||
| set(obj_name "${float_char}${source_name}") | |||||
| else () | |||||
| # replace * with float_char | |||||
| if (${name_in} MATCHES "\\*") | |||||
| string(REPLACE "*" ${float_char} obj_name ${name_in}) | |||||
| else () | |||||
| set(obj_name "${float_char}${name_in}") | |||||
| endif () | |||||
| endif () | |||||
| if (replace_last_with) | |||||
| string(REGEX REPLACE ".$" ${replace_last_with} obj_name ${obj_name}) | |||||
| else () | |||||
| set(obj_name "${obj_name}${append_with}") | |||||
| endif () | |||||
| # now add the object and set the defines | |||||
| set(obj_defines ${defines_in}) | |||||
| if (use_cblas) | |||||
| set(obj_name "cblas_${obj_name}") | |||||
| list(APPEND obj_defines "CBLAS") | |||||
| endif () | |||||
| list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"") | |||||
| if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX") | |||||
| list(APPEND obj_defines "DOUBLE") | |||||
| endif () | |||||
| if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") | |||||
| list(APPEND obj_defines "COMPLEX") | |||||
| if (mangle_complex_sources) | |||||
| # add a z to the filename | |||||
| get_filename_component(source_name ${source_file} NAME) | |||||
| get_filename_component(source_dir ${source_file} DIRECTORY) | |||||
| string(REPLACE ${source_name} "z${source_name}" source_file ${source_file}) | |||||
| endif () | |||||
| endif () | |||||
| if (VERBOSE_GEN) | |||||
| message(STATUS "${obj_name}:${source_file}") | |||||
| message(STATUS "${obj_defines}") | |||||
| endif () | |||||
| # create a copy of the source to avoid duplicate obj filename problem with ar.exe | |||||
| get_filename_component(source_extension ${source_file} EXT) | |||||
| set(new_source_file "${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${obj_name}${source_extension}") | |||||
| if (IS_ABSOLUTE ${source_file}) | |||||
| set(old_source_file ${source_file}) | |||||
| else () | |||||
| set(old_source_file "${CMAKE_CURRENT_LIST_DIR}/${source_file}") | |||||
| endif () | |||||
| string(REPLACE ";" "\n#define " define_source "${obj_defines}") | |||||
| string(REPLACE "=" " " define_source "${define_source}") | |||||
| file(WRITE ${new_source_file} "#define ${define_source}\n#include \"${old_source_file}\"") | |||||
| list(APPEND SRC_LIST_OUT ${new_source_file}) | |||||
| endforeach () | |||||
| endforeach () | |||||
| list(APPEND OPENBLAS_SRC ${SRC_LIST_OUT}) | |||||
| set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE) | |||||
| endfunction () | |||||
| # generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in | |||||
| # @param sources_in the source files to build from | |||||
| # @param defines_in the preprocessor definitions that will be combined to create the object files | |||||
| # @param all_defines_in (optional) preprocessor definitions that will be applied to all objects | |||||
| # @param replace_scheme If 1, replace the "k" in the filename with the define combo letters. E.g. symm_k.c with TRANS and UNIT defined will be symm_TU. | |||||
| # If 0, it will simply append the code, e.g. symm_L.c with TRANS and UNIT will be symm_LTU. | |||||
| # If 2, it will append the code with an underscore, e.g. symm.c with TRANS and UNIT will be symm_TU. | |||||
| # If 3, it will insert the code *around* the last character with an underscore, e.g. symm_L.c with TRANS and UNIT will be symm_TLU (required by BLAS level2 objects). | |||||
| # If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel | |||||
| # @param alternate_name replaces the source name as the object name (define codes are still appended) | |||||
| # @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) | |||||
| # @param complex_filename_scheme see GenerateNamedObjects | |||||
| function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme) | |||||
| set(alternate_name_in "") | |||||
| if (DEFINED ARGV5) | |||||
| set(alternate_name_in ${ARGV5}) | |||||
| endif () | |||||
| set(no_float_type false) | |||||
| if (DEFINED ARGV6) | |||||
| set(no_float_type ${ARGV6}) | |||||
| endif () | |||||
| set(complex_filename_scheme "") | |||||
| if (DEFINED ARGV7) | |||||
| set(complex_filename_scheme ${ARGV7}) | |||||
| endif () | |||||
| AllCombinations("${defines_in}" "${absent_codes_in}") | |||||
| set(define_combos ${LIST_OUT}) | |||||
| set(define_codes ${CODES_OUT}) | |||||
| list(LENGTH define_combos num_combos) | |||||
| math(EXPR num_combos "${num_combos} - 1") | |||||
| foreach (c RANGE 0 ${num_combos}) | |||||
| list(GET define_combos ${c} define_combo) | |||||
| list(GET define_codes ${c} define_code) | |||||
| foreach (source_file ${sources_in}) | |||||
| set(alternate_name ${alternate_name_in}) | |||||
| # replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with | |||||
| string(REPLACE ":" ";" define_combo ${define_combo}) | |||||
| # now add the object and set the defines | |||||
| set(cur_defines ${define_combo}) | |||||
| if ("${cur_defines}" STREQUAL " ") | |||||
| set(cur_defines ${all_defines_in}) | |||||
| else () | |||||
| list(APPEND cur_defines ${all_defines_in}) | |||||
| endif () | |||||
| set(replace_code "") | |||||
| set(append_code "") | |||||
| if (replace_scheme EQUAL 1) | |||||
| set(replace_code ${define_code}) | |||||
| else () | |||||
| if (replace_scheme EQUAL 2) | |||||
| set(append_code "_${define_code}") | |||||
| elseif (replace_scheme EQUAL 3) | |||||
| if ("${alternate_name}" STREQUAL "") | |||||
| string(REGEX MATCH "[a-zA-Z]\\." last_letter ${source_file}) | |||||
| else () | |||||
| string(REGEX MATCH "[a-zA-Z]$" last_letter ${alternate_name}) | |||||
| endif () | |||||
| # first extract the last letter | |||||
| string(SUBSTRING ${last_letter} 0 1 last_letter) # remove period from match | |||||
| # break the code up into the first letter and the remaining (should only be 2 anyway) | |||||
| string(SUBSTRING ${define_code} 0 1 define_code_first) | |||||
| string(SUBSTRING ${define_code} 1 -1 define_code_second) | |||||
| set(replace_code "${define_code_first}${last_letter}${define_code_second}") | |||||
| elseif (replace_scheme EQUAL 4) | |||||
| # insert code before the last underscore and pass that in as the alternate_name | |||||
| if ("${alternate_name}" STREQUAL "") | |||||
| get_filename_component(alternate_name ${source_file} NAME_WE) | |||||
| endif () | |||||
| set(extra_underscore "") | |||||
| # check if filename has two underscores, insert another if not (e.g. getrs_parallel needs to become getrs_U_parallel not getrsU_parallel) | |||||
| string(REGEX MATCH "_[a-zA-Z]+_" underscores ${alternate_name}) | |||||
| string(LENGTH "${underscores}" underscores) | |||||
| if (underscores EQUAL 0) | |||||
| set(extra_underscore "_") | |||||
| endif () | |||||
| string(REGEX REPLACE "(.+)(_[^_]+)$" "\\1${extra_underscore}${define_code}\\2" alternate_name ${alternate_name}) | |||||
| else() | |||||
| set(append_code ${define_code}) # replace_scheme should be 0 | |||||
| endif () | |||||
| endif () | |||||
| GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" false "${replace_code}" "${append_code}" "${no_float_type}" "${complex_filename_scheme}") | |||||
| endforeach () | |||||
| endforeach () | |||||
| set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE) | |||||
| endfunction () | |||||
| @@ -82,7 +82,10 @@ extern "C" { | |||||
| #include <stdio.h> | #include <stdio.h> | ||||
| #include <stdlib.h> | #include <stdlib.h> | ||||
| #include <string.h> | #include <string.h> | ||||
| #if !defined(_MSC_VER) | |||||
| #include <unistd.h> | #include <unistd.h> | ||||
| #endif | |||||
| #ifdef OS_LINUX | #ifdef OS_LINUX | ||||
| #include <malloc.h> | #include <malloc.h> | ||||
| @@ -95,6 +98,10 @@ extern "C" { | |||||
| #ifdef OS_ANDROID | #ifdef OS_ANDROID | ||||
| #define NO_SYSV_IPC | #define NO_SYSV_IPC | ||||
| //Android NDK only supports complex.h since Android 5.0 | |||||
| #if __ANDROID_API__ < 21 | |||||
| #define FORCE_OPENBLAS_COMPLEX_STRUCT | |||||
| #endif | |||||
| #endif | #endif | ||||
| #ifdef OS_WINDOWS | #ifdef OS_WINDOWS | ||||
| @@ -114,6 +121,7 @@ extern "C" { | |||||
| #include <sys/shm.h> | #include <sys/shm.h> | ||||
| #endif | #endif | ||||
| #include <sys/time.h> | #include <sys/time.h> | ||||
| #include <time.h> | |||||
| #include <unistd.h> | #include <unistd.h> | ||||
| #include <math.h> | #include <math.h> | ||||
| #ifdef SMP | #ifdef SMP | ||||
| @@ -293,13 +301,6 @@ typedef int blasint; | |||||
| #define COMPSIZE 2 | #define COMPSIZE 2 | ||||
| #endif | #endif | ||||
| #if defined(C_PGI) || defined(C_SUN) | |||||
| #define CREAL(X) (*((FLOAT *)&X + 0)) | |||||
| #define CIMAG(X) (*((FLOAT *)&X + 1)) | |||||
| #else | |||||
| #define CREAL __real__ | |||||
| #define CIMAG __imag__ | |||||
| #endif | |||||
| #define Address_H(x) (((x)+(1<<15))>>16) | #define Address_H(x) (((x)+(1<<15))>>16) | ||||
| #define Address_L(x) ((x)-((Address_H(x))<<16)) | #define Address_L(x) ((x)-((Address_H(x))<<16)) | ||||
| @@ -313,8 +314,12 @@ typedef int blasint; | |||||
| #endif | #endif | ||||
| #if defined(OS_WINDOWS) | #if defined(OS_WINDOWS) | ||||
| #if defined(_MSC_VER) && !defined(__clang__) | |||||
| #define YIELDING YieldProcessor() | |||||
| #else | |||||
| #define YIELDING SwitchToThread() | #define YIELDING SwitchToThread() | ||||
| #endif | #endif | ||||
| #endif | |||||
| #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5) | #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5) | ||||
| #define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n"); | #define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n"); | ||||
| @@ -500,18 +505,52 @@ static void __inline blas_lock(volatile BLASULONG *address){ | |||||
| /* C99 supports complex floating numbers natively, which GCC also offers as an | /* C99 supports complex floating numbers natively, which GCC also offers as an | ||||
| extension since version 3.0. If neither are available, use a compatible | extension since version 3.0. If neither are available, use a compatible | ||||
| structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ | structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ | ||||
| #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ | |||||
| (__GNUC__ >= 3 && !defined(__cplusplus))) | |||||
| #if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ | |||||
| (__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) | |||||
| #define OPENBLAS_COMPLEX_C99 | #define OPENBLAS_COMPLEX_C99 | ||||
| #ifndef __cplusplus | |||||
| #include <complex.h> | |||||
| #endif | |||||
| typedef float _Complex openblas_complex_float; | typedef float _Complex openblas_complex_float; | ||||
| typedef double _Complex openblas_complex_double; | typedef double _Complex openblas_complex_double; | ||||
| typedef xdouble _Complex openblas_complex_xdouble; | typedef xdouble _Complex openblas_complex_xdouble; | ||||
| #define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I)) | |||||
| #define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I)) | |||||
| #define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I)) | |||||
| #else | #else | ||||
| #define OPENBLAS_COMPLEX_STRUCT | #define OPENBLAS_COMPLEX_STRUCT | ||||
| typedef struct { float real, imag; } openblas_complex_float; | typedef struct { float real, imag; } openblas_complex_float; | ||||
| typedef struct { double real, imag; } openblas_complex_double; | typedef struct { double real, imag; } openblas_complex_double; | ||||
| typedef struct { xdouble real, imag; } openblas_complex_xdouble; | typedef struct { xdouble real, imag; } openblas_complex_xdouble; | ||||
| #define openblas_make_complex_float(real, imag) {(real), (imag)} | |||||
| #define openblas_make_complex_double(real, imag) {(real), (imag)} | |||||
| #define openblas_make_complex_xdouble(real, imag) {(real), (imag)} | |||||
| #endif | |||||
| #ifdef XDOUBLE | |||||
| #define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble | |||||
| #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i) | |||||
| #elif defined(DOUBLE) | |||||
| #define OPENBLAS_COMPLEX_FLOAT openblas_complex_double | |||||
| #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i) | |||||
| #else | |||||
| #define OPENBLAS_COMPLEX_FLOAT openblas_complex_float | |||||
| #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i) | |||||
| #endif | #endif | ||||
| #if defined(C_PGI) || defined(C_SUN) | |||||
| #define CREAL(X) (*((FLOAT *)&X + 0)) | |||||
| #define CIMAG(X) (*((FLOAT *)&X + 1)) | |||||
| #else | |||||
| #ifdef OPENBLAS_COMPLEX_STRUCT | |||||
| #define CREAL(Z) ((Z).real) | |||||
| #define CIMAG(Z) ((Z).imag) | |||||
| #else | |||||
| #define CREAL __real__ | |||||
| #define CIMAG __imag__ | |||||
| #endif | |||||
| #endif | |||||
| #endif // ASSEMBLER | #endif // ASSEMBLER | ||||
| #ifndef IFLUSH | #ifndef IFLUSH | ||||
| @@ -528,6 +567,10 @@ static void __inline blas_lock(volatile BLASULONG *address){ | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(C_MSVC) | |||||
| #define inline __inline | |||||
| #endif | |||||
| #ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
| #ifndef MIN | #ifndef MIN | ||||
| @@ -47,12 +47,12 @@ double dsdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||||
| double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG); | double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG); | ||||
| xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | ||||
| float _Complex cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||||
| float _Complex cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||||
| double _Complex zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||||
| double _Complex zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||||
| xdouble _Complex xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | |||||
| xdouble _Complex xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | |||||
| openblas_complex_float cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||||
| openblas_complex_float cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||||
| openblas_complex_double zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||||
| openblas_complex_double zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||||
| openblas_complex_xdouble xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | |||||
| openblas_complex_xdouble xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | |||||
| int saxpy_k (BLASLONG, BLASLONG, BLASLONG, float, | int saxpy_k (BLASLONG, BLASLONG, BLASLONG, float, | ||||
| float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | ||||
| @@ -830,56 +830,56 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); | |||||
| int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); | int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); | ||||
| int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); | int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); | ||||
| int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG); | |||||
| int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG); | |||||
| int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG); | |||||
| int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG); | |||||
| int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); | int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); | ||||
| int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); | int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); | ||||
| int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); | int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); | ||||
| int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); | int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); | ||||
| int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG); | |||||
| int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG); | |||||
| int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG); | |||||
| int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG); | |||||
| int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | ||||
| int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | ||||
| int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | ||||
| int (*comatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | int (*comatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | ||||
| int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
| int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
| int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
| int (*cimatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
| int (*comatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | int (*comatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | ||||
| int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | ||||
| int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | ||||
| int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | ||||
| int (*cimatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
| int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
| int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
| int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
| int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | ||||
| int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | ||||
| int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | ||||
| int (*zomatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | int (*zomatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | ||||
| int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||||
| int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||||
| int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||||
| int (*zimatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||||
| int (*zomatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | int (*zomatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | ||||
| int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | ||||
| int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | ||||
| int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | ||||
| int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG); | |||||
| int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG); | |||||
| int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG); | |||||
| int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG); | |||||
| int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG); | |||||
| int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG); | |||||
| int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG); | |||||
| int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG); | |||||
| int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
| int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
| int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
| int (*cimatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
| int (*cimatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
| int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
| int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
| int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
| int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||||
| int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||||
| int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||||
| int (*zimatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||||
| int (*zimatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | int (*zimatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | ||||
| int (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | int (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | ||||
| int (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | int (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | ||||
| @@ -56,11 +56,23 @@ static void __inline blas_lock(volatile BLASULONG *address){ | |||||
| do { | do { | ||||
| while (*address) {YIELDING;}; | while (*address) {YIELDING;}; | ||||
| #if defined(_MSC_VER) && !defined(__clang__) | |||||
| // use intrinsic instead of inline assembly | |||||
| ret = _InterlockedExchange(address, 1); | |||||
| // inline assembly | |||||
| /*__asm { | |||||
| mov eax, address | |||||
| mov ebx, 1 | |||||
| xchg [eax], ebx | |||||
| mov ret, ebx | |||||
| }*/ | |||||
| #else | |||||
| __asm__ __volatile__( | __asm__ __volatile__( | ||||
| "xchgl %0, %1\n" | "xchgl %0, %1\n" | ||||
| : "=r"(ret), "=m"(*address) | : "=r"(ret), "=m"(*address) | ||||
| : "0"(1), "m"(*address) | : "0"(1), "m"(*address) | ||||
| : "memory"); | : "memory"); | ||||
| #endif | |||||
| } while (ret); | } while (ret); | ||||
| @@ -68,31 +80,43 @@ static void __inline blas_lock(volatile BLASULONG *address){ | |||||
| #define BLAS_LOCK_DEFINED | #define BLAS_LOCK_DEFINED | ||||
| static __inline unsigned long long rpcc(void){ | static __inline unsigned long long rpcc(void){ | ||||
| #if defined(_MSC_VER) && !defined(__clang__) | |||||
| return __rdtsc(); // use MSVC intrinsic | |||||
| #else | |||||
| unsigned int a, d; | unsigned int a, d; | ||||
| __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); | __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); | ||||
| return ((unsigned long long)a + ((unsigned long long)d << 32)); | return ((unsigned long long)a + ((unsigned long long)d << 32)); | ||||
| #endif | |||||
| }; | }; | ||||
| #define RPCC_DEFINED | #define RPCC_DEFINED | ||||
| static __inline unsigned long getstackaddr(void){ | static __inline unsigned long getstackaddr(void){ | ||||
| #if defined(_MSC_VER) && !defined(__clang__) | |||||
| return (unsigned long)_ReturnAddress(); // use MSVC intrinsic | |||||
| #else | |||||
| unsigned long addr; | unsigned long addr; | ||||
| __asm__ __volatile__ ("mov %%esp, %0" | __asm__ __volatile__ ("mov %%esp, %0" | ||||
| : "=r"(addr) : : "memory"); | : "=r"(addr) : : "memory"); | ||||
| return addr; | return addr; | ||||
| #endif | |||||
| }; | }; | ||||
| static __inline long double sqrt_long(long double val) { | static __inline long double sqrt_long(long double val) { | ||||
| #if defined(_MSC_VER) && !defined(__clang__) | |||||
| return sqrt(val); // not sure if this will use fsqrt | |||||
| #else | |||||
| long double result; | long double result; | ||||
| __asm__ __volatile__ ("fldt %1\n" | __asm__ __volatile__ ("fldt %1\n" | ||||
| "fsqrt\n" | "fsqrt\n" | ||||
| "fstpt %0\n" : "=m" (result) : "m"(val)); | "fstpt %0\n" : "=m" (result) : "m"(val)); | ||||
| return result; | return result; | ||||
| #endif | |||||
| } | } | ||||
| #define SQRT(a) sqrt_long(a) | #define SQRT(a) sqrt_long(a) | ||||
| @@ -102,7 +126,7 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx); | |||||
| #define WHEREAMI | #define WHEREAMI | ||||
| static inline int WhereAmI(void){ | |||||
| static __inline int WhereAmI(void){ | |||||
| int eax, ebx, ecx, edx; | int eax, ebx, ecx, edx; | ||||
| int apicid; | int apicid; | ||||
| @@ -148,9 +172,14 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ | |||||
| y = blas_quick_divide_table[y]; | y = blas_quick_divide_table[y]; | ||||
| #if defined(_MSC_VER) && !defined(__clang__) | |||||
| (void*)result; | |||||
| return x*y; | |||||
| #else | |||||
| __asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y)); | __asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y)); | ||||
| return result; | return result; | ||||
| #endif | |||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -286,8 +315,12 @@ REALNAME: | |||||
| #define PROFCODE | #define PROFCODE | ||||
| #ifdef __clang__ | |||||
| #define EPILOGUE .end | |||||
| #else | |||||
| #define EPILOGUE .end REALNAME | #define EPILOGUE .end REALNAME | ||||
| #endif | #endif | ||||
| #endif | |||||
| #if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) | #if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) | ||||
| #define PROLOGUE \ | #define PROLOGUE \ | ||||
| @@ -41,6 +41,10 @@ | |||||
| #ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
| #ifdef C_MSVC | |||||
| #include <intrin.h> | |||||
| #endif | |||||
| #ifdef C_SUN | #ifdef C_SUN | ||||
| #define __asm__ __asm | #define __asm__ __asm | ||||
| #define __volatile__ | #define __volatile__ | ||||
| @@ -61,32 +65,45 @@ | |||||
| static void __inline blas_lock(volatile BLASULONG *address){ | static void __inline blas_lock(volatile BLASULONG *address){ | ||||
| #ifndef C_MSVC | |||||
| int ret; | int ret; | ||||
| #else | |||||
| BLASULONG ret; | |||||
| #endif | |||||
| do { | do { | ||||
| while (*address) {YIELDING;}; | while (*address) {YIELDING;}; | ||||
| #ifndef C_MSVC | |||||
| __asm__ __volatile__( | __asm__ __volatile__( | ||||
| "xchgl %0, %1\n" | "xchgl %0, %1\n" | ||||
| : "=r"(ret), "=m"(*address) | : "=r"(ret), "=m"(*address) | ||||
| : "0"(1), "m"(*address) | : "0"(1), "m"(*address) | ||||
| : "memory"); | : "memory"); | ||||
| #else | |||||
| ret=InterlockedExchange64((volatile LONG64 *)(address), 1); | |||||
| #endif | |||||
| } while (ret); | } while (ret); | ||||
| } | } | ||||
| #define BLAS_LOCK_DEFINED | #define BLAS_LOCK_DEFINED | ||||
| static __inline BLASULONG rpcc(void){ | static __inline BLASULONG rpcc(void){ | ||||
| #ifdef C_MSVC | |||||
| return __rdtsc(); | |||||
| #else | |||||
| BLASULONG a, d; | BLASULONG a, d; | ||||
| __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); | __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); | ||||
| return ((BLASULONG)a + ((BLASULONG)d << 32)); | return ((BLASULONG)a + ((BLASULONG)d << 32)); | ||||
| #endif | |||||
| } | } | ||||
| #define RPCC_DEFINED | #define RPCC_DEFINED | ||||
| #define RPCC64BIT | #define RPCC64BIT | ||||
| #ifndef C_MSVC | |||||
| static __inline BLASULONG getstackaddr(void){ | static __inline BLASULONG getstackaddr(void){ | ||||
| BLASULONG addr; | BLASULONG addr; | ||||
| @@ -95,22 +112,32 @@ static __inline BLASULONG getstackaddr(void){ | |||||
| return addr; | return addr; | ||||
| } | } | ||||
| #endif | |||||
| static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ | static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ | ||||
| #ifdef C_MSVC | |||||
| int cpuinfo[4]; | |||||
| __cpuid(cpuinfo, op); | |||||
| *eax=cpuinfo[0]; | |||||
| *ebx=cpuinfo[1]; | |||||
| *ecx=cpuinfo[2]; | |||||
| *edx=cpuinfo[3]; | |||||
| #else | |||||
| __asm__ __volatile__("cpuid" | __asm__ __volatile__("cpuid" | ||||
| : "=a" (*eax), | : "=a" (*eax), | ||||
| "=b" (*ebx), | "=b" (*ebx), | ||||
| "=c" (*ecx), | "=c" (*ecx), | ||||
| "=d" (*edx) | "=d" (*edx) | ||||
| : "0" (op)); | : "0" (op)); | ||||
| #endif | |||||
| } | } | ||||
| /* | /* | ||||
| #define WHEREAMI | #define WHEREAMI | ||||
| */ | */ | ||||
| static inline int WhereAmI(void){ | |||||
| static __inline int WhereAmI(void){ | |||||
| int eax, ebx, ecx, edx; | int eax, ebx, ecx, edx; | ||||
| int apicid; | int apicid; | ||||
| @@ -152,10 +179,14 @@ static inline int WhereAmI(void){ | |||||
| #define GET_IMAGE_CANCEL | #define GET_IMAGE_CANCEL | ||||
| #ifdef SMP | #ifdef SMP | ||||
| #ifdef USE64BITINT | |||||
| #if defined(USE64BITINT) | |||||
| static __inline blasint blas_quickdivide(blasint x, blasint y){ | static __inline blasint blas_quickdivide(blasint x, blasint y){ | ||||
| return x / y; | return x / y; | ||||
| } | } | ||||
| #elif defined (C_MSVC) | |||||
| static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){ | |||||
| return x / y; | |||||
| } | |||||
| #else | #else | ||||
| extern unsigned int blas_quick_divide_table[]; | extern unsigned int blas_quick_divide_table[]; | ||||
| @@ -39,6 +39,10 @@ | |||||
| #ifndef CPUID_H | #ifndef CPUID_H | ||||
| #define CPUID_H | #define CPUID_H | ||||
| #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) | |||||
| #define INTEL_AMD | |||||
| #endif | |||||
| #define VENDOR_INTEL 1 | #define VENDOR_INTEL 1 | ||||
| #define VENDOR_UMC 2 | #define VENDOR_UMC 2 | ||||
| #define VENDOR_AMD 3 | #define VENDOR_AMD 3 | ||||
| @@ -59,7 +63,7 @@ | |||||
| #define FAMILY_PM 7 | #define FAMILY_PM 7 | ||||
| #define FAMILY_IA64 8 | #define FAMILY_IA64 8 | ||||
| #if defined(__i386__) || defined(__x86_64__) | |||||
| #ifdef INTEL_AMD | |||||
| #define GET_EXFAMILY 1 | #define GET_EXFAMILY 1 | ||||
| #define GET_EXMODEL 2 | #define GET_EXMODEL 2 | ||||
| #define GET_TYPE 3 | #define GET_TYPE 3 | ||||
| @@ -40,6 +40,12 @@ | |||||
| #include <string.h> | #include <string.h> | ||||
| #include "cpuid.h" | #include "cpuid.h" | ||||
| #if defined(_MSC_VER) && !defined(__clang__) | |||||
| #define C_INLINE __inline | |||||
| #else | |||||
| #define C_INLINE inline | |||||
| #endif | |||||
| /* | /* | ||||
| #ifdef NO_AVX | #ifdef NO_AVX | ||||
| #define CPUTYPE_HASWELL CPUTYPE_NEHALEM | #define CPUTYPE_HASWELL CPUTYPE_NEHALEM | ||||
| @@ -53,12 +59,26 @@ | |||||
| #endif | #endif | ||||
| */ | */ | ||||
| #if defined(_MSC_VER) && !defined(__clang__) | |||||
| void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) | |||||
| { | |||||
| int cpuInfo[4] = {-1}; | |||||
| __cpuid(cpuInfo, op); | |||||
| *eax = cpuInfo[0]; | |||||
| *ebx = cpuInfo[1]; | |||||
| *ecx = cpuInfo[2]; | |||||
| *edx = cpuInfo[3]; | |||||
| } | |||||
| #else | |||||
| #ifndef CPUIDEMU | #ifndef CPUIDEMU | ||||
| #if defined(__APPLE__) && defined(__i386__) | #if defined(__APPLE__) && defined(__i386__) | ||||
| void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx); | void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx); | ||||
| #else | #else | ||||
| static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ | |||||
| static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ | |||||
| #if defined(__i386__) && defined(__PIC__) | #if defined(__i386__) && defined(__PIC__) | ||||
| __asm__ __volatile__ | __asm__ __volatile__ | ||||
| ("mov %%ebx, %%edi;" | ("mov %%ebx, %%edi;" | ||||
| @@ -115,14 +135,16 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int * | |||||
| #endif | #endif | ||||
| static inline int have_cpuid(void){ | |||||
| #endif // _MSC_VER | |||||
| static C_INLINE int have_cpuid(void){ | |||||
| int eax, ebx, ecx, edx; | int eax, ebx, ecx, edx; | ||||
| cpuid(0, &eax, &ebx, &ecx, &edx); | cpuid(0, &eax, &ebx, &ecx, &edx); | ||||
| return eax; | return eax; | ||||
| } | } | ||||
| static inline int have_excpuid(void){ | |||||
| static C_INLINE int have_excpuid(void){ | |||||
| int eax, ebx, ecx, edx; | int eax, ebx, ecx, edx; | ||||
| cpuid(0x80000000, &eax, &ebx, &ecx, &edx); | cpuid(0x80000000, &eax, &ebx, &ecx, &edx); | ||||
| @@ -130,10 +152,14 @@ static inline int have_excpuid(void){ | |||||
| } | } | ||||
| #ifndef NO_AVX | #ifndef NO_AVX | ||||
| static inline void xgetbv(int op, int * eax, int * edx){ | |||||
| static C_INLINE void xgetbv(int op, int * eax, int * edx){ | |||||
| //Use binary code for xgetbv | //Use binary code for xgetbv | ||||
| #if defined(_MSC_VER) && !defined(__clang__) | |||||
| *eax = __xgetbv(op); | |||||
| #else | |||||
| __asm__ __volatile__ | __asm__ __volatile__ | ||||
| (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc"); | (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc"); | ||||
| #endif | |||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -0,0 +1,46 @@ | |||||
| include_directories(${CMAKE_SOURCE_DIR}) | |||||
| enable_language(Fortran) | |||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DADD${BU} -DCBLAS") | |||||
| FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh | |||||
| "$1 < $2\n" | |||||
| ) | |||||
| foreach(float_type ${FLOAT_TYPES}) | |||||
| string(SUBSTRING ${float_type} 0 1 float_char_upper) | |||||
| string(TOLOWER ${float_char_upper} float_char) | |||||
| #level1 | |||||
| add_executable(x${float_char}cblat1 | |||||
| c_${float_char}blat1.f | |||||
| c_${float_char}blas1.c) | |||||
| target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}_static) | |||||
| add_test(NAME "x${float_char}cblat1" | |||||
| COMMAND "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat1") | |||||
| #level2 | |||||
| add_executable(x${float_char}cblat2 | |||||
| c_${float_char}blat2.f | |||||
| c_${float_char}blas2.c | |||||
| c_${float_char}2chke.c | |||||
| auxiliary.c | |||||
| c_xerbla.c | |||||
| constant.c) | |||||
| target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}_static) | |||||
| add_test(NAME "x${float_char}cblat2" | |||||
| COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat2" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in2") | |||||
| #level3 | |||||
| add_executable(x${float_char}cblat3 | |||||
| c_${float_char}blat3.f | |||||
| c_${float_char}blas3.c | |||||
| c_${float_char}3chke.c | |||||
| auxiliary.c | |||||
| c_xerbla.c | |||||
| constant.c) | |||||
| target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}_static) | |||||
| add_test(NAME "x${float_char}cblat3" | |||||
| COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat3" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3") | |||||
| endforeach() | |||||
| @@ -0,0 +1,203 @@ | |||||
| include_directories(${CMAKE_SOURCE_DIR}) | |||||
| # sources that need to be compiled twice, once with no flags and once with LOWER | |||||
| set(UL_SOURCES | |||||
| sbmv_k.c | |||||
| spmv_k.c | |||||
| spr_k.c | |||||
| spr2_k.c | |||||
| syr_k.c | |||||
| syr2_k.c | |||||
| ) | |||||
| # sources that need to be compiled several times, for UNIT, TRANSA | |||||
| set(U_SOURCES | |||||
| trmv_U.c | |||||
| tbmv_U.c | |||||
| tbsv_U.c | |||||
| tpmv_U.c | |||||
| tpsv_U.c | |||||
| trsv_U.c | |||||
| ) | |||||
| set(L_SOURCES | |||||
| trmv_L.c | |||||
| tbmv_L.c | |||||
| tbsv_L.c | |||||
| tpmv_L.c | |||||
| tpsv_L.c | |||||
| trsv_L.c | |||||
| ) | |||||
| set(UL_SMP_SOURCES | |||||
| symv_thread.c | |||||
| syr_thread.c | |||||
| syr2_thread.c | |||||
| spr_thread.c | |||||
| spr2_thread.c | |||||
| spmv_thread.c | |||||
| sbmv_thread.c | |||||
| ) | |||||
| set(NU_SMP_SOURCES | |||||
| trmv_thread.c | |||||
| tpmv_thread.c | |||||
| tbmv_thread.c | |||||
| ) | |||||
| set(ULVM_COMPLEX_SOURCES | |||||
| hbmv_k.c | |||||
| hpmv_k.c | |||||
| hpr_k.c | |||||
| hpr2_k.c | |||||
| her_k.c | |||||
| her2_k.c | |||||
| ) | |||||
| # objects that need LOWER set | |||||
| GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3) | |||||
| # gbmv uses a lowercase n and t | |||||
| GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3) | |||||
| GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3) | |||||
| # c/zgbmv | |||||
| GenerateNamedObjects("zgbmv_k.c" "CONJ" "gbmv_r" false "" "" "" 2) | |||||
| GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ" "gbmv_c" false "" "" "" 2) | |||||
| GenerateNamedObjects("zgbmv_k.c" "XCONJ" "gbmv_o" false "" "" "" 2) | |||||
| GenerateNamedObjects("zgbmv_k.c" "TRANS;XCONJ" "gbmv_u" false "" "" "" 2) | |||||
| GenerateNamedObjects("zgbmv_k.c" "CONJ;XCONJ" "gbmv_s" false "" "" "" 2) | |||||
| GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ;XCONJ" "gbmv_d" false "" "" "" 2) | |||||
| # special defines for complex | |||||
| foreach (float_type ${FLOAT_TYPES}) | |||||
| if (SMP) | |||||
| GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("gbmv_thread.c" "" "gbmv_thread_n" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("gbmv_thread.c" "TRANSA" "gbmv_thread_t" false "" "" false ${float_type}) | |||||
| endif () | |||||
| if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") | |||||
| foreach (u_source ${U_SOURCES}) | |||||
| string(REGEX MATCH "[a-z]+" op_name ${u_source}) | |||||
| GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=1" 0 "${op_name}_NU" false ${float_type}) | |||||
| GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=2" 0 "${op_name}_TL" false ${float_type}) | |||||
| GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=3" 0 "${op_name}_RU" false ${float_type}) | |||||
| GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CL" false ${float_type}) | |||||
| endforeach () | |||||
| foreach (l_source ${L_SOURCES}) | |||||
| string(REGEX MATCH "[a-z]+" op_name ${l_source}) | |||||
| GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=1" 0 "${op_name}_NL" false ${float_type}) | |||||
| GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=2" 0 "${op_name}_TU" false ${float_type}) | |||||
| GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=3" 0 "${op_name}_RL" false ${float_type}) | |||||
| GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type}) | |||||
| endforeach () | |||||
| foreach (ulvm_source ${ULVM_COMPLEX_SOURCES}) | |||||
| string(REGEX MATCH "[a-z0-9]+" op_name ${ulvm_source}) | |||||
| GenerateNamedObjects("z${ulvm_source}" "" "${op_name}_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("z${ulvm_source}" "LOWER" "${op_name}_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("z${ulvm_source}" "HEMVREV" "${op_name}_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("z${ulvm_source}" "LOWER;HEMVREV" "${op_name}_M" false "" "" false ${float_type}) | |||||
| endforeach() | |||||
| if (SMP) | |||||
| GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("gemv_thread.c" "CONJ;TRANSA" "gemv_thread_c" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("gemv_thread.c" "XCONJ" "gemv_thread_o" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("gemv_thread.c" "XCONJ;TRANSA" "gemv_thread_u" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("gemv_thread.c" "XCONJ;CONJ" "gemv_thread_s" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("gemv_thread.c" "XCONJ;CONJ;TRANSA" "gemv_thread_d" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("gbmv_thread.c" "CONJ" "gbmv_thread_r" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("gbmv_thread.c" "CONJ;TRANSA" "gbmv_thread_c" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("gbmv_thread.c" "XCONJ" "gbmv_thread_o" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("gbmv_thread.c" "XCONJ;TRANSA" "gbmv_thread_u" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("gbmv_thread.c" "XCONJ;CONJ" "gbmv_thread_s" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("gbmv_thread.c" "XCONJ;CONJ;TRANSA" "gbmv_thread_d" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("ger_thread.c" "" "ger_thread_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("ger_thread.c" "CONJ" "ger_thread_C" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("sbmv_thread.c" "HEMV" "hbmv_thread_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("sbmv_thread.c" "HEMV;LOWER" "hbmv_thread_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("sbmv_thread.c" "HEMVREV" "hbmv_thread_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("sbmv_thread.c" "LOWER;HEMVREV" "hbmv_thread_M" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spmv_thread.c" "HEMV" "hpmv_thread_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spmv_thread.c" "HEMV;LOWER" "hpmv_thread_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spmv_thread.c" "HEMVREV" "hpmv_thread_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spmv_thread.c" "LOWER;HEMVREV" "hpmv_thread_M" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spr_thread.c" "HEMV" "hpr_thread_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spr_thread.c" "HEMV;LOWER" "hpr_thread_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spr_thread.c" "HEMVREV" "hpr_thread_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spr_thread.c" "LOWER;HEMVREV" "hpr_thread_M" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spr2_thread.c" "HEMV" "hpr2_thread_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spr2_thread.c" "HEMV;LOWER" "hpr2_thread_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spr2_thread.c" "HEMVREV" "hpr2_thread_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spr2_thread.c" "LOWER;HEMVREV" "hpr2_thread_M" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("symv_thread.c" "HEMV" "hemv_thread_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("symv_thread.c" "HEMV;LOWER" "hemv_thread_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("symv_thread.c" "HEMVREV" "hemv_thread_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("symv_thread.c" "LOWER;HEMVREV" "hemv_thread_M" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr_thread.c" "HER" "her_thread_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr_thread.c" "HER;LOWER" "her_thread_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr_thread.c" "HERREV" "her_thread_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr_thread.c" "LOWER;HERREV" "her_thread_M" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr2_thread.c" "HER" "her2_thread_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr2_thread.c" "HER;LOWER" "her2_thread_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr2_thread.c" "HERREV" "her2_thread_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr2_thread.c" "LOWER;HERREV" "her2_thread_M" false "" "" false ${float_type}) | |||||
| foreach (nu_smp_src ${NU_SMP_SOURCES}) | |||||
| string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src}) | |||||
| GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type}) | |||||
| GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=2" 0 "${op_name}_T" false ${float_type}) | |||||
| GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=3" 0 "${op_name}_R" false ${float_type}) | |||||
| GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=4" 0 "${op_name}_C" false ${float_type}) | |||||
| endforeach () | |||||
| endif () | |||||
| else () | |||||
| # For real number functions | |||||
| foreach (u_source ${U_SOURCES}) | |||||
| string(REGEX MATCH "[a-z]+" op_name ${u_source}) | |||||
| GenerateCombinationObjects("${u_source}" "UNIT" "N" "" 0 "${op_name}_NU" false ${float_type}) | |||||
| GenerateCombinationObjects("${u_source}" "UNIT" "N" "TRANSA" 0 "${op_name}_TL" false ${float_type}) | |||||
| endforeach () | |||||
| foreach (l_source ${L_SOURCES}) | |||||
| string(REGEX MATCH "[a-z]+" op_name ${l_source}) | |||||
| GenerateCombinationObjects("${l_source}" "UNIT" "N" "" 0 "${op_name}_NL" false ${float_type}) | |||||
| GenerateCombinationObjects("${l_source}" "UNIT" "N" "TRANSA" 0 "${op_name}_TU" false ${float_type}) | |||||
| endforeach () | |||||
| if (SMP) | |||||
| GenerateNamedObjects("ger_thread.c" "" "" false "" "" false ${float_type}) | |||||
| foreach(nu_smp_source ${NU_SMP_SOURCES}) | |||||
| string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_source}) | |||||
| GenerateCombinationObjects("${nu_smp_source}" "LOWER;UNIT" "U;N" "" 0 "${op_name}_N" false ${float_type}) | |||||
| GenerateCombinationObjects("${nu_smp_source}" "LOWER;UNIT" "U;N" "TRANSA" 0 "${op_name}_T" false ${float_type}) | |||||
| endforeach() | |||||
| endif () | |||||
| endif () | |||||
| endforeach () | |||||
| if (SMP) | |||||
| GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2) | |||||
| endif () | |||||
| add_library(driver_level2 OBJECT ${OPENBLAS_SRC}) | |||||
| @@ -64,7 +64,7 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| FLOAT result; | FLOAT result; | ||||
| #else | #else | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -60,7 +60,7 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| FLOAT result; | FLOAT result; | ||||
| #else | #else | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| a = (FLOAT *)args -> a; | a = (FLOAT *)args -> a; | ||||
| @@ -60,7 +60,7 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| FLOAT result; | FLOAT result; | ||||
| #else | #else | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| a = (FLOAT *)args -> a; | a = (FLOAT *)args -> a; | ||||
| @@ -76,7 +76,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| FLOAT result; | FLOAT result; | ||||
| #else | #else | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -81,7 +81,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| FLOAT result; | FLOAT result; | ||||
| #else | #else | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -87,7 +87,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| FLOAT result; | FLOAT result; | ||||
| #else | #else | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -77,7 +77,7 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA | |||||
| FLOAT *bufferY = gemvbuffer; | FLOAT *bufferY = gemvbuffer; | ||||
| FLOAT *bufferX = gemvbuffer; | FLOAT *bufferX = gemvbuffer; | ||||
| #ifdef TRANS | #ifdef TRANS | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| if (incy != 1) { | if (incy != 1) { | ||||
| @@ -56,6 +56,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
| FLOAT *bufferX = sbmvbuffer; | FLOAT *bufferX = sbmvbuffer; | ||||
| FLOAT temp[2]; | FLOAT temp[2]; | ||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| if (incy != 1) { | if (incy != 1) { | ||||
| Y = bufferY; | Y = bufferY; | ||||
| bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); | bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); | ||||
| @@ -93,7 +95,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
| Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; | Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; | ||||
| if (length > 0) { | if (length > 0) { | ||||
| FLOAT _Complex result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); | |||||
| result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -118,7 +120,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
| Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; | Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; | ||||
| if (length > 0) { | if (length > 0) { | ||||
| FLOAT _Complex result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); | |||||
| result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -143,7 +145,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
| Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; | Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; | ||||
| if (length > 0) { | if (length > 0) { | ||||
| FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); | |||||
| result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -168,7 +170,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
| Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; | Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; | ||||
| if (length > 0) { | if (length > 0) { | ||||
| FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); | |||||
| result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -51,6 +51,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, | |||||
| FLOAT *bufferX = gemvbuffer; | FLOAT *bufferX = gemvbuffer; | ||||
| FLOAT temp[2]; | FLOAT temp[2]; | ||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| if (incy != 1) { | if (incy != 1) { | ||||
| Y = bufferY; | Y = bufferY; | ||||
| bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095); | bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095); | ||||
| @@ -69,7 +71,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, | |||||
| #ifndef HEMVREV | #ifndef HEMVREV | ||||
| #ifndef LOWER | #ifndef LOWER | ||||
| if (i > 0) { | if (i > 0) { | ||||
| FLOAT _Complex result = DOTC_K(i, a, 1, X, 1); | |||||
| result = DOTC_K(i, a, 1, X, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -93,7 +95,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, | |||||
| #else | #else | ||||
| if (m - i > 1) { | if (m - i > 1) { | ||||
| FLOAT _Complex result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); | |||||
| result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -118,7 +120,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, | |||||
| #else | #else | ||||
| #ifndef LOWER | #ifndef LOWER | ||||
| if (i > 0) { | if (i > 0) { | ||||
| FLOAT _Complex result = DOTU_K(i, a, 1, X, 1); | |||||
| result = DOTU_K(i, a, 1, X, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -142,7 +144,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, | |||||
| #else | #else | ||||
| if (m - i > 1) { | if (m - i > 1) { | ||||
| FLOAT _Complex result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); | |||||
| result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -55,6 +55,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
| FLOAT *bufferY = sbmvbuffer; | FLOAT *bufferY = sbmvbuffer; | ||||
| FLOAT *bufferX = sbmvbuffer; | FLOAT *bufferX = sbmvbuffer; | ||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| if (incy != 1) { | if (incy != 1) { | ||||
| Y = bufferY; | Y = bufferY; | ||||
| bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); | bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); | ||||
| @@ -83,7 +85,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
| a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0); | a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0); | ||||
| if (length > 0) { | if (length > 0) { | ||||
| FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); | |||||
| result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -100,7 +102,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
| a, 1, Y + i * COMPSIZE, 1, NULL, 0); | a, 1, Y + i * COMPSIZE, 1, NULL, 0); | ||||
| if (length > 0) { | if (length > 0) { | ||||
| FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); | |||||
| result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -49,7 +49,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, | |||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | FLOAT *gemvbuffer = (FLOAT *)buffer; | ||||
| FLOAT *bufferY = gemvbuffer; | FLOAT *bufferY = gemvbuffer; | ||||
| FLOAT *bufferX = gemvbuffer; | FLOAT *bufferX = gemvbuffer; | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| if (incy != 1) { | if (incy != 1) { | ||||
| Y = bufferY; | Y = bufferY; | ||||
| @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT atemp1, atemp2, btemp1, btemp2; | FLOAT atemp1, atemp2, btemp1, btemp2; | ||||
| @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT atemp1, atemp2, btemp1, btemp2; | FLOAT atemp1, atemp2, btemp1, btemp2; | ||||
| @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT ar, ai, br, bi, ratio, den; | FLOAT ar, ai, br, bi, ratio, den; | ||||
| @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT ar, ai, br, bi, ratio, den; | FLOAT ar, ai, br, bi, ratio, den; | ||||
| @@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | |||||
| BLASLONG i; | BLASLONG i; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT atemp1, atemp2, btemp1, btemp2; | FLOAT atemp1, atemp2, btemp1, btemp2; | ||||
| @@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | |||||
| BLASLONG i; | BLASLONG i; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT atemp1, atemp2, btemp1, btemp2; | FLOAT atemp1, atemp2, btemp1, btemp2; | ||||
| @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | |||||
| BLASLONG i; | BLASLONG i; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT ar, ai, br, bi, ratio, den; | FLOAT ar, ai, br, bi, ratio, den; | ||||
| @@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | |||||
| BLASLONG i; | BLASLONG i; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT ar, ai, br, bi, ratio, den; | FLOAT ar, ai, br, bi, ratio, den; | ||||
| @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu | |||||
| BLASLONG i, is, min_i; | BLASLONG i, is, min_i; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT atemp1, atemp2, btemp1, btemp2; | FLOAT atemp1, atemp2, btemp1, btemp2; | ||||
| @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu | |||||
| BLASLONG i, is, min_i; | BLASLONG i, is, min_i; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT atemp1, atemp2, btemp1, btemp2; | FLOAT atemp1, atemp2, btemp1, btemp2; | ||||
| @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf | |||||
| BLASLONG i, is, min_i; | BLASLONG i, is, min_i; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT ar, ai, br, bi, ratio, den; | FLOAT ar, ai, br, bi, ratio, den; | ||||
| @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf | |||||
| BLASLONG i, is, min_i; | BLASLONG i, is, min_i; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT ar, ai, br, bi, ratio, den; | FLOAT ar, ai, br, bi, ratio, den; | ||||
| @@ -0,0 +1,115 @@ | |||||
| include_directories(${CMAKE_SOURCE_DIR}) | |||||
| # N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa | |||||
| # loop through gemm.c defines | |||||
| set(GEMM_DEFINES NN NT TN TT) | |||||
| set(GEMM_COMPLEX_DEFINES RN CN RT CT NR TR RR CR NC TC RC CC) | |||||
| foreach (GEMM_DEFINE ${GEMM_DEFINES}) | |||||
| string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) | |||||
| GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0) | |||||
| if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) | |||||
| GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0) | |||||
| endif () | |||||
| endforeach () | |||||
| set(TRMM_TRSM_SOURCES | |||||
| trmm_L.c | |||||
| trmm_R.c | |||||
| trsm_L.c | |||||
| trsm_R.c) | |||||
| foreach(trmm_trsm_source ${TRMM_TRSM_SOURCES}) | |||||
| string(REGEX MATCH "[a-z]+_[A-Z]+" op_name ${trmm_trsm_source}) | |||||
| GenerateCombinationObjects("${trmm_trsm_source}" "UPPER;UNIT" "L;N" "" 0 "${op_name}N") | |||||
| GenerateCombinationObjects("${trmm_trsm_source}" "UPPER;UNIT" "L;N" "TRANSA" 0 "${op_name}T") | |||||
| endforeach() | |||||
| GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "NN" 1) | |||||
| GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "" 1) | |||||
| GenerateCombinationObjects("syr2k_k.c" "LOWER;TRANS" "U;N" "" 1) | |||||
| GenerateCombinationObjects("syrk_kernel.c" "LOWER" "U" "" 2) | |||||
| GenerateCombinationObjects("syr2k_kernel.c" "LOWER" "U" "" 2) | |||||
| if (SMP) | |||||
| # N.B. these do NOT have a float type (e.g. DOUBLE) defined! | |||||
| GenerateNamedObjects("gemm_thread_m.c;gemm_thread_n.c;gemm_thread_mn.c;gemm_thread_variable.c;syrk_thread.c" "" "" 0 "" "" 1) | |||||
| if (NOT USE_SIMPLE_THREADED_LEVEL3) | |||||
| GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "THREADED_LEVEL3" 2 "syrk_thread") | |||||
| GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "THREADED_LEVEL3;NN" 2 "symm_thread") | |||||
| endif () | |||||
| endif () | |||||
| foreach (float_type ${FLOAT_TYPES}) | |||||
| if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") | |||||
| GenerateCombinationObjects("zherk_kernel.c" "LOWER;CONJ" "U;N" "HERK" 2 "herk_kernel" false ${float_type}) | |||||
| # TRANS needs to be set/unset when CONJ is set/unset, so can't use it as a combination | |||||
| GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK" 3 "herk_N" false ${float_type}) | |||||
| GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type}) | |||||
| GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type}) | |||||
| GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type}) | |||||
| # Need to set CONJ for trmm and trsm | |||||
| GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type}) | |||||
| GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_LC" false ${float_type}) | |||||
| GenerateCombinationObjects("trmm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_RR" false ${float_type}) | |||||
| GenerateCombinationObjects("trmm_R.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_RC" false ${float_type}) | |||||
| GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_LR" false ${float_type}) | |||||
| GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trsm_LC" false ${float_type}) | |||||
| GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type}) | |||||
| GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trsm_RC" false ${float_type}) | |||||
| #hemm | |||||
| GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN" 0 "hemm_L" false ${float_type}) | |||||
| GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE" 0 "hemm_R" false ${float_type}) | |||||
| #her2k | |||||
| GenerateCombinationObjects("zher2k_kernel.c" "LOWER;CONJ" "U;N" "" 2 "her2k_kernel" false ${float_type}) | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type}) | |||||
| if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) | |||||
| #hemm | |||||
| GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type}) | |||||
| GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type}) | |||||
| #her2k | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type}) | |||||
| endif() | |||||
| # special gemm defines for complex | |||||
| foreach (gemm_define ${GEMM_COMPLEX_DEFINES}) | |||||
| string(TOLOWER ${gemm_define} gemm_define_LC) | |||||
| GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type}) | |||||
| if(USE_GEMM3M) | |||||
| GenerateNamedObjects("gemm3m.c" "${gemm_define}" "gemm3m_${gemm_define_LC}" false "" "" false ${float_type}) | |||||
| endif() | |||||
| if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) | |||||
| GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type}) | |||||
| if(USE_GEMM3M) | |||||
| GenerateNamedObjects("gemm3m.c" "${gemm_define};THREADED_LEVEL3" "gemm3m_thread_${gemm_define_LC}" false "" "" false ${float_type}) | |||||
| endif() | |||||
| endif () | |||||
| endforeach () | |||||
| endif () | |||||
| endforeach () | |||||
| #HPLOBJS = | |||||
| # dgemm_nn.c dgemm_nt.c dgemm_tn.c dgemm_tt.c | |||||
| # dtrsm_LNUU.c dtrsm_LNUN.c dtrsm_LNLU.c dtrsm_LNLN.c | |||||
| # dtrsm_LTUU.c dtrsm_LTUN.c dtrsm_LTLU.c dtrsm_LTLN.c | |||||
| # dtrsm_RNUU.c dtrsm_RNUN.c dtrsm_RNLU.c dtrsm_RNLN.c | |||||
| # dtrsm_RTUU.c dtrsm_RTUN.c dtrsm_RTLU.c dtrsm_RTLN.c | |||||
| # | |||||
| #if (USE_SIMPLE_THREADED_LEVEL3) | |||||
| # HPLOBJS += dgemm_thread_nn.c dgemm_thread_nt.c | |||||
| # dgemm_thread_tn.c dgemm_thread_tt.c | |||||
| #endif | |||||
| # | |||||
| add_library(driver_level3 OBJECT ${OPENBLAS_SRC}) | |||||
| @@ -47,7 +47,7 @@ | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) { | |||||
| static __inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) { | |||||
| BLASLONG i; | BLASLONG i; | ||||
| @@ -49,7 +49,7 @@ | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) { | |||||
| static __inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) { | |||||
| BLASLONG i; | BLASLONG i; | ||||
| @@ -70,6 +70,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
| BLASLONG ls, is, js; | BLASLONG ls, is, js; | ||||
| BLASLONG min_l, min_i, min_j; | BLASLONG min_l, min_i, min_j; | ||||
| BLASLONG jjs, min_jj; | BLASLONG jjs, min_jj; | ||||
| #if !((!defined(UPPER) && !defined(TRANSA)) || (defined(UPPER) && defined(TRANSA))) | |||||
| BLASLONG start_ls; | |||||
| #endif | |||||
| m = args -> m; | m = args -> m; | ||||
| n = args -> n; | n = args -> n; | ||||
| @@ -226,8 +229,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
| } | } | ||||
| #else | #else | ||||
| BLASLONG start_ls; | |||||
| for(js = n; js > 0; js -= GEMM_R){ | for(js = n; js > 0; js -= GEMM_R){ | ||||
| min_j = js; | min_j = js; | ||||
| if (min_j > GEMM_R) min_j = GEMM_R; | if (min_j > GEMM_R) min_j = GEMM_R; | ||||
| @@ -76,6 +76,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
| BLASLONG ls, is, js; | BLASLONG ls, is, js; | ||||
| BLASLONG min_l, min_i, min_j; | BLASLONG min_l, min_i, min_j; | ||||
| BLASLONG jjs, min_jj; | BLASLONG jjs, min_jj; | ||||
| #if !((!defined(UPPER) && !defined(TRANSA)) || (defined(UPPER) && defined(TRANSA))) | |||||
| BLASLONG start_is; | |||||
| #endif | |||||
| m = args -> m; | m = args -> m; | ||||
| n = args -> n; | n = args -> n; | ||||
| @@ -178,8 +181,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
| } | } | ||||
| } | } | ||||
| #else | #else | ||||
| BLASLONG start_is; | |||||
| for(ls = m; ls > 0; ls -= GEMM_Q){ | for(ls = m; ls > 0; ls -= GEMM_Q){ | ||||
| min_l = ls; | min_l = ls; | ||||
| if (min_l > GEMM_Q) min_l = GEMM_Q; | if (min_l > GEMM_Q) min_l = GEMM_Q; | ||||
| @@ -75,6 +75,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
| BLASLONG ls, is, js; | BLASLONG ls, is, js; | ||||
| BLASLONG min_l, min_i, min_j; | BLASLONG min_l, min_i, min_j; | ||||
| BLASLONG jjs, min_jj; | BLASLONG jjs, min_jj; | ||||
| #if !((defined(UPPER) && !defined(TRANSA)) || (!defined(UPPER) && defined(TRANSA))) | |||||
| BLASLONG start_ls; | |||||
| #endif | |||||
| m = args -> m; | m = args -> m; | ||||
| n = args -> n; | n = args -> n; | ||||
| @@ -226,8 +229,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
| } | } | ||||
| #else | #else | ||||
| BLASLONG start_ls; | |||||
| for(js = n; js > 0; js -= GEMM_R){ | for(js = n; js > 0; js -= GEMM_R){ | ||||
| min_j = js; | min_j = js; | ||||
| if (min_j > GEMM_R) min_j = GEMM_R; | if (min_j > GEMM_R) min_j = GEMM_R; | ||||
| @@ -0,0 +1,75 @@ | |||||
| include_directories(${CMAKE_SOURCE_DIR}) | |||||
| if (${CORE} STREQUAL "PPC440") | |||||
| set(MEMORY memory_qalloc.c) | |||||
| else () | |||||
| set(MEMORY memory.c) | |||||
| endif () | |||||
| if (SMP) | |||||
| if (USE_OPENMP) | |||||
| set(BLAS_SERVER blas_server_omp.c) | |||||
| elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") | |||||
| set(BLAS_SERVER blas_server_win32.c) | |||||
| endif () | |||||
| if (NOT DEFINED BLAS_SERVER) | |||||
| set(BLAS_SERVER blas_server.c) | |||||
| endif () | |||||
| set(SMP_SOURCES | |||||
| ${BLAS_SERVER} | |||||
| divtable.c # TODO: Makefile has -UDOUBLE | |||||
| blas_l1_thread.c | |||||
| ) | |||||
| if (NOT NO_AFFINITY) | |||||
| list(APPEND SMP_SOURCES init.c) | |||||
| endif () | |||||
| endif () | |||||
| set(COMMON_SOURCES | |||||
| xerbla.c | |||||
| openblas_set_num_threads.c | |||||
| openblas_error_handle.c | |||||
| openblas_get_num_procs.c | |||||
| openblas_get_num_threads.c | |||||
| ) | |||||
| # these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling | |||||
| GenerateNamedObjects("abs.c" "" "c_abs" 0 "" "" 1 ) | |||||
| GenerateNamedObjects("abs.c" "DOUBLE" "z_abs" 0 "" "" 1) | |||||
| GenerateNamedObjects("openblas_get_config.c;openblas_get_parallel.c" "" "" 0 "" "" 1) | |||||
| if (DYNAMIC_ARCH) | |||||
| list(APPEND COMMON_SOURCES dynamic.c) | |||||
| else () | |||||
| list(APPEND COMMON_SOURCES parameter.c) | |||||
| endif () | |||||
| #ifdef EXPRECISION | |||||
| #COMMONOBJS += x_abs.$(SUFFIX) qlamch.$(SUFFIX) qlamc3.$(SUFFIX) | |||||
| #endif | |||||
| # | |||||
| #ifdef QUAD_PRECISION | |||||
| #COMMONOBJS += addx.$(SUFFIX) mulx.$(SUFFIX) | |||||
| #endif | |||||
| # | |||||
| #ifdef USE_CUDA | |||||
| #COMMONOBJS += cuda_init.$(SUFFIX) | |||||
| #endif | |||||
| # | |||||
| #ifdef FUNCTION_PROFILE | |||||
| #COMMONOBJS += profile.$(SUFFIX) | |||||
| #endif | |||||
| #LIBOTHERS = libothers.$(LIBSUFFIX) | |||||
| #ifeq ($(DYNAMIC_ARCH), 1) | |||||
| #HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) | |||||
| #else | |||||
| #HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) | |||||
| #endif | |||||
| add_library(driver_others OBJECT ${OPENBLAS_SRC} ${MEMORY} ${SMP_SOURCES} ${COMMON_SOURCES}) | |||||
| @@ -70,9 +70,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| #include "common.h" | #include "common.h" | ||||
| #ifdef OS_LINUX | |||||
| #if defined(OS_LINUX) || defined(OS_NETBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||||
| #include <dlfcn.h> | #include <dlfcn.h> | ||||
| #include <signal.h> | |||||
| #include <sys/resource.h> | #include <sys/resource.h> | ||||
| #include <sys/time.h> | |||||
| #endif | #endif | ||||
| #ifndef likely | #ifndef likely | ||||
| @@ -265,7 +267,7 @@ int get_node(void); | |||||
| static int increased_threads = 0; | static int increased_threads = 0; | ||||
| static int blas_thread_server(void *arg){ | |||||
| static void* blas_thread_server(void *arg){ | |||||
| /* Thread identifier */ | /* Thread identifier */ | ||||
| BLASLONG cpu = (BLASLONG)arg; | BLASLONG cpu = (BLASLONG)arg; | ||||
| @@ -458,7 +460,7 @@ static int blas_thread_server(void *arg){ | |||||
| //pthread_exit(NULL); | //pthread_exit(NULL); | ||||
| return 0; | |||||
| return NULL; | |||||
| } | } | ||||
| #ifdef MONITOR | #ifdef MONITOR | ||||
| @@ -565,14 +567,23 @@ int blas_thread_init(void){ | |||||
| #ifdef NEED_STACKATTR | #ifdef NEED_STACKATTR | ||||
| ret=pthread_create(&blas_threads[i], &attr, | ret=pthread_create(&blas_threads[i], &attr, | ||||
| (void *)&blas_thread_server, (void *)i); | |||||
| &blas_thread_server, (void *)i); | |||||
| #else | #else | ||||
| ret=pthread_create(&blas_threads[i], NULL, | ret=pthread_create(&blas_threads[i], NULL, | ||||
| (void *)&blas_thread_server, (void *)i); | |||||
| &blas_thread_server, (void *)i); | |||||
| #endif | #endif | ||||
| if(ret!=0){ | if(ret!=0){ | ||||
| fprintf(STDERR,"OpenBLAS: pthread_creat error in blas_thread_init function. Error code:%d\n",ret); | |||||
| exit(1); | |||||
| struct rlimit rlim; | |||||
| const char *msg = strerror(ret); | |||||
| fprintf(STDERR, "OpenBLAS blas_thread_init: pthread_create: %s\n", msg); | |||||
| if(0 == getrlimit(RLIMIT_NPROC, &rlim)) { | |||||
| fprintf(STDERR, "OpenBLAS blas_thread_init: RLIMIT_NPROC " | |||||
| "%ld current, %ld max\n", (long)(rlim.rlim_cur), (long)(rlim.rlim_max)); | |||||
| } | |||||
| if(0 != raise(SIGINT)) { | |||||
| fprintf(STDERR, "OpenBLAS blas_thread_init: calling exit(3)\n"); | |||||
| exit(EXIT_FAILURE); | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| @@ -832,10 +843,10 @@ void goto_set_num_threads(int num_threads) { | |||||
| #ifdef NEED_STACKATTR | #ifdef NEED_STACKATTR | ||||
| pthread_create(&blas_threads[i], &attr, | pthread_create(&blas_threads[i], &attr, | ||||
| (void *)&blas_thread_server, (void *)i); | |||||
| &blas_thread_server, (void *)i); | |||||
| #else | #else | ||||
| pthread_create(&blas_threads[i], NULL, | pthread_create(&blas_threads[i], NULL, | ||||
| (void *)&blas_thread_server, (void *)i); | |||||
| &blas_thread_server, (void *)i); | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -139,8 +139,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define BITMASK(a, b, c) ((((a) >> (b)) & (c))) | #define BITMASK(a, b, c) ((((a) >> (b)) & (c))) | ||||
| #if defined(_MSC_VER) && !defined(__clang__) | |||||
| #define CONSTRUCTOR __cdecl | |||||
| #define DESTRUCTOR __cdecl | |||||
| #elif defined(OS_DARWIN) && defined(C_GCC) | |||||
| #define CONSTRUCTOR __attribute__ ((constructor)) | #define CONSTRUCTOR __attribute__ ((constructor)) | ||||
| #define DESTRUCTOR __attribute__ ((destructor)) | #define DESTRUCTOR __attribute__ ((destructor)) | ||||
| #else | |||||
| #define CONSTRUCTOR __attribute__ ((constructor(101))) | |||||
| #define DESTRUCTOR __attribute__ ((destructor(101))) | |||||
| #endif | |||||
| #ifdef DYNAMIC_ARCH | #ifdef DYNAMIC_ARCH | ||||
| gotoblas_t *gotoblas = NULL; | gotoblas_t *gotoblas = NULL; | ||||
| @@ -795,12 +803,12 @@ static void *alloc_hugetlb(void *address){ | |||||
| if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) { | if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) { | ||||
| CloseHandle(hToken); | CloseHandle(hToken); | ||||
| return -1; | |||||
| return (void*)-1; | |||||
| } | } | ||||
| if (AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL) != TRUE) { | if (AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL) != TRUE) { | ||||
| CloseHandle(hToken); | CloseHandle(hToken); | ||||
| return -1; | |||||
| return (void*)-1; | |||||
| } | } | ||||
| map_address = (void *)VirtualAlloc(address, | map_address = (void *)VirtualAlloc(address, | ||||
| @@ -1402,6 +1410,28 @@ void DESTRUCTOR gotoblas_quit(void) { | |||||
| #endif | #endif | ||||
| } | } | ||||
| #if defined(_MSC_VER) && !defined(__clang__) | |||||
| BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved) | |||||
| { | |||||
| switch (ul_reason_for_call) | |||||
| { | |||||
| case DLL_PROCESS_ATTACH: | |||||
| gotoblas_init(); | |||||
| break; | |||||
| case DLL_THREAD_ATTACH: | |||||
| break; | |||||
| case DLL_THREAD_DETACH: | |||||
| break; | |||||
| case DLL_PROCESS_DETACH: | |||||
| gotoblas_quit(); | |||||
| break; | |||||
| default: | |||||
| break; | |||||
| } | |||||
| return TRUE; | |||||
| } | |||||
| #endif | |||||
| #if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64)) | #if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64)) | ||||
| /* Don't call me; this is just work around for PGI / Sun bug */ | /* Don't call me; this is just work around for PGI / Sun bug */ | ||||
| void gotoblas_dummy_for_PGI(void) { | void gotoblas_dummy_for_PGI(void) { | ||||
| @@ -69,10 +69,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| /* or implied, of The University of Texas at Austin. */ | /* or implied, of The University of Texas at Austin. */ | ||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| #if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) | |||||
| #if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) || defined(_WIN32) || defined(_WIN64) | |||||
| #define OS_WINDOWS | #define OS_WINDOWS | ||||
| #endif | #endif | ||||
| #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) | |||||
| #define INTEL_AMD | |||||
| #endif | |||||
| #include <stdio.h> | #include <stdio.h> | ||||
| #include <string.h> | #include <string.h> | ||||
| #ifdef OS_WINDOWS | #ifdef OS_WINDOWS | ||||
| @@ -750,7 +754,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ARCHITECTURE "ARM" | #define ARCHITECTURE "ARM" | ||||
| #define SUBARCHITECTURE "CORTEXA9" | #define SUBARCHITECTURE "CORTEXA9" | ||||
| #define SUBDIRNAME "arm" | #define SUBDIRNAME "arm" | ||||
| #define ARCHCONFIG "-DCORTEXA9 " \ | |||||
| #define ARCHCONFIG "-DCORTEXA9 -DARMV7 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ | "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ | ||||
| "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ | "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ | ||||
| "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | ||||
| @@ -765,7 +769,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ARCHITECTURE "ARM" | #define ARCHITECTURE "ARM" | ||||
| #define SUBARCHITECTURE "CORTEXA15" | #define SUBARCHITECTURE "CORTEXA15" | ||||
| #define SUBDIRNAME "arm" | #define SUBDIRNAME "arm" | ||||
| #define ARCHCONFIG "-DCORTEXA15 " \ | |||||
| #define ARCHCONFIG "-DCORTEXA15 -DARMV7 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ | "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ | ||||
| "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ | "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ | ||||
| "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | ||||
| @@ -830,7 +834,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define OPENBLAS_SUPPORTED | #define OPENBLAS_SUPPORTED | ||||
| #endif | #endif | ||||
| #if defined(__i386__) || (__x86_64__) | |||||
| #ifdef INTEL_AMD | |||||
| #include "cpuid_x86.c" | #include "cpuid_x86.c" | ||||
| #define OPENBLAS_SUPPORTED | #define OPENBLAS_SUPPORTED | ||||
| #endif | #endif | ||||
| @@ -925,7 +929,7 @@ int main(int argc, char *argv[]){ | |||||
| #ifdef FORCE | #ifdef FORCE | ||||
| printf("CORE=%s\n", CORENAME); | printf("CORE=%s\n", CORENAME); | ||||
| #else | #else | ||||
| #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) | |||||
| #if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) | |||||
| printf("CORE=%s\n", get_corename()); | printf("CORE=%s\n", get_corename()); | ||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -945,7 +949,7 @@ int main(int argc, char *argv[]){ | |||||
| #endif | #endif | ||||
| #if defined(__i386__) || defined(__x86_64__) | |||||
| #ifdef INTEL_AMD | |||||
| #ifndef FORCE | #ifndef FORCE | ||||
| get_sse(); | get_sse(); | ||||
| #else | #else | ||||
| @@ -1025,7 +1029,7 @@ int main(int argc, char *argv[]){ | |||||
| #ifdef FORCE | #ifdef FORCE | ||||
| printf("#define CHAR_CORENAME \"%s\"\n", CORENAME); | printf("#define CHAR_CORENAME \"%s\"\n", CORENAME); | ||||
| #else | #else | ||||
| #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) | |||||
| #if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) | |||||
| printf("#define CHAR_CORENAME \"%s\"\n", get_corename()); | printf("#define CHAR_CORENAME \"%s\"\n", get_corename()); | ||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -0,0 +1,166 @@ | |||||
| include_directories(${CMAKE_SOURCE_DIR}) | |||||
| set(BLAS1_SOURCES | |||||
| copy.c | |||||
| nrm2.c | |||||
| ) | |||||
| set(BLAS1_REAL_ONLY_SOURCES | |||||
| rotm.c rotmg.c # N.B. these do not have complex counterparts | |||||
| rot.c | |||||
| asum.c | |||||
| ) | |||||
| # these will have 'z' prepended for the complex version | |||||
| set(BLAS1_MANGLED_SOURCES | |||||
| axpy.c swap.c | |||||
| scal.c | |||||
| dot.c | |||||
| rotg.c | |||||
| axpby.c | |||||
| ) | |||||
| # TODO: USE_NETLIB_GEMV shoudl switch gemv.c to netlib/*gemv.f | |||||
| # these all have 'z' sources for complex versions | |||||
| set(BLAS2_SOURCES | |||||
| gemv.c ger.c | |||||
| trsv.c trmv.c symv.c | |||||
| syr.c syr2.c gbmv.c | |||||
| sbmv.c spmv.c | |||||
| spr.c spr2.c | |||||
| tbsv.c tbmv.c | |||||
| tpsv.c tpmv.c | |||||
| ) | |||||
| set(BLAS2_COMPLEX_ONLY_MANGLED_SOURCES | |||||
| hemv.c hbmv.c | |||||
| her.c her2.c | |||||
| hpmv.c hpr.c | |||||
| hpr2.c | |||||
| ) | |||||
| # these do not have separate 'z' sources | |||||
| set(BLAS3_SOURCES | |||||
| gemm.c symm.c | |||||
| trsm.c syrk.c syr2k.c | |||||
| ) | |||||
| set(BLAS3_MANGLED_SOURCES | |||||
| omatcopy.c imatcopy.c | |||||
| geadd.c | |||||
| ) | |||||
| # generate the BLAS objs once with and once without cblas | |||||
| set (CBLAS_FLAGS "") | |||||
| if (NOT DEFINED NO_FBLAS) | |||||
| list(APPEND CBLAS_FLAGS 0) | |||||
| endif () | |||||
| if (NOT DEFINED NO_CBLAS) | |||||
| list(APPEND CBLAS_FLAGS 1) | |||||
| endif () | |||||
| foreach (CBLAS_FLAG ${CBLAS_FLAGS}) | |||||
| # TODO: don't compile complex sources with cblas for now, the naming schemes are all different and they will have to be handled separately from SINGLE/DOUBLE | |||||
| set(DISABLE_COMPLEX 0) | |||||
| set(MANGLE_COMPLEX 3) | |||||
| if (CBLAS_FLAG EQUAL 1) | |||||
| # set(DISABLE_COMPLEX 1) | |||||
| # set(MANGLE_COMPLEX 1) | |||||
| endif () | |||||
| GenerateNamedObjects("${BLAS1_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) | |||||
| GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1) | |||||
| GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) | |||||
| GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) | |||||
| GenerateNamedObjects("${BLAS2_COMPLEX_ONLY_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 4) | |||||
| GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) | |||||
| GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) | |||||
| #sdsdot, dsdot | |||||
| GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE") | |||||
| GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE") | |||||
| # trmm is trsm with a compiler flag set | |||||
| GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) | |||||
| # max and imax are compiled 4 times | |||||
| GenerateNamedObjects("max.c" "" "" ${CBLAS_FLAG}) | |||||
| GenerateNamedObjects("max.c" "USE_ABS" "amax" ${CBLAS_FLAG}) | |||||
| GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "amin" ${CBLAS_FLAG}) | |||||
| GenerateNamedObjects("max.c" "USE_MIN" "min" ${CBLAS_FLAG}) | |||||
| GenerateNamedObjects("imax.c" "" "i*max" ${CBLAS_FLAG}) | |||||
| GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" ${CBLAS_FLAG}) | |||||
| GenerateNamedObjects("imax.c" "USE_ABS;USE_MIN" "i*amin" ${CBLAS_FLAG}) | |||||
| GenerateNamedObjects("imax.c" "USE_MIN" "i*min" ${CBLAS_FLAG}) | |||||
| # complex-specific sources | |||||
| foreach (float_type ${FLOAT_TYPES}) | |||||
| if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") | |||||
| GenerateNamedObjects("zger.c" "" "geru" ${CBLAS_FLAG} "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zger.c" "CONJ" "gerc" ${CBLAS_FLAG} "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zdot.c" "CONJ" "dotc" ${CBLAS_FLAG} "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zdot.c" "" "dotu" ${CBLAS_FLAG} "" "" false ${float_type}) | |||||
| GenerateNamedObjects("symm.c" "HEMM" "hemm" ${CBLAS_FLAG} "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syrk.c" "HEMM" "herk" ${CBLAS_FLAG} "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr2k.c" "HEMM" "her2k" ${CBLAS_FLAG} "" "" false ${float_type}) | |||||
| if (USE_GEMM3M) | |||||
| GenerateNamedObjects("gemm.c" "GEMM3M" "gemm3m" false "" "" false ${float_type}) | |||||
| endif() | |||||
| endif () | |||||
| if (${float_type} STREQUAL "COMPLEX") | |||||
| GenerateNamedObjects("zscal.c" "SSCAL" "sscal" ${CBLAS_FLAG} "" "" false "COMPLEX") | |||||
| GenerateNamedObjects("nrm2.c" "" "scnrm2" ${CBLAS_FLAG} "" "" true "COMPLEX") | |||||
| GenerateNamedObjects("zrot.c" "" "csrot" ${CBLAS_FLAG} "" "" true "COMPLEX") | |||||
| GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "scamin" ${CBLAS_FLAG} "" "" true "COMPLEX") | |||||
| GenerateNamedObjects("max.c" "USE_ABS" "scamax" ${CBLAS_FLAG} "" "" true "COMPLEX") | |||||
| GenerateNamedObjects("asum.c" "" "scasum" ${CBLAS_FLAG} "" "" true "COMPLEX") | |||||
| endif () | |||||
| if (${float_type} STREQUAL "ZCOMPLEX") | |||||
| GenerateNamedObjects("zscal.c" "SSCAL" "dscal" ${CBLAS_FLAG} "" "" false "ZCOMPLEX") | |||||
| GenerateNamedObjects("nrm2.c" "" "dznrm2" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") | |||||
| GenerateNamedObjects("zrot.c" "" "zdrot" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") | |||||
| GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "dzamin" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") | |||||
| GenerateNamedObjects("max.c" "USE_ABS" "dzamax" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") | |||||
| GenerateNamedObjects("asum.c" "" "dzasum" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") | |||||
| endif () | |||||
| endforeach () | |||||
| endforeach () | |||||
| #Special functions for CBLAS | |||||
| if (NOT DEFINED NO_CBLAS) | |||||
| foreach (float_type ${FLOAT_TYPES}) | |||||
| if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") | |||||
| #cblas_dotc_sub cblas_dotu_sub | |||||
| GenerateNamedObjects("zdot.c" "FORCE_USE_STACK" "dotu_sub" 1 "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zdot.c" "FORCE_USE_STACK;CONJ" "dotc_sub" 1 "" "" false ${float_type}) | |||||
| endif() | |||||
| endforeach () | |||||
| endif() | |||||
| if (NOT DEFINED NO_LAPACK) | |||||
| set(LAPACK_SOURCES | |||||
| lapack/gesv.c | |||||
| ) | |||||
| # prepend z for complex versions | |||||
| set(LAPACK_MANGLED_SOURCES | |||||
| lapack/getrf.c lapack/getrs.c lapack/potrf.c lapack/getf2.c | |||||
| lapack/potf2.c lapack/laswp.c lapack/lauu2.c | |||||
| lapack/lauum.c lapack/trti2.c lapack/trtri.c | |||||
| ) | |||||
| GenerateNamedObjects("${LAPACK_SOURCES}") | |||||
| GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3) | |||||
| endif () | |||||
| add_library(interface OBJECT ${OPENBLAS_SRC}) | |||||
| @@ -121,6 +121,9 @@ void NAME(char *TRANSA, char *TRANSB, | |||||
| FLOAT *sa, *sb; | FLOAT *sa, *sb; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads_max; | |||||
| int nthreads_avail; | |||||
| double MNK; | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| #ifdef XDOUBLE | #ifdef XDOUBLE | ||||
| int mode = BLAS_XDOUBLE | BLAS_REAL; | int mode = BLAS_XDOUBLE | BLAS_REAL; | ||||
| @@ -237,6 +240,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS | |||||
| XFLOAT *sa, *sb; | XFLOAT *sa, *sb; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads_max; | |||||
| int nthreads_avail; | |||||
| double MNK; | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| #ifdef XDOUBLE | #ifdef XDOUBLE | ||||
| int mode = BLAS_XDOUBLE | BLAS_REAL; | int mode = BLAS_XDOUBLE | BLAS_REAL; | ||||
| @@ -400,15 +406,15 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS | |||||
| mode |= (transa << BLAS_TRANSA_SHIFT); | mode |= (transa << BLAS_TRANSA_SHIFT); | ||||
| mode |= (transb << BLAS_TRANSB_SHIFT); | mode |= (transb << BLAS_TRANSB_SHIFT); | ||||
| int nthreads_max = num_cpu_avail(3); | |||||
| int nthreads_avail = nthreads_max; | |||||
| nthreads_max = num_cpu_avail(3); | |||||
| nthreads_avail = nthreads_max; | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| double MNK = (double) args.m * (double) args.n * (double) args.k; | |||||
| MNK = (double) args.m * (double) args.n * (double) args.k; | |||||
| if ( MNK <= (65536.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) | if ( MNK <= (65536.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) | ||||
| nthreads_max = 1; | nthreads_max = 1; | ||||
| #else | #else | ||||
| double MNK = (double) args.m * (double) args.n * (double) args.k; | |||||
| MNK = (double) args.m * (double) args.n * (double) args.k; | |||||
| if ( MNK <= (8192.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) | if ( MNK <= (8192.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) | ||||
| nthreads_max = 1; | nthreads_max = 1; | ||||
| #endif | #endif | ||||
| @@ -81,6 +81,9 @@ void NAME(char *TRANS, blasint *M, blasint *N, | |||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| int nthreads_max; | |||||
| int nthreads_avail; | |||||
| double MNK; | |||||
| #endif | #endif | ||||
| int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { | int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { | ||||
| @@ -135,6 +138,9 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| blasint info, t; | blasint info, t; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| int nthreads_max; | |||||
| int nthreads_avail; | |||||
| double MNK; | |||||
| #endif | #endif | ||||
| int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { | int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { | ||||
| @@ -235,10 +241,10 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads_max = num_cpu_avail(2); | |||||
| int nthreads_avail = nthreads_max; | |||||
| nthreads_max = num_cpu_avail(2); | |||||
| nthreads_avail = nthreads_max; | |||||
| double MNK = (double) m * (double) n; | |||||
| MNK = (double) m * (double) n; | |||||
| if ( MNK <= (24.0 * 24.0 * (double) (GEMM_MULTITHREAD_THRESHOLD*GEMM_MULTITHREAD_THRESHOLD) ) ) | if ( MNK <= (24.0 * 24.0 * (double) (GEMM_MULTITHREAD_THRESHOLD*GEMM_MULTITHREAD_THRESHOLD) ) ) | ||||
| nthreads_max = 1; | nthreads_max = 1; | ||||
| @@ -136,6 +136,8 @@ blasint NAME(blasint *N, FLOAT *x, blasint *INCX){ | |||||
| ret = (blasint)MAX_K(n, x, incx); | ret = (blasint)MAX_K(n, x, incx); | ||||
| if(ret > n) ret=n; | |||||
| FUNCTION_PROFILE_END(COMPSIZE, n, 0); | FUNCTION_PROFILE_END(COMPSIZE, n, 0); | ||||
| IDEBUG_END; | IDEBUG_END; | ||||
| @@ -159,6 +161,8 @@ CBLAS_INDEX CNAME(blasint n, FLOAT *x, blasint incx){ | |||||
| ret = MAX_K(n, x, incx); | ret = MAX_K(n, x, incx); | ||||
| if (ret > n) ret=n; | |||||
| if (ret) ret --; | if (ret) ret --; | ||||
| FUNCTION_PROFILE_END(COMPSIZE, n, 0); | FUNCTION_PROFILE_END(COMPSIZE, n, 0); | ||||
| @@ -14,8 +14,7 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ | |||||
| #endif | #endif | ||||
| #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) | |||||
| #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) | |||||
| long double da = *DA; | long double da = *DA; | ||||
| long double db = *DB; | long double db = *DB; | ||||
| @@ -53,13 +53,13 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT * | |||||
| #endif | #endif | ||||
| if (n <= 0) return; | |||||
| FLOAT alpha_r = *(ALPHA + 0); | FLOAT alpha_r = *(ALPHA + 0); | ||||
| FLOAT alpha_i = *(ALPHA + 1); | FLOAT alpha_i = *(ALPHA + 1); | ||||
| FLOAT beta_r = *(BETA + 0); | FLOAT beta_r = *(BETA + 0); | ||||
| FLOAT beta_i = *(BETA + 1); | FLOAT beta_i = *(BETA + 1); | ||||
| if (n <= 0) return; | |||||
| FUNCTION_PROFILE_START(); | FUNCTION_PROFILE_START(); | ||||
| if (incx < 0) x -= (n - 1) * incx * 2; | if (incx < 0) x -= (n - 1) * incx * 2; | ||||
| @@ -57,21 +57,25 @@ | |||||
| #ifdef RETURN_BY_STRUCT | #ifdef RETURN_BY_STRUCT | ||||
| MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { | MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { | ||||
| #elif defined RETURN_BY_STACK | #elif defined RETURN_BY_STACK | ||||
| void NAME(FLOAT _Complex *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { | |||||
| void NAME(OPENBLAS_COMPLEX_FLOAT *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { | |||||
| #else | #else | ||||
| FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { | |||||
| OPENBLAS_COMPLEX_FLOAT NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { | |||||
| #endif | #endif | ||||
| BLASLONG n = *N; | BLASLONG n = *N; | ||||
| BLASLONG incx = *INCX; | BLASLONG incx = *INCX; | ||||
| BLASLONG incy = *INCY; | BLASLONG incy = *INCY; | ||||
| #ifndef RETURN_BY_STACK | #ifndef RETURN_BY_STACK | ||||
| FLOAT _Complex ret; | |||||
| OPENBLAS_COMPLEX_FLOAT ret; | |||||
| #endif | #endif | ||||
| #ifdef RETURN_BY_STRUCT | #ifdef RETURN_BY_STRUCT | ||||
| MYTYPE myret; | MYTYPE myret; | ||||
| #endif | #endif | ||||
| #ifndef RETURN_BY_STRUCT | |||||
| OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); | |||||
| #endif | |||||
| PRINT_DEBUG_NAME; | PRINT_DEBUG_NAME; | ||||
| if (n <= 0) { | if (n <= 0) { | ||||
| @@ -80,10 +84,10 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, | |||||
| myret.i = 0.; | myret.i = 0.; | ||||
| return myret; | return myret; | ||||
| #elif defined RETURN_BY_STACK | #elif defined RETURN_BY_STACK | ||||
| *result = ZERO; | |||||
| *result = zero; | |||||
| return; | return; | ||||
| #else | #else | ||||
| return ZERO; | |||||
| return zero; | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -144,21 +148,24 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, | |||||
| #else | #else | ||||
| #ifdef FORCE_USE_STACK | #ifdef FORCE_USE_STACK | ||||
| void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT _Complex *result){ | |||||
| void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, OPENBLAS_COMPLEX_FLOAT *result){ | |||||
| #else | #else | ||||
| FLOAT _Complex CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ | |||||
| OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ | |||||
| FLOAT _Complex ret; | |||||
| OPENBLAS_COMPLEX_FLOAT ret; | |||||
| OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); | |||||
| #endif | #endif | ||||
| PRINT_DEBUG_CNAME; | PRINT_DEBUG_CNAME; | ||||
| if (n <= 0) { | if (n <= 0) { | ||||
| #ifdef FORCE_USE_STACK | #ifdef FORCE_USE_STACK | ||||
| *result = ZERO; | |||||
| //*result = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); | |||||
| CREAL(*result) = 0.0; | |||||
| CIMAG(*result) = 0.0; | |||||
| return; | return; | ||||
| #else | #else | ||||
| return ZERO; | |||||
| return zero; | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -79,6 +79,9 @@ void NAME(char *TRANS, blasint *M, blasint *N, | |||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| int nthreads_max; | |||||
| int nthreads_avail; | |||||
| double MNK; | |||||
| #endif | #endif | ||||
| int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, | int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, | ||||
| @@ -91,14 +94,14 @@ void NAME(char *TRANS, blasint *M, blasint *N, | |||||
| blasint lenx, leny; | blasint lenx, leny; | ||||
| blasint i; | blasint i; | ||||
| PRINT_DEBUG_NAME; | |||||
| FLOAT alpha_r = *(ALPHA + 0); | FLOAT alpha_r = *(ALPHA + 0); | ||||
| FLOAT alpha_i = *(ALPHA + 1); | FLOAT alpha_i = *(ALPHA + 1); | ||||
| FLOAT beta_r = *(BETA + 0); | FLOAT beta_r = *(BETA + 0); | ||||
| FLOAT beta_i = *(BETA + 1); | FLOAT beta_i = *(BETA + 1); | ||||
| PRINT_DEBUG_NAME; | |||||
| TOUPPER(trans); | TOUPPER(trans); | ||||
| info = 0; | info = 0; | ||||
| @@ -145,6 +148,9 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| blasint info, t; | blasint info, t; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| int nthreads_max; | |||||
| int nthreads_avail; | |||||
| double MNK; | |||||
| #endif | #endif | ||||
| int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, | int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, | ||||
| @@ -153,14 +159,14 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| GEMV_O, GEMV_U, GEMV_S, GEMV_D, | GEMV_O, GEMV_U, GEMV_S, GEMV_D, | ||||
| }; | }; | ||||
| PRINT_DEBUG_CNAME; | |||||
| FLOAT alpha_r = *(ALPHA + 0); | FLOAT alpha_r = *(ALPHA + 0); | ||||
| FLOAT alpha_i = *(ALPHA + 1); | FLOAT alpha_i = *(ALPHA + 1); | ||||
| FLOAT beta_r = *(BETA + 0); | FLOAT beta_r = *(BETA + 0); | ||||
| FLOAT beta_i = *(BETA + 1); | FLOAT beta_i = *(BETA + 1); | ||||
| PRINT_DEBUG_CNAME; | |||||
| trans = -1; | trans = -1; | ||||
| info = 0; | info = 0; | ||||
| @@ -234,10 +240,10 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads_max = num_cpu_avail(2); | |||||
| int nthreads_avail = nthreads_max; | |||||
| nthreads_max = num_cpu_avail(2); | |||||
| nthreads_avail = nthreads_max; | |||||
| double MNK = (double) m * (double) n; | |||||
| MNK = (double) m * (double) n; | |||||
| if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) )) | if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) )) | ||||
| nthreads_max = 1; | nthreads_max = 1; | ||||
| @@ -6,13 +6,7 @@ | |||||
| void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ | void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ | ||||
| PRINT_DEBUG_NAME; | |||||
| IDEBUG_START; | |||||
| FUNCTION_PROFILE_START(); | |||||
| #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) | |||||
| #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) | |||||
| long double da_r = *(DA + 0); | long double da_r = *(DA + 0); | ||||
| long double da_i = *(DA + 1); | long double da_i = *(DA + 1); | ||||
| @@ -22,6 +16,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ | |||||
| long double ada = fabs(da_r) + fabs(da_i); | long double ada = fabs(da_r) + fabs(da_i); | ||||
| PRINT_DEBUG_NAME; | |||||
| IDEBUG_START; | |||||
| FUNCTION_PROFILE_START(); | |||||
| if (ada == ZERO) { | if (ada == ZERO) { | ||||
| *C = ZERO; | *C = ZERO; | ||||
| *(S + 0) = ONE; | *(S + 0) = ONE; | ||||
| @@ -54,6 +54,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ | |||||
| FLOAT ada = fabs(da_r) + fabs(da_i); | FLOAT ada = fabs(da_r) + fabs(da_i); | ||||
| FLOAT adb; | FLOAT adb; | ||||
| PRINT_DEBUG_NAME; | |||||
| IDEBUG_START; | |||||
| FUNCTION_PROFILE_START(); | |||||
| if (ada == ZERO) { | if (ada == ZERO) { | ||||
| *C = ZERO; | *C = ZERO; | ||||
| *(S + 0) = ONE; | *(S + 0) = ONE; | ||||
| @@ -121,6 +121,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, FLOAT alpha, FLO | |||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| int trans, uplo; | int trans, uplo; | ||||
| blasint info; | blasint info; | ||||
| FLOAT * ALPHA = α | |||||
| FLOAT alpha_r = ALPHA[0]; | |||||
| FLOAT alpha_i = ALPHA[1]; | |||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| #endif | #endif | ||||
| @@ -0,0 +1,428 @@ | |||||
| include_directories(${CMAKE_SOURCE_DIR}) | |||||
| include("${CMAKE_SOURCE_DIR}/cmake/kernel.cmake") | |||||
| # Makefile | |||||
| if (DEFINED TARGET_CORE) | |||||
| #override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) | |||||
| set(BUILD_KERNEL 1) | |||||
| set(KDIR "") | |||||
| set(TSUFFIX "_${TARGET_CORE}") | |||||
| else () | |||||
| set(TARGET_CORE ${CORE}) | |||||
| set(KDIR "") | |||||
| set(TSUFFIX "") | |||||
| endif () | |||||
| SetDefaultL1() | |||||
| SetDefaultL2() | |||||
| SetDefaultL3() | |||||
| ParseMakefileVars("${KERNELDIR}/KERNEL") | |||||
| ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") | |||||
| if (${ARCH} STREQUAL "x86") | |||||
| if (NOT MSVC) | |||||
| GenerateNamedObjects("${KERNELDIR}/cpuid.S" "" "" false "" "" true) | |||||
| else() | |||||
| GenerateNamedObjects("${KERNELDIR}/cpuid_win.c" "" "" false "" "" true) | |||||
| endif() | |||||
| endif () | |||||
| # don't use float type name mangling here | |||||
| GenerateNamedObjects("${KERNELDIR}/${LSAME_KERNEL}" "F_INTERFACE" "lsame" false "" "" true) | |||||
| GenerateNamedObjects("${KERNELDIR}/${SCABS_KERNEL}" "COMPLEX;F_INTERFACE" "scabs1" false "" "" true) | |||||
| GenerateNamedObjects("${KERNELDIR}/${DCABS_KERNEL}" "DOUBLE;COMPLEX;F_INTERFACE" "dcabs1" false "" "" true) | |||||
| # Makefile.L1 | |||||
| foreach (float_type ${FLOAT_TYPES}) | |||||
| # a bit of metaprogramming here to pull out the appropriate KERNEL var | |||||
| string(SUBSTRING ${float_type} 0 1 float_char) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false ${float_type}) | |||||
| if (DEFINED ${float_char}MAXKERNEL) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}MAXKERNEL}" "" "max_k" false "" "" false ${float_type}) | |||||
| endif () | |||||
| if (DEFINED ${float_char}MINKERNEL) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}MINKERNEL}" "" "min_k" false "" "" false ${float_type}) | |||||
| endif () | |||||
| GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false ${float_type}) | |||||
| if (DEFINED I${float_char}MAXKERNEL) | |||||
| GenerateNamedObjects("${KERNELDIR}/${I${float_char}MAXKERNEL}" "" "i*max_k" false "" "" false ${float_type}) | |||||
| endif () | |||||
| if (DEFINED I${float_char}MINKERNEL) | |||||
| GenerateNamedObjects("${KERNELDIR}/${I${float_char}MINKERNEL}" "" "i*min_k" false "" "" false ${float_type}) | |||||
| endif () | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}ASUMKERNEL}" "" "asum_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "" "axpy_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}COPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}NRM2KERNEL}" "" "nrm2_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "rot_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}SCALKERNEL}" "" "scal_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}SWAPKERNEL}" "" "swap_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPBYKERNEL}" "" "axpby_k" false "" "" false ${float_type}) | |||||
| if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "CONJ" "axpyc_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dotu_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "CONJ" "dotc_k" false "" "" false ${float_type}) | |||||
| else () | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type}) | |||||
| endif () | |||||
| if (${float_type} STREQUAL "COMPLEX") | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "srot_k" false "" "" false ${float_type}) | |||||
| endif() | |||||
| if (${float_type} STREQUAL "ZCOMPLEX") | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "drot_k" false "" "" false ${float_type}) | |||||
| endif() | |||||
| endforeach () | |||||
| #dsdot,sdsdot | |||||
| GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE") | |||||
| GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE") | |||||
| # Makefile.L2 | |||||
| GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) | |||||
| GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) | |||||
| foreach (float_type ${FLOAT_TYPES}) | |||||
| string(SUBSTRING ${float_type} 0 1 float_char) | |||||
| if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "" "geru_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ" "gerc_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "XCONJ" "gerv_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ;XCONJ" "gerd_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANSA" "gemv_t" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "CONJ" "gemv_r" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "CONJ;TRANSA" "gemv_c" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ" "gemv_o" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_U_KERNEL}" "HEMV" "hemv_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_L_KERNEL}" "HEMV;LOWER" "hemv_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_V_KERNEL}" "HEMV;HEMVREV" "hemv_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_M_KERNEL}" "HEMV;HEMVREV;LOWER" "hemv_M" false "" "" false ${float_type}) | |||||
| else () | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type}) | |||||
| endif () | |||||
| endforeach () | |||||
| # Makefile.L3 | |||||
| set(USE_TRMM false) | |||||
| if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell") | |||||
| set(USE_TRMM true) | |||||
| endif () | |||||
| foreach (float_type ${FLOAT_TYPES}) | |||||
| string(SUBSTRING ${float_type} 0 1 float_char) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) | |||||
| if (${float_char}GEMMINCOPY) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMINCOPY}" "${float_type}" "${${float_char}GEMMINCOPYOBJ}" false "" "" true ${float_type}) | |||||
| endif () | |||||
| if (${float_char}GEMMITCOPY) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMITCOPY}" "${float_type}" "${${float_char}GEMMITCOPYOBJ}" false "" "" true ${float_type}) | |||||
| endif () | |||||
| if (${float_char}GEMMONCOPY) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMONCOPY}" "${float_type}" "${${float_char}GEMMONCOPYOBJ}" false "" "" true ${float_type}) | |||||
| endif () | |||||
| if (${float_char}GEMMOTCOPY) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMOTCOPY}" "${float_type}" "${${float_char}GEMMOTCOPYOBJ}" false "" "" true ${float_type}) | |||||
| endif () | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_BETA}" "" "gemm_beta" false "" "" false ${float_type}) | |||||
| if (USE_TRMM) | |||||
| set(TRMM_KERNEL "${${float_char}TRMMKERNEL}") | |||||
| else () | |||||
| set(TRMM_KERNEL "${${float_char}GEMMKERNEL}") | |||||
| endif () | |||||
| if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") | |||||
| # just enumerate all these. there is an extra define for these indicating which side is a conjugate (e.g. CN NC NN) that I don't really want to work into GenerateCombinationObjects | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "NN" "gemm_kernel_n" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "CN" "gemm_kernel_l" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "NC" "gemm_kernel_r" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "CC" "gemm_kernel_b" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;NN" "trmm_kernel_LN" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;TRANSA;NN" "trmm_kernel_LT" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;CONJ;CN" "trmm_kernel_LR" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;TRANSA;CONJ;CN" "trmm_kernel_LC" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;NN" "trmm_kernel_RN" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;TRANSA;NN" "trmm_kernel_RT" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;CONJ;NC" "trmm_kernel_RR" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;TRANSA;CONJ;NC" "trmm_kernel_RC" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL;CONJ" "trsm_kernel_LR" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "RT;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type}) | |||||
| #hemm | |||||
| GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "hemm_iutcopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "hemm_iltcopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "hemm_outcopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "hemm_oltcopy" false "" "" false ${float_type}) | |||||
| # symm for c and z | |||||
| GenerateNamedObjects("generic/zsymm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "symm_outcopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/zsymm_ucopy_${${float_char}GEMM_UNROLL_M}.c" "" "symm_iutcopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/zsymm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/zsymm_lcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "symm_iltcopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iunucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iunncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_ounucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_ounncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_ilnncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_olnncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iutucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iutncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_outucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_outncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_iltncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type}) | |||||
| else () #For real | |||||
| GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type}) | |||||
| # symm for s and d | |||||
| GenerateNamedObjects("generic/symm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "symm_outcopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/symm_ucopy_${${float_char}GEMM_UNROLL_M}.c" "" "symm_iutcopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/symm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/symm_lcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "symm_iltcopy" false "" "" false ${float_type}) | |||||
| # These don't use a scheme that is easy to iterate over - the filenames have part of the DEFINE codes in them, for UPPER/TRANS but not for UNIT/OUTER. Also TRANS is not passed in as a define. | |||||
| # Could simplify it a bit by pairing up by -UUNIT/-DUNIT. | |||||
| GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iunucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iunncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_ounucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_ounncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_ilnncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_olnncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iutucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iutncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_outucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_outncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_iltncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type}) | |||||
| endif () | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false ${float_type}) | |||||
| if (NOT DEFINED ${float_char}OMATCOPY_CN) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}OMATCOPY_CN ../arm/zomatcopy_cn.c) | |||||
| else () | |||||
| set(${float_char}OMATCOPY_CN ../arm/omatcopy_cn.c) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT DEFINED ${float_char}OMATCOPY_RN) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}OMATCOPY_RN ../arm/zomatcopy_rn.c) | |||||
| else () | |||||
| set(${float_char}OMATCOPY_RN ../arm/omatcopy_rn.c) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT DEFINED ${float_char}OMATCOPY_CT) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}OMATCOPY_CT ../arm/zomatcopy_ct.c) | |||||
| else () | |||||
| set(${float_char}OMATCOPY_CT ../arm/omatcopy_ct.c) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT DEFINED ${float_char}OMATCOPY_RT) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}OMATCOPY_RT ../arm/zomatcopy_rt.c) | |||||
| else () | |||||
| set(${float_char}OMATCOPY_RT ../arm/omatcopy_rt.c) | |||||
| endif () | |||||
| endif () | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "omatcopy_k_cn" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "omatcopy_k_rn" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "omatcopy_k_ct" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "omatcopy_k_rt" false "" "" false ${float_type}) | |||||
| if (NOT DEFINED ${float_char}OMATCOPY_CNC) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}OMATCOPY_CNC ../arm/zomatcopy_cnc.c) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT DEFINED ${float_char}OMATCOPY_RNC) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}OMATCOPY_RNC ../arm/zomatcopy_rnc.c) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT DEFINED ${float_char}OMATCOPY_CTC) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}OMATCOPY_CTC ../arm/zomatcopy_ctc.c) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT DEFINED ${float_char}OMATCOPY_RTC) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}OMATCOPY_RTC ../arm/zomatcopy_rtc.c) | |||||
| endif () | |||||
| endif () | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CNC}" "CONJ" "omatcopy_k_cnc" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RNC}" "CONJ;ROWM" "omatcopy_k_rnc" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CTC}" "CONJ" "omatcopy_k_ctc" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RTC}" "CONJ;ROWM" "omatcopy_k_rtc" false "" "" false ${float_type}) | |||||
| endif() | |||||
| #imatcopy | |||||
| if (NOT DEFINED ${float_char}IMATCOPY_CN) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}IMATCOPY_CN ../generic/zimatcopy_cn.c) | |||||
| else () | |||||
| set(${float_char}IMATCOPY_CN ../generic/imatcopy_cn.c) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT DEFINED ${float_char}IMATCOPY_RN) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}IMATCOPY_RN ../generic/zimatcopy_rn.c) | |||||
| else () | |||||
| set(${float_char}IMATCOPY_RN ../generic/imatcopy_rn.c) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT DEFINED ${float_char}IMATCOPY_CT) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}IMATCOPY_CT ../generic/zimatcopy_ct.c) | |||||
| else () | |||||
| set(${float_char}IMATCOPY_CT ../generic/imatcopy_ct.c) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT DEFINED ${float_char}IMATCOPY_RT) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}IMATCOPY_RT ../generic/zimatcopy_rt.c) | |||||
| else () | |||||
| set(${float_char}IMATCOPY_RT ../generic/imatcopy_rt.c) | |||||
| endif () | |||||
| endif () | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CN}" "" "imatcopy_k_cn" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RN}" "ROWM" "imatcopy_k_rn" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CT}" "" "imatcopy_k_ct" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RT}" "ROWM" "imatcopy_k_rt" false "" "" false ${float_type}) | |||||
| if (NOT DEFINED ${float_char}IMATCOPY_CNC) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}IMATCOPY_CNC ../generic/zimatcopy_cnc.c) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT DEFINED ${float_char}IMATCOPY_RNC) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}IMATCOPY_RNC ../generic/zimatcopy_rnc.c) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT DEFINED ${float_char}IMATCOPY_CTC) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}IMATCOPY_CTC ../generic/zimatcopy_ctc.c) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT DEFINED ${float_char}IMATCOPY_RTC) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}IMATCOPY_RTC ../generic/zimatcopy_rtc.c) | |||||
| endif () | |||||
| endif () | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CNC}" "CONJ" "imatcopy_k_cnc" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RNC}" "CONJ;ROWM" "imatcopy_k_rnc" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CTC}" "CONJ" "imatcopy_k_ctc" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RTC}" "CONJ;ROWM" "imatcopy_k_rtc" false "" "" false ${float_type}) | |||||
| endif() | |||||
| #geadd | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type}) | |||||
| endforeach () | |||||
| # Makefile.LA | |||||
| #DBLASOBJS += dneg_tcopy$(TSUFFIX).$(SUFFIX) dlaswp_ncopy$(TSUFFIX).$(SUFFIX) | |||||
| add_library(kernel OBJECT ${OPENBLAS_SRC}) | |||||
| @@ -3640,7 +3640,7 @@ ifndef DGEADD_K | |||||
| DGEADD_K = ../generic/geadd.c | DGEADD_K = ../generic/geadd.c | ||||
| endif | endif | ||||
| $(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K) | |||||
| $(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEADD_K) | |||||
| $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ | $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ | ||||
| ifndef CGEADD_K | ifndef CGEADD_K | ||||
| @@ -38,13 +38,16 @@ int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FL | |||||
| BLASLONG ix,iy; | BLASLONG ix,iy; | ||||
| FLOAT temp; | FLOAT temp; | ||||
| BLASLONG inc_x2; | |||||
| BLASLONG inc_y2; | |||||
| if ( n < 0 ) return(0); | if ( n < 0 ) return(0); | ||||
| ix = 0; | ix = 0; | ||||
| iy = 0; | iy = 0; | ||||
| BLASLONG inc_x2 = 2 * inc_x; | |||||
| BLASLONG inc_y2 = 2 * inc_y; | |||||
| inc_x2 = 2 * inc_x; | |||||
| inc_y2 = 2 * inc_y; | |||||
| if ( beta_r == 0.0 && beta_i == 0.0) | if ( beta_r == 0.0 && beta_i == 0.0) | ||||
| { | { | ||||
| @@ -41,6 +41,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, | |||||
| { | { | ||||
| BLASLONG i=0; | BLASLONG i=0; | ||||
| BLASLONG ix,iy; | BLASLONG ix,iy; | ||||
| BLASLONG inc_x2; | |||||
| BLASLONG inc_y2; | |||||
| if ( n < 0 ) return(0); | if ( n < 0 ) return(0); | ||||
| if ( da_r == 0.0 && da_i == 0.0 ) return(0); | if ( da_r == 0.0 && da_i == 0.0 ) return(0); | ||||
| @@ -48,8 +50,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, | |||||
| ix = 0; | ix = 0; | ||||
| iy = 0; | iy = 0; | ||||
| BLASLONG inc_x2 = 2 * inc_x; | |||||
| BLASLONG inc_y2 = 2 * inc_y; | |||||
| inc_x2 = 2 * inc_x; | |||||
| inc_y2 = 2 * inc_y; | |||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| @@ -40,11 +40,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||||
| { | { | ||||
| BLASLONG i=0; | BLASLONG i=0; | ||||
| BLASLONG ix=0,iy=0; | BLASLONG ix=0,iy=0; | ||||
| BLASLONG inc_x2; | |||||
| BLASLONG inc_y2; | |||||
| if ( n < 0 ) return(0); | if ( n < 0 ) return(0); | ||||
| BLASLONG inc_x2 = 2 * inc_x; | |||||
| BLASLONG inc_y2 = 2 * inc_y; | |||||
| inc_x2 = 2 * inc_x; | |||||
| inc_y2 = 2 * inc_y; | |||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| @@ -35,25 +35,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| **************************************************************************************/ | **************************************************************************************/ | ||||
| #include "common.h" | #include "common.h" | ||||
| #include <complex.h> | |||||
| #ifndef _MSC_VER | |||||
| #include <complex.h> | |||||
| FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | ||||
| #else | |||||
| OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||||
| #endif | |||||
| { | { | ||||
| BLASLONG i=0; | BLASLONG i=0; | ||||
| BLASLONG ix=0,iy=0; | BLASLONG ix=0,iy=0; | ||||
| FLOAT dot[2]; | FLOAT dot[2]; | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| BLASLONG inc_x2; | |||||
| BLASLONG inc_y2; | |||||
| dot[0]=0.0; | dot[0]=0.0; | ||||
| dot[1]=0.0; | dot[1]=0.0; | ||||
| __real__ result = 0.0 ; | |||||
| __imag__ result = 0.0 ; | |||||
| CREAL(result) = 0.0 ; | |||||
| CIMAG(result) = 0.0 ; | |||||
| if ( n < 1 ) return(result); | if ( n < 1 ) return(result); | ||||
| BLASLONG inc_x2 = 2 * inc_x ; | |||||
| BLASLONG inc_y2 = 2 * inc_y ; | |||||
| inc_x2 = 2 * inc_x ; | |||||
| inc_y2 = 2 * inc_y ; | |||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| @@ -69,8 +75,8 @@ FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG in | |||||
| i++ ; | i++ ; | ||||
| } | } | ||||
| __real__ result = dot[0]; | |||||
| __imag__ result = dot[1]; | |||||
| CREAL(result) = dot[0]; | |||||
| CIMAG(result) = dot[1]; | |||||
| return(result); | return(result); | ||||
| } | } | ||||
| @@ -41,11 +41,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT | |||||
| BLASLONG i=0; | BLASLONG i=0; | ||||
| BLASLONG ix=0,iy=0; | BLASLONG ix=0,iy=0; | ||||
| FLOAT temp[2]; | FLOAT temp[2]; | ||||
| BLASLONG inc_x2; | |||||
| BLASLONG inc_y2; | |||||
| if ( n <= 0 ) return(0); | if ( n <= 0 ) return(0); | ||||
| BLASLONG inc_x2 = 2 * inc_x ; | |||||
| BLASLONG inc_y2 = 2 * inc_y ; | |||||
| inc_x2 = 2 * inc_x ; | |||||
| inc_y2 = 2 * inc_y ; | |||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| @@ -42,11 +42,13 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm | |||||
| BLASLONG i=0; | BLASLONG i=0; | ||||
| BLASLONG ix=0,iy=0; | BLASLONG ix=0,iy=0; | ||||
| FLOAT temp[2]; | FLOAT temp[2]; | ||||
| BLASLONG inc_x2; | |||||
| BLASLONG inc_y2; | |||||
| if ( n < 0 ) return(0); | if ( n < 0 ) return(0); | ||||
| BLASLONG inc_x2 = 2 * inc_x; | |||||
| BLASLONG inc_y2 = 2 * inc_y; | |||||
| inc_x2 = 2 * inc_x; | |||||
| inc_y2 = 2 * inc_y; | |||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| @@ -550,6 +550,13 @@ gotoblas_t TABLE_NAME = { | |||||
| zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS, | zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS, | ||||
| zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS, | zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS, | ||||
| simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS, | |||||
| dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS, | |||||
| cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS, | |||||
| cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS, | |||||
| zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS, | |||||
| zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS, | |||||
| sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS | sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS | ||||
| }; | }; | ||||
| @@ -0,0 +1,41 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2015, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #if defined(_MSC_VER) && !defined(__clang__) | |||||
| #include<intrin.h> | |||||
| void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) | |||||
| { | |||||
| int cpuInfo[4] = {-1}; | |||||
| __cpuid(cpuInfo, op); | |||||
| *eax = cpuInfo[0]; | |||||
| *ebx = cpuInfo[1]; | |||||
| *ecx = cpuInfo[2]; | |||||
| *edx = cpuInfo[3]; | |||||
| } | |||||
| #endif | |||||
| @@ -119,11 +119,11 @@ XCOPYKERNEL = zcopy.S | |||||
| endif | endif | ||||
| ifndef SDOTKERNEL | ifndef SDOTKERNEL | ||||
| SDOTKERNEL = ../generic/dot.c | |||||
| SDOTKERNEL = ../generic/dot.c | |||||
| endif | endif | ||||
| ifndef DSDOTKERNEL | ifndef DSDOTKERNEL | ||||
| DSDOTKERNEL = ../generic/dot.c | |||||
| DSDOTKERNEL = ../generic/dot.c | |||||
| endif | endif | ||||
| ifndef DDOTKERNEL | ifndef DDOTKERNEL | ||||
| @@ -155,5 +155,11 @@ XSYMV_L_KERNEL = ../generic/zsymv_k.c | |||||
| ZHEMV_U_KERNEL = ../generic/zhemv_k.c | ZHEMV_U_KERNEL = ../generic/zhemv_k.c | ||||
| ZHEMV_L_KERNEL = ../generic/zhemv_k.c | ZHEMV_L_KERNEL = ../generic/zhemv_k.c | ||||
| LSAME_KERNEL = ../generic/lsame.c | |||||
| SCABS_KERNEL = ../generic/cabs.c | |||||
| DCABS_KERNEL = ../generic/cabs.c | |||||
| QCABS_KERNEL = ../generic/cabs.c | |||||
| #Dump kernel | |||||
| CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
| ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
| @@ -7,7 +7,7 @@ static void dtrmm_kernel_4x8( BLASLONG n, FLOAT *alpha ,FLOAT *a, FLOAT *b, FLOA | |||||
| static void dtrmm_kernel_4x8( BLASLONG n, FLOAT *alpha ,FLOAT *a, FLOAT *b, FLOAT *C0, FLOAT *C1, FLOAT *C2,FLOAT *C3, FLOAT *C4, FLOAT *C5,FLOAT *C6, FLOAT *C7) | static void dtrmm_kernel_4x8( BLASLONG n, FLOAT *alpha ,FLOAT *a, FLOAT *b, FLOAT *C0, FLOAT *C1, FLOAT *C2,FLOAT *C3, FLOAT *C4, FLOAT *C5,FLOAT *C6, FLOAT *C7) | ||||
| { | { | ||||
| BLASLONG I = 0; | |||||
| BLASLONG i = 0; | |||||
| BLASLONG temp1 = n * 8; | BLASLONG temp1 = n * 8; | ||||
| __asm__ __volatile__ | __asm__ __volatile__ | ||||
| @@ -110,7 +110,7 @@ static void dtrmm_kernel_4x8( BLASLONG n, FLOAT *alpha ,FLOAT *a, FLOAT *b, FLOA | |||||
| : | : | ||||
| : | : | ||||
| "a" (I), // 0 | |||||
| "a" (i), // 0 | |||||
| "r" (temp1), // 1 | "r" (temp1), // 1 | ||||
| "S" (a), // 2 | "S" (a), // 2 | ||||
| "D" (b), // 3 | "D" (b), // 3 | ||||
| @@ -1,5 +1,5 @@ | |||||
| /***************************************************************************** | /***************************************************************************** | ||||
| Copyright (c) 2011, Intel Corp. | |||||
| Copyright (c) 2014, Intel Corp. | |||||
| All rights reserved. | All rights reserved. | ||||
| Redistribution and use in source and binary forms, with or without | Redistribution and use in source and binary forms, with or without | ||||
| @@ -33,7 +33,7 @@ | |||||
| #include "lapacke_utils.h" | #include "lapacke_utils.h" | ||||
| lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans, | |||||
| lapack_int LAPACKE_cunmlq_work( int matrix_layout, char side, char trans, | |||||
| lapack_int m, lapack_int n, lapack_int k, | lapack_int m, lapack_int n, lapack_int k, | ||||
| const lapack_complex_float* a, lapack_int lda, | const lapack_complex_float* a, lapack_int lda, | ||||
| const lapack_complex_float* tau, | const lapack_complex_float* tau, | ||||
| @@ -41,20 +41,22 @@ lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans, | |||||
| lapack_complex_float* work, lapack_int lwork ) | lapack_complex_float* work, lapack_int lwork ) | ||||
| { | { | ||||
| lapack_int info = 0; | lapack_int info = 0; | ||||
| if( matrix_order == LAPACK_COL_MAJOR ) { | |||||
| lapack_int r; | |||||
| if( matrix_layout == LAPACK_COL_MAJOR ) { | |||||
| /* Call LAPACK function and adjust info */ | /* Call LAPACK function and adjust info */ | ||||
| LAPACK_cunmlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, | LAPACK_cunmlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, | ||||
| &lwork, &info ); | &lwork, &info ); | ||||
| if( info < 0 ) { | if( info < 0 ) { | ||||
| info = info - 1; | info = info - 1; | ||||
| } | } | ||||
| } else if( matrix_order == LAPACK_ROW_MAJOR ) { | |||||
| } else if( matrix_layout == LAPACK_ROW_MAJOR ) { | |||||
| r = LAPACKE_lsame( side, 'l' ) ? m : n; | |||||
| lapack_int lda_t = MAX(1,k); | lapack_int lda_t = MAX(1,k); | ||||
| lapack_int ldc_t = MAX(1,m); | lapack_int ldc_t = MAX(1,m); | ||||
| lapack_complex_float* a_t = NULL; | lapack_complex_float* a_t = NULL; | ||||
| lapack_complex_float* c_t = NULL; | lapack_complex_float* c_t = NULL; | ||||
| /* Check leading dimension(s) */ | /* Check leading dimension(s) */ | ||||
| if( lda < m ) { | |||||
| if( lda < r ) { | |||||
| info = -8; | info = -8; | ||||
| LAPACKE_xerbla( "LAPACKE_cunmlq_work", info ); | LAPACKE_xerbla( "LAPACKE_cunmlq_work", info ); | ||||
| return info; | return info; | ||||
| @@ -84,8 +86,8 @@ lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans, | |||||
| goto exit_level_1; | goto exit_level_1; | ||||
| } | } | ||||
| /* Transpose input matrices */ | /* Transpose input matrices */ | ||||
| LAPACKE_cge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); | |||||
| LAPACKE_cge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); | |||||
| LAPACKE_cge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); | |||||
| LAPACKE_cge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); | |||||
| /* Call LAPACK function and adjust info */ | /* Call LAPACK function and adjust info */ | ||||
| LAPACK_cunmlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, | LAPACK_cunmlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, | ||||
| work, &lwork, &info ); | work, &lwork, &info ); | ||||
| @@ -1,5 +1,5 @@ | |||||
| /***************************************************************************** | /***************************************************************************** | ||||
| Copyright (c) 2011, Intel Corp. | |||||
| Copyright (c) 2014, Intel Corp. | |||||
| All rights reserved. | All rights reserved. | ||||
| Redistribution and use in source and binary forms, with or without | Redistribution and use in source and binary forms, with or without | ||||
| @@ -33,27 +33,29 @@ | |||||
| #include "lapacke_utils.h" | #include "lapacke_utils.h" | ||||
| lapack_int LAPACKE_dormlq_work( int matrix_order, char side, char trans, | |||||
| lapack_int LAPACKE_dormlq_work( int matrix_layout, char side, char trans, | |||||
| lapack_int m, lapack_int n, lapack_int k, | lapack_int m, lapack_int n, lapack_int k, | ||||
| const double* a, lapack_int lda, | const double* a, lapack_int lda, | ||||
| const double* tau, double* c, lapack_int ldc, | const double* tau, double* c, lapack_int ldc, | ||||
| double* work, lapack_int lwork ) | double* work, lapack_int lwork ) | ||||
| { | { | ||||
| lapack_int info = 0; | lapack_int info = 0; | ||||
| lapack_int r; | |||||
| lapack_int lda_t, ldc_t; | lapack_int lda_t, ldc_t; | ||||
| double *a_t = NULL, *c_t = NULL; | double *a_t = NULL, *c_t = NULL; | ||||
| if( matrix_order == LAPACK_COL_MAJOR ) { | |||||
| if( matrix_layout == LAPACK_COL_MAJOR ) { | |||||
| /* Call LAPACK function and adjust info */ | /* Call LAPACK function and adjust info */ | ||||
| LAPACK_dormlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, | LAPACK_dormlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, | ||||
| &lwork, &info ); | &lwork, &info ); | ||||
| if( info < 0 ) { | if( info < 0 ) { | ||||
| info = info - 1; | info = info - 1; | ||||
| } | } | ||||
| } else if( matrix_order == LAPACK_ROW_MAJOR ) { | |||||
| } else if( matrix_layout == LAPACK_ROW_MAJOR ) { | |||||
| r = LAPACKE_lsame( side, 'l' ) ? m : n; | |||||
| lda_t = MAX(1,k); | lda_t = MAX(1,k); | ||||
| ldc_t = MAX(1,m); | ldc_t = MAX(1,m); | ||||
| /* Check leading dimension(s) */ | /* Check leading dimension(s) */ | ||||
| if( lda < m ) { | |||||
| if( lda < r ) { | |||||
| info = -8; | info = -8; | ||||
| LAPACKE_xerbla( "LAPACKE_dormlq_work", info ); | LAPACKE_xerbla( "LAPACKE_dormlq_work", info ); | ||||
| return info; | return info; | ||||
| @@ -81,8 +83,8 @@ lapack_int LAPACKE_dormlq_work( int matrix_order, char side, char trans, | |||||
| goto exit_level_1; | goto exit_level_1; | ||||
| } | } | ||||
| /* Transpose input matrices */ | /* Transpose input matrices */ | ||||
| LAPACKE_dge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); | |||||
| LAPACKE_dge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); | |||||
| LAPACKE_dge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); | |||||
| LAPACKE_dge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); | |||||
| /* Call LAPACK function and adjust info */ | /* Call LAPACK function and adjust info */ | ||||
| LAPACK_dormlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, | LAPACK_dormlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, | ||||
| work, &lwork, &info ); | work, &lwork, &info ); | ||||
| @@ -1,5 +1,5 @@ | |||||
| /***************************************************************************** | /***************************************************************************** | ||||
| Copyright (c) 2011, Intel Corp. | |||||
| Copyright (c) 2014, Intel Corp. | |||||
| All rights reserved. | All rights reserved. | ||||
| Redistribution and use in source and binary forms, with or without | Redistribution and use in source and binary forms, with or without | ||||
| @@ -33,27 +33,29 @@ | |||||
| #include "lapacke_utils.h" | #include "lapacke_utils.h" | ||||
| lapack_int LAPACKE_sormlq_work( int matrix_order, char side, char trans, | |||||
| lapack_int LAPACKE_sormlq_work( int matrix_layout, char side, char trans, | |||||
| lapack_int m, lapack_int n, lapack_int k, | lapack_int m, lapack_int n, lapack_int k, | ||||
| const float* a, lapack_int lda, | const float* a, lapack_int lda, | ||||
| const float* tau, float* c, lapack_int ldc, | const float* tau, float* c, lapack_int ldc, | ||||
| float* work, lapack_int lwork ) | float* work, lapack_int lwork ) | ||||
| { | { | ||||
| lapack_int info = 0; | lapack_int info = 0; | ||||
| lapack_int r; | |||||
| lapack_int lda_t, ldc_t; | lapack_int lda_t, ldc_t; | ||||
| float *a_t = NULL, *c_t = NULL; | float *a_t = NULL, *c_t = NULL; | ||||
| if( matrix_order == LAPACK_COL_MAJOR ) { | |||||
| if( matrix_layout == LAPACK_COL_MAJOR ) { | |||||
| /* Call LAPACK function and adjust info */ | /* Call LAPACK function and adjust info */ | ||||
| LAPACK_sormlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, | LAPACK_sormlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, | ||||
| &lwork, &info ); | &lwork, &info ); | ||||
| if( info < 0 ) { | if( info < 0 ) { | ||||
| info = info - 1; | info = info - 1; | ||||
| } | } | ||||
| } else if( matrix_order == LAPACK_ROW_MAJOR ) { | |||||
| } else if( matrix_layout == LAPACK_ROW_MAJOR ) { | |||||
| r = LAPACKE_lsame( side, 'l' ) ? m : n; | |||||
| lda_t = MAX(1,k); | lda_t = MAX(1,k); | ||||
| ldc_t = MAX(1,m); | ldc_t = MAX(1,m); | ||||
| /* Check leading dimension(s) */ | /* Check leading dimension(s) */ | ||||
| if( lda < m ) { | |||||
| if( lda < r ) { | |||||
| info = -8; | info = -8; | ||||
| LAPACKE_xerbla( "LAPACKE_sormlq_work", info ); | LAPACKE_xerbla( "LAPACKE_sormlq_work", info ); | ||||
| return info; | return info; | ||||
| @@ -81,8 +83,8 @@ lapack_int LAPACKE_sormlq_work( int matrix_order, char side, char trans, | |||||
| goto exit_level_1; | goto exit_level_1; | ||||
| } | } | ||||
| /* Transpose input matrices */ | /* Transpose input matrices */ | ||||
| LAPACKE_sge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); | |||||
| LAPACKE_sge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); | |||||
| LAPACKE_sge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); | |||||
| LAPACKE_sge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); | |||||
| /* Call LAPACK function and adjust info */ | /* Call LAPACK function and adjust info */ | ||||
| LAPACK_sormlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, | LAPACK_sormlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, | ||||
| work, &lwork, &info ); | work, &lwork, &info ); | ||||
| @@ -1,5 +1,5 @@ | |||||
| /***************************************************************************** | /***************************************************************************** | ||||
| Copyright (c) 2011, Intel Corp. | |||||
| Copyright (c) 2014, Intel Corp. | |||||
| All rights reserved. | All rights reserved. | ||||
| Redistribution and use in source and binary forms, with or without | Redistribution and use in source and binary forms, with or without | ||||
| @@ -33,7 +33,7 @@ | |||||
| #include "lapacke_utils.h" | #include "lapacke_utils.h" | ||||
| lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans, | |||||
| lapack_int LAPACKE_zunmlq_work( int matrix_layout, char side, char trans, | |||||
| lapack_int m, lapack_int n, lapack_int k, | lapack_int m, lapack_int n, lapack_int k, | ||||
| const lapack_complex_double* a, lapack_int lda, | const lapack_complex_double* a, lapack_int lda, | ||||
| const lapack_complex_double* tau, | const lapack_complex_double* tau, | ||||
| @@ -41,20 +41,22 @@ lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans, | |||||
| lapack_complex_double* work, lapack_int lwork ) | lapack_complex_double* work, lapack_int lwork ) | ||||
| { | { | ||||
| lapack_int info = 0; | lapack_int info = 0; | ||||
| if( matrix_order == LAPACK_COL_MAJOR ) { | |||||
| lapack_int r; | |||||
| if( matrix_layout == LAPACK_COL_MAJOR ) { | |||||
| /* Call LAPACK function and adjust info */ | /* Call LAPACK function and adjust info */ | ||||
| LAPACK_zunmlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, | LAPACK_zunmlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, | ||||
| &lwork, &info ); | &lwork, &info ); | ||||
| if( info < 0 ) { | if( info < 0 ) { | ||||
| info = info - 1; | info = info - 1; | ||||
| } | } | ||||
| } else if( matrix_order == LAPACK_ROW_MAJOR ) { | |||||
| } else if( matrix_layout == LAPACK_ROW_MAJOR ) { | |||||
| r = LAPACKE_lsame( side, 'l' ) ? m : n; | |||||
| lapack_int lda_t = MAX(1,k); | lapack_int lda_t = MAX(1,k); | ||||
| lapack_int ldc_t = MAX(1,m); | lapack_int ldc_t = MAX(1,m); | ||||
| lapack_complex_double* a_t = NULL; | lapack_complex_double* a_t = NULL; | ||||
| lapack_complex_double* c_t = NULL; | lapack_complex_double* c_t = NULL; | ||||
| /* Check leading dimension(s) */ | /* Check leading dimension(s) */ | ||||
| if( lda < m ) { | |||||
| if( lda < r ) { | |||||
| info = -8; | info = -8; | ||||
| LAPACKE_xerbla( "LAPACKE_zunmlq_work", info ); | LAPACKE_xerbla( "LAPACKE_zunmlq_work", info ); | ||||
| return info; | return info; | ||||
| @@ -84,8 +86,8 @@ lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans, | |||||
| goto exit_level_1; | goto exit_level_1; | ||||
| } | } | ||||
| /* Transpose input matrices */ | /* Transpose input matrices */ | ||||
| LAPACKE_zge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); | |||||
| LAPACKE_zge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); | |||||
| LAPACKE_zge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); | |||||
| LAPACKE_zge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); | |||||
| /* Call LAPACK function and adjust info */ | /* Call LAPACK function and adjust info */ | ||||
| LAPACK_zunmlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, | LAPACK_zunmlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, | ||||
| work, &lwork, &info ); | work, &lwork, &info ); | ||||
| @@ -0,0 +1,98 @@ | |||||
| include_directories(${CMAKE_SOURCE_DIR}) | |||||
| set(LAPACK_SOURCES | |||||
| getrf/getrf_single.c | |||||
| potrf/potrf_U_single.c | |||||
| potrf/potrf_L_single.c | |||||
| lauum/lauum_U_single.c | |||||
| lauum/lauum_L_single.c | |||||
| ) | |||||
| # add a 'z' to filename for complex version | |||||
| set(LAPACK_MANGLED_SOURCES | |||||
| getf2/getf2_k.c | |||||
| lauu2/lauu2_U.c | |||||
| lauu2/lauu2_L.c | |||||
| potf2/potf2_U.c | |||||
| potf2/potf2_L.c | |||||
| ) | |||||
| # sources that need TRANS set | |||||
| # this has a 'z' version | |||||
| set(TRANS_SOURCES | |||||
| getrs/getrs_single.c | |||||
| ) | |||||
| # sources that need UNIT set | |||||
| # these do NOT have a z version | |||||
| set(UNIT_SOURCES | |||||
| trtri/trtri_U_single.c | |||||
| trtri/trtri_L_single.c | |||||
| ) | |||||
| # these have a 'z' version | |||||
| set(UNIT_SOURCES2 | |||||
| trti2/trti2_U.c | |||||
| trti2/trti2_L.c | |||||
| ) | |||||
| GenerateNamedObjects("${LAPACK_SOURCES}") | |||||
| GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" false "" "" false 3) | |||||
| # TODO: laswp needs arch specific code | |||||
| GenerateNamedObjects("laswp/generic/laswp_k.c" "" "laswp_plus" false "" "" false 3) | |||||
| GenerateNamedObjects("laswp/generic/laswp_k.c" "MINUS" "laswp_minus" false "" "" false 3) | |||||
| if (SMP) | |||||
| if (USE_OPENMP) | |||||
| set(GETRF_SRC getrf/getrf_parallel_omp.c) | |||||
| else () | |||||
| set(GETRF_SRC getrf/getrf_parallel.c) | |||||
| endif () | |||||
| # these do not have 'z' versions | |||||
| set(PARALLEL_SOURCES | |||||
| ${GETRF_SRC} | |||||
| lauum/lauum_U_parallel.c | |||||
| lauum/lauum_L_parallel.c | |||||
| potrf/potrf_U_parallel.c | |||||
| potrf/potrf_L_parallel.c | |||||
| ) | |||||
| # this has a z version | |||||
| list(APPEND TRANS_SOURCES | |||||
| getrs/getrs_parallel.c | |||||
| ) | |||||
| # these do NOT have a z version | |||||
| list(APPEND UNIT_SOURCES | |||||
| trtri/trtri_U_parallel.c | |||||
| trtri/trtri_L_parallel.c | |||||
| ) | |||||
| GenerateNamedObjects("${PARALLEL_SOURCES}") | |||||
| endif () | |||||
| foreach (float_type ${FLOAT_TYPES}) | |||||
| if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") | |||||
| foreach (trans_src ${TRANS_SOURCES}) | |||||
| string(REGEX MATCH "[a-z]/([a-z]+_)([a-z]+)" op_name ${trans_src}) | |||||
| string(REPLACE "/" "/z" ztrans_src ${trans_src}) | |||||
| GenerateNamedObjects("${ztrans_src}" "TRANS=1" "${CMAKE_MATCH_1}N_${CMAKE_MATCH_2}" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${ztrans_src}" "TRANS=2" "${CMAKE_MATCH_1}T_${CMAKE_MATCH_2}" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${ztrans_src}" "TRANS=3" "${CMAKE_MATCH_1}R_${CMAKE_MATCH_2}" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${ztrans_src}" "TRANS=4" "${CMAKE_MATCH_1}C_${CMAKE_MATCH_2}" false "" "" false ${float_type}) | |||||
| endforeach () | |||||
| else () | |||||
| GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "" 4 "" false ${float_type}) | |||||
| endif () | |||||
| endforeach () | |||||
| GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "" 4) | |||||
| GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "" 0 "" "" 3) | |||||
| add_library(lapack OBJECT ${OPENBLAS_SRC}) | |||||
| @@ -67,7 +67,7 @@ double sqrt(double); | |||||
| #undef GETRF_FACTOR | #undef GETRF_FACTOR | ||||
| #define GETRF_FACTOR 1.00 | #define GETRF_FACTOR 1.00 | ||||
| static inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) { | |||||
| static __inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) { | |||||
| double m = (double)(M - IS - BK); | double m = (double)(M - IS - BK); | ||||
| double n = (double)(N - IS - BK); | double n = (double)(N - IS - BK); | ||||
| @@ -373,7 +373,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
| BLASLONG num_cpu; | BLASLONG num_cpu; | ||||
| #ifdef _MSC_VER | |||||
| BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE]; | |||||
| #else | |||||
| volatile BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE] __attribute__((aligned(128))); | volatile BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE] __attribute__((aligned(128))); | ||||
| #endif | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| #ifdef XDOUBLE | #ifdef XDOUBLE | ||||
| @@ -59,7 +59,8 @@ typedef int blasint; | |||||
| extension since version 3.0. If neither are available, use a compatible | extension since version 3.0. If neither are available, use a compatible | ||||
| structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ | structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ | ||||
| #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ | #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ | ||||
| (__GNUC__ >= 3 && !defined(__cplusplus))) | |||||
| (__GNUC__ >= 3 && !defined(__cplusplus)) || \ | |||||
| _MSC_VER >= 1800) // Visual Studio 2013 supports complex | |||||
| #define OPENBLAS_COMPLEX_C99 | #define OPENBLAS_COMPLEX_C99 | ||||
| #ifndef __cplusplus | #ifndef __cplusplus | ||||
| #include <complex.h> | #include <complex.h> | ||||
| @@ -43,7 +43,7 @@ | |||||
| #if !defined(XDOUBLE) || !defined(QUAD_PRECISION) | #if !defined(XDOUBLE) || !defined(QUAD_PRECISION) | ||||
| static inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| static __inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| BLASLONG is, js; | BLASLONG is, js; | ||||
| FLOAT *aa1, *aa2; | FLOAT *aa1, *aa2; | ||||
| @@ -141,7 +141,7 @@ static inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| } | } | ||||
| } | } | ||||
| static inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| static __inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| BLASLONG is, js; | BLASLONG is, js; | ||||
| FLOAT *aa1, *aa2; | FLOAT *aa1, *aa2; | ||||
| @@ -232,7 +232,7 @@ static inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| } | } | ||||
| static inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| static __inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| BLASLONG is, js; | BLASLONG is, js; | ||||
| FLOAT *aa1, *aa2; | FLOAT *aa1, *aa2; | ||||
| @@ -362,7 +362,7 @@ static inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| } | } | ||||
| } | } | ||||
| static inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| static __inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| BLASLONG is, js; | BLASLONG is, js; | ||||
| FLOAT *aa1, *aa2; | FLOAT *aa1, *aa2; | ||||
| @@ -486,7 +486,7 @@ static inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| } | } | ||||
| } | } | ||||
| static inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| static __inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| BLASLONG is, js; | BLASLONG is, js; | ||||
| FLOAT *aa1, *aa2; | FLOAT *aa1, *aa2; | ||||
| @@ -613,7 +613,7 @@ static inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| } | } | ||||
| } | } | ||||
| static inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| static __inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| BLASLONG is, js; | BLASLONG is, js; | ||||
| FLOAT *aa1, *aa2; | FLOAT *aa1, *aa2; | ||||
| @@ -735,7 +735,7 @@ static inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| } | } | ||||
| static inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| static __inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| BLASLONG is, js; | BLASLONG is, js; | ||||
| FLOAT *aa1, *aa2; | FLOAT *aa1, *aa2; | ||||
| @@ -862,7 +862,7 @@ static inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| } | } | ||||
| } | } | ||||
| static inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| static __inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| BLASLONG is, js; | BLASLONG is, js; | ||||
| FLOAT *aa1, *aa2; | FLOAT *aa1, *aa2; | ||||
| @@ -984,7 +984,7 @@ static inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| } | } | ||||
| static inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| static __inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| BLASLONG is, js; | BLASLONG is, js; | ||||
| FLOAT *aa1, *aa2; | FLOAT *aa1, *aa2; | ||||
| @@ -1082,7 +1082,7 @@ static inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| } | } | ||||
| } | } | ||||
| static inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| static __inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| BLASLONG is, js; | BLASLONG is, js; | ||||
| FLOAT *aa1, *aa2; | FLOAT *aa1, *aa2; | ||||
| @@ -1180,7 +1180,7 @@ static inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| } | } | ||||
| } | } | ||||
| static inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| static __inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| BLASLONG is, js; | BLASLONG is, js; | ||||
| FLOAT *aa1, *aa2; | FLOAT *aa1, *aa2; | ||||
| @@ -1270,7 +1270,7 @@ static inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| } | } | ||||
| } | } | ||||
| static inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| static __inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| BLASLONG is, js; | BLASLONG is, js; | ||||
| FLOAT *aa1, *aa2; | FLOAT *aa1, *aa2; | ||||
| @@ -1360,7 +1360,7 @@ static inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| } | } | ||||
| } | } | ||||
| static inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| static __inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| BLASLONG is, js; | BLASLONG is, js; | ||||
| FLOAT *aa1, *aa2; | FLOAT *aa1, *aa2; | ||||
| @@ -1490,7 +1490,7 @@ static inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| } | } | ||||
| } | } | ||||
| static inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| static __inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| BLASLONG is, js; | BLASLONG is, js; | ||||
| FLOAT *aa1, *aa2; | FLOAT *aa1, *aa2; | ||||
| @@ -1620,7 +1620,7 @@ static inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| } | } | ||||
| } | } | ||||
| static inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| static __inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| BLASLONG is, js; | BLASLONG is, js; | ||||
| FLOAT *aa1, *aa2; | FLOAT *aa1, *aa2; | ||||
| @@ -1744,7 +1744,7 @@ static inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| } | } | ||||
| } | } | ||||
| static inline void ZTRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| static __inline void ZTRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ | |||||
| BLASLONG is, js; | BLASLONG is, js; | ||||
| FLOAT *aa1, *aa2; | FLOAT *aa1, *aa2; | ||||
| @@ -0,0 +1,38 @@ | |||||
| include_directories(${CMAKE_SOURCE_DIR}) | |||||
| enable_language(Fortran) | |||||
| set(OpenBLAS_Tests | |||||
| sblat1 sblat2 sblat3 | |||||
| dblat1 dblat2 dblat3 | |||||
| cblat1 cblat2 cblat3 | |||||
| zblat1 zblat2 zblat3) | |||||
| foreach(test_bin ${OpenBLAS_Tests}) | |||||
| add_executable(${test_bin} ${test_bin}.f) | |||||
| target_link_libraries(${test_bin} ${OpenBLAS_LIBNAME}_static) | |||||
| endforeach() | |||||
| # $1 exec, $2 input, $3 output_result | |||||
| FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh | |||||
| "rm -f $3\n" | |||||
| "$1 < $2\n" | |||||
| "grep -q FATAL $3\n" | |||||
| "if [ $? -eq 0 ]; then\n" | |||||
| "echo Error\n" | |||||
| "exit 1\n" | |||||
| "else\n" | |||||
| "exit 0\n" | |||||
| "fi\n" | |||||
| ) | |||||
| set(float_types s d c z) | |||||
| foreach(float_type ${float_types}) | |||||
| string(TOUPPER ${float_type} float_type_upper) | |||||
| add_test(NAME "${float_type}blas1" | |||||
| COMMAND "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat1") | |||||
| add_test(NAME "${float_type}blas2" | |||||
| COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat2" "${PROJECT_SOURCE_DIR}/test/${float_type}blat2.dat" ${float_type_upper}BLAT2.SUMM) | |||||
| add_test(NAME "${float_type}blas3" | |||||
| COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat3" "${PROJECT_SOURCE_DIR}/test/${float_type}blat3.dat" ${float_type_upper}BLAT3.SUMM) | |||||
| endforeach() | |||||