From f874465bb81d10e7cdb88a10cff7d62df3fe370c Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Mon, 10 Aug 2015 14:10:44 -0500 Subject: [PATCH] Use cmake to build OpenBLAS GENERIC Target on MSVC x86 64-bit. Disable CBLAS and LAPACK. --- CMakeLists.txt | 20 ++++++++--- cmake/export.cmake | 60 +++++++++++++++++++++++++++++++ cmake/f_check.cmake | 3 ++ cmake/kernel.cmake | 15 +++++--- cmake/prebuild.cmake | 9 +++-- cmake/system.cmake | 15 ++++++++ cmake/utils.cmake | 4 +++ common.h | 45 ++++++++++++++++++----- common_x86_64.h | 35 +++++++++++++++--- driver/level2/CMakeLists.txt | 59 ++++++++++++++++++++++++++++++ driver/level2/gbmv_thread.c | 2 +- driver/level2/sbmv_thread.c | 2 +- driver/level2/spmv_thread.c | 2 +- driver/level2/tbmv_thread.c | 2 +- driver/level2/tpmv_thread.c | 2 +- driver/level2/trmv_thread.c | 2 +- driver/level2/zgbmv_k.c | 2 +- driver/level2/zhbmv_k.c | 10 +++--- driver/level2/zhpmv_k.c | 10 +++--- driver/level2/zsbmv_k.c | 6 ++-- driver/level2/zspmv_k.c | 3 +- driver/level2/ztbmv_L.c | 2 +- driver/level2/ztbmv_U.c | 2 +- driver/level2/ztbsv_L.c | 2 +- driver/level2/ztbsv_U.c | 2 +- driver/level2/ztpmv_L.c | 2 +- driver/level2/ztpmv_U.c | 2 +- driver/level2/ztpsv_L.c | 2 +- driver/level2/ztpsv_U.c | 2 +- driver/level2/ztrmv_L.c | 2 +- driver/level2/ztrmv_U.c | 2 +- driver/level2/ztrsv_L.c | 2 +- driver/level2/ztrsv_U.c | 2 +- driver/level3/CMakeLists.txt | 37 ++++++++++++++----- driver/others/CMakeLists.txt | 2 ++ interface/CMakeLists.txt | 39 ++++++++++++++++++-- interface/rotg.c | 3 +- interface/zaxpby.c | 4 +-- interface/zdot.c | 24 +++++++------ interface/zgemv.c | 17 +++++---- interface/zrotg.c | 20 +++++++---- kernel/CMakeLists.txt | 70 ++++++++++++++++++++++++++++++------ kernel/Makefile.L3 | 2 +- kernel/arm/zaxpby.c | 7 ++-- kernel/arm/zaxpy.c | 6 ++-- kernel/arm/zcopy.c | 6 ++-- kernel/arm/zdot.c | 18 +++++----- kernel/arm/zrot.c | 6 ++-- kernel/arm/zswap.c | 6 ++-- kernel/x86_64/KERNEL.generic | 6 ++++ openblas_config_template.h | 3 +- 51 files changed, 488 insertions(+), 120 deletions(-) create mode 100644 cmake/export.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 1d2e5d3c6..610cc9c90 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,11 +15,13 @@ enable_language(C) set(OpenBLAS_LIBNAME openblas) ####### -option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS and CBLAS)" ON) +option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON) +option(BUILD_WITHOUT_CBLAS "Without CBLAS" ON) option(BUILD_DEBUG "Build Debug Version" OFF) ####### if(BUILD_WITHOUT_LAPACK) set(NO_LAPACK 1) +set(NO_LAPACKE 1) endif() if(BUILD_DEBUG) @@ -27,6 +29,11 @@ set(CMAKE_BUILD_TYPE Debug) else() set(CMAKE_BUILD_TYPE Release) endif() + +if(BUILD_WITHOUT_CBLAS) +set(NO_CBLAS 1) +endif() + ####### @@ -51,7 +58,6 @@ endif () set(SUBDIRS ${BLASDIRS}) if (NOT NO_LAPACK) - message ("error 1") list(APPEND SUBDIRS lapack) endif () @@ -111,15 +117,21 @@ endforeach () # Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. # Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. if (NOT NOFORTRAN AND NOT NO_LAPACK) - message ("error 2") include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake") if (NOT NO_LAPACKE) include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake") endif () endif () +#Only generate .def for dll on MSVC +if(MSVC) +set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def") +endif() + # add objects to the openblas lib -add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS}) +add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${PROJECT_BINARY_DIR}/openblas.def) + +include("${CMAKE_SOURCE_DIR}/cmake/export.cmake") #only build shared library for MSVC if(NOT MSVC) diff --git a/cmake/export.cmake b/cmake/export.cmake new file mode 100644 index 000000000..adf59101f --- /dev/null +++ b/cmake/export.cmake @@ -0,0 +1,60 @@ + +#Only generate .def for dll on MSVC +if(MSVC) + +set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1) + +if (NOT DEFINED ARCH) + set(ARCH_IN "x86_64") +else() + set(ARCH_IN ${ARCH}) +endif() + +if (${CORE} STREQUAL "generic") + set(ARCH_IN "GENERIC") +endif () + +if (NOT DEFINED EXPRECISION) + set(EXPRECISION_IN 0) +else() + set(EXPRECISION_IN ${EXPRECISION}) +endif() + +if (NOT DEFINED NO_CBLAS) + set(NO_CBLAS_IN 0) +else() + set(NO_CBLAS_IN ${NO_CBLAS}) +endif() + +if (NOT DEFINED NO_LAPACK) + set(NO_LAPACK_IN 0) +else() + set(NO_LAPACK_IN ${NO_LAPACK}) +endif() + +if (NOT DEFINED NO_LAPACKE) + set(NO_LAPACKE_IN 0) +else() + set(NO_LAPACKE_IN ${NO_LAPACKE}) +endif() + +if (NOT DEFINED NEED2UNDERSCORES) + set(NEED2UNDERSCORES_IN 0) +else() + set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES}) +endif() + +if (NOT DEFINED ONLY_CBLAS) + set(ONLY_CBLAS_IN 0) +else() + set(ONLY_CBLAS_IN ${ONLY_CBLAS}) +endif() + +add_custom_command( + TARGET ${OpenBLAS_LIBNAME} PRE_LINK + COMMAND perl + ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def" + COMMENT "Create openblas.def file" + VERBATIM) + +endif() \ No newline at end of file diff --git a/cmake/f_check.cmake b/cmake/f_check.cmake index f7651db56..e189b683a 100644 --- a/cmake/f_check.cmake +++ b/cmake/f_check.cmake @@ -25,7 +25,10 @@ if (MSVC) include(CMakeForceCompiler) CMAKE_FORCE_Fortran_COMPILER(gfortran GNU) endif () + +if (NOT NO_LAPACK) enable_language(Fortran) +endif() if (NOT ONLY_CBLAS) # N.B. f_check is not cross-platform, so instead try to use CMake variables diff --git a/cmake/kernel.cmake b/cmake/kernel.cmake index 3a4d13837..c2ee62545 100644 --- a/cmake/kernel.cmake +++ b/cmake/kernel.cmake @@ -99,10 +99,10 @@ macro(SetDefaultL1) set(QGEMVTKERNEL gemv_t.S) set(XGEMVNKERNEL zgemv_n.S) set(XGEMVTKERNEL zgemv_t.S) - set(SCABS_KERNEL cabs.S) - set(DCABS_KERNEL cabs.S) - set(QCABS_KERNEL cabs.S) - set(LSAME_KERNEL lsame.S) + set(SCABS_KERNEL ../generic/cabs.c) + set(DCABS_KERNEL ../generic/cabs.S) + set(QCABS_KERNEL ../generic/cabs.S) + set(LSAME_KERNEL ../generic/lsame.c) set(SAXPBYKERNEL ../arm/axpby.c) set(DAXPBYKERNEL ../arm/axpby.c) set(CAXPBYKERNEL ../arm/zaxpby.c) @@ -156,3 +156,10 @@ macro(SetDefaultL2) set(XHEMV_V_KERNEL ../generic/zhemv_k.c) set(XHEMV_M_KERNEL ../generic/zhemv_k.c) endmacro () + +macro(SetDefaultL3) + set(SGEADD_KERNEL ../generic/geadd.c) + set(DGEADD_KERNEL ../generic/geadd.c) + set(CGEADD_KERNEL ../generic/zgeadd.c) + set(ZGEADD_KERNEL ../generic/zgeadd.c) +endmacro () \ No newline at end of file diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 901c237c4..c3fa48655 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -66,6 +66,11 @@ if (NOT MSVC) list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S) endif () +if (MSVC) +#Use generic for MSVC now +set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC) +endif() + set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") file(MAKE_DIRECTORY ${GETARCH_DIR}) @@ -73,7 +78,7 @@ try_compile(GETARCH_RESULT ${GETARCH_DIR} SOURCES ${GETARCH_SRC} COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE GETARCH_LOG - COPY_FILE ${GETARCH_BIN} + COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN} ) message(STATUS "Running getarch") @@ -95,7 +100,7 @@ try_compile(GETARCH2_RESULT ${GETARCH2_DIR} SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE GETARCH2_LOG - COPY_FILE ${GETARCH2_BIN} + COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} ) # use the cmake binary w/ the -E param to run a shell command in a cross-platform way diff --git a/cmake/system.cmake b/cmake/system.cmake index 36f9b7cbd..8ec738a10 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -420,6 +420,21 @@ if (ONLY_CBLAS) set(LIB_COMPONENTS CBLAS) endif () + +# For GEMM3M +set(USE_GEMM3M 0) + +if (DEFINED ARCH) + if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS") + set(USE_GEMM3M 1) + endif () + + if (${CORE} STREQUAL "generic") + set(USE_GEMM3M 0) + endif () +endif () + + #export OSNAME #export ARCH #export CORE diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 498c3840a..6e2a98069 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -102,6 +102,7 @@ endfunction () # 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE) # 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX) # 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c) +# 4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c) # STRING - compiles only the given type (e.g. DOUBLE) function(GenerateNamedObjects sources_in) @@ -151,6 +152,9 @@ function(GenerateNamedObjects sources_in) set(complex_only true) elseif (${ARGV7} EQUAL 3) set(mangle_complex_sources true) + elseif (${ARGV7} EQUAL 4) + set(mangle_complex_sources true) + set(complex_only true) elseif (NOT ${ARGV7} EQUAL 0) set(float_list ${ARGV7}) endif () diff --git a/common.h b/common.h index 1894a5c86..1fb2c7eaf 100644 --- a/common.h +++ b/common.h @@ -296,13 +296,6 @@ typedef int blasint; #define COMPSIZE 2 #endif -#if defined(C_PGI) || defined(C_SUN) -#define CREAL(X) (*((FLOAT *)&X + 0)) -#define CIMAG(X) (*((FLOAT *)&X + 1)) -#else -#define CREAL __real__ -#define CIMAG __imag__ -#endif #define Address_H(x) (((x)+(1<<15))>>16) #define Address_L(x) ((x)-((Address_H(x))<<16)) @@ -464,17 +457,49 @@ typedef char* env_var_t; extension since version 3.0. If neither are available, use a compatible structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ - (__GNUC__ >= 3 && !defined(__cplusplus))) + (__GNUC__ >= 3 && !defined(__cplusplus)) || \ + _MSC_VER >= 1800) // Visual Studio 2013 supports complex #define OPENBLAS_COMPLEX_C99 typedef float _Complex openblas_complex_float; typedef double _Complex openblas_complex_double; typedef xdouble _Complex openblas_complex_xdouble; + #define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I)) + #define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I)) + #define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I)) #else #define OPENBLAS_COMPLEX_STRUCT typedef struct { float real, imag; } openblas_complex_float; typedef struct { double real, imag; } openblas_complex_double; typedef struct { xdouble real, imag; } openblas_complex_xdouble; + #define openblas_make_complex_float(real, imag) {(real), (imag)} + #define openblas_make_complex_double(real, imag) {(real), (imag)} + #define openblas_make_complex_xdouble(real, imag) {(real), (imag)} #endif + +#ifdef XDOUBLE +#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble +#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i) +#elif defined(DOUBLE) +#define OPENBLAS_COMPLEX_FLOAT openblas_complex_double +#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i) +#else +#define OPENBLAS_COMPLEX_FLOAT openblas_complex_float +#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i) +#endif + +#if defined(C_PGI) || defined(C_SUN) +#define CREAL(X) (*((FLOAT *)&X + 0)) +#define CIMAG(X) (*((FLOAT *)&X + 1)) +#else +#ifdef OPENBLAS_COMPLEX_STRUCT +#define CREAL(Z) ((Z).real) +#define CIMAG(Z) ((Z).imag) +#else +#define CREAL __real__ +#define CIMAG __imag__ +#endif +#endif + #endif // ASSEMBLER #ifndef IFLUSH @@ -491,6 +516,10 @@ typedef char* env_var_t; #endif #endif +#if defined(C_MSVC) +#define inline __inline +#endif + #ifndef ASSEMBLER #ifndef MIN diff --git a/common_x86_64.h b/common_x86_64.h index efb902416..8bb87c7c0 100644 --- a/common_x86_64.h +++ b/common_x86_64.h @@ -41,6 +41,10 @@ #ifndef ASSEMBLER +#ifdef C_MSVC +#include +#endif + #ifdef C_SUN #define __asm__ __asm #define __volatile__ @@ -61,30 +65,39 @@ static void __inline blas_lock(volatile BLASULONG *address){ - int ret; + BLASULONG ret; do { while (*address) {YIELDING;}; +#ifndef C_MSVC __asm__ __volatile__( "xchgl %0, %1\n" : "=r"(ret), "=m"(*address) : "0"(1), "m"(*address) : "memory"); - +#else + ret=InterlockedExchange64((volatile LONG64 *)(address), 1); +#endif } while (ret); + } static __inline BLASULONG rpcc(void){ +#ifdef C_MSVC + return __rdtsc(); +#else BLASULONG a, d; __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); return ((BLASULONG)a + ((BLASULONG)d << 32)); +#endif } #define RPCC64BIT +#ifndef C_MSVC static __inline BLASULONG getstackaddr(void){ BLASULONG addr; @@ -93,22 +106,32 @@ static __inline BLASULONG getstackaddr(void){ return addr; } +#endif static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ +#ifdef C_MSVC + int cpuinfo[4]; + __cpuid(cpuinfo, op); + *eax=cpuinfo[0]; + *ebx=cpuinfo[1]; + *ecx=cpuinfo[2]; + *edx=cpuinfo[3]; +#else __asm__ __volatile__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op)); +#endif } /* #define WHEREAMI */ -static inline int WhereAmI(void){ +static __inline int WhereAmI(void){ int eax, ebx, ecx, edx; int apicid; @@ -150,10 +173,14 @@ static inline int WhereAmI(void){ #define GET_IMAGE_CANCEL #ifdef SMP -#ifdef USE64BITINT +#if defined(USE64BITINT) static __inline blasint blas_quickdivide(blasint x, blasint y){ return x / y; } +#elif defined (C_MSVC) +static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){ + return x / y; +} #else extern unsigned int blas_quick_divide_table[]; diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index e4440be6d..5db4fb5ee 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -46,12 +46,28 @@ set(NU_SMP_SOURCES tbmv_thread.c ) +set(ULVM_COMPLEX_SOURCES + hbmv_k.c + hpmv_k.c + hpr_k.c + hpr2_k.c + her_k.c + her2_k.c +) + # objects that need LOWER set GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3) # gbmv uses a lowercase n and t GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3) GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3) +# c/zgbmv +GenerateNamedObjects("zgbmv_k.c" "CONJ" "gbmv_r" false "" "" "" 2) +GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ" "gbmv_c" false "" "" "" 2) +GenerateNamedObjects("zgbmv_k.c" "XCONJ" "gbmv_o" false "" "" "" 2) +GenerateNamedObjects("zgbmv_k.c" "TRANS;XCONJ" "gbmv_u" false "" "" "" 2) +GenerateNamedObjects("zgbmv_k.c" "CONJ;XCONJ" "gbmv_s" false "" "" "" 2) +GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ;XCONJ" "gbmv_d" false "" "" "" 2) # special defines for complex foreach (float_type ${FLOAT_TYPES}) @@ -82,6 +98,14 @@ foreach (float_type ${FLOAT_TYPES}) GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type}) endforeach () + foreach (ulvm_source ${ULVM_COMPLEX_SOURCES}) + string(REGEX MATCH "[a-z0-9]+" op_name ${ulvm_source}) + GenerateNamedObjects("z${ulvm_source}" "" "${op_name}_U" false "" "" false ${float_type}) + GenerateNamedObjects("z${ulvm_source}" "LOWER" "${op_name}_L" false "" "" false ${float_type}) + GenerateNamedObjects("z${ulvm_source}" "HEMVREV" "${op_name}_V" false "" "" false ${float_type}) + GenerateNamedObjects("z${ulvm_source}" "LOWER;HEMVREV" "${op_name}_M" false "" "" false ${float_type}) + endforeach() + if (SMP) GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type}) @@ -103,6 +127,41 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type}) GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type}) + GenerateNamedObjects("sbmv_thread.c" "HEMV" "hbmv_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("sbmv_thread.c" "HEMV;LOWER" "hbmv_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("sbmv_thread.c" "HEMVREV" "hbmv_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("sbmv_thread.c" "LOWER;HEMVREV" "hbmv_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("spmv_thread.c" "HEMV" "hpmv_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("spmv_thread.c" "HEMV;LOWER" "hpmv_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("spmv_thread.c" "HEMVREV" "hpmv_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("spmv_thread.c" "LOWER;HEMVREV" "hpmv_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("spr_thread.c" "HEMV" "hpr_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("spr_thread.c" "HEMV;LOWER" "hpr_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("spr_thread.c" "HEMVREV" "hpr_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("spr_thread.c" "LOWER;HEMVREV" "hpr_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("spr2_thread.c" "HEMV" "hpr2_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("spr2_thread.c" "HEMV;LOWER" "hpr2_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("spr2_thread.c" "HEMVREV" "hpr2_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("spr2_thread.c" "LOWER;HEMVREV" "hpr2_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("symv_thread.c" "HEMV" "hemv_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("symv_thread.c" "HEMV;LOWER" "hemv_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("symv_thread.c" "HEMVREV" "hemv_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("symv_thread.c" "LOWER;HEMVREV" "hemv_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("syr_thread.c" "HER" "her_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("syr_thread.c" "HER;LOWER" "her_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("syr_thread.c" "HEMVREV" "her_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("syr_thread.c" "LOWER;HEMVREV" "her_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("syr2_thread.c" "HER2" "her2_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("syr2_thread.c" "HER2;LOWER" "her2_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("syr2_thread.c" "HEMVREV" "her2_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("syr2_thread.c" "LOWER;HEMVREV" "her2_thread_M" false "" "" false ${float_type}) + foreach (nu_smp_src ${NU_SMP_SOURCES}) string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src}) GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type}) diff --git a/driver/level2/gbmv_thread.c b/driver/level2/gbmv_thread.c index 9efe17092..ef9d58d76 100644 --- a/driver/level2/gbmv_thread.c +++ b/driver/level2/gbmv_thread.c @@ -64,7 +64,7 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #endif diff --git a/driver/level2/sbmv_thread.c b/driver/level2/sbmv_thread.c index 5b7fc7332..a0377d638 100644 --- a/driver/level2/sbmv_thread.c +++ b/driver/level2/sbmv_thread.c @@ -60,7 +60,7 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif a = (FLOAT *)args -> a; diff --git a/driver/level2/spmv_thread.c b/driver/level2/spmv_thread.c index 93a2f44d4..0f47344df 100644 --- a/driver/level2/spmv_thread.c +++ b/driver/level2/spmv_thread.c @@ -60,7 +60,7 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif a = (FLOAT *)args -> a; diff --git a/driver/level2/tbmv_thread.c b/driver/level2/tbmv_thread.c index 3c1249448..bbb1c50eb 100644 --- a/driver/level2/tbmv_thread.c +++ b/driver/level2/tbmv_thread.c @@ -76,7 +76,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #endif diff --git a/driver/level2/tpmv_thread.c b/driver/level2/tpmv_thread.c index 3b91cee45..47dc1daf9 100644 --- a/driver/level2/tpmv_thread.c +++ b/driver/level2/tpmv_thread.c @@ -81,7 +81,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #endif diff --git a/driver/level2/trmv_thread.c b/driver/level2/trmv_thread.c index 29e9799f6..a9dc2dc62 100644 --- a/driver/level2/trmv_thread.c +++ b/driver/level2/trmv_thread.c @@ -87,7 +87,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #endif diff --git a/driver/level2/zgbmv_k.c b/driver/level2/zgbmv_k.c index 68d6045bd..d89932e33 100644 --- a/driver/level2/zgbmv_k.c +++ b/driver/level2/zgbmv_k.c @@ -77,7 +77,7 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA FLOAT *bufferY = gemvbuffer; FLOAT *bufferX = gemvbuffer; #ifdef TRANS - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif if (incy != 1) { diff --git a/driver/level2/zhbmv_k.c b/driver/level2/zhbmv_k.c index 70e92e050..33f70d2c5 100644 --- a/driver/level2/zhbmv_k.c +++ b/driver/level2/zhbmv_k.c @@ -56,6 +56,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT *bufferX = sbmvbuffer; FLOAT temp[2]; + OPENBLAS_COMPLEX_FLOAT result; + if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); @@ -93,7 +95,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; if (length > 0) { - FLOAT _Complex result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); + result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -118,7 +120,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; if (length > 0) { - FLOAT _Complex result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); + result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -143,7 +145,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; if (length > 0) { - FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); + result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -168,7 +170,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; if (length > 0) { - FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); + result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); diff --git a/driver/level2/zhpmv_k.c b/driver/level2/zhpmv_k.c index 96bceaaf2..9e7ed7b0e 100644 --- a/driver/level2/zhpmv_k.c +++ b/driver/level2/zhpmv_k.c @@ -51,6 +51,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, FLOAT *bufferX = gemvbuffer; FLOAT temp[2]; + OPENBLAS_COMPLEX_FLOAT result; + if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095); @@ -69,7 +71,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, #ifndef HEMVREV #ifndef LOWER if (i > 0) { - FLOAT _Complex result = DOTC_K(i, a, 1, X, 1); + result = DOTC_K(i, a, 1, X, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -93,7 +95,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, #else if (m - i > 1) { - FLOAT _Complex result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); + result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -118,7 +120,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, #else #ifndef LOWER if (i > 0) { - FLOAT _Complex result = DOTU_K(i, a, 1, X, 1); + result = DOTU_K(i, a, 1, X, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -142,7 +144,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, #else if (m - i > 1) { - FLOAT _Complex result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); + result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); diff --git a/driver/level2/zsbmv_k.c b/driver/level2/zsbmv_k.c index 30e2f91c3..3ae74ce80 100644 --- a/driver/level2/zsbmv_k.c +++ b/driver/level2/zsbmv_k.c @@ -55,6 +55,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT *bufferY = sbmvbuffer; FLOAT *bufferX = sbmvbuffer; + OPENBLAS_COMPLEX_FLOAT result; + if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); @@ -83,7 +85,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0); if (length > 0) { - FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); + result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -100,7 +102,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, a, 1, Y + i * COMPSIZE, 1, NULL, 0); if (length > 0) { - FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); + result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); diff --git a/driver/level2/zspmv_k.c b/driver/level2/zspmv_k.c index 76657eab9..432205e83 100644 --- a/driver/level2/zspmv_k.c +++ b/driver/level2/zspmv_k.c @@ -49,7 +49,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, FLOAT *gemvbuffer = (FLOAT *)buffer; FLOAT *bufferY = gemvbuffer; FLOAT *bufferX = gemvbuffer; - FLOAT _Complex result; + + OPENBLAS_COMPLEX_FLOAT result; if (incy != 1) { Y = bufferY; diff --git a/driver/level2/ztbmv_L.c b/driver/level2/ztbmv_L.c index 74ff0bce1..1ac1cdef1 100644 --- a/driver/level2/ztbmv_L.c +++ b/driver/level2/ztbmv_L.c @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc FLOAT *B = b; BLASLONG length; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztbmv_U.c b/driver/level2/ztbmv_U.c index 933275de3..9aa203396 100644 --- a/driver/level2/ztbmv_U.c +++ b/driver/level2/ztbmv_U.c @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc FLOAT *B = b; BLASLONG length; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztbsv_L.c b/driver/level2/ztbsv_L.c index 0726bbd16..9aa701841 100644 --- a/driver/level2/ztbsv_L.c +++ b/driver/level2/ztbsv_L.c @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc FLOAT *B = b; BLASLONG length; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level2/ztbsv_U.c b/driver/level2/ztbsv_U.c index d022650bc..3722b1f71 100644 --- a/driver/level2/ztbsv_U.c +++ b/driver/level2/ztbsv_U.c @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc FLOAT *B = b; BLASLONG length; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level2/ztpmv_L.c b/driver/level2/ztpmv_L.c index 12c254c12..47e6df56c 100644 --- a/driver/level2/ztpmv_L.c +++ b/driver/level2/ztpmv_L.c @@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztpmv_U.c b/driver/level2/ztpmv_U.c index 59708b8b8..da911fb4e 100644 --- a/driver/level2/ztpmv_U.c +++ b/driver/level2/ztpmv_U.c @@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztpsv_L.c b/driver/level2/ztpsv_L.c index 3b8e562ce..a497e42a4 100644 --- a/driver/level2/ztpsv_L.c +++ b/driver/level2/ztpsv_L.c @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level2/ztpsv_U.c b/driver/level2/ztpsv_U.c index 601ac2f9d..28b824e3a 100644 --- a/driver/level2/ztpsv_U.c +++ b/driver/level2/ztpsv_U.c @@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level2/ztrmv_L.c b/driver/level2/ztrmv_L.c index 63522cf81..92c86aec2 100644 --- a/driver/level2/ztrmv_L.c +++ b/driver/level2/ztrmv_L.c @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu BLASLONG i, is, min_i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztrmv_U.c b/driver/level2/ztrmv_U.c index 8a4494fd7..f9671c9d6 100644 --- a/driver/level2/ztrmv_U.c +++ b/driver/level2/ztrmv_U.c @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu BLASLONG i, is, min_i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztrsv_L.c b/driver/level2/ztrsv_L.c index 90f1c2c7d..dd3b2786e 100644 --- a/driver/level2/ztrsv_L.c +++ b/driver/level2/ztrsv_L.c @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf BLASLONG i, is, min_i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level2/ztrsv_U.c b/driver/level2/ztrsv_U.c index bec8114f3..8803182a8 100644 --- a/driver/level2/ztrsv_U.c +++ b/driver/level2/ztrsv_U.c @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf BLASLONG i, is, min_i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index 376a0beeb..6d623b0c2 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -1,13 +1,5 @@ include_directories(${CMAKE_SOURCE_DIR}) -set(USE_GEMM3M 0) - -if (DEFINED ARCH) - if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS") - set(USE_GEMM3M 1) - endif () -endif () - # N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa # loop through gemm.c defines @@ -54,12 +46,41 @@ foreach (float_type ${FLOAT_TYPES}) GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_LC" false ${float_type}) GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type}) GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_RC" false ${float_type}) + + #hemm + GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN" 0 "hemm_L" false ${float_type}) + GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE" 0 "hemm_R" false ${float_type}) + + #her2k + GenerateCombinationObjects("zher2k_kernel.c" "LOWER;CONJ" "U;N" "" 2 "her2k_kernel" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type}) + + if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) + #hemm + GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type}) + GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type}) + #her2k + GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type}) + endif() + # special gemm defines for complex foreach (gemm_define ${GEMM_COMPLEX_DEFINES}) string(TOLOWER ${gemm_define} gemm_define_LC) GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type}) + if(USE_GEMM3M) + GenerateNamedObjects("gemm3m.c" "${gemm_define}" "gemm3m_${gemm_define_LC}" false "" "" false ${float_type}) + endif() if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type}) + if(USE_GEMM3M) + GenerateNamedObjects("gemm3m.c" "${gemm_define};THREADED_LEVEL3" "gemm3m_thread_${gemm_define_LC}" false "" "" false ${float_type}) + endif() endif () endforeach () endif () diff --git a/driver/others/CMakeLists.txt b/driver/others/CMakeLists.txt index 938f1daaf..b2af55e36 100644 --- a/driver/others/CMakeLists.txt +++ b/driver/others/CMakeLists.txt @@ -33,6 +33,8 @@ set(COMMON_SOURCES xerbla.c openblas_set_num_threads.c openblas_error_handle.c + openblas_get_num_procs.c + openblas_get_num_threads.c ) # these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index ae949235b..91565d2f2 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -1,13 +1,16 @@ include_directories(${CMAKE_SOURCE_DIR}) + set(BLAS1_SOURCES copy.c - asum.c nrm2.c + nrm2.c ) set(BLAS1_REAL_ONLY_SOURCES rotm.c rotmg.c # N.B. these do not have complex counterparts + rot.c + asum.c ) # these will have 'z' prepended for the complex version @@ -15,7 +18,7 @@ set(BLAS1_MANGLED_SOURCES axpy.c swap.c scal.c dot.c - rot.c rotg.c + rotg.c axpby.c ) @@ -31,6 +34,13 @@ set(BLAS2_SOURCES tpsv.c tpmv.c ) +set(BLAS2_COMPLEX_ONLY_MANGLED_SOURCES + hemv.c hbmv.c + her.c her2.c + hpmv.c hpr.c + hpr2.c +) + # these do not have separate 'z' sources set(BLAS3_SOURCES gemm.c symm.c @@ -39,6 +49,7 @@ set(BLAS3_SOURCES set(BLAS3_MANGLED_SOURCES omatcopy.c imatcopy.c + geadd.c ) # generate the BLAS objs once with and once without cblas @@ -65,9 +76,14 @@ foreach (CBLAS_FLAG ${CBLAS_FLAGS}) GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1) GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) + GenerateNamedObjects("${BLAS2_COMPLEX_ONLY_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 4) GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) + #sdsdot, dsdot + GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE") + GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE") + # trmm is trsm with a compiler flag set GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) @@ -86,17 +102,36 @@ endforeach () # complex-specific sources foreach (float_type ${FLOAT_TYPES}) + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") GenerateNamedObjects("zger.c" "" "geru" false "" "" false ${float_type}) GenerateNamedObjects("zger.c" "CONJ" "gerc" false "" "" false ${float_type}) + GenerateNamedObjects("zdot.c" "CONJ" "dotc" false "" "" false ${float_type}) + GenerateNamedObjects("zdot.c" "" "dotu" false "" "" false ${float_type}) + + GenerateNamedObjects("symm.c" "HEMM" "hemm" false "" "" false ${float_type}) + GenerateNamedObjects("syrk.c" "HEMM" "herk" false "" "" false ${float_type}) + GenerateNamedObjects("syr2k.c" "HEMM" "her2k" false "" "" false ${float_type}) + + if (USE_GEMM3M) + GenerateNamedObjects("gemm.c" "GEMM3M" "gemm3m" false "" "" false ${float_type}) + endif() endif () if (${float_type} STREQUAL "COMPLEX") GenerateNamedObjects("zscal.c" "SSCAL" "sscal" false "" "" false "COMPLEX") GenerateNamedObjects("nrm2.c" "" "scnrm2" false "" "" true "COMPLEX") + GenerateNamedObjects("zrot.c" "" "csrot" false "" "" true "COMPLEX") + GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "scamin" false "" "" true "COMPLEX") + GenerateNamedObjects("max.c" "USE_ABS" "scamax" false "" "" true "COMPLEX") + GenerateNamedObjects("asum.c" "" "scasum" false "" "" true "COMPLEX") endif () if (${float_type} STREQUAL "ZCOMPLEX") GenerateNamedObjects("zscal.c" "SSCAL" "dscal" false "" "" false "ZCOMPLEX") GenerateNamedObjects("nrm2.c" "" "dznrm2" false "" "" true "ZCOMPLEX") + GenerateNamedObjects("zrot.c" "" "zdrot" false "" "" true "ZCOMPLEX") + GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "dzamin" false "" "" true "ZCOMPLEX") + GenerateNamedObjects("max.c" "USE_ABS" "dzamax" false "" "" true "ZCOMPLEX") + GenerateNamedObjects("asum.c" "" "dzasum" false "" "" true "ZCOMPLEX") endif () endforeach () diff --git a/interface/rotg.c b/interface/rotg.c index 49088ab02..a0e6efdab 100644 --- a/interface/rotg.c +++ b/interface/rotg.c @@ -14,8 +14,7 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ #endif - -#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) long double da = *DA; long double db = *DB; diff --git a/interface/zaxpby.c b/interface/zaxpby.c index 9e8324432..1abb24de9 100644 --- a/interface/zaxpby.c +++ b/interface/zaxpby.c @@ -53,13 +53,13 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT * #endif - if (n <= 0) return; - FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_i = *(ALPHA + 1); FLOAT beta_r = *(BETA + 0); FLOAT beta_i = *(BETA + 1); + if (n <= 0) return; + FUNCTION_PROFILE_START(); if (incx < 0) x -= (n - 1) * incx * 2; diff --git a/interface/zdot.c b/interface/zdot.c index 1380ce292..34dfb731a 100644 --- a/interface/zdot.c +++ b/interface/zdot.c @@ -57,21 +57,25 @@ #ifdef RETURN_BY_STRUCT MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { #elif defined RETURN_BY_STACK -void NAME(FLOAT _Complex *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { +void NAME(OPENBLAS_COMPLEX_FLOAT *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { #else -FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { +OPENBLAS_COMPLEX_FLOAT NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { #endif BLASLONG n = *N; BLASLONG incx = *INCX; BLASLONG incy = *INCY; #ifndef RETURN_BY_STACK - FLOAT _Complex ret; + OPENBLAS_COMPLEX_FLOAT ret; #endif #ifdef RETURN_BY_STRUCT MYTYPE myret; #endif +#ifndef RETURN_BY_STRUCT + OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); +#endif + PRINT_DEBUG_NAME; if (n <= 0) { @@ -80,10 +84,10 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, myret.i = 0.; return myret; #elif defined RETURN_BY_STACK - *result = ZERO; + *result = zero; return; #else - return ZERO; + return zero; #endif } @@ -144,21 +148,21 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, #else #ifdef FORCE_USE_STACK -void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT _Complex *result){ +void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, OPENBLAS_COMPLEX_FLOAT *result){ #else -FLOAT _Complex CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ +OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ - FLOAT _Complex ret; + OPENBLAS_COMPLEX_FLOAT ret; #endif PRINT_DEBUG_CNAME; if (n <= 0) { #ifdef FORCE_USE_STACK - *result = ZERO; + *result = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); return; #else - return ZERO; + return OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); #endif } diff --git a/interface/zgemv.c b/interface/zgemv.c index 704034aaf..792f799e5 100644 --- a/interface/zgemv.c +++ b/interface/zgemv.c @@ -79,6 +79,9 @@ void NAME(char *TRANS, blasint *M, blasint *N, FLOAT *buffer; #ifdef SMP int nthreads; + int nthreads_max; + int nthreads_avail; + double MNK; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, @@ -91,14 +94,14 @@ void NAME(char *TRANS, blasint *M, blasint *N, blasint lenx, leny; blasint i; - PRINT_DEBUG_NAME; - FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_i = *(ALPHA + 1); FLOAT beta_r = *(BETA + 0); FLOAT beta_i = *(BETA + 1); + PRINT_DEBUG_NAME; + TOUPPER(trans); info = 0; @@ -153,14 +156,14 @@ void CNAME(enum CBLAS_ORDER order, GEMV_O, GEMV_U, GEMV_S, GEMV_D, }; - PRINT_DEBUG_CNAME; - FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_i = *(ALPHA + 1); FLOAT beta_r = *(BETA + 0); FLOAT beta_i = *(BETA + 1); + PRINT_DEBUG_CNAME; + trans = -1; info = 0; @@ -234,10 +237,10 @@ void CNAME(enum CBLAS_ORDER order, #ifdef SMP - int nthreads_max = num_cpu_avail(2); - int nthreads_avail = nthreads_max; + nthreads_max = num_cpu_avail(2); + nthreads_avail = nthreads_max; - double MNK = (double) m * (double) n; + MNK = (double) m * (double) n; if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) )) nthreads_max = 1; diff --git a/interface/zrotg.c b/interface/zrotg.c index e9e8a11df..187343d41 100644 --- a/interface/zrotg.c +++ b/interface/zrotg.c @@ -6,13 +6,7 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ - PRINT_DEBUG_NAME; - - IDEBUG_START; - - FUNCTION_PROFILE_START(); - -#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) long double da_r = *(DA + 0); long double da_i = *(DA + 1); @@ -22,6 +16,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ long double ada = fabs(da_r) + fabs(da_i); + PRINT_DEBUG_NAME; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + if (ada == ZERO) { *C = ZERO; *(S + 0) = ONE; @@ -54,6 +54,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ FLOAT ada = fabs(da_r) + fabs(da_i); FLOAT adb; + PRINT_DEBUG_NAME; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + if (ada == ZERO) { *C = ZERO; *(S + 0) = ONE; diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index cd71101a5..d2cc77b11 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -17,6 +17,7 @@ endif () SetDefaultL1() SetDefaultL2() +SetDefaultL3() ParseMakefileVars("${KERNELDIR}/KERNEL") ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") @@ -65,8 +66,20 @@ foreach (float_type ${FLOAT_TYPES}) else () GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type}) endif () + + if (${float_type} STREQUAL "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "srot_k" false "" "" false ${float_type}) + endif() + if (${float_type} STREQUAL "ZCOMPLEX") + GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "drot_k" false "" "" false ${float_type}) + endif() + endforeach () +#dsdot,sdsdot +GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE") +GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE") + # Makefile.L2 GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) @@ -86,6 +99,12 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type}) + + GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_U_KERNEL}" "HEMV" "hemv_U" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_L_KERNEL}" "HEMV;LOWER" "hemv_L" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_V_KERNEL}" "HEMV;HEMVREV" "hemv_V" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_M_KERNEL}" "HEMV;HEMVREV;LOWER" "hemv_M" false "" "" false ${float_type}) + else () GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type}) @@ -93,14 +112,9 @@ foreach (float_type ${FLOAT_TYPES}) endforeach () # Makefile.L3 -set(USE_GEMM3M false) set(USE_TRMM false) -if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS") - set(USE_GEMM3M true) -endif () - -if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC") +if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic") set(USE_TRMM true) endif () @@ -155,6 +169,13 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type}) + + #hemm + GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "hemm_iutcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "hemm_iltcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "hemm_outcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "hemm_oltcopy" false "" "" false ${float_type}) + else () GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type}) endif () @@ -241,11 +262,40 @@ foreach (float_type ${FLOAT_TYPES}) endif () endif () - GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "domatcopy_k_cn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "domatcopy_k_rn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "domatcopy_k_ct" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "domatcopy_k_rt" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "omatcopy_k_cn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "omatcopy_k_rn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "omatcopy_k_ct" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "omatcopy_k_rt" false "" "" false ${float_type}) + + if (NOT DEFINED ${float_char}OMATCOPY_CNC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_CNC ../arm/zomatcopy_cnc.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_RNC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_RNC ../arm/zomatcopy_rnc.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_CTC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_CTC ../arm/zomatcopy_ctc.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_RTC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_RTC ../arm/zomatcopy_rtc.c) + endif () + endif () + + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CNC}" "CONJ" "omatcopy_k_cnc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RNC}" "CONJ;ROWM" "omatcopy_k_rnc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CTC}" "CONJ" "omatcopy_k_ctc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RTC}" "CONJ;ROWM" "omatcopy_k_rtc" false "" "" false ${float_type}) + endif() + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type}) endforeach () # Makefile.LA diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 4ef351de3..60b8fb57f 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -3459,7 +3459,7 @@ ifndef DGEADD_K DGEADD_K = ../generic/geadd.c endif -$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K) +$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEADD_K) $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ ifndef CGEADD_K diff --git a/kernel/arm/zaxpby.c b/kernel/arm/zaxpby.c index 2e0c2940d..d9948349d 100644 --- a/kernel/arm/zaxpby.c +++ b/kernel/arm/zaxpby.c @@ -38,13 +38,16 @@ int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FL BLASLONG ix,iy; FLOAT temp; + BLASLONG inc_x2; + BLASLONG inc_y2; + if ( n < 0 ) return(0); ix = 0; iy = 0; - BLASLONG inc_x2 = 2 * inc_x; - BLASLONG inc_y2 = 2 * inc_y; + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; if ( beta_r == 0.0 && beta_i == 0.0) { diff --git a/kernel/arm/zaxpy.c b/kernel/arm/zaxpy.c index 929ee8b54..1dcaeac27 100644 --- a/kernel/arm/zaxpy.c +++ b/kernel/arm/zaxpy.c @@ -41,6 +41,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, { BLASLONG i=0; BLASLONG ix,iy; + BLASLONG inc_x2; + BLASLONG inc_y2; if ( n < 0 ) return(0); if ( da_r == 0.0 && da_i == 0.0 ) return(0); @@ -48,8 +50,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, ix = 0; iy = 0; - BLASLONG inc_x2 = 2 * inc_x; - BLASLONG inc_y2 = 2 * inc_y; + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; while(i < n) { diff --git a/kernel/arm/zcopy.c b/kernel/arm/zcopy.c index f720d6ee5..07fe584c5 100644 --- a/kernel/arm/zcopy.c +++ b/kernel/arm/zcopy.c @@ -40,11 +40,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) { BLASLONG i=0; BLASLONG ix=0,iy=0; + BLASLONG inc_x2; + BLASLONG inc_y2; if ( n < 0 ) return(0); - BLASLONG inc_x2 = 2 * inc_x; - BLASLONG inc_y2 = 2 * inc_y; + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; while(i < n) { diff --git a/kernel/arm/zdot.c b/kernel/arm/zdot.c index 198104022..57f47e58e 100644 --- a/kernel/arm/zdot.c +++ b/kernel/arm/zdot.c @@ -40,24 +40,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) #else -openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) #endif { BLASLONG i=0; BLASLONG ix=0,iy=0; FLOAT dot[2]; - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; + BLASLONG inc_x2; + BLASLONG inc_y2; dot[0]=0.0; dot[1]=0.0; - __real__ result = 0.0 ; - __imag__ result = 0.0 ; + CREAL(result) = 0.0 ; + CIMAG(result) = 0.0 ; if ( n < 1 ) return(result); - BLASLONG inc_x2 = 2 * inc_x ; - BLASLONG inc_y2 = 2 * inc_y ; + inc_x2 = 2 * inc_x ; + inc_y2 = 2 * inc_y ; while(i < n) { @@ -73,8 +75,8 @@ openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BL i++ ; } - __real__ result = dot[0]; - __imag__ result = dot[1]; + CREAL(result) = dot[0]; + CIMAG(result) = dot[1]; return(result); } diff --git a/kernel/arm/zrot.c b/kernel/arm/zrot.c index 356a4df72..98be68db8 100644 --- a/kernel/arm/zrot.c +++ b/kernel/arm/zrot.c @@ -41,11 +41,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT BLASLONG i=0; BLASLONG ix=0,iy=0; FLOAT temp[2]; + BLASLONG inc_x2; + BLASLONG inc_y2; if ( n <= 0 ) return(0); - BLASLONG inc_x2 = 2 * inc_x ; - BLASLONG inc_y2 = 2 * inc_y ; + inc_x2 = 2 * inc_x ; + inc_y2 = 2 * inc_y ; while(i < n) { diff --git a/kernel/arm/zswap.c b/kernel/arm/zswap.c index fcfb38506..ae4760ae0 100644 --- a/kernel/arm/zswap.c +++ b/kernel/arm/zswap.c @@ -42,11 +42,13 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm BLASLONG i=0; BLASLONG ix=0,iy=0; FLOAT temp[2]; + BLASLONG inc_x2; + BLASLONG inc_y2; if ( n < 0 ) return(0); - BLASLONG inc_x2 = 2 * inc_x; - BLASLONG inc_y2 = 2 * inc_y; + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; while(i < n) { diff --git a/kernel/x86_64/KERNEL.generic b/kernel/x86_64/KERNEL.generic index 672edb069..a23e59f3f 100644 --- a/kernel/x86_64/KERNEL.generic +++ b/kernel/x86_64/KERNEL.generic @@ -155,5 +155,11 @@ XSYMV_L_KERNEL = ../generic/zsymv_k.c ZHEMV_U_KERNEL = ../generic/zhemv_k.c ZHEMV_L_KERNEL = ../generic/zhemv_k.c +LSAME_KERNEL = ../generic/lsame.c +SCABS_KERNEL = ../generic/cabs.c +DCABS_KERNEL = ../generic/cabs.c +QCABS_KERNEL = ../generic/cabs.c + +#Dump kernel CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c diff --git a/openblas_config_template.h b/openblas_config_template.h index 3b3435b0e..942a8f547 100644 --- a/openblas_config_template.h +++ b/openblas_config_template.h @@ -59,7 +59,8 @@ typedef int blasint; extension since version 3.0. If neither are available, use a compatible structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ - (__GNUC__ >= 3 && !defined(__cplusplus))) + (__GNUC__ >= 3 && !defined(__cplusplus)) || \ + _MSC_VER >= 1800) // Visual Studio 2013 supports complex #define OPENBLAS_COMPLEX_C99 #ifndef __cplusplus #include