Disable CBLAS and LAPACK.tags/v0.2.15^2
| @@ -15,11 +15,13 @@ enable_language(C) | |||||
| set(OpenBLAS_LIBNAME openblas) | set(OpenBLAS_LIBNAME openblas) | ||||
| ####### | ####### | ||||
| option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS and CBLAS)" ON) | |||||
| option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON) | |||||
| option(BUILD_WITHOUT_CBLAS "Without CBLAS" ON) | |||||
| option(BUILD_DEBUG "Build Debug Version" OFF) | option(BUILD_DEBUG "Build Debug Version" OFF) | ||||
| ####### | ####### | ||||
| if(BUILD_WITHOUT_LAPACK) | if(BUILD_WITHOUT_LAPACK) | ||||
| set(NO_LAPACK 1) | set(NO_LAPACK 1) | ||||
| set(NO_LAPACKE 1) | |||||
| endif() | endif() | ||||
| if(BUILD_DEBUG) | if(BUILD_DEBUG) | ||||
| @@ -27,6 +29,11 @@ set(CMAKE_BUILD_TYPE Debug) | |||||
| else() | else() | ||||
| set(CMAKE_BUILD_TYPE Release) | set(CMAKE_BUILD_TYPE Release) | ||||
| endif() | endif() | ||||
| if(BUILD_WITHOUT_CBLAS) | |||||
| set(NO_CBLAS 1) | |||||
| endif() | |||||
| ####### | ####### | ||||
| @@ -51,7 +58,6 @@ endif () | |||||
| set(SUBDIRS ${BLASDIRS}) | set(SUBDIRS ${BLASDIRS}) | ||||
| if (NOT NO_LAPACK) | if (NOT NO_LAPACK) | ||||
| message ("error 1") | |||||
| list(APPEND SUBDIRS lapack) | list(APPEND SUBDIRS lapack) | ||||
| endif () | endif () | ||||
| @@ -111,15 +117,21 @@ endforeach () | |||||
| # Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. | # Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. | ||||
| # Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. | # Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. | ||||
| if (NOT NOFORTRAN AND NOT NO_LAPACK) | if (NOT NOFORTRAN AND NOT NO_LAPACK) | ||||
| message ("error 2") | |||||
| include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake") | include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake") | ||||
| if (NOT NO_LAPACKE) | if (NOT NO_LAPACKE) | ||||
| include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake") | include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake") | ||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| #Only generate .def for dll on MSVC | |||||
| if(MSVC) | |||||
| set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def") | |||||
| endif() | |||||
| # add objects to the openblas lib | # add objects to the openblas lib | ||||
| add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS}) | |||||
| add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${PROJECT_BINARY_DIR}/openblas.def) | |||||
| include("${CMAKE_SOURCE_DIR}/cmake/export.cmake") | |||||
| #only build shared library for MSVC | #only build shared library for MSVC | ||||
| if(NOT MSVC) | if(NOT MSVC) | ||||
| @@ -0,0 +1,60 @@ | |||||
| #Only generate .def for dll on MSVC | |||||
| if(MSVC) | |||||
| set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1) | |||||
| if (NOT DEFINED ARCH) | |||||
| set(ARCH_IN "x86_64") | |||||
| else() | |||||
| set(ARCH_IN ${ARCH}) | |||||
| endif() | |||||
| if (${CORE} STREQUAL "generic") | |||||
| set(ARCH_IN "GENERIC") | |||||
| endif () | |||||
| if (NOT DEFINED EXPRECISION) | |||||
| set(EXPRECISION_IN 0) | |||||
| else() | |||||
| set(EXPRECISION_IN ${EXPRECISION}) | |||||
| endif() | |||||
| if (NOT DEFINED NO_CBLAS) | |||||
| set(NO_CBLAS_IN 0) | |||||
| else() | |||||
| set(NO_CBLAS_IN ${NO_CBLAS}) | |||||
| endif() | |||||
| if (NOT DEFINED NO_LAPACK) | |||||
| set(NO_LAPACK_IN 0) | |||||
| else() | |||||
| set(NO_LAPACK_IN ${NO_LAPACK}) | |||||
| endif() | |||||
| if (NOT DEFINED NO_LAPACKE) | |||||
| set(NO_LAPACKE_IN 0) | |||||
| else() | |||||
| set(NO_LAPACKE_IN ${NO_LAPACKE}) | |||||
| endif() | |||||
| if (NOT DEFINED NEED2UNDERSCORES) | |||||
| set(NEED2UNDERSCORES_IN 0) | |||||
| else() | |||||
| set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES}) | |||||
| endif() | |||||
| if (NOT DEFINED ONLY_CBLAS) | |||||
| set(ONLY_CBLAS_IN 0) | |||||
| else() | |||||
| set(ONLY_CBLAS_IN ${ONLY_CBLAS}) | |||||
| endif() | |||||
| add_custom_command( | |||||
| TARGET ${OpenBLAS_LIBNAME} PRE_LINK | |||||
| COMMAND perl | |||||
| ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def" | |||||
| COMMENT "Create openblas.def file" | |||||
| VERBATIM) | |||||
| endif() | |||||
| @@ -25,7 +25,10 @@ if (MSVC) | |||||
| include(CMakeForceCompiler) | include(CMakeForceCompiler) | ||||
| CMAKE_FORCE_Fortran_COMPILER(gfortran GNU) | CMAKE_FORCE_Fortran_COMPILER(gfortran GNU) | ||||
| endif () | endif () | ||||
| if (NOT NO_LAPACK) | |||||
| enable_language(Fortran) | enable_language(Fortran) | ||||
| endif() | |||||
| if (NOT ONLY_CBLAS) | if (NOT ONLY_CBLAS) | ||||
| # N.B. f_check is not cross-platform, so instead try to use CMake variables | # N.B. f_check is not cross-platform, so instead try to use CMake variables | ||||
| @@ -99,10 +99,10 @@ macro(SetDefaultL1) | |||||
| set(QGEMVTKERNEL gemv_t.S) | set(QGEMVTKERNEL gemv_t.S) | ||||
| set(XGEMVNKERNEL zgemv_n.S) | set(XGEMVNKERNEL zgemv_n.S) | ||||
| set(XGEMVTKERNEL zgemv_t.S) | set(XGEMVTKERNEL zgemv_t.S) | ||||
| set(SCABS_KERNEL cabs.S) | |||||
| set(DCABS_KERNEL cabs.S) | |||||
| set(QCABS_KERNEL cabs.S) | |||||
| set(LSAME_KERNEL lsame.S) | |||||
| set(SCABS_KERNEL ../generic/cabs.c) | |||||
| set(DCABS_KERNEL ../generic/cabs.S) | |||||
| set(QCABS_KERNEL ../generic/cabs.S) | |||||
| set(LSAME_KERNEL ../generic/lsame.c) | |||||
| set(SAXPBYKERNEL ../arm/axpby.c) | set(SAXPBYKERNEL ../arm/axpby.c) | ||||
| set(DAXPBYKERNEL ../arm/axpby.c) | set(DAXPBYKERNEL ../arm/axpby.c) | ||||
| set(CAXPBYKERNEL ../arm/zaxpby.c) | set(CAXPBYKERNEL ../arm/zaxpby.c) | ||||
| @@ -156,3 +156,10 @@ macro(SetDefaultL2) | |||||
| set(XHEMV_V_KERNEL ../generic/zhemv_k.c) | set(XHEMV_V_KERNEL ../generic/zhemv_k.c) | ||||
| set(XHEMV_M_KERNEL ../generic/zhemv_k.c) | set(XHEMV_M_KERNEL ../generic/zhemv_k.c) | ||||
| endmacro () | endmacro () | ||||
| macro(SetDefaultL3) | |||||
| set(SGEADD_KERNEL ../generic/geadd.c) | |||||
| set(DGEADD_KERNEL ../generic/geadd.c) | |||||
| set(CGEADD_KERNEL ../generic/zgeadd.c) | |||||
| set(ZGEADD_KERNEL ../generic/zgeadd.c) | |||||
| endmacro () | |||||
| @@ -66,6 +66,11 @@ if (NOT MSVC) | |||||
| list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S) | list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S) | ||||
| endif () | endif () | ||||
| if (MSVC) | |||||
| #Use generic for MSVC now | |||||
| set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC) | |||||
| endif() | |||||
| set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") | set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") | ||||
| set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") | set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") | ||||
| file(MAKE_DIRECTORY ${GETARCH_DIR}) | file(MAKE_DIRECTORY ${GETARCH_DIR}) | ||||
| @@ -73,7 +78,7 @@ try_compile(GETARCH_RESULT ${GETARCH_DIR} | |||||
| SOURCES ${GETARCH_SRC} | SOURCES ${GETARCH_SRC} | ||||
| COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR} | COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR} | ||||
| OUTPUT_VARIABLE GETARCH_LOG | OUTPUT_VARIABLE GETARCH_LOG | ||||
| COPY_FILE ${GETARCH_BIN} | |||||
| COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN} | |||||
| ) | ) | ||||
| message(STATUS "Running getarch") | message(STATUS "Running getarch") | ||||
| @@ -95,7 +100,7 @@ try_compile(GETARCH2_RESULT ${GETARCH2_DIR} | |||||
| SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c | SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c | ||||
| COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR} | COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR} | ||||
| OUTPUT_VARIABLE GETARCH2_LOG | OUTPUT_VARIABLE GETARCH2_LOG | ||||
| COPY_FILE ${GETARCH2_BIN} | |||||
| COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} | |||||
| ) | ) | ||||
| # use the cmake binary w/ the -E param to run a shell command in a cross-platform way | # use the cmake binary w/ the -E param to run a shell command in a cross-platform way | ||||
| @@ -420,6 +420,21 @@ if (ONLY_CBLAS) | |||||
| set(LIB_COMPONENTS CBLAS) | set(LIB_COMPONENTS CBLAS) | ||||
| endif () | endif () | ||||
| # For GEMM3M | |||||
| set(USE_GEMM3M 0) | |||||
| if (DEFINED ARCH) | |||||
| if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS") | |||||
| set(USE_GEMM3M 1) | |||||
| endif () | |||||
| if (${CORE} STREQUAL "generic") | |||||
| set(USE_GEMM3M 0) | |||||
| endif () | |||||
| endif () | |||||
| #export OSNAME | #export OSNAME | ||||
| #export ARCH | #export ARCH | ||||
| #export CORE | #export CORE | ||||
| @@ -102,6 +102,7 @@ endfunction () | |||||
| # 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE) | # 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE) | ||||
| # 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX) | # 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX) | ||||
| # 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c) | # 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c) | ||||
| # 4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c) | |||||
| # STRING - compiles only the given type (e.g. DOUBLE) | # STRING - compiles only the given type (e.g. DOUBLE) | ||||
| function(GenerateNamedObjects sources_in) | function(GenerateNamedObjects sources_in) | ||||
| @@ -151,6 +152,9 @@ function(GenerateNamedObjects sources_in) | |||||
| set(complex_only true) | set(complex_only true) | ||||
| elseif (${ARGV7} EQUAL 3) | elseif (${ARGV7} EQUAL 3) | ||||
| set(mangle_complex_sources true) | set(mangle_complex_sources true) | ||||
| elseif (${ARGV7} EQUAL 4) | |||||
| set(mangle_complex_sources true) | |||||
| set(complex_only true) | |||||
| elseif (NOT ${ARGV7} EQUAL 0) | elseif (NOT ${ARGV7} EQUAL 0) | ||||
| set(float_list ${ARGV7}) | set(float_list ${ARGV7}) | ||||
| endif () | endif () | ||||
| @@ -296,13 +296,6 @@ typedef int blasint; | |||||
| #define COMPSIZE 2 | #define COMPSIZE 2 | ||||
| #endif | #endif | ||||
| #if defined(C_PGI) || defined(C_SUN) | |||||
| #define CREAL(X) (*((FLOAT *)&X + 0)) | |||||
| #define CIMAG(X) (*((FLOAT *)&X + 1)) | |||||
| #else | |||||
| #define CREAL __real__ | |||||
| #define CIMAG __imag__ | |||||
| #endif | |||||
| #define Address_H(x) (((x)+(1<<15))>>16) | #define Address_H(x) (((x)+(1<<15))>>16) | ||||
| #define Address_L(x) ((x)-((Address_H(x))<<16)) | #define Address_L(x) ((x)-((Address_H(x))<<16)) | ||||
| @@ -464,17 +457,49 @@ typedef char* env_var_t; | |||||
| extension since version 3.0. If neither are available, use a compatible | extension since version 3.0. If neither are available, use a compatible | ||||
| structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ | structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ | ||||
| #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ | #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ | ||||
| (__GNUC__ >= 3 && !defined(__cplusplus))) | |||||
| (__GNUC__ >= 3 && !defined(__cplusplus)) || \ | |||||
| _MSC_VER >= 1800) // Visual Studio 2013 supports complex | |||||
| #define OPENBLAS_COMPLEX_C99 | #define OPENBLAS_COMPLEX_C99 | ||||
| typedef float _Complex openblas_complex_float; | typedef float _Complex openblas_complex_float; | ||||
| typedef double _Complex openblas_complex_double; | typedef double _Complex openblas_complex_double; | ||||
| typedef xdouble _Complex openblas_complex_xdouble; | typedef xdouble _Complex openblas_complex_xdouble; | ||||
| #define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I)) | |||||
| #define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I)) | |||||
| #define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I)) | |||||
| #else | #else | ||||
| #define OPENBLAS_COMPLEX_STRUCT | #define OPENBLAS_COMPLEX_STRUCT | ||||
| typedef struct { float real, imag; } openblas_complex_float; | typedef struct { float real, imag; } openblas_complex_float; | ||||
| typedef struct { double real, imag; } openblas_complex_double; | typedef struct { double real, imag; } openblas_complex_double; | ||||
| typedef struct { xdouble real, imag; } openblas_complex_xdouble; | typedef struct { xdouble real, imag; } openblas_complex_xdouble; | ||||
| #define openblas_make_complex_float(real, imag) {(real), (imag)} | |||||
| #define openblas_make_complex_double(real, imag) {(real), (imag)} | |||||
| #define openblas_make_complex_xdouble(real, imag) {(real), (imag)} | |||||
| #endif | #endif | ||||
| #ifdef XDOUBLE | |||||
| #define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble | |||||
| #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i) | |||||
| #elif defined(DOUBLE) | |||||
| #define OPENBLAS_COMPLEX_FLOAT openblas_complex_double | |||||
| #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i) | |||||
| #else | |||||
| #define OPENBLAS_COMPLEX_FLOAT openblas_complex_float | |||||
| #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i) | |||||
| #endif | |||||
| #if defined(C_PGI) || defined(C_SUN) | |||||
| #define CREAL(X) (*((FLOAT *)&X + 0)) | |||||
| #define CIMAG(X) (*((FLOAT *)&X + 1)) | |||||
| #else | |||||
| #ifdef OPENBLAS_COMPLEX_STRUCT | |||||
| #define CREAL(Z) ((Z).real) | |||||
| #define CIMAG(Z) ((Z).imag) | |||||
| #else | |||||
| #define CREAL __real__ | |||||
| #define CIMAG __imag__ | |||||
| #endif | |||||
| #endif | |||||
| #endif // ASSEMBLER | #endif // ASSEMBLER | ||||
| #ifndef IFLUSH | #ifndef IFLUSH | ||||
| @@ -491,6 +516,10 @@ typedef char* env_var_t; | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(C_MSVC) | |||||
| #define inline __inline | |||||
| #endif | |||||
| #ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
| #ifndef MIN | #ifndef MIN | ||||
| @@ -41,6 +41,10 @@ | |||||
| #ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
| #ifdef C_MSVC | |||||
| #include <intrin.h> | |||||
| #endif | |||||
| #ifdef C_SUN | #ifdef C_SUN | ||||
| #define __asm__ __asm | #define __asm__ __asm | ||||
| #define __volatile__ | #define __volatile__ | ||||
| @@ -61,30 +65,39 @@ | |||||
| static void __inline blas_lock(volatile BLASULONG *address){ | static void __inline blas_lock(volatile BLASULONG *address){ | ||||
| int ret; | |||||
| BLASULONG ret; | |||||
| do { | do { | ||||
| while (*address) {YIELDING;}; | while (*address) {YIELDING;}; | ||||
| #ifndef C_MSVC | |||||
| __asm__ __volatile__( | __asm__ __volatile__( | ||||
| "xchgl %0, %1\n" | "xchgl %0, %1\n" | ||||
| : "=r"(ret), "=m"(*address) | : "=r"(ret), "=m"(*address) | ||||
| : "0"(1), "m"(*address) | : "0"(1), "m"(*address) | ||||
| : "memory"); | : "memory"); | ||||
| #else | |||||
| ret=InterlockedExchange64((volatile LONG64 *)(address), 1); | |||||
| #endif | |||||
| } while (ret); | } while (ret); | ||||
| } | } | ||||
| static __inline BLASULONG rpcc(void){ | static __inline BLASULONG rpcc(void){ | ||||
| #ifdef C_MSVC | |||||
| return __rdtsc(); | |||||
| #else | |||||
| BLASULONG a, d; | BLASULONG a, d; | ||||
| __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); | __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); | ||||
| return ((BLASULONG)a + ((BLASULONG)d << 32)); | return ((BLASULONG)a + ((BLASULONG)d << 32)); | ||||
| #endif | |||||
| } | } | ||||
| #define RPCC64BIT | #define RPCC64BIT | ||||
| #ifndef C_MSVC | |||||
| static __inline BLASULONG getstackaddr(void){ | static __inline BLASULONG getstackaddr(void){ | ||||
| BLASULONG addr; | BLASULONG addr; | ||||
| @@ -93,22 +106,32 @@ static __inline BLASULONG getstackaddr(void){ | |||||
| return addr; | return addr; | ||||
| } | } | ||||
| #endif | |||||
| static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ | static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ | ||||
| #ifdef C_MSVC | |||||
| int cpuinfo[4]; | |||||
| __cpuid(cpuinfo, op); | |||||
| *eax=cpuinfo[0]; | |||||
| *ebx=cpuinfo[1]; | |||||
| *ecx=cpuinfo[2]; | |||||
| *edx=cpuinfo[3]; | |||||
| #else | |||||
| __asm__ __volatile__("cpuid" | __asm__ __volatile__("cpuid" | ||||
| : "=a" (*eax), | : "=a" (*eax), | ||||
| "=b" (*ebx), | "=b" (*ebx), | ||||
| "=c" (*ecx), | "=c" (*ecx), | ||||
| "=d" (*edx) | "=d" (*edx) | ||||
| : "0" (op)); | : "0" (op)); | ||||
| #endif | |||||
| } | } | ||||
| /* | /* | ||||
| #define WHEREAMI | #define WHEREAMI | ||||
| */ | */ | ||||
| static inline int WhereAmI(void){ | |||||
| static __inline int WhereAmI(void){ | |||||
| int eax, ebx, ecx, edx; | int eax, ebx, ecx, edx; | ||||
| int apicid; | int apicid; | ||||
| @@ -150,10 +173,14 @@ static inline int WhereAmI(void){ | |||||
| #define GET_IMAGE_CANCEL | #define GET_IMAGE_CANCEL | ||||
| #ifdef SMP | #ifdef SMP | ||||
| #ifdef USE64BITINT | |||||
| #if defined(USE64BITINT) | |||||
| static __inline blasint blas_quickdivide(blasint x, blasint y){ | static __inline blasint blas_quickdivide(blasint x, blasint y){ | ||||
| return x / y; | return x / y; | ||||
| } | } | ||||
| #elif defined (C_MSVC) | |||||
| static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){ | |||||
| return x / y; | |||||
| } | |||||
| #else | #else | ||||
| extern unsigned int blas_quick_divide_table[]; | extern unsigned int blas_quick_divide_table[]; | ||||
| @@ -46,12 +46,28 @@ set(NU_SMP_SOURCES | |||||
| tbmv_thread.c | tbmv_thread.c | ||||
| ) | ) | ||||
| set(ULVM_COMPLEX_SOURCES | |||||
| hbmv_k.c | |||||
| hpmv_k.c | |||||
| hpr_k.c | |||||
| hpr2_k.c | |||||
| her_k.c | |||||
| her2_k.c | |||||
| ) | |||||
| # objects that need LOWER set | # objects that need LOWER set | ||||
| GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3) | GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3) | ||||
| # gbmv uses a lowercase n and t | # gbmv uses a lowercase n and t | ||||
| GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3) | GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3) | ||||
| GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3) | GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3) | ||||
| # c/zgbmv | |||||
| GenerateNamedObjects("zgbmv_k.c" "CONJ" "gbmv_r" false "" "" "" 2) | |||||
| GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ" "gbmv_c" false "" "" "" 2) | |||||
| GenerateNamedObjects("zgbmv_k.c" "XCONJ" "gbmv_o" false "" "" "" 2) | |||||
| GenerateNamedObjects("zgbmv_k.c" "TRANS;XCONJ" "gbmv_u" false "" "" "" 2) | |||||
| GenerateNamedObjects("zgbmv_k.c" "CONJ;XCONJ" "gbmv_s" false "" "" "" 2) | |||||
| GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ;XCONJ" "gbmv_d" false "" "" "" 2) | |||||
| # special defines for complex | # special defines for complex | ||||
| foreach (float_type ${FLOAT_TYPES}) | foreach (float_type ${FLOAT_TYPES}) | ||||
| @@ -82,6 +98,14 @@ foreach (float_type ${FLOAT_TYPES}) | |||||
| GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type}) | GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type}) | ||||
| endforeach () | endforeach () | ||||
| foreach (ulvm_source ${ULVM_COMPLEX_SOURCES}) | |||||
| string(REGEX MATCH "[a-z0-9]+" op_name ${ulvm_source}) | |||||
| GenerateNamedObjects("z${ulvm_source}" "" "${op_name}_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("z${ulvm_source}" "LOWER" "${op_name}_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("z${ulvm_source}" "HEMVREV" "${op_name}_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("z${ulvm_source}" "LOWER;HEMVREV" "${op_name}_M" false "" "" false ${float_type}) | |||||
| endforeach() | |||||
| if (SMP) | if (SMP) | ||||
| GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type}) | GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type}) | ||||
| @@ -103,6 +127,41 @@ foreach (float_type ${FLOAT_TYPES}) | |||||
| GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type}) | GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type}) | ||||
| GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type}) | GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type}) | ||||
| GenerateNamedObjects("sbmv_thread.c" "HEMV" "hbmv_thread_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("sbmv_thread.c" "HEMV;LOWER" "hbmv_thread_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("sbmv_thread.c" "HEMVREV" "hbmv_thread_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("sbmv_thread.c" "LOWER;HEMVREV" "hbmv_thread_M" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spmv_thread.c" "HEMV" "hpmv_thread_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spmv_thread.c" "HEMV;LOWER" "hpmv_thread_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spmv_thread.c" "HEMVREV" "hpmv_thread_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spmv_thread.c" "LOWER;HEMVREV" "hpmv_thread_M" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spr_thread.c" "HEMV" "hpr_thread_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spr_thread.c" "HEMV;LOWER" "hpr_thread_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spr_thread.c" "HEMVREV" "hpr_thread_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spr_thread.c" "LOWER;HEMVREV" "hpr_thread_M" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spr2_thread.c" "HEMV" "hpr2_thread_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spr2_thread.c" "HEMV;LOWER" "hpr2_thread_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spr2_thread.c" "HEMVREV" "hpr2_thread_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("spr2_thread.c" "LOWER;HEMVREV" "hpr2_thread_M" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("symv_thread.c" "HEMV" "hemv_thread_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("symv_thread.c" "HEMV;LOWER" "hemv_thread_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("symv_thread.c" "HEMVREV" "hemv_thread_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("symv_thread.c" "LOWER;HEMVREV" "hemv_thread_M" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr_thread.c" "HER" "her_thread_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr_thread.c" "HER;LOWER" "her_thread_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr_thread.c" "HEMVREV" "her_thread_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr_thread.c" "LOWER;HEMVREV" "her_thread_M" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr2_thread.c" "HER2" "her2_thread_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr2_thread.c" "HER2;LOWER" "her2_thread_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr2_thread.c" "HEMVREV" "her2_thread_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr2_thread.c" "LOWER;HEMVREV" "her2_thread_M" false "" "" false ${float_type}) | |||||
| foreach (nu_smp_src ${NU_SMP_SOURCES}) | foreach (nu_smp_src ${NU_SMP_SOURCES}) | ||||
| string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src}) | string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src}) | ||||
| GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type}) | GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type}) | ||||
| @@ -64,7 +64,7 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| FLOAT result; | FLOAT result; | ||||
| #else | #else | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -60,7 +60,7 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| FLOAT result; | FLOAT result; | ||||
| #else | #else | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| a = (FLOAT *)args -> a; | a = (FLOAT *)args -> a; | ||||
| @@ -60,7 +60,7 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| FLOAT result; | FLOAT result; | ||||
| #else | #else | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| a = (FLOAT *)args -> a; | a = (FLOAT *)args -> a; | ||||
| @@ -76,7 +76,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| FLOAT result; | FLOAT result; | ||||
| #else | #else | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -81,7 +81,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| FLOAT result; | FLOAT result; | ||||
| #else | #else | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -87,7 +87,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| FLOAT result; | FLOAT result; | ||||
| #else | #else | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -77,7 +77,7 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA | |||||
| FLOAT *bufferY = gemvbuffer; | FLOAT *bufferY = gemvbuffer; | ||||
| FLOAT *bufferX = gemvbuffer; | FLOAT *bufferX = gemvbuffer; | ||||
| #ifdef TRANS | #ifdef TRANS | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| if (incy != 1) { | if (incy != 1) { | ||||
| @@ -56,6 +56,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
| FLOAT *bufferX = sbmvbuffer; | FLOAT *bufferX = sbmvbuffer; | ||||
| FLOAT temp[2]; | FLOAT temp[2]; | ||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| if (incy != 1) { | if (incy != 1) { | ||||
| Y = bufferY; | Y = bufferY; | ||||
| bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); | bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); | ||||
| @@ -93,7 +95,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
| Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; | Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; | ||||
| if (length > 0) { | if (length > 0) { | ||||
| FLOAT _Complex result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); | |||||
| result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -118,7 +120,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
| Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; | Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; | ||||
| if (length > 0) { | if (length > 0) { | ||||
| FLOAT _Complex result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); | |||||
| result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -143,7 +145,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
| Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; | Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; | ||||
| if (length > 0) { | if (length > 0) { | ||||
| FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); | |||||
| result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -168,7 +170,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
| Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; | Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; | ||||
| if (length > 0) { | if (length > 0) { | ||||
| FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); | |||||
| result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -51,6 +51,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, | |||||
| FLOAT *bufferX = gemvbuffer; | FLOAT *bufferX = gemvbuffer; | ||||
| FLOAT temp[2]; | FLOAT temp[2]; | ||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| if (incy != 1) { | if (incy != 1) { | ||||
| Y = bufferY; | Y = bufferY; | ||||
| bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095); | bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095); | ||||
| @@ -69,7 +71,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, | |||||
| #ifndef HEMVREV | #ifndef HEMVREV | ||||
| #ifndef LOWER | #ifndef LOWER | ||||
| if (i > 0) { | if (i > 0) { | ||||
| FLOAT _Complex result = DOTC_K(i, a, 1, X, 1); | |||||
| result = DOTC_K(i, a, 1, X, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -93,7 +95,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, | |||||
| #else | #else | ||||
| if (m - i > 1) { | if (m - i > 1) { | ||||
| FLOAT _Complex result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); | |||||
| result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -118,7 +120,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, | |||||
| #else | #else | ||||
| #ifndef LOWER | #ifndef LOWER | ||||
| if (i > 0) { | if (i > 0) { | ||||
| FLOAT _Complex result = DOTU_K(i, a, 1, X, 1); | |||||
| result = DOTU_K(i, a, 1, X, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -142,7 +144,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, | |||||
| #else | #else | ||||
| if (m - i > 1) { | if (m - i > 1) { | ||||
| FLOAT _Complex result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); | |||||
| result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -55,6 +55,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
| FLOAT *bufferY = sbmvbuffer; | FLOAT *bufferY = sbmvbuffer; | ||||
| FLOAT *bufferX = sbmvbuffer; | FLOAT *bufferX = sbmvbuffer; | ||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| if (incy != 1) { | if (incy != 1) { | ||||
| Y = bufferY; | Y = bufferY; | ||||
| bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); | bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); | ||||
| @@ -83,7 +85,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
| a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0); | a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0); | ||||
| if (length > 0) { | if (length > 0) { | ||||
| FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); | |||||
| result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -100,7 +102,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
| a, 1, Y + i * COMPSIZE, 1, NULL, 0); | a, 1, Y + i * COMPSIZE, 1, NULL, 0); | ||||
| if (length > 0) { | if (length > 0) { | ||||
| FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); | |||||
| result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); | |||||
| Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); | ||||
| Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); | ||||
| @@ -49,7 +49,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, | |||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | FLOAT *gemvbuffer = (FLOAT *)buffer; | ||||
| FLOAT *bufferY = gemvbuffer; | FLOAT *bufferY = gemvbuffer; | ||||
| FLOAT *bufferX = gemvbuffer; | FLOAT *bufferX = gemvbuffer; | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| if (incy != 1) { | if (incy != 1) { | ||||
| Y = bufferY; | Y = bufferY; | ||||
| @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT atemp1, atemp2, btemp1, btemp2; | FLOAT atemp1, atemp2, btemp1, btemp2; | ||||
| @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT atemp1, atemp2, btemp1, btemp2; | FLOAT atemp1, atemp2, btemp1, btemp2; | ||||
| @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT ar, ai, br, bi, ratio, den; | FLOAT ar, ai, br, bi, ratio, den; | ||||
| @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT ar, ai, br, bi, ratio, den; | FLOAT ar, ai, br, bi, ratio, den; | ||||
| @@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | |||||
| BLASLONG i; | BLASLONG i; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT atemp1, atemp2, btemp1, btemp2; | FLOAT atemp1, atemp2, btemp1, btemp2; | ||||
| @@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | |||||
| BLASLONG i; | BLASLONG i; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT atemp1, atemp2, btemp1, btemp2; | FLOAT atemp1, atemp2, btemp1, btemp2; | ||||
| @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | |||||
| BLASLONG i; | BLASLONG i; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT ar, ai, br, bi, ratio, den; | FLOAT ar, ai, br, bi, ratio, den; | ||||
| @@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | |||||
| BLASLONG i; | BLASLONG i; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT ar, ai, br, bi, ratio, den; | FLOAT ar, ai, br, bi, ratio, den; | ||||
| @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu | |||||
| BLASLONG i, is, min_i; | BLASLONG i, is, min_i; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT atemp1, atemp2, btemp1, btemp2; | FLOAT atemp1, atemp2, btemp1, btemp2; | ||||
| @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu | |||||
| BLASLONG i, is, min_i; | BLASLONG i, is, min_i; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex temp; | |||||
| OPENBLAS_COMPLEX_FLOAT temp; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT atemp1, atemp2, btemp1, btemp2; | FLOAT atemp1, atemp2, btemp1, btemp2; | ||||
| @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf | |||||
| BLASLONG i, is, min_i; | BLASLONG i, is, min_i; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT ar, ai, br, bi, ratio, den; | FLOAT ar, ai, br, bi, ratio, den; | ||||
| @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf | |||||
| BLASLONG i, is, min_i; | BLASLONG i, is, min_i; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| #endif | #endif | ||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT ar, ai, br, bi, ratio, den; | FLOAT ar, ai, br, bi, ratio, den; | ||||
| @@ -1,13 +1,5 @@ | |||||
| include_directories(${CMAKE_SOURCE_DIR}) | include_directories(${CMAKE_SOURCE_DIR}) | ||||
| set(USE_GEMM3M 0) | |||||
| if (DEFINED ARCH) | |||||
| if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS") | |||||
| set(USE_GEMM3M 1) | |||||
| endif () | |||||
| endif () | |||||
| # N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa | # N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa | ||||
| # loop through gemm.c defines | # loop through gemm.c defines | ||||
| @@ -54,12 +46,41 @@ foreach (float_type ${FLOAT_TYPES}) | |||||
| GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_LC" false ${float_type}) | GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_LC" false ${float_type}) | ||||
| GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type}) | GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type}) | ||||
| GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_RC" false ${float_type}) | GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_RC" false ${float_type}) | ||||
| #hemm | |||||
| GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN" 0 "hemm_L" false ${float_type}) | |||||
| GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE" 0 "hemm_R" false ${float_type}) | |||||
| #her2k | |||||
| GenerateCombinationObjects("zher2k_kernel.c" "LOWER;CONJ" "U;N" "" 2 "her2k_kernel" false ${float_type}) | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type}) | |||||
| if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) | |||||
| #hemm | |||||
| GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type}) | |||||
| GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type}) | |||||
| #her2k | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type}) | |||||
| endif() | |||||
| # special gemm defines for complex | # special gemm defines for complex | ||||
| foreach (gemm_define ${GEMM_COMPLEX_DEFINES}) | foreach (gemm_define ${GEMM_COMPLEX_DEFINES}) | ||||
| string(TOLOWER ${gemm_define} gemm_define_LC) | string(TOLOWER ${gemm_define} gemm_define_LC) | ||||
| GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type}) | GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type}) | ||||
| if(USE_GEMM3M) | |||||
| GenerateNamedObjects("gemm3m.c" "${gemm_define}" "gemm3m_${gemm_define_LC}" false "" "" false ${float_type}) | |||||
| endif() | |||||
| if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) | if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) | ||||
| GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type}) | GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type}) | ||||
| if(USE_GEMM3M) | |||||
| GenerateNamedObjects("gemm3m.c" "${gemm_define};THREADED_LEVEL3" "gemm3m_thread_${gemm_define_LC}" false "" "" false ${float_type}) | |||||
| endif() | |||||
| endif () | endif () | ||||
| endforeach () | endforeach () | ||||
| endif () | endif () | ||||
| @@ -33,6 +33,8 @@ set(COMMON_SOURCES | |||||
| xerbla.c | xerbla.c | ||||
| openblas_set_num_threads.c | openblas_set_num_threads.c | ||||
| openblas_error_handle.c | openblas_error_handle.c | ||||
| openblas_get_num_procs.c | |||||
| openblas_get_num_threads.c | |||||
| ) | ) | ||||
| # these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling | # these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling | ||||
| @@ -1,13 +1,16 @@ | |||||
| include_directories(${CMAKE_SOURCE_DIR}) | include_directories(${CMAKE_SOURCE_DIR}) | ||||
| set(BLAS1_SOURCES | set(BLAS1_SOURCES | ||||
| copy.c | copy.c | ||||
| asum.c nrm2.c | |||||
| nrm2.c | |||||
| ) | ) | ||||
| set(BLAS1_REAL_ONLY_SOURCES | set(BLAS1_REAL_ONLY_SOURCES | ||||
| rotm.c rotmg.c # N.B. these do not have complex counterparts | rotm.c rotmg.c # N.B. these do not have complex counterparts | ||||
| rot.c | |||||
| asum.c | |||||
| ) | ) | ||||
| # these will have 'z' prepended for the complex version | # these will have 'z' prepended for the complex version | ||||
| @@ -15,7 +18,7 @@ set(BLAS1_MANGLED_SOURCES | |||||
| axpy.c swap.c | axpy.c swap.c | ||||
| scal.c | scal.c | ||||
| dot.c | dot.c | ||||
| rot.c rotg.c | |||||
| rotg.c | |||||
| axpby.c | axpby.c | ||||
| ) | ) | ||||
| @@ -31,6 +34,13 @@ set(BLAS2_SOURCES | |||||
| tpsv.c tpmv.c | tpsv.c tpmv.c | ||||
| ) | ) | ||||
| set(BLAS2_COMPLEX_ONLY_MANGLED_SOURCES | |||||
| hemv.c hbmv.c | |||||
| her.c her2.c | |||||
| hpmv.c hpr.c | |||||
| hpr2.c | |||||
| ) | |||||
| # these do not have separate 'z' sources | # these do not have separate 'z' sources | ||||
| set(BLAS3_SOURCES | set(BLAS3_SOURCES | ||||
| gemm.c symm.c | gemm.c symm.c | ||||
| @@ -39,6 +49,7 @@ set(BLAS3_SOURCES | |||||
| set(BLAS3_MANGLED_SOURCES | set(BLAS3_MANGLED_SOURCES | ||||
| omatcopy.c imatcopy.c | omatcopy.c imatcopy.c | ||||
| geadd.c | |||||
| ) | ) | ||||
| # generate the BLAS objs once with and once without cblas | # generate the BLAS objs once with and once without cblas | ||||
| @@ -65,9 +76,14 @@ foreach (CBLAS_FLAG ${CBLAS_FLAGS}) | |||||
| GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1) | GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1) | ||||
| GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) | GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) | ||||
| GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) | GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) | ||||
| GenerateNamedObjects("${BLAS2_COMPLEX_ONLY_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 4) | |||||
| GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) | GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) | ||||
| GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) | GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) | ||||
| #sdsdot, dsdot | |||||
| GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE") | |||||
| GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE") | |||||
| # trmm is trsm with a compiler flag set | # trmm is trsm with a compiler flag set | ||||
| GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) | GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) | ||||
| @@ -86,17 +102,36 @@ endforeach () | |||||
| # complex-specific sources | # complex-specific sources | ||||
| foreach (float_type ${FLOAT_TYPES}) | foreach (float_type ${FLOAT_TYPES}) | ||||
| if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") | if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") | ||||
| GenerateNamedObjects("zger.c" "" "geru" false "" "" false ${float_type}) | GenerateNamedObjects("zger.c" "" "geru" false "" "" false ${float_type}) | ||||
| GenerateNamedObjects("zger.c" "CONJ" "gerc" false "" "" false ${float_type}) | GenerateNamedObjects("zger.c" "CONJ" "gerc" false "" "" false ${float_type}) | ||||
| GenerateNamedObjects("zdot.c" "CONJ" "dotc" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("zdot.c" "" "dotu" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("symm.c" "HEMM" "hemm" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syrk.c" "HEMM" "herk" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("syr2k.c" "HEMM" "her2k" false "" "" false ${float_type}) | |||||
| if (USE_GEMM3M) | |||||
| GenerateNamedObjects("gemm.c" "GEMM3M" "gemm3m" false "" "" false ${float_type}) | |||||
| endif() | |||||
| endif () | endif () | ||||
| if (${float_type} STREQUAL "COMPLEX") | if (${float_type} STREQUAL "COMPLEX") | ||||
| GenerateNamedObjects("zscal.c" "SSCAL" "sscal" false "" "" false "COMPLEX") | GenerateNamedObjects("zscal.c" "SSCAL" "sscal" false "" "" false "COMPLEX") | ||||
| GenerateNamedObjects("nrm2.c" "" "scnrm2" false "" "" true "COMPLEX") | GenerateNamedObjects("nrm2.c" "" "scnrm2" false "" "" true "COMPLEX") | ||||
| GenerateNamedObjects("zrot.c" "" "csrot" false "" "" true "COMPLEX") | |||||
| GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "scamin" false "" "" true "COMPLEX") | |||||
| GenerateNamedObjects("max.c" "USE_ABS" "scamax" false "" "" true "COMPLEX") | |||||
| GenerateNamedObjects("asum.c" "" "scasum" false "" "" true "COMPLEX") | |||||
| endif () | endif () | ||||
| if (${float_type} STREQUAL "ZCOMPLEX") | if (${float_type} STREQUAL "ZCOMPLEX") | ||||
| GenerateNamedObjects("zscal.c" "SSCAL" "dscal" false "" "" false "ZCOMPLEX") | GenerateNamedObjects("zscal.c" "SSCAL" "dscal" false "" "" false "ZCOMPLEX") | ||||
| GenerateNamedObjects("nrm2.c" "" "dznrm2" false "" "" true "ZCOMPLEX") | GenerateNamedObjects("nrm2.c" "" "dznrm2" false "" "" true "ZCOMPLEX") | ||||
| GenerateNamedObjects("zrot.c" "" "zdrot" false "" "" true "ZCOMPLEX") | |||||
| GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "dzamin" false "" "" true "ZCOMPLEX") | |||||
| GenerateNamedObjects("max.c" "USE_ABS" "dzamax" false "" "" true "ZCOMPLEX") | |||||
| GenerateNamedObjects("asum.c" "" "dzasum" false "" "" true "ZCOMPLEX") | |||||
| endif () | endif () | ||||
| endforeach () | endforeach () | ||||
| @@ -14,8 +14,7 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ | |||||
| #endif | #endif | ||||
| #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) | |||||
| #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) | |||||
| long double da = *DA; | long double da = *DA; | ||||
| long double db = *DB; | long double db = *DB; | ||||
| @@ -53,13 +53,13 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT * | |||||
| #endif | #endif | ||||
| if (n <= 0) return; | |||||
| FLOAT alpha_r = *(ALPHA + 0); | FLOAT alpha_r = *(ALPHA + 0); | ||||
| FLOAT alpha_i = *(ALPHA + 1); | FLOAT alpha_i = *(ALPHA + 1); | ||||
| FLOAT beta_r = *(BETA + 0); | FLOAT beta_r = *(BETA + 0); | ||||
| FLOAT beta_i = *(BETA + 1); | FLOAT beta_i = *(BETA + 1); | ||||
| if (n <= 0) return; | |||||
| FUNCTION_PROFILE_START(); | FUNCTION_PROFILE_START(); | ||||
| if (incx < 0) x -= (n - 1) * incx * 2; | if (incx < 0) x -= (n - 1) * incx * 2; | ||||
| @@ -57,21 +57,25 @@ | |||||
| #ifdef RETURN_BY_STRUCT | #ifdef RETURN_BY_STRUCT | ||||
| MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { | MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { | ||||
| #elif defined RETURN_BY_STACK | #elif defined RETURN_BY_STACK | ||||
| void NAME(FLOAT _Complex *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { | |||||
| void NAME(OPENBLAS_COMPLEX_FLOAT *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { | |||||
| #else | #else | ||||
| FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { | |||||
| OPENBLAS_COMPLEX_FLOAT NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { | |||||
| #endif | #endif | ||||
| BLASLONG n = *N; | BLASLONG n = *N; | ||||
| BLASLONG incx = *INCX; | BLASLONG incx = *INCX; | ||||
| BLASLONG incy = *INCY; | BLASLONG incy = *INCY; | ||||
| #ifndef RETURN_BY_STACK | #ifndef RETURN_BY_STACK | ||||
| FLOAT _Complex ret; | |||||
| OPENBLAS_COMPLEX_FLOAT ret; | |||||
| #endif | #endif | ||||
| #ifdef RETURN_BY_STRUCT | #ifdef RETURN_BY_STRUCT | ||||
| MYTYPE myret; | MYTYPE myret; | ||||
| #endif | #endif | ||||
| #ifndef RETURN_BY_STRUCT | |||||
| OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); | |||||
| #endif | |||||
| PRINT_DEBUG_NAME; | PRINT_DEBUG_NAME; | ||||
| if (n <= 0) { | if (n <= 0) { | ||||
| @@ -80,10 +84,10 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, | |||||
| myret.i = 0.; | myret.i = 0.; | ||||
| return myret; | return myret; | ||||
| #elif defined RETURN_BY_STACK | #elif defined RETURN_BY_STACK | ||||
| *result = ZERO; | |||||
| *result = zero; | |||||
| return; | return; | ||||
| #else | #else | ||||
| return ZERO; | |||||
| return zero; | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -144,21 +148,21 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, | |||||
| #else | #else | ||||
| #ifdef FORCE_USE_STACK | #ifdef FORCE_USE_STACK | ||||
| void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT _Complex *result){ | |||||
| void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, OPENBLAS_COMPLEX_FLOAT *result){ | |||||
| #else | #else | ||||
| FLOAT _Complex CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ | |||||
| OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ | |||||
| FLOAT _Complex ret; | |||||
| OPENBLAS_COMPLEX_FLOAT ret; | |||||
| #endif | #endif | ||||
| PRINT_DEBUG_CNAME; | PRINT_DEBUG_CNAME; | ||||
| if (n <= 0) { | if (n <= 0) { | ||||
| #ifdef FORCE_USE_STACK | #ifdef FORCE_USE_STACK | ||||
| *result = ZERO; | |||||
| *result = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); | |||||
| return; | return; | ||||
| #else | #else | ||||
| return ZERO; | |||||
| return OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -79,6 +79,9 @@ void NAME(char *TRANS, blasint *M, blasint *N, | |||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| int nthreads_max; | |||||
| int nthreads_avail; | |||||
| double MNK; | |||||
| #endif | #endif | ||||
| int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, | int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, | ||||
| @@ -91,14 +94,14 @@ void NAME(char *TRANS, blasint *M, blasint *N, | |||||
| blasint lenx, leny; | blasint lenx, leny; | ||||
| blasint i; | blasint i; | ||||
| PRINT_DEBUG_NAME; | |||||
| FLOAT alpha_r = *(ALPHA + 0); | FLOAT alpha_r = *(ALPHA + 0); | ||||
| FLOAT alpha_i = *(ALPHA + 1); | FLOAT alpha_i = *(ALPHA + 1); | ||||
| FLOAT beta_r = *(BETA + 0); | FLOAT beta_r = *(BETA + 0); | ||||
| FLOAT beta_i = *(BETA + 1); | FLOAT beta_i = *(BETA + 1); | ||||
| PRINT_DEBUG_NAME; | |||||
| TOUPPER(trans); | TOUPPER(trans); | ||||
| info = 0; | info = 0; | ||||
| @@ -153,14 +156,14 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| GEMV_O, GEMV_U, GEMV_S, GEMV_D, | GEMV_O, GEMV_U, GEMV_S, GEMV_D, | ||||
| }; | }; | ||||
| PRINT_DEBUG_CNAME; | |||||
| FLOAT alpha_r = *(ALPHA + 0); | FLOAT alpha_r = *(ALPHA + 0); | ||||
| FLOAT alpha_i = *(ALPHA + 1); | FLOAT alpha_i = *(ALPHA + 1); | ||||
| FLOAT beta_r = *(BETA + 0); | FLOAT beta_r = *(BETA + 0); | ||||
| FLOAT beta_i = *(BETA + 1); | FLOAT beta_i = *(BETA + 1); | ||||
| PRINT_DEBUG_CNAME; | |||||
| trans = -1; | trans = -1; | ||||
| info = 0; | info = 0; | ||||
| @@ -234,10 +237,10 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads_max = num_cpu_avail(2); | |||||
| int nthreads_avail = nthreads_max; | |||||
| nthreads_max = num_cpu_avail(2); | |||||
| nthreads_avail = nthreads_max; | |||||
| double MNK = (double) m * (double) n; | |||||
| MNK = (double) m * (double) n; | |||||
| if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) )) | if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) )) | ||||
| nthreads_max = 1; | nthreads_max = 1; | ||||
| @@ -6,13 +6,7 @@ | |||||
| void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ | void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ | ||||
| PRINT_DEBUG_NAME; | |||||
| IDEBUG_START; | |||||
| FUNCTION_PROFILE_START(); | |||||
| #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) | |||||
| #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) | |||||
| long double da_r = *(DA + 0); | long double da_r = *(DA + 0); | ||||
| long double da_i = *(DA + 1); | long double da_i = *(DA + 1); | ||||
| @@ -22,6 +16,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ | |||||
| long double ada = fabs(da_r) + fabs(da_i); | long double ada = fabs(da_r) + fabs(da_i); | ||||
| PRINT_DEBUG_NAME; | |||||
| IDEBUG_START; | |||||
| FUNCTION_PROFILE_START(); | |||||
| if (ada == ZERO) { | if (ada == ZERO) { | ||||
| *C = ZERO; | *C = ZERO; | ||||
| *(S + 0) = ONE; | *(S + 0) = ONE; | ||||
| @@ -54,6 +54,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ | |||||
| FLOAT ada = fabs(da_r) + fabs(da_i); | FLOAT ada = fabs(da_r) + fabs(da_i); | ||||
| FLOAT adb; | FLOAT adb; | ||||
| PRINT_DEBUG_NAME; | |||||
| IDEBUG_START; | |||||
| FUNCTION_PROFILE_START(); | |||||
| if (ada == ZERO) { | if (ada == ZERO) { | ||||
| *C = ZERO; | *C = ZERO; | ||||
| *(S + 0) = ONE; | *(S + 0) = ONE; | ||||
| @@ -17,6 +17,7 @@ endif () | |||||
| SetDefaultL1() | SetDefaultL1() | ||||
| SetDefaultL2() | SetDefaultL2() | ||||
| SetDefaultL3() | |||||
| ParseMakefileVars("${KERNELDIR}/KERNEL") | ParseMakefileVars("${KERNELDIR}/KERNEL") | ||||
| ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") | ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") | ||||
| @@ -65,8 +66,20 @@ foreach (float_type ${FLOAT_TYPES}) | |||||
| else () | else () | ||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type}) | ||||
| endif () | endif () | ||||
| if (${float_type} STREQUAL "COMPLEX") | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "srot_k" false "" "" false ${float_type}) | |||||
| endif() | |||||
| if (${float_type} STREQUAL "ZCOMPLEX") | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "drot_k" false "" "" false ${float_type}) | |||||
| endif() | |||||
| endforeach () | endforeach () | ||||
| #dsdot,sdsdot | |||||
| GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE") | |||||
| GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE") | |||||
| # Makefile.L2 | # Makefile.L2 | ||||
| GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) | GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) | ||||
| GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) | GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) | ||||
| @@ -86,6 +99,12 @@ foreach (float_type ${FLOAT_TYPES}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type}) | ||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type}) | ||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type}) | ||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_U_KERNEL}" "HEMV" "hemv_U" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_L_KERNEL}" "HEMV;LOWER" "hemv_L" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_V_KERNEL}" "HEMV;HEMVREV" "hemv_V" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_M_KERNEL}" "HEMV;HEMVREV;LOWER" "hemv_M" false "" "" false ${float_type}) | |||||
| else () | else () | ||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) | ||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type}) | ||||
| @@ -93,14 +112,9 @@ foreach (float_type ${FLOAT_TYPES}) | |||||
| endforeach () | endforeach () | ||||
| # Makefile.L3 | # Makefile.L3 | ||||
| set(USE_GEMM3M false) | |||||
| set(USE_TRMM false) | set(USE_TRMM false) | ||||
| if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS") | |||||
| set(USE_GEMM3M true) | |||||
| endif () | |||||
| if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC") | |||||
| if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic") | |||||
| set(USE_TRMM true) | set(USE_TRMM true) | ||||
| endif () | endif () | ||||
| @@ -155,6 +169,13 @@ foreach (float_type ${FLOAT_TYPES}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type}) | ||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type}) | ||||
| #hemm | |||||
| GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "hemm_iutcopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "hemm_iltcopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "hemm_outcopy" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "hemm_oltcopy" false "" "" false ${float_type}) | |||||
| else () | else () | ||||
| GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type}) | GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type}) | ||||
| endif () | endif () | ||||
| @@ -241,11 +262,40 @@ foreach (float_type ${FLOAT_TYPES}) | |||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "domatcopy_k_cn" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "domatcopy_k_rn" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "domatcopy_k_ct" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "domatcopy_k_rt" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "omatcopy_k_cn" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "omatcopy_k_rn" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "omatcopy_k_ct" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "omatcopy_k_rt" false "" "" false ${float_type}) | |||||
| if (NOT DEFINED ${float_char}OMATCOPY_CNC) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}OMATCOPY_CNC ../arm/zomatcopy_cnc.c) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT DEFINED ${float_char}OMATCOPY_RNC) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}OMATCOPY_RNC ../arm/zomatcopy_rnc.c) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT DEFINED ${float_char}OMATCOPY_CTC) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}OMATCOPY_CTC ../arm/zomatcopy_ctc.c) | |||||
| endif () | |||||
| endif () | |||||
| if (NOT DEFINED ${float_char}OMATCOPY_RTC) | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| set(${float_char}OMATCOPY_RTC ../arm/zomatcopy_rtc.c) | |||||
| endif () | |||||
| endif () | |||||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CNC}" "CONJ" "omatcopy_k_cnc" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RNC}" "CONJ;ROWM" "omatcopy_k_rnc" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CTC}" "CONJ" "omatcopy_k_ctc" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RTC}" "CONJ;ROWM" "omatcopy_k_rtc" false "" "" false ${float_type}) | |||||
| endif() | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type}) | |||||
| endforeach () | endforeach () | ||||
| # Makefile.LA | # Makefile.LA | ||||
| @@ -3459,7 +3459,7 @@ ifndef DGEADD_K | |||||
| DGEADD_K = ../generic/geadd.c | DGEADD_K = ../generic/geadd.c | ||||
| endif | endif | ||||
| $(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K) | |||||
| $(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEADD_K) | |||||
| $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ | $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ | ||||
| ifndef CGEADD_K | ifndef CGEADD_K | ||||
| @@ -38,13 +38,16 @@ int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FL | |||||
| BLASLONG ix,iy; | BLASLONG ix,iy; | ||||
| FLOAT temp; | FLOAT temp; | ||||
| BLASLONG inc_x2; | |||||
| BLASLONG inc_y2; | |||||
| if ( n < 0 ) return(0); | if ( n < 0 ) return(0); | ||||
| ix = 0; | ix = 0; | ||||
| iy = 0; | iy = 0; | ||||
| BLASLONG inc_x2 = 2 * inc_x; | |||||
| BLASLONG inc_y2 = 2 * inc_y; | |||||
| inc_x2 = 2 * inc_x; | |||||
| inc_y2 = 2 * inc_y; | |||||
| if ( beta_r == 0.0 && beta_i == 0.0) | if ( beta_r == 0.0 && beta_i == 0.0) | ||||
| { | { | ||||
| @@ -41,6 +41,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, | |||||
| { | { | ||||
| BLASLONG i=0; | BLASLONG i=0; | ||||
| BLASLONG ix,iy; | BLASLONG ix,iy; | ||||
| BLASLONG inc_x2; | |||||
| BLASLONG inc_y2; | |||||
| if ( n < 0 ) return(0); | if ( n < 0 ) return(0); | ||||
| if ( da_r == 0.0 && da_i == 0.0 ) return(0); | if ( da_r == 0.0 && da_i == 0.0 ) return(0); | ||||
| @@ -48,8 +50,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, | |||||
| ix = 0; | ix = 0; | ||||
| iy = 0; | iy = 0; | ||||
| BLASLONG inc_x2 = 2 * inc_x; | |||||
| BLASLONG inc_y2 = 2 * inc_y; | |||||
| inc_x2 = 2 * inc_x; | |||||
| inc_y2 = 2 * inc_y; | |||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| @@ -40,11 +40,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||||
| { | { | ||||
| BLASLONG i=0; | BLASLONG i=0; | ||||
| BLASLONG ix=0,iy=0; | BLASLONG ix=0,iy=0; | ||||
| BLASLONG inc_x2; | |||||
| BLASLONG inc_y2; | |||||
| if ( n < 0 ) return(0); | if ( n < 0 ) return(0); | ||||
| BLASLONG inc_x2 = 2 * inc_x; | |||||
| BLASLONG inc_y2 = 2 * inc_y; | |||||
| inc_x2 = 2 * inc_x; | |||||
| inc_y2 = 2 * inc_y; | |||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| @@ -40,24 +40,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include <complex.h> | #include <complex.h> | ||||
| FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | ||||
| #else | #else | ||||
| openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||||
| OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||||
| #endif | #endif | ||||
| { | { | ||||
| BLASLONG i=0; | BLASLONG i=0; | ||||
| BLASLONG ix=0,iy=0; | BLASLONG ix=0,iy=0; | ||||
| FLOAT dot[2]; | FLOAT dot[2]; | ||||
| FLOAT _Complex result; | |||||
| OPENBLAS_COMPLEX_FLOAT result; | |||||
| BLASLONG inc_x2; | |||||
| BLASLONG inc_y2; | |||||
| dot[0]=0.0; | dot[0]=0.0; | ||||
| dot[1]=0.0; | dot[1]=0.0; | ||||
| __real__ result = 0.0 ; | |||||
| __imag__ result = 0.0 ; | |||||
| CREAL(result) = 0.0 ; | |||||
| CIMAG(result) = 0.0 ; | |||||
| if ( n < 1 ) return(result); | if ( n < 1 ) return(result); | ||||
| BLASLONG inc_x2 = 2 * inc_x ; | |||||
| BLASLONG inc_y2 = 2 * inc_y ; | |||||
| inc_x2 = 2 * inc_x ; | |||||
| inc_y2 = 2 * inc_y ; | |||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| @@ -73,8 +75,8 @@ openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BL | |||||
| i++ ; | i++ ; | ||||
| } | } | ||||
| __real__ result = dot[0]; | |||||
| __imag__ result = dot[1]; | |||||
| CREAL(result) = dot[0]; | |||||
| CIMAG(result) = dot[1]; | |||||
| return(result); | return(result); | ||||
| } | } | ||||
| @@ -41,11 +41,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT | |||||
| BLASLONG i=0; | BLASLONG i=0; | ||||
| BLASLONG ix=0,iy=0; | BLASLONG ix=0,iy=0; | ||||
| FLOAT temp[2]; | FLOAT temp[2]; | ||||
| BLASLONG inc_x2; | |||||
| BLASLONG inc_y2; | |||||
| if ( n <= 0 ) return(0); | if ( n <= 0 ) return(0); | ||||
| BLASLONG inc_x2 = 2 * inc_x ; | |||||
| BLASLONG inc_y2 = 2 * inc_y ; | |||||
| inc_x2 = 2 * inc_x ; | |||||
| inc_y2 = 2 * inc_y ; | |||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| @@ -42,11 +42,13 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm | |||||
| BLASLONG i=0; | BLASLONG i=0; | ||||
| BLASLONG ix=0,iy=0; | BLASLONG ix=0,iy=0; | ||||
| FLOAT temp[2]; | FLOAT temp[2]; | ||||
| BLASLONG inc_x2; | |||||
| BLASLONG inc_y2; | |||||
| if ( n < 0 ) return(0); | if ( n < 0 ) return(0); | ||||
| BLASLONG inc_x2 = 2 * inc_x; | |||||
| BLASLONG inc_y2 = 2 * inc_y; | |||||
| inc_x2 = 2 * inc_x; | |||||
| inc_y2 = 2 * inc_y; | |||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| @@ -155,5 +155,11 @@ XSYMV_L_KERNEL = ../generic/zsymv_k.c | |||||
| ZHEMV_U_KERNEL = ../generic/zhemv_k.c | ZHEMV_U_KERNEL = ../generic/zhemv_k.c | ||||
| ZHEMV_L_KERNEL = ../generic/zhemv_k.c | ZHEMV_L_KERNEL = ../generic/zhemv_k.c | ||||
| LSAME_KERNEL = ../generic/lsame.c | |||||
| SCABS_KERNEL = ../generic/cabs.c | |||||
| DCABS_KERNEL = ../generic/cabs.c | |||||
| QCABS_KERNEL = ../generic/cabs.c | |||||
| #Dump kernel | |||||
| CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
| ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
| @@ -59,7 +59,8 @@ typedef int blasint; | |||||
| extension since version 3.0. If neither are available, use a compatible | extension since version 3.0. If neither are available, use a compatible | ||||
| structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ | structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ | ||||
| #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ | #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ | ||||
| (__GNUC__ >= 3 && !defined(__cplusplus))) | |||||
| (__GNUC__ >= 3 && !defined(__cplusplus)) || \ | |||||
| _MSC_VER >= 1800) // Visual Studio 2013 supports complex | |||||
| #define OPENBLAS_COMPLEX_C99 | #define OPENBLAS_COMPLEX_C99 | ||||
| #ifndef __cplusplus | #ifndef __cplusplus | ||||
| #include <complex.h> | #include <complex.h> | ||||