Browse Source

Use cmake to build OpenBLAS GENERIC Target on MSVC x86 64-bit.

Disable CBLAS and LAPACK.
tags/v0.2.15^2
Zhang Xianyi 10 years ago
parent
commit
f874465bb8
51 changed files with 488 additions and 120 deletions
  1. +16
    -4
      CMakeLists.txt
  2. +60
    -0
      cmake/export.cmake
  3. +3
    -0
      cmake/f_check.cmake
  4. +11
    -4
      cmake/kernel.cmake
  5. +7
    -2
      cmake/prebuild.cmake
  6. +15
    -0
      cmake/system.cmake
  7. +4
    -0
      cmake/utils.cmake
  8. +37
    -8
      common.h
  9. +31
    -4
      common_x86_64.h
  10. +59
    -0
      driver/level2/CMakeLists.txt
  11. +1
    -1
      driver/level2/gbmv_thread.c
  12. +1
    -1
      driver/level2/sbmv_thread.c
  13. +1
    -1
      driver/level2/spmv_thread.c
  14. +1
    -1
      driver/level2/tbmv_thread.c
  15. +1
    -1
      driver/level2/tpmv_thread.c
  16. +1
    -1
      driver/level2/trmv_thread.c
  17. +1
    -1
      driver/level2/zgbmv_k.c
  18. +6
    -4
      driver/level2/zhbmv_k.c
  19. +6
    -4
      driver/level2/zhpmv_k.c
  20. +4
    -2
      driver/level2/zsbmv_k.c
  21. +2
    -1
      driver/level2/zspmv_k.c
  22. +1
    -1
      driver/level2/ztbmv_L.c
  23. +1
    -1
      driver/level2/ztbmv_U.c
  24. +1
    -1
      driver/level2/ztbsv_L.c
  25. +1
    -1
      driver/level2/ztbsv_U.c
  26. +1
    -1
      driver/level2/ztpmv_L.c
  27. +1
    -1
      driver/level2/ztpmv_U.c
  28. +1
    -1
      driver/level2/ztpsv_L.c
  29. +1
    -1
      driver/level2/ztpsv_U.c
  30. +1
    -1
      driver/level2/ztrmv_L.c
  31. +1
    -1
      driver/level2/ztrmv_U.c
  32. +1
    -1
      driver/level2/ztrsv_L.c
  33. +1
    -1
      driver/level2/ztrsv_U.c
  34. +29
    -8
      driver/level3/CMakeLists.txt
  35. +2
    -0
      driver/others/CMakeLists.txt
  36. +37
    -2
      interface/CMakeLists.txt
  37. +1
    -2
      interface/rotg.c
  38. +2
    -2
      interface/zaxpby.c
  39. +14
    -10
      interface/zdot.c
  40. +10
    -7
      interface/zgemv.c
  41. +13
    -7
      interface/zrotg.c
  42. +60
    -10
      kernel/CMakeLists.txt
  43. +1
    -1
      kernel/Makefile.L3
  44. +5
    -2
      kernel/arm/zaxpby.c
  45. +4
    -2
      kernel/arm/zaxpy.c
  46. +4
    -2
      kernel/arm/zcopy.c
  47. +10
    -8
      kernel/arm/zdot.c
  48. +4
    -2
      kernel/arm/zrot.c
  49. +4
    -2
      kernel/arm/zswap.c
  50. +6
    -0
      kernel/x86_64/KERNEL.generic
  51. +2
    -1
      openblas_config_template.h

+ 16
- 4
CMakeLists.txt View File

@@ -15,11 +15,13 @@ enable_language(C)
set(OpenBLAS_LIBNAME openblas) set(OpenBLAS_LIBNAME openblas)


####### #######
option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS and CBLAS)" ON)
option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
option(BUILD_WITHOUT_CBLAS "Without CBLAS" ON)
option(BUILD_DEBUG "Build Debug Version" OFF) option(BUILD_DEBUG "Build Debug Version" OFF)
####### #######
if(BUILD_WITHOUT_LAPACK) if(BUILD_WITHOUT_LAPACK)
set(NO_LAPACK 1) set(NO_LAPACK 1)
set(NO_LAPACKE 1)
endif() endif()


if(BUILD_DEBUG) if(BUILD_DEBUG)
@@ -27,6 +29,11 @@ set(CMAKE_BUILD_TYPE Debug)
else() else()
set(CMAKE_BUILD_TYPE Release) set(CMAKE_BUILD_TYPE Release)
endif() endif()

if(BUILD_WITHOUT_CBLAS)
set(NO_CBLAS 1)
endif()

####### #######




@@ -51,7 +58,6 @@ endif ()


set(SUBDIRS ${BLASDIRS}) set(SUBDIRS ${BLASDIRS})
if (NOT NO_LAPACK) if (NOT NO_LAPACK)
message ("error 1")
list(APPEND SUBDIRS lapack) list(APPEND SUBDIRS lapack)
endif () endif ()


@@ -111,15 +117,21 @@ endforeach ()
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. # Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. # Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
if (NOT NOFORTRAN AND NOT NO_LAPACK) if (NOT NOFORTRAN AND NOT NO_LAPACK)
message ("error 2")
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake") include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake")
if (NOT NO_LAPACKE) if (NOT NO_LAPACKE)
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake") include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake")
endif () endif ()
endif () endif ()


#Only generate .def for dll on MSVC
if(MSVC)
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
endif()

# add objects to the openblas lib # add objects to the openblas lib
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${PROJECT_BINARY_DIR}/openblas.def)

include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")


#only build shared library for MSVC #only build shared library for MSVC
if(NOT MSVC) if(NOT MSVC)


+ 60
- 0
cmake/export.cmake View File

@@ -0,0 +1,60 @@

#Only generate .def for dll on MSVC
if(MSVC)

set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1)

if (NOT DEFINED ARCH)
set(ARCH_IN "x86_64")
else()
set(ARCH_IN ${ARCH})
endif()

if (${CORE} STREQUAL "generic")
set(ARCH_IN "GENERIC")
endif ()

if (NOT DEFINED EXPRECISION)
set(EXPRECISION_IN 0)
else()
set(EXPRECISION_IN ${EXPRECISION})
endif()

if (NOT DEFINED NO_CBLAS)
set(NO_CBLAS_IN 0)
else()
set(NO_CBLAS_IN ${NO_CBLAS})
endif()

if (NOT DEFINED NO_LAPACK)
set(NO_LAPACK_IN 0)
else()
set(NO_LAPACK_IN ${NO_LAPACK})
endif()

if (NOT DEFINED NO_LAPACKE)
set(NO_LAPACKE_IN 0)
else()
set(NO_LAPACKE_IN ${NO_LAPACKE})
endif()

if (NOT DEFINED NEED2UNDERSCORES)
set(NEED2UNDERSCORES_IN 0)
else()
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
endif()

if (NOT DEFINED ONLY_CBLAS)
set(ONLY_CBLAS_IN 0)
else()
set(ONLY_CBLAS_IN ${ONLY_CBLAS})
endif()

add_custom_command(
TARGET ${OpenBLAS_LIBNAME} PRE_LINK
COMMAND perl
ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
COMMENT "Create openblas.def file"
VERBATIM)

endif()

+ 3
- 0
cmake/f_check.cmake View File

@@ -25,7 +25,10 @@ if (MSVC)
include(CMakeForceCompiler) include(CMakeForceCompiler)
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU) CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
endif () endif ()

if (NOT NO_LAPACK)
enable_language(Fortran) enable_language(Fortran)
endif()


if (NOT ONLY_CBLAS) if (NOT ONLY_CBLAS)
# N.B. f_check is not cross-platform, so instead try to use CMake variables # N.B. f_check is not cross-platform, so instead try to use CMake variables


+ 11
- 4
cmake/kernel.cmake View File

@@ -99,10 +99,10 @@ macro(SetDefaultL1)
set(QGEMVTKERNEL gemv_t.S) set(QGEMVTKERNEL gemv_t.S)
set(XGEMVNKERNEL zgemv_n.S) set(XGEMVNKERNEL zgemv_n.S)
set(XGEMVTKERNEL zgemv_t.S) set(XGEMVTKERNEL zgemv_t.S)
set(SCABS_KERNEL cabs.S)
set(DCABS_KERNEL cabs.S)
set(QCABS_KERNEL cabs.S)
set(LSAME_KERNEL lsame.S)
set(SCABS_KERNEL ../generic/cabs.c)
set(DCABS_KERNEL ../generic/cabs.S)
set(QCABS_KERNEL ../generic/cabs.S)
set(LSAME_KERNEL ../generic/lsame.c)
set(SAXPBYKERNEL ../arm/axpby.c) set(SAXPBYKERNEL ../arm/axpby.c)
set(DAXPBYKERNEL ../arm/axpby.c) set(DAXPBYKERNEL ../arm/axpby.c)
set(CAXPBYKERNEL ../arm/zaxpby.c) set(CAXPBYKERNEL ../arm/zaxpby.c)
@@ -156,3 +156,10 @@ macro(SetDefaultL2)
set(XHEMV_V_KERNEL ../generic/zhemv_k.c) set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
set(XHEMV_M_KERNEL ../generic/zhemv_k.c) set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
endmacro () endmacro ()

macro(SetDefaultL3)
set(SGEADD_KERNEL ../generic/geadd.c)
set(DGEADD_KERNEL ../generic/geadd.c)
set(CGEADD_KERNEL ../generic/zgeadd.c)
set(ZGEADD_KERNEL ../generic/zgeadd.c)
endmacro ()

+ 7
- 2
cmake/prebuild.cmake View File

@@ -66,6 +66,11 @@ if (NOT MSVC)
list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S) list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S)
endif () endif ()


if (MSVC)
#Use generic for MSVC now
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
endif()

set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH_DIR}) file(MAKE_DIRECTORY ${GETARCH_DIR})
@@ -73,7 +78,7 @@ try_compile(GETARCH_RESULT ${GETARCH_DIR}
SOURCES ${GETARCH_SRC} SOURCES ${GETARCH_SRC}
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH_LOG OUTPUT_VARIABLE GETARCH_LOG
COPY_FILE ${GETARCH_BIN}
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
) )


message(STATUS "Running getarch") message(STATUS "Running getarch")
@@ -95,7 +100,7 @@ try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH2_LOG OUTPUT_VARIABLE GETARCH2_LOG
COPY_FILE ${GETARCH2_BIN}
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
) )


# use the cmake binary w/ the -E param to run a shell command in a cross-platform way # use the cmake binary w/ the -E param to run a shell command in a cross-platform way


+ 15
- 0
cmake/system.cmake View File

@@ -420,6 +420,21 @@ if (ONLY_CBLAS)
set(LIB_COMPONENTS CBLAS) set(LIB_COMPONENTS CBLAS)
endif () endif ()



# For GEMM3M
set(USE_GEMM3M 0)

if (DEFINED ARCH)
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
set(USE_GEMM3M 1)
endif ()

if (${CORE} STREQUAL "generic")
set(USE_GEMM3M 0)
endif ()
endif ()


#export OSNAME #export OSNAME
#export ARCH #export ARCH
#export CORE #export CORE


+ 4
- 0
cmake/utils.cmake View File

@@ -102,6 +102,7 @@ endfunction ()
# 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE) # 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE)
# 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX) # 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX)
# 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c) # 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c)
# 4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c)
# STRING - compiles only the given type (e.g. DOUBLE) # STRING - compiles only the given type (e.g. DOUBLE)
function(GenerateNamedObjects sources_in) function(GenerateNamedObjects sources_in)


@@ -151,6 +152,9 @@ function(GenerateNamedObjects sources_in)
set(complex_only true) set(complex_only true)
elseif (${ARGV7} EQUAL 3) elseif (${ARGV7} EQUAL 3)
set(mangle_complex_sources true) set(mangle_complex_sources true)
elseif (${ARGV7} EQUAL 4)
set(mangle_complex_sources true)
set(complex_only true)
elseif (NOT ${ARGV7} EQUAL 0) elseif (NOT ${ARGV7} EQUAL 0)
set(float_list ${ARGV7}) set(float_list ${ARGV7})
endif () endif ()


+ 37
- 8
common.h View File

@@ -296,13 +296,6 @@ typedef int blasint;
#define COMPSIZE 2 #define COMPSIZE 2
#endif #endif


#if defined(C_PGI) || defined(C_SUN)
#define CREAL(X) (*((FLOAT *)&X + 0))
#define CIMAG(X) (*((FLOAT *)&X + 1))
#else
#define CREAL __real__
#define CIMAG __imag__
#endif


#define Address_H(x) (((x)+(1<<15))>>16) #define Address_H(x) (((x)+(1<<15))>>16)
#define Address_L(x) ((x)-((Address_H(x))<<16)) #define Address_L(x) ((x)-((Address_H(x))<<16))
@@ -464,17 +457,49 @@ typedef char* env_var_t;
extension since version 3.0. If neither are available, use a compatible extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
(__GNUC__ >= 3 && !defined(__cplusplus)))
(__GNUC__ >= 3 && !defined(__cplusplus)) || \
_MSC_VER >= 1800) // Visual Studio 2013 supports complex
#define OPENBLAS_COMPLEX_C99 #define OPENBLAS_COMPLEX_C99
typedef float _Complex openblas_complex_float; typedef float _Complex openblas_complex_float;
typedef double _Complex openblas_complex_double; typedef double _Complex openblas_complex_double;
typedef xdouble _Complex openblas_complex_xdouble; typedef xdouble _Complex openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
#else #else
#define OPENBLAS_COMPLEX_STRUCT #define OPENBLAS_COMPLEX_STRUCT
typedef struct { float real, imag; } openblas_complex_float; typedef struct { float real, imag; } openblas_complex_float;
typedef struct { double real, imag; } openblas_complex_double; typedef struct { double real, imag; } openblas_complex_double;
typedef struct { xdouble real, imag; } openblas_complex_xdouble; typedef struct { xdouble real, imag; } openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) {(real), (imag)}
#define openblas_make_complex_double(real, imag) {(real), (imag)}
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
#endif #endif

#ifdef XDOUBLE
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
#elif defined(DOUBLE)
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
#else
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
#endif

#if defined(C_PGI) || defined(C_SUN)
#define CREAL(X) (*((FLOAT *)&X + 0))
#define CIMAG(X) (*((FLOAT *)&X + 1))
#else
#ifdef OPENBLAS_COMPLEX_STRUCT
#define CREAL(Z) ((Z).real)
#define CIMAG(Z) ((Z).imag)
#else
#define CREAL __real__
#define CIMAG __imag__
#endif
#endif

#endif // ASSEMBLER #endif // ASSEMBLER


#ifndef IFLUSH #ifndef IFLUSH
@@ -491,6 +516,10 @@ typedef char* env_var_t;
#endif #endif
#endif #endif


#if defined(C_MSVC)
#define inline __inline
#endif

#ifndef ASSEMBLER #ifndef ASSEMBLER


#ifndef MIN #ifndef MIN


+ 31
- 4
common_x86_64.h View File

@@ -41,6 +41,10 @@


#ifndef ASSEMBLER #ifndef ASSEMBLER


#ifdef C_MSVC
#include <intrin.h>
#endif

#ifdef C_SUN #ifdef C_SUN
#define __asm__ __asm #define __asm__ __asm
#define __volatile__ #define __volatile__
@@ -61,30 +65,39 @@


static void __inline blas_lock(volatile BLASULONG *address){ static void __inline blas_lock(volatile BLASULONG *address){


int ret;
BLASULONG ret;


do { do {
while (*address) {YIELDING;}; while (*address) {YIELDING;};


#ifndef C_MSVC
__asm__ __volatile__( __asm__ __volatile__(
"xchgl %0, %1\n" "xchgl %0, %1\n"
: "=r"(ret), "=m"(*address) : "=r"(ret), "=m"(*address)
: "0"(1), "m"(*address) : "0"(1), "m"(*address)
: "memory"); : "memory");

#else
ret=InterlockedExchange64((volatile LONG64 *)(address), 1);
#endif
} while (ret); } while (ret);

} }


static __inline BLASULONG rpcc(void){ static __inline BLASULONG rpcc(void){
#ifdef C_MSVC
return __rdtsc();
#else
BLASULONG a, d; BLASULONG a, d;


__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));


return ((BLASULONG)a + ((BLASULONG)d << 32)); return ((BLASULONG)a + ((BLASULONG)d << 32));
#endif
} }


#define RPCC64BIT #define RPCC64BIT


#ifndef C_MSVC
static __inline BLASULONG getstackaddr(void){ static __inline BLASULONG getstackaddr(void){
BLASULONG addr; BLASULONG addr;


@@ -93,22 +106,32 @@ static __inline BLASULONG getstackaddr(void){


return addr; return addr;
} }
#endif


static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){


#ifdef C_MSVC
int cpuinfo[4];
__cpuid(cpuinfo, op);
*eax=cpuinfo[0];
*ebx=cpuinfo[1];
*ecx=cpuinfo[2];
*edx=cpuinfo[3];
#else
__asm__ __volatile__("cpuid" __asm__ __volatile__("cpuid"
: "=a" (*eax), : "=a" (*eax),
"=b" (*ebx), "=b" (*ebx),
"=c" (*ecx), "=c" (*ecx),
"=d" (*edx) "=d" (*edx)
: "0" (op)); : "0" (op));
#endif
} }


/* /*
#define WHEREAMI #define WHEREAMI
*/ */


static inline int WhereAmI(void){
static __inline int WhereAmI(void){
int eax, ebx, ecx, edx; int eax, ebx, ecx, edx;
int apicid; int apicid;


@@ -150,10 +173,14 @@ static inline int WhereAmI(void){
#define GET_IMAGE_CANCEL #define GET_IMAGE_CANCEL


#ifdef SMP #ifdef SMP
#ifdef USE64BITINT
#if defined(USE64BITINT)
static __inline blasint blas_quickdivide(blasint x, blasint y){ static __inline blasint blas_quickdivide(blasint x, blasint y){
return x / y; return x / y;
} }
#elif defined (C_MSVC)
static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){
return x / y;
}
#else #else
extern unsigned int blas_quick_divide_table[]; extern unsigned int blas_quick_divide_table[];




+ 59
- 0
driver/level2/CMakeLists.txt View File

@@ -46,12 +46,28 @@ set(NU_SMP_SOURCES
tbmv_thread.c tbmv_thread.c
) )


set(ULVM_COMPLEX_SOURCES
hbmv_k.c
hpmv_k.c
hpr_k.c
hpr2_k.c
her_k.c
her2_k.c
)

# objects that need LOWER set # objects that need LOWER set
GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3) GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3)


# gbmv uses a lowercase n and t # gbmv uses a lowercase n and t
GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3) GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3)
GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3) GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3)
# c/zgbmv
GenerateNamedObjects("zgbmv_k.c" "CONJ" "gbmv_r" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ" "gbmv_c" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "XCONJ" "gbmv_o" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "TRANS;XCONJ" "gbmv_u" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "CONJ;XCONJ" "gbmv_s" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ;XCONJ" "gbmv_d" false "" "" "" 2)


# special defines for complex # special defines for complex
foreach (float_type ${FLOAT_TYPES}) foreach (float_type ${FLOAT_TYPES})
@@ -82,6 +98,14 @@ foreach (float_type ${FLOAT_TYPES})
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type}) GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type})
endforeach () endforeach ()


foreach (ulvm_source ${ULVM_COMPLEX_SOURCES})
string(REGEX MATCH "[a-z0-9]+" op_name ${ulvm_source})
GenerateNamedObjects("z${ulvm_source}" "" "${op_name}_U" false "" "" false ${float_type})
GenerateNamedObjects("z${ulvm_source}" "LOWER" "${op_name}_L" false "" "" false ${float_type})
GenerateNamedObjects("z${ulvm_source}" "HEMVREV" "${op_name}_V" false "" "" false ${float_type})
GenerateNamedObjects("z${ulvm_source}" "LOWER;HEMVREV" "${op_name}_M" false "" "" false ${float_type})
endforeach()

if (SMP) if (SMP)


GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type}) GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type})
@@ -103,6 +127,41 @@ foreach (float_type ${FLOAT_TYPES})
GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type}) GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type}) GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type})


GenerateNamedObjects("sbmv_thread.c" "HEMV" "hbmv_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "HEMV;LOWER" "hbmv_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "HEMVREV" "hbmv_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "LOWER;HEMVREV" "hbmv_thread_M" false "" "" false ${float_type})

GenerateNamedObjects("spmv_thread.c" "HEMV" "hpmv_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "HEMV;LOWER" "hpmv_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "HEMVREV" "hpmv_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "LOWER;HEMVREV" "hpmv_thread_M" false "" "" false ${float_type})

GenerateNamedObjects("spr_thread.c" "HEMV" "hpr_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "HEMV;LOWER" "hpr_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "HEMVREV" "hpr_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "LOWER;HEMVREV" "hpr_thread_M" false "" "" false ${float_type})

GenerateNamedObjects("spr2_thread.c" "HEMV" "hpr2_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "HEMV;LOWER" "hpr2_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "HEMVREV" "hpr2_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "LOWER;HEMVREV" "hpr2_thread_M" false "" "" false ${float_type})

GenerateNamedObjects("symv_thread.c" "HEMV" "hemv_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "HEMV;LOWER" "hemv_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "HEMVREV" "hemv_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "LOWER;HEMVREV" "hemv_thread_M" false "" "" false ${float_type})

GenerateNamedObjects("syr_thread.c" "HER" "her_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "HER;LOWER" "her_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "HEMVREV" "her_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "LOWER;HEMVREV" "her_thread_M" false "" "" false ${float_type})

GenerateNamedObjects("syr2_thread.c" "HER2" "her2_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "HER2;LOWER" "her2_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "HEMVREV" "her2_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "LOWER;HEMVREV" "her2_thread_M" false "" "" false ${float_type})

foreach (nu_smp_src ${NU_SMP_SOURCES}) foreach (nu_smp_src ${NU_SMP_SOURCES})
string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src}) string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src})
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type}) GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type})


+ 1
- 1
driver/level2/gbmv_thread.c View File

@@ -64,7 +64,7 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#endif #endif




+ 1
- 1
driver/level2/sbmv_thread.c View File

@@ -60,7 +60,7 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif #endif


a = (FLOAT *)args -> a; a = (FLOAT *)args -> a;


+ 1
- 1
driver/level2/spmv_thread.c View File

@@ -60,7 +60,7 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif #endif


a = (FLOAT *)args -> a; a = (FLOAT *)args -> a;


+ 1
- 1
driver/level2/tbmv_thread.c View File

@@ -76,7 +76,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#endif #endif




+ 1
- 1
driver/level2/tpmv_thread.c View File

@@ -81,7 +81,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#endif #endif




+ 1
- 1
driver/level2/trmv_thread.c View File

@@ -87,7 +87,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#endif #endif




+ 1
- 1
driver/level2/zgbmv_k.c View File

@@ -77,7 +77,7 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA
FLOAT *bufferY = gemvbuffer; FLOAT *bufferY = gemvbuffer;
FLOAT *bufferX = gemvbuffer; FLOAT *bufferX = gemvbuffer;
#ifdef TRANS #ifdef TRANS
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif


if (incy != 1) { if (incy != 1) {


+ 6
- 4
driver/level2/zhbmv_k.c View File

@@ -56,6 +56,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *bufferX = sbmvbuffer; FLOAT *bufferX = sbmvbuffer;
FLOAT temp[2]; FLOAT temp[2];


OPENBLAS_COMPLEX_FLOAT result;

if (incy != 1) { if (incy != 1) {
Y = bufferY; Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
@@ -93,7 +95,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];


if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);


Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -118,7 +120,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];


if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);


Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -143,7 +145,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];


if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);


Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -168,7 +170,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];


if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);


Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);


+ 6
- 4
driver/level2/zhpmv_k.c View File

@@ -51,6 +51,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *bufferX = gemvbuffer; FLOAT *bufferX = gemvbuffer;
FLOAT temp[2]; FLOAT temp[2];


OPENBLAS_COMPLEX_FLOAT result;

if (incy != 1) { if (incy != 1) {
Y = bufferY; Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095); bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
@@ -69,7 +71,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#ifndef HEMVREV #ifndef HEMVREV
#ifndef LOWER #ifndef LOWER
if (i > 0) { if (i > 0) {
FLOAT _Complex result = DOTC_K(i, a, 1, X, 1);
result = DOTC_K(i, a, 1, X, 1);


Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -93,7 +95,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#else #else


if (m - i > 1) { if (m - i > 1) {
FLOAT _Complex result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);


Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -118,7 +120,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#else #else
#ifndef LOWER #ifndef LOWER
if (i > 0) { if (i > 0) {
FLOAT _Complex result = DOTU_K(i, a, 1, X, 1);
result = DOTU_K(i, a, 1, X, 1);


Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -142,7 +144,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#else #else


if (m - i > 1) { if (m - i > 1) {
FLOAT _Complex result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);


Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);


+ 4
- 2
driver/level2/zsbmv_k.c View File

@@ -55,6 +55,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *bufferY = sbmvbuffer; FLOAT *bufferY = sbmvbuffer;
FLOAT *bufferX = sbmvbuffer; FLOAT *bufferX = sbmvbuffer;


OPENBLAS_COMPLEX_FLOAT result;

if (incy != 1) { if (incy != 1) {
Y = bufferY; Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
@@ -83,7 +85,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0); a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0);


if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);


Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -100,7 +102,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
a, 1, Y + i * COMPSIZE, 1, NULL, 0); a, 1, Y + i * COMPSIZE, 1, NULL, 0);


if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);


Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);


+ 2
- 1
driver/level2/zspmv_k.c View File

@@ -49,7 +49,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *gemvbuffer = (FLOAT *)buffer; FLOAT *gemvbuffer = (FLOAT *)buffer;
FLOAT *bufferY = gemvbuffer; FLOAT *bufferY = gemvbuffer;
FLOAT *bufferX = gemvbuffer; FLOAT *bufferX = gemvbuffer;
FLOAT _Complex result;

OPENBLAS_COMPLEX_FLOAT result;


if (incy != 1) { if (incy != 1) {
Y = bufferY; Y = bufferY;


+ 1
- 1
driver/level2/ztbmv_L.c View File

@@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
FLOAT *B = b; FLOAT *B = b;
BLASLONG length; BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;


+ 1
- 1
driver/level2/ztbmv_U.c View File

@@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
FLOAT *B = b; FLOAT *B = b;
BLASLONG length; BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;


+ 1
- 1
driver/level2/ztbsv_L.c View File

@@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
FLOAT *B = b; FLOAT *B = b;
BLASLONG length; BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;


+ 1
- 1
driver/level2/ztbsv_U.c View File

@@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
FLOAT *B = b; FLOAT *B = b;
BLASLONG length; BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;


+ 1
- 1
driver/level2/ztpmv_L.c View File

@@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){


BLASLONG i; BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;


+ 1
- 1
driver/level2/ztpmv_U.c View File

@@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){


BLASLONG i; BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;


+ 1
- 1
driver/level2/ztpsv_L.c View File

@@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){


BLASLONG i; BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;


+ 1
- 1
driver/level2/ztpsv_U.c View File

@@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){


BLASLONG i; BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;


+ 1
- 1
driver/level2/ztrmv_L.c View File

@@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu


BLASLONG i, is, min_i; BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;


+ 1
- 1
driver/level2/ztrmv_U.c View File

@@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu


BLASLONG i, is, min_i; BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;


+ 1
- 1
driver/level2/ztrsv_L.c View File

@@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf


BLASLONG i, is, min_i; BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;


+ 1
- 1
driver/level2/ztrsv_U.c View File

@@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf


BLASLONG i, is, min_i; BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;


+ 29
- 8
driver/level3/CMakeLists.txt View File

@@ -1,13 +1,5 @@
include_directories(${CMAKE_SOURCE_DIR}) include_directories(${CMAKE_SOURCE_DIR})


set(USE_GEMM3M 0)

if (DEFINED ARCH)
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
set(USE_GEMM3M 1)
endif ()
endif ()

# N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa # N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa


# loop through gemm.c defines # loop through gemm.c defines
@@ -54,12 +46,41 @@ foreach (float_type ${FLOAT_TYPES})
GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_LC" false ${float_type}) GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_LC" false ${float_type})
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type}) GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type})
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_RC" false ${float_type}) GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_RC" false ${float_type})

#hemm
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN" 0 "hemm_L" false ${float_type})
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE" 0 "hemm_R" false ${float_type})

#her2k
GenerateCombinationObjects("zher2k_kernel.c" "LOWER;CONJ" "U;N" "" 2 "her2k_kernel" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})

if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
#hemm
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type})
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type})
#her2k
GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
endif()

# special gemm defines for complex # special gemm defines for complex
foreach (gemm_define ${GEMM_COMPLEX_DEFINES}) foreach (gemm_define ${GEMM_COMPLEX_DEFINES})
string(TOLOWER ${gemm_define} gemm_define_LC) string(TOLOWER ${gemm_define} gemm_define_LC)
GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type}) GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type})
if(USE_GEMM3M)
GenerateNamedObjects("gemm3m.c" "${gemm_define}" "gemm3m_${gemm_define_LC}" false "" "" false ${float_type})
endif()
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type}) GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type})
if(USE_GEMM3M)
GenerateNamedObjects("gemm3m.c" "${gemm_define};THREADED_LEVEL3" "gemm3m_thread_${gemm_define_LC}" false "" "" false ${float_type})
endif()
endif () endif ()
endforeach () endforeach ()
endif () endif ()


+ 2
- 0
driver/others/CMakeLists.txt View File

@@ -33,6 +33,8 @@ set(COMMON_SOURCES
xerbla.c xerbla.c
openblas_set_num_threads.c openblas_set_num_threads.c
openblas_error_handle.c openblas_error_handle.c
openblas_get_num_procs.c
openblas_get_num_threads.c
) )


# these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling # these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling


+ 37
- 2
interface/CMakeLists.txt View File

@@ -1,13 +1,16 @@


include_directories(${CMAKE_SOURCE_DIR}) include_directories(${CMAKE_SOURCE_DIR})



set(BLAS1_SOURCES set(BLAS1_SOURCES
copy.c copy.c
asum.c nrm2.c
nrm2.c
) )


set(BLAS1_REAL_ONLY_SOURCES set(BLAS1_REAL_ONLY_SOURCES
rotm.c rotmg.c # N.B. these do not have complex counterparts rotm.c rotmg.c # N.B. these do not have complex counterparts
rot.c
asum.c
) )


# these will have 'z' prepended for the complex version # these will have 'z' prepended for the complex version
@@ -15,7 +18,7 @@ set(BLAS1_MANGLED_SOURCES
axpy.c swap.c axpy.c swap.c
scal.c scal.c
dot.c dot.c
rot.c rotg.c
rotg.c
axpby.c axpby.c
) )


@@ -31,6 +34,13 @@ set(BLAS2_SOURCES
tpsv.c tpmv.c tpsv.c tpmv.c
) )


set(BLAS2_COMPLEX_ONLY_MANGLED_SOURCES
hemv.c hbmv.c
her.c her2.c
hpmv.c hpr.c
hpr2.c
)

# these do not have separate 'z' sources # these do not have separate 'z' sources
set(BLAS3_SOURCES set(BLAS3_SOURCES
gemm.c symm.c gemm.c symm.c
@@ -39,6 +49,7 @@ set(BLAS3_SOURCES


set(BLAS3_MANGLED_SOURCES set(BLAS3_MANGLED_SOURCES
omatcopy.c imatcopy.c omatcopy.c imatcopy.c
geadd.c
) )


# generate the BLAS objs once with and once without cblas # generate the BLAS objs once with and once without cblas
@@ -65,9 +76,14 @@ foreach (CBLAS_FLAG ${CBLAS_FLAGS})
GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1) GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1)
GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
GenerateNamedObjects("${BLAS2_COMPLEX_ONLY_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 4)
GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX})
GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})


#sdsdot, dsdot
GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE")

# trmm is trsm with a compiler flag set # trmm is trsm with a compiler flag set
GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG})


@@ -86,17 +102,36 @@ endforeach ()


# complex-specific sources # complex-specific sources
foreach (float_type ${FLOAT_TYPES}) foreach (float_type ${FLOAT_TYPES})

if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("zger.c" "" "geru" false "" "" false ${float_type}) GenerateNamedObjects("zger.c" "" "geru" false "" "" false ${float_type})
GenerateNamedObjects("zger.c" "CONJ" "gerc" false "" "" false ${float_type}) GenerateNamedObjects("zger.c" "CONJ" "gerc" false "" "" false ${float_type})
GenerateNamedObjects("zdot.c" "CONJ" "dotc" false "" "" false ${float_type})
GenerateNamedObjects("zdot.c" "" "dotu" false "" "" false ${float_type})

GenerateNamedObjects("symm.c" "HEMM" "hemm" false "" "" false ${float_type})
GenerateNamedObjects("syrk.c" "HEMM" "herk" false "" "" false ${float_type})
GenerateNamedObjects("syr2k.c" "HEMM" "her2k" false "" "" false ${float_type})

if (USE_GEMM3M)
GenerateNamedObjects("gemm.c" "GEMM3M" "gemm3m" false "" "" false ${float_type})
endif()
endif () endif ()
if (${float_type} STREQUAL "COMPLEX") if (${float_type} STREQUAL "COMPLEX")
GenerateNamedObjects("zscal.c" "SSCAL" "sscal" false "" "" false "COMPLEX") GenerateNamedObjects("zscal.c" "SSCAL" "sscal" false "" "" false "COMPLEX")
GenerateNamedObjects("nrm2.c" "" "scnrm2" false "" "" true "COMPLEX") GenerateNamedObjects("nrm2.c" "" "scnrm2" false "" "" true "COMPLEX")
GenerateNamedObjects("zrot.c" "" "csrot" false "" "" true "COMPLEX")
GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "scamin" false "" "" true "COMPLEX")
GenerateNamedObjects("max.c" "USE_ABS" "scamax" false "" "" true "COMPLEX")
GenerateNamedObjects("asum.c" "" "scasum" false "" "" true "COMPLEX")
endif () endif ()
if (${float_type} STREQUAL "ZCOMPLEX") if (${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("zscal.c" "SSCAL" "dscal" false "" "" false "ZCOMPLEX") GenerateNamedObjects("zscal.c" "SSCAL" "dscal" false "" "" false "ZCOMPLEX")
GenerateNamedObjects("nrm2.c" "" "dznrm2" false "" "" true "ZCOMPLEX") GenerateNamedObjects("nrm2.c" "" "dznrm2" false "" "" true "ZCOMPLEX")
GenerateNamedObjects("zrot.c" "" "zdrot" false "" "" true "ZCOMPLEX")
GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "dzamin" false "" "" true "ZCOMPLEX")
GenerateNamedObjects("max.c" "USE_ABS" "dzamax" false "" "" true "ZCOMPLEX")
GenerateNamedObjects("asum.c" "" "dzasum" false "" "" true "ZCOMPLEX")
endif () endif ()
endforeach () endforeach ()




+ 1
- 2
interface/rotg.c View File

@@ -14,8 +14,7 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){


#endif #endif



#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__)
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)


long double da = *DA; long double da = *DA;
long double db = *DB; long double db = *DB;


+ 2
- 2
interface/zaxpby.c View File

@@ -53,13 +53,13 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT *


#endif #endif


if (n <= 0) return;

FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_r = *(ALPHA + 0);
FLOAT alpha_i = *(ALPHA + 1); FLOAT alpha_i = *(ALPHA + 1);
FLOAT beta_r = *(BETA + 0); FLOAT beta_r = *(BETA + 0);
FLOAT beta_i = *(BETA + 1); FLOAT beta_i = *(BETA + 1);


if (n <= 0) return;

FUNCTION_PROFILE_START(); FUNCTION_PROFILE_START();


if (incx < 0) x -= (n - 1) * incx * 2; if (incx < 0) x -= (n - 1) * incx * 2;


+ 14
- 10
interface/zdot.c View File

@@ -57,21 +57,25 @@
#ifdef RETURN_BY_STRUCT #ifdef RETURN_BY_STRUCT
MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
#elif defined RETURN_BY_STACK #elif defined RETURN_BY_STACK
void NAME(FLOAT _Complex *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
void NAME(OPENBLAS_COMPLEX_FLOAT *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
#else #else
FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
OPENBLAS_COMPLEX_FLOAT NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
#endif #endif


BLASLONG n = *N; BLASLONG n = *N;
BLASLONG incx = *INCX; BLASLONG incx = *INCX;
BLASLONG incy = *INCY; BLASLONG incy = *INCY;
#ifndef RETURN_BY_STACK #ifndef RETURN_BY_STACK
FLOAT _Complex ret;
OPENBLAS_COMPLEX_FLOAT ret;
#endif #endif
#ifdef RETURN_BY_STRUCT #ifdef RETURN_BY_STRUCT
MYTYPE myret; MYTYPE myret;
#endif #endif


#ifndef RETURN_BY_STRUCT
OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
#endif

PRINT_DEBUG_NAME; PRINT_DEBUG_NAME;


if (n <= 0) { if (n <= 0) {
@@ -80,10 +84,10 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX,
myret.i = 0.; myret.i = 0.;
return myret; return myret;
#elif defined RETURN_BY_STACK #elif defined RETURN_BY_STACK
*result = ZERO;
*result = zero;
return; return;
#else #else
return ZERO;
return zero;
#endif #endif
} }


@@ -144,21 +148,21 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX,
#else #else


#ifdef FORCE_USE_STACK #ifdef FORCE_USE_STACK
void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT _Complex *result){
void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, OPENBLAS_COMPLEX_FLOAT *result){
#else #else
FLOAT _Complex CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){


FLOAT _Complex ret;
OPENBLAS_COMPLEX_FLOAT ret;
#endif #endif


PRINT_DEBUG_CNAME; PRINT_DEBUG_CNAME;


if (n <= 0) { if (n <= 0) {
#ifdef FORCE_USE_STACK #ifdef FORCE_USE_STACK
*result = ZERO;
*result = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
return; return;
#else #else
return ZERO;
return OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
#endif #endif
} }




+ 10
- 7
interface/zgemv.c View File

@@ -79,6 +79,9 @@ void NAME(char *TRANS, blasint *M, blasint *N,
FLOAT *buffer; FLOAT *buffer;
#ifdef SMP #ifdef SMP
int nthreads; int nthreads;
int nthreads_max;
int nthreads_avail;
double MNK;
#endif #endif


int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG,
@@ -91,14 +94,14 @@ void NAME(char *TRANS, blasint *M, blasint *N,
blasint lenx, leny; blasint lenx, leny;
blasint i; blasint i;


PRINT_DEBUG_NAME;

FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_r = *(ALPHA + 0);
FLOAT alpha_i = *(ALPHA + 1); FLOAT alpha_i = *(ALPHA + 1);


FLOAT beta_r = *(BETA + 0); FLOAT beta_r = *(BETA + 0);
FLOAT beta_i = *(BETA + 1); FLOAT beta_i = *(BETA + 1);


PRINT_DEBUG_NAME;

TOUPPER(trans); TOUPPER(trans);


info = 0; info = 0;
@@ -153,14 +156,14 @@ void CNAME(enum CBLAS_ORDER order,
GEMV_O, GEMV_U, GEMV_S, GEMV_D, GEMV_O, GEMV_U, GEMV_S, GEMV_D,
}; };


PRINT_DEBUG_CNAME;

FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_r = *(ALPHA + 0);
FLOAT alpha_i = *(ALPHA + 1); FLOAT alpha_i = *(ALPHA + 1);


FLOAT beta_r = *(BETA + 0); FLOAT beta_r = *(BETA + 0);
FLOAT beta_i = *(BETA + 1); FLOAT beta_i = *(BETA + 1);


PRINT_DEBUG_CNAME;

trans = -1; trans = -1;
info = 0; info = 0;


@@ -234,10 +237,10 @@ void CNAME(enum CBLAS_ORDER order,


#ifdef SMP #ifdef SMP


int nthreads_max = num_cpu_avail(2);
int nthreads_avail = nthreads_max;
nthreads_max = num_cpu_avail(2);
nthreads_avail = nthreads_max;


double MNK = (double) m * (double) n;
MNK = (double) m * (double) n;
if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) )) if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) ))
nthreads_max = 1; nthreads_max = 1;




+ 13
- 7
interface/zrotg.c View File

@@ -6,13 +6,7 @@


void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){


PRINT_DEBUG_NAME;

IDEBUG_START;

FUNCTION_PROFILE_START();

#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__)
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)


long double da_r = *(DA + 0); long double da_r = *(DA + 0);
long double da_i = *(DA + 1); long double da_i = *(DA + 1);
@@ -22,6 +16,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){


long double ada = fabs(da_r) + fabs(da_i); long double ada = fabs(da_r) + fabs(da_i);


PRINT_DEBUG_NAME;

IDEBUG_START;

FUNCTION_PROFILE_START();

if (ada == ZERO) { if (ada == ZERO) {
*C = ZERO; *C = ZERO;
*(S + 0) = ONE; *(S + 0) = ONE;
@@ -54,6 +54,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
FLOAT ada = fabs(da_r) + fabs(da_i); FLOAT ada = fabs(da_r) + fabs(da_i);
FLOAT adb; FLOAT adb;


PRINT_DEBUG_NAME;

IDEBUG_START;

FUNCTION_PROFILE_START();

if (ada == ZERO) { if (ada == ZERO) {
*C = ZERO; *C = ZERO;
*(S + 0) = ONE; *(S + 0) = ONE;


+ 60
- 10
kernel/CMakeLists.txt View File

@@ -17,6 +17,7 @@ endif ()


SetDefaultL1() SetDefaultL1()
SetDefaultL2() SetDefaultL2()
SetDefaultL3()
ParseMakefileVars("${KERNELDIR}/KERNEL") ParseMakefileVars("${KERNELDIR}/KERNEL")
ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}")


@@ -65,8 +66,20 @@ foreach (float_type ${FLOAT_TYPES})
else () else ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type})
endif () endif ()

if (${float_type} STREQUAL "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "srot_k" false "" "" false ${float_type})
endif()
if (${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "drot_k" false "" "" false ${float_type})
endif()

endforeach () endforeach ()


#dsdot,sdsdot
GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE")

# Makefile.L2 # Makefile.L2
GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3)
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3)
@@ -86,6 +99,12 @@ foreach (float_type ${FLOAT_TYPES})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type})

GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_U_KERNEL}" "HEMV" "hemv_U" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_L_KERNEL}" "HEMV;LOWER" "hemv_L" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_V_KERNEL}" "HEMV;HEMVREV" "hemv_V" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_M_KERNEL}" "HEMV;HEMVREV;LOWER" "hemv_M" false "" "" false ${float_type})

else () else ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type})
@@ -93,14 +112,9 @@ foreach (float_type ${FLOAT_TYPES})
endforeach () endforeach ()


# Makefile.L3 # Makefile.L3
set(USE_GEMM3M false)
set(USE_TRMM false) set(USE_TRMM false)


if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
set(USE_GEMM3M true)
endif ()

if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC")
if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic")
set(USE_TRMM true) set(USE_TRMM true)
endif () endif ()


@@ -155,6 +169,13 @@ foreach (float_type ${FLOAT_TYPES})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type})



#hemm
GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "hemm_iutcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "hemm_iltcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "hemm_outcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "hemm_oltcopy" false "" "" false ${float_type})

else () else ()
GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type}) GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type})
endif () endif ()
@@ -241,11 +262,40 @@ foreach (float_type ${FLOAT_TYPES})
endif () endif ()
endif () endif ()


GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "domatcopy_k_cn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "domatcopy_k_rn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "domatcopy_k_ct" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "domatcopy_k_rt" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "omatcopy_k_cn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "omatcopy_k_rn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "omatcopy_k_ct" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "omatcopy_k_rt" false "" "" false ${float_type})

if (NOT DEFINED ${float_char}OMATCOPY_CNC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_CNC ../arm/zomatcopy_cnc.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_RNC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_RNC ../arm/zomatcopy_rnc.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_CTC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_CTC ../arm/zomatcopy_ctc.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_RTC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_RTC ../arm/zomatcopy_rtc.c)
endif ()
endif ()

if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CNC}" "CONJ" "omatcopy_k_cnc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RNC}" "CONJ;ROWM" "omatcopy_k_rnc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CTC}" "CONJ" "omatcopy_k_ctc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RTC}" "CONJ;ROWM" "omatcopy_k_rtc" false "" "" false ${float_type})
endif()


GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type})
endforeach () endforeach ()


# Makefile.LA # Makefile.LA


+ 1
- 1
kernel/Makefile.L3 View File

@@ -3459,7 +3459,7 @@ ifndef DGEADD_K
DGEADD_K = ../generic/geadd.c DGEADD_K = ../generic/geadd.c
endif endif


$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K)
$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEADD_K)
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@


ifndef CGEADD_K ifndef CGEADD_K


+ 5
- 2
kernel/arm/zaxpby.c View File

@@ -38,13 +38,16 @@ int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FL
BLASLONG ix,iy; BLASLONG ix,iy;
FLOAT temp; FLOAT temp;


BLASLONG inc_x2;
BLASLONG inc_y2;

if ( n < 0 ) return(0); if ( n < 0 ) return(0);


ix = 0; ix = 0;
iy = 0; iy = 0;


BLASLONG inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y;
inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y;


if ( beta_r == 0.0 && beta_i == 0.0) if ( beta_r == 0.0 && beta_i == 0.0)
{ {


+ 4
- 2
kernel/arm/zaxpy.c View File

@@ -41,6 +41,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
{ {
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix,iy; BLASLONG ix,iy;
BLASLONG inc_x2;
BLASLONG inc_y2;


if ( n < 0 ) return(0); if ( n < 0 ) return(0);
if ( da_r == 0.0 && da_i == 0.0 ) return(0); if ( da_r == 0.0 && da_i == 0.0 ) return(0);
@@ -48,8 +50,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
ix = 0; ix = 0;
iy = 0; iy = 0;


BLASLONG inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y;
inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y;


while(i < n) while(i < n)
{ {


+ 4
- 2
kernel/arm/zcopy.c View File

@@ -40,11 +40,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
{ {
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
BLASLONG inc_x2;
BLASLONG inc_y2;


if ( n < 0 ) return(0); if ( n < 0 ) return(0);


BLASLONG inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y;
inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y;


while(i < n) while(i < n)
{ {


+ 10
- 8
kernel/arm/zdot.c View File

@@ -40,24 +40,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <complex.h> #include <complex.h>
FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#else #else
openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#endif #endif
{ {
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
FLOAT dot[2]; FLOAT dot[2];
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
BLASLONG inc_x2;
BLASLONG inc_y2;


dot[0]=0.0; dot[0]=0.0;
dot[1]=0.0; dot[1]=0.0;


__real__ result = 0.0 ;
__imag__ result = 0.0 ;
CREAL(result) = 0.0 ;
CIMAG(result) = 0.0 ;


if ( n < 1 ) return(result); if ( n < 1 ) return(result);


BLASLONG inc_x2 = 2 * inc_x ;
BLASLONG inc_y2 = 2 * inc_y ;
inc_x2 = 2 * inc_x ;
inc_y2 = 2 * inc_y ;


while(i < n) while(i < n)
{ {
@@ -73,8 +75,8 @@ openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BL
i++ ; i++ ;


} }
__real__ result = dot[0];
__imag__ result = dot[1];
CREAL(result) = dot[0];
CIMAG(result) = dot[1];
return(result); return(result);


} }


+ 4
- 2
kernel/arm/zrot.c View File

@@ -41,11 +41,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
FLOAT temp[2]; FLOAT temp[2];
BLASLONG inc_x2;
BLASLONG inc_y2;


if ( n <= 0 ) return(0); if ( n <= 0 ) return(0);


BLASLONG inc_x2 = 2 * inc_x ;
BLASLONG inc_y2 = 2 * inc_y ;
inc_x2 = 2 * inc_x ;
inc_y2 = 2 * inc_y ;


while(i < n) while(i < n)
{ {


+ 4
- 2
kernel/arm/zswap.c View File

@@ -42,11 +42,13 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
FLOAT temp[2]; FLOAT temp[2];
BLASLONG inc_x2;
BLASLONG inc_y2;


if ( n < 0 ) return(0); if ( n < 0 ) return(0);


BLASLONG inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y;
inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y;


while(i < n) while(i < n)
{ {


+ 6
- 0
kernel/x86_64/KERNEL.generic View File

@@ -155,5 +155,11 @@ XSYMV_L_KERNEL = ../generic/zsymv_k.c
ZHEMV_U_KERNEL = ../generic/zhemv_k.c ZHEMV_U_KERNEL = ../generic/zhemv_k.c
ZHEMV_L_KERNEL = ../generic/zhemv_k.c ZHEMV_L_KERNEL = ../generic/zhemv_k.c


LSAME_KERNEL = ../generic/lsame.c
SCABS_KERNEL = ../generic/cabs.c
DCABS_KERNEL = ../generic/cabs.c
QCABS_KERNEL = ../generic/cabs.c

#Dump kernel
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c

+ 2
- 1
openblas_config_template.h View File

@@ -59,7 +59,8 @@ typedef int blasint;
extension since version 3.0. If neither are available, use a compatible extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
(__GNUC__ >= 3 && !defined(__cplusplus)))
(__GNUC__ >= 3 && !defined(__cplusplus)) || \
_MSC_VER >= 1800) // Visual Studio 2013 supports complex
#define OPENBLAS_COMPLEX_C99 #define OPENBLAS_COMPLEX_C99
#ifndef __cplusplus #ifndef __cplusplus
#include <complex.h> #include <complex.h>


Loading…
Cancel
Save