Browse Source

Use cmake to build OpenBLAS GENERIC Target on MSVC x86 64-bit.

Disable CBLAS and LAPACK.
tags/v0.2.15^2
Zhang Xianyi 10 years ago
parent
commit
f874465bb8
51 changed files with 488 additions and 120 deletions
  1. +16
    -4
      CMakeLists.txt
  2. +60
    -0
      cmake/export.cmake
  3. +3
    -0
      cmake/f_check.cmake
  4. +11
    -4
      cmake/kernel.cmake
  5. +7
    -2
      cmake/prebuild.cmake
  6. +15
    -0
      cmake/system.cmake
  7. +4
    -0
      cmake/utils.cmake
  8. +37
    -8
      common.h
  9. +31
    -4
      common_x86_64.h
  10. +59
    -0
      driver/level2/CMakeLists.txt
  11. +1
    -1
      driver/level2/gbmv_thread.c
  12. +1
    -1
      driver/level2/sbmv_thread.c
  13. +1
    -1
      driver/level2/spmv_thread.c
  14. +1
    -1
      driver/level2/tbmv_thread.c
  15. +1
    -1
      driver/level2/tpmv_thread.c
  16. +1
    -1
      driver/level2/trmv_thread.c
  17. +1
    -1
      driver/level2/zgbmv_k.c
  18. +6
    -4
      driver/level2/zhbmv_k.c
  19. +6
    -4
      driver/level2/zhpmv_k.c
  20. +4
    -2
      driver/level2/zsbmv_k.c
  21. +2
    -1
      driver/level2/zspmv_k.c
  22. +1
    -1
      driver/level2/ztbmv_L.c
  23. +1
    -1
      driver/level2/ztbmv_U.c
  24. +1
    -1
      driver/level2/ztbsv_L.c
  25. +1
    -1
      driver/level2/ztbsv_U.c
  26. +1
    -1
      driver/level2/ztpmv_L.c
  27. +1
    -1
      driver/level2/ztpmv_U.c
  28. +1
    -1
      driver/level2/ztpsv_L.c
  29. +1
    -1
      driver/level2/ztpsv_U.c
  30. +1
    -1
      driver/level2/ztrmv_L.c
  31. +1
    -1
      driver/level2/ztrmv_U.c
  32. +1
    -1
      driver/level2/ztrsv_L.c
  33. +1
    -1
      driver/level2/ztrsv_U.c
  34. +29
    -8
      driver/level3/CMakeLists.txt
  35. +2
    -0
      driver/others/CMakeLists.txt
  36. +37
    -2
      interface/CMakeLists.txt
  37. +1
    -2
      interface/rotg.c
  38. +2
    -2
      interface/zaxpby.c
  39. +14
    -10
      interface/zdot.c
  40. +10
    -7
      interface/zgemv.c
  41. +13
    -7
      interface/zrotg.c
  42. +60
    -10
      kernel/CMakeLists.txt
  43. +1
    -1
      kernel/Makefile.L3
  44. +5
    -2
      kernel/arm/zaxpby.c
  45. +4
    -2
      kernel/arm/zaxpy.c
  46. +4
    -2
      kernel/arm/zcopy.c
  47. +10
    -8
      kernel/arm/zdot.c
  48. +4
    -2
      kernel/arm/zrot.c
  49. +4
    -2
      kernel/arm/zswap.c
  50. +6
    -0
      kernel/x86_64/KERNEL.generic
  51. +2
    -1
      openblas_config_template.h

+ 16
- 4
CMakeLists.txt View File

@@ -15,11 +15,13 @@ enable_language(C)
set(OpenBLAS_LIBNAME openblas)

#######
option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS and CBLAS)" ON)
option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
option(BUILD_WITHOUT_CBLAS "Without CBLAS" ON)
option(BUILD_DEBUG "Build Debug Version" OFF)
#######
if(BUILD_WITHOUT_LAPACK)
set(NO_LAPACK 1)
set(NO_LAPACKE 1)
endif()

if(BUILD_DEBUG)
@@ -27,6 +29,11 @@ set(CMAKE_BUILD_TYPE Debug)
else()
set(CMAKE_BUILD_TYPE Release)
endif()

if(BUILD_WITHOUT_CBLAS)
set(NO_CBLAS 1)
endif()

#######


@@ -51,7 +58,6 @@ endif ()

set(SUBDIRS ${BLASDIRS})
if (NOT NO_LAPACK)
message ("error 1")
list(APPEND SUBDIRS lapack)
endif ()

@@ -111,15 +117,21 @@ endforeach ()
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
if (NOT NOFORTRAN AND NOT NO_LAPACK)
message ("error 2")
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake")
if (NOT NO_LAPACKE)
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake")
endif ()
endif ()

#Only generate .def for dll on MSVC
if(MSVC)
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
endif()

# add objects to the openblas lib
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${PROJECT_BINARY_DIR}/openblas.def)

include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")

#only build shared library for MSVC
if(NOT MSVC)


+ 60
- 0
cmake/export.cmake View File

@@ -0,0 +1,60 @@

#Only generate .def for dll on MSVC
if(MSVC)

set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1)

if (NOT DEFINED ARCH)
set(ARCH_IN "x86_64")
else()
set(ARCH_IN ${ARCH})
endif()

if (${CORE} STREQUAL "generic")
set(ARCH_IN "GENERIC")
endif ()

if (NOT DEFINED EXPRECISION)
set(EXPRECISION_IN 0)
else()
set(EXPRECISION_IN ${EXPRECISION})
endif()

if (NOT DEFINED NO_CBLAS)
set(NO_CBLAS_IN 0)
else()
set(NO_CBLAS_IN ${NO_CBLAS})
endif()

if (NOT DEFINED NO_LAPACK)
set(NO_LAPACK_IN 0)
else()
set(NO_LAPACK_IN ${NO_LAPACK})
endif()

if (NOT DEFINED NO_LAPACKE)
set(NO_LAPACKE_IN 0)
else()
set(NO_LAPACKE_IN ${NO_LAPACKE})
endif()

if (NOT DEFINED NEED2UNDERSCORES)
set(NEED2UNDERSCORES_IN 0)
else()
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
endif()

if (NOT DEFINED ONLY_CBLAS)
set(ONLY_CBLAS_IN 0)
else()
set(ONLY_CBLAS_IN ${ONLY_CBLAS})
endif()

add_custom_command(
TARGET ${OpenBLAS_LIBNAME} PRE_LINK
COMMAND perl
ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
COMMENT "Create openblas.def file"
VERBATIM)

endif()

+ 3
- 0
cmake/f_check.cmake View File

@@ -25,7 +25,10 @@ if (MSVC)
include(CMakeForceCompiler)
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
endif ()

if (NOT NO_LAPACK)
enable_language(Fortran)
endif()

if (NOT ONLY_CBLAS)
# N.B. f_check is not cross-platform, so instead try to use CMake variables


+ 11
- 4
cmake/kernel.cmake View File

@@ -99,10 +99,10 @@ macro(SetDefaultL1)
set(QGEMVTKERNEL gemv_t.S)
set(XGEMVNKERNEL zgemv_n.S)
set(XGEMVTKERNEL zgemv_t.S)
set(SCABS_KERNEL cabs.S)
set(DCABS_KERNEL cabs.S)
set(QCABS_KERNEL cabs.S)
set(LSAME_KERNEL lsame.S)
set(SCABS_KERNEL ../generic/cabs.c)
set(DCABS_KERNEL ../generic/cabs.S)
set(QCABS_KERNEL ../generic/cabs.S)
set(LSAME_KERNEL ../generic/lsame.c)
set(SAXPBYKERNEL ../arm/axpby.c)
set(DAXPBYKERNEL ../arm/axpby.c)
set(CAXPBYKERNEL ../arm/zaxpby.c)
@@ -156,3 +156,10 @@ macro(SetDefaultL2)
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
endmacro ()

macro(SetDefaultL3)
set(SGEADD_KERNEL ../generic/geadd.c)
set(DGEADD_KERNEL ../generic/geadd.c)
set(CGEADD_KERNEL ../generic/zgeadd.c)
set(ZGEADD_KERNEL ../generic/zgeadd.c)
endmacro ()

+ 7
- 2
cmake/prebuild.cmake View File

@@ -66,6 +66,11 @@ if (NOT MSVC)
list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S)
endif ()

if (MSVC)
#Use generic for MSVC now
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
endif()

set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH_DIR})
@@ -73,7 +78,7 @@ try_compile(GETARCH_RESULT ${GETARCH_DIR}
SOURCES ${GETARCH_SRC}
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH_LOG
COPY_FILE ${GETARCH_BIN}
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
)

message(STATUS "Running getarch")
@@ -95,7 +100,7 @@ try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH2_LOG
COPY_FILE ${GETARCH2_BIN}
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
)

# use the cmake binary w/ the -E param to run a shell command in a cross-platform way


+ 15
- 0
cmake/system.cmake View File

@@ -420,6 +420,21 @@ if (ONLY_CBLAS)
set(LIB_COMPONENTS CBLAS)
endif ()


# For GEMM3M
set(USE_GEMM3M 0)

if (DEFINED ARCH)
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
set(USE_GEMM3M 1)
endif ()

if (${CORE} STREQUAL "generic")
set(USE_GEMM3M 0)
endif ()
endif ()


#export OSNAME
#export ARCH
#export CORE


+ 4
- 0
cmake/utils.cmake View File

@@ -102,6 +102,7 @@ endfunction ()
# 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE)
# 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX)
# 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c)
# 4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c)
# STRING - compiles only the given type (e.g. DOUBLE)
function(GenerateNamedObjects sources_in)

@@ -151,6 +152,9 @@ function(GenerateNamedObjects sources_in)
set(complex_only true)
elseif (${ARGV7} EQUAL 3)
set(mangle_complex_sources true)
elseif (${ARGV7} EQUAL 4)
set(mangle_complex_sources true)
set(complex_only true)
elseif (NOT ${ARGV7} EQUAL 0)
set(float_list ${ARGV7})
endif ()


+ 37
- 8
common.h View File

@@ -296,13 +296,6 @@ typedef int blasint;
#define COMPSIZE 2
#endif

#if defined(C_PGI) || defined(C_SUN)
#define CREAL(X) (*((FLOAT *)&X + 0))
#define CIMAG(X) (*((FLOAT *)&X + 1))
#else
#define CREAL __real__
#define CIMAG __imag__
#endif

#define Address_H(x) (((x)+(1<<15))>>16)
#define Address_L(x) ((x)-((Address_H(x))<<16))
@@ -464,17 +457,49 @@ typedef char* env_var_t;
extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
(__GNUC__ >= 3 && !defined(__cplusplus)))
(__GNUC__ >= 3 && !defined(__cplusplus)) || \
_MSC_VER >= 1800) // Visual Studio 2013 supports complex
#define OPENBLAS_COMPLEX_C99
typedef float _Complex openblas_complex_float;
typedef double _Complex openblas_complex_double;
typedef xdouble _Complex openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
#else
#define OPENBLAS_COMPLEX_STRUCT
typedef struct { float real, imag; } openblas_complex_float;
typedef struct { double real, imag; } openblas_complex_double;
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) {(real), (imag)}
#define openblas_make_complex_double(real, imag) {(real), (imag)}
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
#endif

#ifdef XDOUBLE
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
#elif defined(DOUBLE)
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
#else
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
#endif

#if defined(C_PGI) || defined(C_SUN)
#define CREAL(X) (*((FLOAT *)&X + 0))
#define CIMAG(X) (*((FLOAT *)&X + 1))
#else
#ifdef OPENBLAS_COMPLEX_STRUCT
#define CREAL(Z) ((Z).real)
#define CIMAG(Z) ((Z).imag)
#else
#define CREAL __real__
#define CIMAG __imag__
#endif
#endif

#endif // ASSEMBLER

#ifndef IFLUSH
@@ -491,6 +516,10 @@ typedef char* env_var_t;
#endif
#endif

#if defined(C_MSVC)
#define inline __inline
#endif

#ifndef ASSEMBLER

#ifndef MIN


+ 31
- 4
common_x86_64.h View File

@@ -41,6 +41,10 @@

#ifndef ASSEMBLER

#ifdef C_MSVC
#include <intrin.h>
#endif

#ifdef C_SUN
#define __asm__ __asm
#define __volatile__
@@ -61,30 +65,39 @@

static void __inline blas_lock(volatile BLASULONG *address){

int ret;
BLASULONG ret;

do {
while (*address) {YIELDING;};

#ifndef C_MSVC
__asm__ __volatile__(
"xchgl %0, %1\n"
: "=r"(ret), "=m"(*address)
: "0"(1), "m"(*address)
: "memory");

#else
ret=InterlockedExchange64((volatile LONG64 *)(address), 1);
#endif
} while (ret);

}

static __inline BLASULONG rpcc(void){
#ifdef C_MSVC
return __rdtsc();
#else
BLASULONG a, d;

__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));

return ((BLASULONG)a + ((BLASULONG)d << 32));
#endif
}

#define RPCC64BIT

#ifndef C_MSVC
static __inline BLASULONG getstackaddr(void){
BLASULONG addr;

@@ -93,22 +106,32 @@ static __inline BLASULONG getstackaddr(void){

return addr;
}
#endif

static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){

#ifdef C_MSVC
int cpuinfo[4];
__cpuid(cpuinfo, op);
*eax=cpuinfo[0];
*ebx=cpuinfo[1];
*ecx=cpuinfo[2];
*edx=cpuinfo[3];
#else
__asm__ __volatile__("cpuid"
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (op));
#endif
}

/*
#define WHEREAMI
*/

static inline int WhereAmI(void){
static __inline int WhereAmI(void){
int eax, ebx, ecx, edx;
int apicid;

@@ -150,10 +173,14 @@ static inline int WhereAmI(void){
#define GET_IMAGE_CANCEL

#ifdef SMP
#ifdef USE64BITINT
#if defined(USE64BITINT)
static __inline blasint blas_quickdivide(blasint x, blasint y){
return x / y;
}
#elif defined (C_MSVC)
static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){
return x / y;
}
#else
extern unsigned int blas_quick_divide_table[];



+ 59
- 0
driver/level2/CMakeLists.txt View File

@@ -46,12 +46,28 @@ set(NU_SMP_SOURCES
tbmv_thread.c
)

set(ULVM_COMPLEX_SOURCES
hbmv_k.c
hpmv_k.c
hpr_k.c
hpr2_k.c
her_k.c
her2_k.c
)

# objects that need LOWER set
GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3)

# gbmv uses a lowercase n and t
GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3)
GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3)
# c/zgbmv
GenerateNamedObjects("zgbmv_k.c" "CONJ" "gbmv_r" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ" "gbmv_c" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "XCONJ" "gbmv_o" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "TRANS;XCONJ" "gbmv_u" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "CONJ;XCONJ" "gbmv_s" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ;XCONJ" "gbmv_d" false "" "" "" 2)

# special defines for complex
foreach (float_type ${FLOAT_TYPES})
@@ -82,6 +98,14 @@ foreach (float_type ${FLOAT_TYPES})
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type})
endforeach ()

foreach (ulvm_source ${ULVM_COMPLEX_SOURCES})
string(REGEX MATCH "[a-z0-9]+" op_name ${ulvm_source})
GenerateNamedObjects("z${ulvm_source}" "" "${op_name}_U" false "" "" false ${float_type})
GenerateNamedObjects("z${ulvm_source}" "LOWER" "${op_name}_L" false "" "" false ${float_type})
GenerateNamedObjects("z${ulvm_source}" "HEMVREV" "${op_name}_V" false "" "" false ${float_type})
GenerateNamedObjects("z${ulvm_source}" "LOWER;HEMVREV" "${op_name}_M" false "" "" false ${float_type})
endforeach()

if (SMP)

GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type})
@@ -103,6 +127,41 @@ foreach (float_type ${FLOAT_TYPES})
GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type})

GenerateNamedObjects("sbmv_thread.c" "HEMV" "hbmv_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "HEMV;LOWER" "hbmv_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "HEMVREV" "hbmv_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "LOWER;HEMVREV" "hbmv_thread_M" false "" "" false ${float_type})

GenerateNamedObjects("spmv_thread.c" "HEMV" "hpmv_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "HEMV;LOWER" "hpmv_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "HEMVREV" "hpmv_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "LOWER;HEMVREV" "hpmv_thread_M" false "" "" false ${float_type})

GenerateNamedObjects("spr_thread.c" "HEMV" "hpr_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "HEMV;LOWER" "hpr_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "HEMVREV" "hpr_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "LOWER;HEMVREV" "hpr_thread_M" false "" "" false ${float_type})

GenerateNamedObjects("spr2_thread.c" "HEMV" "hpr2_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "HEMV;LOWER" "hpr2_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "HEMVREV" "hpr2_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "LOWER;HEMVREV" "hpr2_thread_M" false "" "" false ${float_type})

GenerateNamedObjects("symv_thread.c" "HEMV" "hemv_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "HEMV;LOWER" "hemv_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "HEMVREV" "hemv_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "LOWER;HEMVREV" "hemv_thread_M" false "" "" false ${float_type})

GenerateNamedObjects("syr_thread.c" "HER" "her_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "HER;LOWER" "her_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "HEMVREV" "her_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "LOWER;HEMVREV" "her_thread_M" false "" "" false ${float_type})

GenerateNamedObjects("syr2_thread.c" "HER2" "her2_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "HER2;LOWER" "her2_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "HEMVREV" "her2_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "LOWER;HEMVREV" "her2_thread_M" false "" "" false ${float_type})

foreach (nu_smp_src ${NU_SMP_SOURCES})
string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src})
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type})


+ 1
- 1
driver/level2/gbmv_thread.c View File

@@ -64,7 +64,7 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX
FLOAT result;
#else
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif
#endif



+ 1
- 1
driver/level2/sbmv_thread.c View File

@@ -60,7 +60,7 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX
FLOAT result;
#else
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif

a = (FLOAT *)args -> a;


+ 1
- 1
driver/level2/spmv_thread.c View File

@@ -60,7 +60,7 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX
FLOAT result;
#else
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif

a = (FLOAT *)args -> a;


+ 1
- 1
driver/level2/tbmv_thread.c View File

@@ -76,7 +76,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX
FLOAT result;
#else
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif
#endif



+ 1
- 1
driver/level2/tpmv_thread.c View File

@@ -81,7 +81,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX
FLOAT result;
#else
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif
#endif



+ 1
- 1
driver/level2/trmv_thread.c View File

@@ -87,7 +87,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX
FLOAT result;
#else
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif
#endif



+ 1
- 1
driver/level2/zgbmv_k.c View File

@@ -77,7 +77,7 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA
FLOAT *bufferY = gemvbuffer;
FLOAT *bufferX = gemvbuffer;
#ifdef TRANS
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif

if (incy != 1) {


+ 6
- 4
driver/level2/zhbmv_k.c View File

@@ -56,6 +56,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *bufferX = sbmvbuffer;
FLOAT temp[2];

OPENBLAS_COMPLEX_FLOAT result;

if (incy != 1) {
Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
@@ -93,7 +95,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];

if (length > 0) {
FLOAT _Complex result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);

Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -118,7 +120,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];

if (length > 0) {
FLOAT _Complex result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);

Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -143,7 +145,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];

if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);

Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -168,7 +170,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];

if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);

Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);


+ 6
- 4
driver/level2/zhpmv_k.c View File

@@ -51,6 +51,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *bufferX = gemvbuffer;
FLOAT temp[2];

OPENBLAS_COMPLEX_FLOAT result;

if (incy != 1) {
Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
@@ -69,7 +71,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#ifndef HEMVREV
#ifndef LOWER
if (i > 0) {
FLOAT _Complex result = DOTC_K(i, a, 1, X, 1);
result = DOTC_K(i, a, 1, X, 1);

Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -93,7 +95,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#else

if (m - i > 1) {
FLOAT _Complex result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);

Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -118,7 +120,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#else
#ifndef LOWER
if (i > 0) {
FLOAT _Complex result = DOTU_K(i, a, 1, X, 1);
result = DOTU_K(i, a, 1, X, 1);

Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -142,7 +144,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#else

if (m - i > 1) {
FLOAT _Complex result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);

Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);


+ 4
- 2
driver/level2/zsbmv_k.c View File

@@ -55,6 +55,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *bufferY = sbmvbuffer;
FLOAT *bufferX = sbmvbuffer;

OPENBLAS_COMPLEX_FLOAT result;

if (incy != 1) {
Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
@@ -83,7 +85,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0);

if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);

Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -100,7 +102,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
a, 1, Y + i * COMPSIZE, 1, NULL, 0);

if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);

Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);


+ 2
- 1
driver/level2/zspmv_k.c View File

@@ -49,7 +49,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *gemvbuffer = (FLOAT *)buffer;
FLOAT *bufferY = gemvbuffer;
FLOAT *bufferX = gemvbuffer;
FLOAT _Complex result;

OPENBLAS_COMPLEX_FLOAT result;

if (incy != 1) {
Y = bufferY;


+ 1
- 1
driver/level2/ztbmv_L.c View File

@@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
FLOAT *B = b;
BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2;


+ 1
- 1
driver/level2/ztbmv_U.c View File

@@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
FLOAT *B = b;
BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2;


+ 1
- 1
driver/level2/ztbsv_L.c View File

@@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
FLOAT *B = b;
BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den;


+ 1
- 1
driver/level2/ztbsv_U.c View File

@@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
FLOAT *B = b;
BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den;


+ 1
- 1
driver/level2/ztpmv_L.c View File

@@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){

BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2;


+ 1
- 1
driver/level2/ztpmv_U.c View File

@@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){

BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2;


+ 1
- 1
driver/level2/ztpsv_L.c View File

@@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){

BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif
#ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den;


+ 1
- 1
driver/level2/ztpsv_U.c View File

@@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){

BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif
#ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den;


+ 1
- 1
driver/level2/ztrmv_L.c View File

@@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu

BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2;


+ 1
- 1
driver/level2/ztrmv_U.c View File

@@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu

BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2;


+ 1
- 1
driver/level2/ztrsv_L.c View File

@@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf

BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif
#ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den;


+ 1
- 1
driver/level2/ztrsv_U.c View File

@@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf

BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
#endif
#ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den;


+ 29
- 8
driver/level3/CMakeLists.txt View File

@@ -1,13 +1,5 @@
include_directories(${CMAKE_SOURCE_DIR})

set(USE_GEMM3M 0)

if (DEFINED ARCH)
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
set(USE_GEMM3M 1)
endif ()
endif ()

# N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa

# loop through gemm.c defines
@@ -54,12 +46,41 @@ foreach (float_type ${FLOAT_TYPES})
GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_LC" false ${float_type})
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type})
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_RC" false ${float_type})

#hemm
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN" 0 "hemm_L" false ${float_type})
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE" 0 "hemm_R" false ${float_type})

#her2k
GenerateCombinationObjects("zher2k_kernel.c" "LOWER;CONJ" "U;N" "" 2 "her2k_kernel" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})

if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
#hemm
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type})
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type})
#her2k
GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
endif()

# special gemm defines for complex
foreach (gemm_define ${GEMM_COMPLEX_DEFINES})
string(TOLOWER ${gemm_define} gemm_define_LC)
GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type})
if(USE_GEMM3M)
GenerateNamedObjects("gemm3m.c" "${gemm_define}" "gemm3m_${gemm_define_LC}" false "" "" false ${float_type})
endif()
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type})
if(USE_GEMM3M)
GenerateNamedObjects("gemm3m.c" "${gemm_define};THREADED_LEVEL3" "gemm3m_thread_${gemm_define_LC}" false "" "" false ${float_type})
endif()
endif ()
endforeach ()
endif ()


+ 2
- 0
driver/others/CMakeLists.txt View File

@@ -33,6 +33,8 @@ set(COMMON_SOURCES
xerbla.c
openblas_set_num_threads.c
openblas_error_handle.c
openblas_get_num_procs.c
openblas_get_num_threads.c
)

# these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling


+ 37
- 2
interface/CMakeLists.txt View File

@@ -1,13 +1,16 @@

include_directories(${CMAKE_SOURCE_DIR})


set(BLAS1_SOURCES
copy.c
asum.c nrm2.c
nrm2.c
)

set(BLAS1_REAL_ONLY_SOURCES
rotm.c rotmg.c # N.B. these do not have complex counterparts
rot.c
asum.c
)

# these will have 'z' prepended for the complex version
@@ -15,7 +18,7 @@ set(BLAS1_MANGLED_SOURCES
axpy.c swap.c
scal.c
dot.c
rot.c rotg.c
rotg.c
axpby.c
)

@@ -31,6 +34,13 @@ set(BLAS2_SOURCES
tpsv.c tpmv.c
)

set(BLAS2_COMPLEX_ONLY_MANGLED_SOURCES
hemv.c hbmv.c
her.c her2.c
hpmv.c hpr.c
hpr2.c
)

# these do not have separate 'z' sources
set(BLAS3_SOURCES
gemm.c symm.c
@@ -39,6 +49,7 @@ set(BLAS3_SOURCES

set(BLAS3_MANGLED_SOURCES
omatcopy.c imatcopy.c
geadd.c
)

# generate the BLAS objs once with and once without cblas
@@ -65,9 +76,14 @@ foreach (CBLAS_FLAG ${CBLAS_FLAGS})
GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1)
GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
GenerateNamedObjects("${BLAS2_COMPLEX_ONLY_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 4)
GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX})
GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})

#sdsdot, dsdot
GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE")

# trmm is trsm with a compiler flag set
GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG})

@@ -86,17 +102,36 @@ endforeach ()

# complex-specific sources
foreach (float_type ${FLOAT_TYPES})

if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("zger.c" "" "geru" false "" "" false ${float_type})
GenerateNamedObjects("zger.c" "CONJ" "gerc" false "" "" false ${float_type})
GenerateNamedObjects("zdot.c" "CONJ" "dotc" false "" "" false ${float_type})
GenerateNamedObjects("zdot.c" "" "dotu" false "" "" false ${float_type})

GenerateNamedObjects("symm.c" "HEMM" "hemm" false "" "" false ${float_type})
GenerateNamedObjects("syrk.c" "HEMM" "herk" false "" "" false ${float_type})
GenerateNamedObjects("syr2k.c" "HEMM" "her2k" false "" "" false ${float_type})

if (USE_GEMM3M)
GenerateNamedObjects("gemm.c" "GEMM3M" "gemm3m" false "" "" false ${float_type})
endif()
endif ()
if (${float_type} STREQUAL "COMPLEX")
GenerateNamedObjects("zscal.c" "SSCAL" "sscal" false "" "" false "COMPLEX")
GenerateNamedObjects("nrm2.c" "" "scnrm2" false "" "" true "COMPLEX")
GenerateNamedObjects("zrot.c" "" "csrot" false "" "" true "COMPLEX")
GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "scamin" false "" "" true "COMPLEX")
GenerateNamedObjects("max.c" "USE_ABS" "scamax" false "" "" true "COMPLEX")
GenerateNamedObjects("asum.c" "" "scasum" false "" "" true "COMPLEX")
endif ()
if (${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("zscal.c" "SSCAL" "dscal" false "" "" false "ZCOMPLEX")
GenerateNamedObjects("nrm2.c" "" "dznrm2" false "" "" true "ZCOMPLEX")
GenerateNamedObjects("zrot.c" "" "zdrot" false "" "" true "ZCOMPLEX")
GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "dzamin" false "" "" true "ZCOMPLEX")
GenerateNamedObjects("max.c" "USE_ABS" "dzamax" false "" "" true "ZCOMPLEX")
GenerateNamedObjects("asum.c" "" "dzasum" false "" "" true "ZCOMPLEX")
endif ()
endforeach ()



+ 1
- 2
interface/rotg.c View File

@@ -14,8 +14,7 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){

#endif


#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__)
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)

long double da = *DA;
long double db = *DB;


+ 2
- 2
interface/zaxpby.c View File

@@ -53,13 +53,13 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT *

#endif

if (n <= 0) return;

FLOAT alpha_r = *(ALPHA + 0);
FLOAT alpha_i = *(ALPHA + 1);
FLOAT beta_r = *(BETA + 0);
FLOAT beta_i = *(BETA + 1);

if (n <= 0) return;

FUNCTION_PROFILE_START();

if (incx < 0) x -= (n - 1) * incx * 2;


+ 14
- 10
interface/zdot.c View File

@@ -57,21 +57,25 @@
#ifdef RETURN_BY_STRUCT
MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
#elif defined RETURN_BY_STACK
void NAME(FLOAT _Complex *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
void NAME(OPENBLAS_COMPLEX_FLOAT *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
#else
FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
OPENBLAS_COMPLEX_FLOAT NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
#endif

BLASLONG n = *N;
BLASLONG incx = *INCX;
BLASLONG incy = *INCY;
#ifndef RETURN_BY_STACK
FLOAT _Complex ret;
OPENBLAS_COMPLEX_FLOAT ret;
#endif
#ifdef RETURN_BY_STRUCT
MYTYPE myret;
#endif

#ifndef RETURN_BY_STRUCT
OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
#endif

PRINT_DEBUG_NAME;

if (n <= 0) {
@@ -80,10 +84,10 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX,
myret.i = 0.;
return myret;
#elif defined RETURN_BY_STACK
*result = ZERO;
*result = zero;
return;
#else
return ZERO;
return zero;
#endif
}

@@ -144,21 +148,21 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX,
#else

#ifdef FORCE_USE_STACK
void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT _Complex *result){
void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, OPENBLAS_COMPLEX_FLOAT *result){
#else
FLOAT _Complex CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){

FLOAT _Complex ret;
OPENBLAS_COMPLEX_FLOAT ret;
#endif

PRINT_DEBUG_CNAME;

if (n <= 0) {
#ifdef FORCE_USE_STACK
*result = ZERO;
*result = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
return;
#else
return ZERO;
return OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
#endif
}



+ 10
- 7
interface/zgemv.c View File

@@ -79,6 +79,9 @@ void NAME(char *TRANS, blasint *M, blasint *N,
FLOAT *buffer;
#ifdef SMP
int nthreads;
int nthreads_max;
int nthreads_avail;
double MNK;
#endif

int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG,
@@ -91,14 +94,14 @@ void NAME(char *TRANS, blasint *M, blasint *N,
blasint lenx, leny;
blasint i;

PRINT_DEBUG_NAME;

FLOAT alpha_r = *(ALPHA + 0);
FLOAT alpha_i = *(ALPHA + 1);

FLOAT beta_r = *(BETA + 0);
FLOAT beta_i = *(BETA + 1);

PRINT_DEBUG_NAME;

TOUPPER(trans);

info = 0;
@@ -153,14 +156,14 @@ void CNAME(enum CBLAS_ORDER order,
GEMV_O, GEMV_U, GEMV_S, GEMV_D,
};

PRINT_DEBUG_CNAME;

FLOAT alpha_r = *(ALPHA + 0);
FLOAT alpha_i = *(ALPHA + 1);

FLOAT beta_r = *(BETA + 0);
FLOAT beta_i = *(BETA + 1);

PRINT_DEBUG_CNAME;

trans = -1;
info = 0;

@@ -234,10 +237,10 @@ void CNAME(enum CBLAS_ORDER order,

#ifdef SMP

int nthreads_max = num_cpu_avail(2);
int nthreads_avail = nthreads_max;
nthreads_max = num_cpu_avail(2);
nthreads_avail = nthreads_max;

double MNK = (double) m * (double) n;
MNK = (double) m * (double) n;
if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) ))
nthreads_max = 1;



+ 13
- 7
interface/zrotg.c View File

@@ -6,13 +6,7 @@

void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){

PRINT_DEBUG_NAME;

IDEBUG_START;

FUNCTION_PROFILE_START();

#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__)
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)

long double da_r = *(DA + 0);
long double da_i = *(DA + 1);
@@ -22,6 +16,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){

long double ada = fabs(da_r) + fabs(da_i);

PRINT_DEBUG_NAME;

IDEBUG_START;

FUNCTION_PROFILE_START();

if (ada == ZERO) {
*C = ZERO;
*(S + 0) = ONE;
@@ -54,6 +54,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
FLOAT ada = fabs(da_r) + fabs(da_i);
FLOAT adb;

PRINT_DEBUG_NAME;

IDEBUG_START;

FUNCTION_PROFILE_START();

if (ada == ZERO) {
*C = ZERO;
*(S + 0) = ONE;


+ 60
- 10
kernel/CMakeLists.txt View File

@@ -17,6 +17,7 @@ endif ()

SetDefaultL1()
SetDefaultL2()
SetDefaultL3()
ParseMakefileVars("${KERNELDIR}/KERNEL")
ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}")

@@ -65,8 +66,20 @@ foreach (float_type ${FLOAT_TYPES})
else ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type})
endif ()

if (${float_type} STREQUAL "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "srot_k" false "" "" false ${float_type})
endif()
if (${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "drot_k" false "" "" false ${float_type})
endif()

endforeach ()

#dsdot,sdsdot
GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE")

# Makefile.L2
GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3)
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3)
@@ -86,6 +99,12 @@ foreach (float_type ${FLOAT_TYPES})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type})

GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_U_KERNEL}" "HEMV" "hemv_U" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_L_KERNEL}" "HEMV;LOWER" "hemv_L" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_V_KERNEL}" "HEMV;HEMVREV" "hemv_V" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_M_KERNEL}" "HEMV;HEMVREV;LOWER" "hemv_M" false "" "" false ${float_type})

else ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type})
@@ -93,14 +112,9 @@ foreach (float_type ${FLOAT_TYPES})
endforeach ()

# Makefile.L3
set(USE_GEMM3M false)
set(USE_TRMM false)

if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
set(USE_GEMM3M true)
endif ()

if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC")
if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic")
set(USE_TRMM true)
endif ()

@@ -155,6 +169,13 @@ foreach (float_type ${FLOAT_TYPES})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type})


#hemm
GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "hemm_iutcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "hemm_iltcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "hemm_outcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "hemm_oltcopy" false "" "" false ${float_type})

else ()
GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type})
endif ()
@@ -241,11 +262,40 @@ foreach (float_type ${FLOAT_TYPES})
endif ()
endif ()

GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "domatcopy_k_cn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "domatcopy_k_rn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "domatcopy_k_ct" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "domatcopy_k_rt" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "omatcopy_k_cn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "omatcopy_k_rn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "omatcopy_k_ct" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "omatcopy_k_rt" false "" "" false ${float_type})

if (NOT DEFINED ${float_char}OMATCOPY_CNC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_CNC ../arm/zomatcopy_cnc.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_RNC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_RNC ../arm/zomatcopy_rnc.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_CTC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_CTC ../arm/zomatcopy_ctc.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_RTC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_RTC ../arm/zomatcopy_rtc.c)
endif ()
endif ()

if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CNC}" "CONJ" "omatcopy_k_cnc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RNC}" "CONJ;ROWM" "omatcopy_k_rnc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CTC}" "CONJ" "omatcopy_k_ctc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RTC}" "CONJ;ROWM" "omatcopy_k_rtc" false "" "" false ${float_type})
endif()

GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type})
endforeach ()

# Makefile.LA


+ 1
- 1
kernel/Makefile.L3 View File

@@ -3459,7 +3459,7 @@ ifndef DGEADD_K
DGEADD_K = ../generic/geadd.c
endif

$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K)
$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEADD_K)
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@

ifndef CGEADD_K


+ 5
- 2
kernel/arm/zaxpby.c View File

@@ -38,13 +38,16 @@ int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FL
BLASLONG ix,iy;
FLOAT temp;

BLASLONG inc_x2;
BLASLONG inc_y2;

if ( n < 0 ) return(0);

ix = 0;
iy = 0;

BLASLONG inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y;
inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y;

if ( beta_r == 0.0 && beta_i == 0.0)
{


+ 4
- 2
kernel/arm/zaxpy.c View File

@@ -41,6 +41,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
{
BLASLONG i=0;
BLASLONG ix,iy;
BLASLONG inc_x2;
BLASLONG inc_y2;

if ( n < 0 ) return(0);
if ( da_r == 0.0 && da_i == 0.0 ) return(0);
@@ -48,8 +50,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
ix = 0;
iy = 0;

BLASLONG inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y;
inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y;

while(i < n)
{


+ 4
- 2
kernel/arm/zcopy.c View File

@@ -40,11 +40,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
BLASLONG inc_x2;
BLASLONG inc_y2;

if ( n < 0 ) return(0);

BLASLONG inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y;
inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y;

while(i < n)
{


+ 10
- 8
kernel/arm/zdot.c View File

@@ -40,24 +40,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <complex.h>
FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#else
openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#endif
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
FLOAT dot[2];
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
BLASLONG inc_x2;
BLASLONG inc_y2;

dot[0]=0.0;
dot[1]=0.0;

__real__ result = 0.0 ;
__imag__ result = 0.0 ;
CREAL(result) = 0.0 ;
CIMAG(result) = 0.0 ;

if ( n < 1 ) return(result);

BLASLONG inc_x2 = 2 * inc_x ;
BLASLONG inc_y2 = 2 * inc_y ;
inc_x2 = 2 * inc_x ;
inc_y2 = 2 * inc_y ;

while(i < n)
{
@@ -73,8 +75,8 @@ openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BL
i++ ;

}
__real__ result = dot[0];
__imag__ result = dot[1];
CREAL(result) = dot[0];
CIMAG(result) = dot[1];
return(result);

}


+ 4
- 2
kernel/arm/zrot.c View File

@@ -41,11 +41,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT
BLASLONG i=0;
BLASLONG ix=0,iy=0;
FLOAT temp[2];
BLASLONG inc_x2;
BLASLONG inc_y2;

if ( n <= 0 ) return(0);

BLASLONG inc_x2 = 2 * inc_x ;
BLASLONG inc_y2 = 2 * inc_y ;
inc_x2 = 2 * inc_x ;
inc_y2 = 2 * inc_y ;

while(i < n)
{


+ 4
- 2
kernel/arm/zswap.c View File

@@ -42,11 +42,13 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm
BLASLONG i=0;
BLASLONG ix=0,iy=0;
FLOAT temp[2];
BLASLONG inc_x2;
BLASLONG inc_y2;

if ( n < 0 ) return(0);

BLASLONG inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y;
inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y;

while(i < n)
{


+ 6
- 0
kernel/x86_64/KERNEL.generic View File

@@ -155,5 +155,11 @@ XSYMV_L_KERNEL = ../generic/zsymv_k.c
ZHEMV_U_KERNEL = ../generic/zhemv_k.c
ZHEMV_L_KERNEL = ../generic/zhemv_k.c

LSAME_KERNEL = ../generic/lsame.c
SCABS_KERNEL = ../generic/cabs.c
DCABS_KERNEL = ../generic/cabs.c
QCABS_KERNEL = ../generic/cabs.c

#Dump kernel
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c

+ 2
- 1
openblas_config_template.h View File

@@ -59,7 +59,8 @@ typedef int blasint;
extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
(__GNUC__ >= 3 && !defined(__cplusplus)))
(__GNUC__ >= 3 && !defined(__cplusplus)) || \
_MSC_VER >= 1800) // Visual Studio 2013 supports complex
#define OPENBLAS_COMPLEX_C99
#ifndef __cplusplus
#include <complex.h>


Loading…
Cancel
Save