From 90a4dab501ce11947981c92af85106dec98ca7ba Mon Sep 17 00:00:00 2001 From: Date: Thu, 17 Aug 2017 00:35:54 +1000 Subject: [PATCH 001/122] Let CMake deal with build type. --- CMakeLists.txt | 17 ----------------- cmake/system.cmake | 4 ++-- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a91ea5ff7..4575fd390 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,6 @@ if(MSVC) option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON) endif() option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF) -option(BUILD_DEBUG "Build Debug Version" OFF) option(DYNAMIC_ARCH "Build with DYNAMIC_ARCH" OFF) ####### if(BUILD_WITHOUT_LAPACK) @@ -34,22 +33,6 @@ set(NO_LAPACK 1) set(NO_LAPACKE 1) endif() -if(CMAKE_CONFIGURATION_TYPES) # multiconfig generator? - set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE) - set(CMAKE_BUILD_TYPE - Debug Debug - Release Release - ) -else() - if( NOT CMAKE_BUILD_TYPE ) - if(BUILD_DEBUG) - set(CMAKE_BUILD_TYPE Debug) - else() - set(CMAKE_BUILD_TYPE Release) - endif() - endif() -endif() - if(BUILD_WITHOUT_CBLAS) set(NO_CBLAS 1) endif() diff --git a/cmake/system.cmake b/cmake/system.cmake index 3d3270778..5fe9081ac 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -52,7 +52,7 @@ if (NO_AVX2) set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2") endif () -if (CMAKE_BUILD_TYPE STREQUAL Debug) +if (CMAKE_BUILD_TYPE STREQUAL "Debug") set(GETARCH_FLAGS "${GETARCH_FLAGS} -g") endif () @@ -316,7 +316,7 @@ set(SED sed) set(REVISION "-r${OpenBLAS_VERSION}") set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION}) -if (DEBUG) +if (CMAKE_BUILD_TYPE STREQUAL "Debug") set(COMMON_OPT "${COMMON_OPT} -g") endif () From 7242cdc4ecc761e565db4e0ff163cbf3172df3bc Mon Sep 17 00:00:00 2001 From: Date: Thu, 17 Aug 2017 00:51:04 +1000 Subject: [PATCH 002/122] Allow CMake to determine if it is building static or shared. --- CMakeLists.txt | 43 ++++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4575fd390..8c4bd8b98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,14 +3,13 @@ ## cmake_minimum_required(VERSION 2.8.5) -project(OpenBLAS) +project(OpenBLAS C) set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MINOR_VERSION 3) set(OpenBLAS_PATCH_VERSION 0.dev) set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") enable_language(ASM) -enable_language(C) # Adhere to GNU filesystem layout conventions include(GNUInstallDirs) @@ -99,10 +98,6 @@ if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN") message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.") endif () -if (${NO_STATIC} AND ${NO_SHARED}) - message(FATAL_ERROR "Neither static nor shared are enabled.") -endif () - #Set default output directory set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) @@ -152,9 +147,14 @@ if (${DYNAMIC_ARCH}) endforeach() endif () +# Only build shared libs for MSVC +if (MSVC) + set(BUILD_SHARED_LIBS ON) +endif() + # add objects to the openblas lib -add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) +add_library(${OpenBLAS_LIBNAME} ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) include("${PROJECT_SOURCE_DIR}/cmake/export.cmake") @@ -173,23 +173,16 @@ endforeach() enable_testing() add_subdirectory(utest) -if (NOT MSVC) - #only build shared library for MSVC - - add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS}) - set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME}) - set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1) +# Add threading library to linker +find_package(Threads) +target_link_libraries(${OpenBLAS_LIBNAME} ${CMAKE_THREAD_LIBS_INIT}) - if(SMP) - target_link_libraries(${OpenBLAS_LIBNAME} pthread) - target_link_libraries(${OpenBLAS_LIBNAME}_static pthread) -endif() - -#build test and ctest -add_subdirectory(test) -if(NOT NO_CBLAS) -add_subdirectory(ctest) -endif() +if (NOT MSVC) + # Build test and ctest + add_subdirectory(test) + if(NOT NO_CBLAS) + add_subdirectory(ctest) + endif() endif() set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES @@ -265,10 +258,6 @@ if(NOT NO_LAPACKE) ) install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) endif() - -if(NOT MSVC) - install (TARGETS ${OpenBLAS_LIBNAME}_static DESTINATION ${CMAKE_INSTALL_LIBDIR}) -endif() include(FindPkgConfig QUIET) if(PKG_CONFIG_FOUND) From 38d273ea03e4438001fb752835bd1d67e616c6d9 Mon Sep 17 00:00:00 2001 From: Date: Thu, 17 Aug 2017 02:04:36 +1000 Subject: [PATCH 003/122] Drop some redundant vars and improve arch detection in CMake. --- CMakeLists.txt | 8 +++-- cmake/c_check.cmake | 88 +++++++++++++++++++++++++-------------------- cmake/os.cmake | 2 +- cmake/system.cmake | 31 ++++------------ 4 files changed, 62 insertions(+), 67 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c4bd8b98..95dbdf9ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -173,9 +173,11 @@ endforeach() enable_testing() add_subdirectory(utest) -# Add threading library to linker -find_package(Threads) -target_link_libraries(${OpenBLAS_LIBNAME} ${CMAKE_THREAD_LIBS_INIT}) +if (USE_THREAD) + # Add threading library to linker + find_package(Threads) + target_link_libraries(${OpenBLAS_LIBNAME} ${CMAKE_THREAD_LIBS_INIT}) +endif() if (NOT MSVC) # Build test and ctest diff --git a/cmake/c_check.cmake b/cmake/c_check.cmake index 2249a873f..4ac661f47 100644 --- a/cmake/c_check.cmake +++ b/cmake/c_check.cmake @@ -26,14 +26,8 @@ # N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables. set(FU "") -if(APPLE) -set(FU "_") -elseif(MSVC AND ${CMAKE_C_COMPILER_ID} MATCHES "Clang") -set(FU "") -elseif(MSVC) -set(FU "_") -elseif(UNIX) -set(FU "") +if (APPLE OR (MSVC AND NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")) + set(FU "_") endif() # Convert CMake vars into the format that OpenBLAS expects @@ -42,43 +36,59 @@ if (${HOST_OS} STREQUAL "WINDOWS") set(HOST_OS WINNT) endif () -# added by hpa - check size of void ptr to detect 64-bit compile -if (NOT DEFINED BINARY) - set(BINARY 32) - if (CMAKE_SIZEOF_VOID_P EQUAL 8) - set(BINARY 64) - endif () -endif () - -if (BINARY EQUAL 64) - set(BINARY64 1) -else () - set(BINARY32 1) -endif () +if(CMAKE_COMPILER_IS_GNUCC AND WIN32) + execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpmachine + OUTPUT_VARIABLE OPENBLAS_GCC_TARGET_MACHINE + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(OPENBLAS_GCC_TARGET_MACHINE MATCHES "amd64|x86_64|AMD64") + set(MINGW64 1) + endif() +endif() -# CMake docs define these: -# CMAKE_SYSTEM_PROCESSOR - The name of the CPU CMake is building for. -# CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on. -# -# TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check -set(ARCH ${CMAKE_SYSTEM_PROCESSOR} CACHE STRING "Target Architecture") +# Pretty thorough determination of arch. Add more if needed +if(CMAKE_CL_64 OR MINGW64) + set(X86_64 1) +elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING)) + set(X86 1) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc") + set(PPC 1) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") + set(X86_64 1) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*") + set(X86 1) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)") + set(ARM 1) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") + set(ARM64 1) +endif() -if (${ARCH} STREQUAL "AMD64") +if (X86_64) set(ARCH "x86_64") +elseif(X86) + set(ARCH "x86") +elseif(PPC) + set(ARCH "power") +elseif(ARM) + set(ARCH "arm") +elseif(ARM64) + set(ARCH "arm64") +else() + set(ARCH ${CMAKE_SYSTEM_PROCESSOR} CACHE STRING "Target Architecture") endif () -# If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong -if (${ARCH} STREQUAL "x86_64" AND BINARY EQUAL 32) - set(ARCH x86) -endif () - -if (${ARCH} STREQUAL "X86") - set(ARCH x86) -endif () +if (NOT BINARY) + if (X86_64 OR ARM64 OR PPC OR ARCH STREQUAL "mips64") + set(BINARY 64) + else () + set(BINARY 32) + endif () +endif() -if (${ARCH} MATCHES "ppc") - set(ARCH power) -endif () +if(BINARY EQUAL 64) + set(BINARY64 1) +else() + set(BINARY32 1) +endif() set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID}) if (${COMPILER_ID} STREQUAL "GNU") diff --git a/cmake/os.cmake b/cmake/os.cmake index e9df68d7f..1e9458ccf 100644 --- a/cmake/os.cmake +++ b/cmake/os.cmake @@ -78,7 +78,7 @@ if (CYGWIN) endif () if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Android") - if (SMP) + if (USE_THREAD) set(EXTRALIB "${EXTRALIB} -lpthread") endif () endif () diff --git a/cmake/system.cmake b/cmake/system.cmake index 5fe9081ac..c0957c715 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -56,11 +56,6 @@ if (CMAKE_BUILD_TYPE STREQUAL "Debug") set(GETARCH_FLAGS "${GETARCH_FLAGS} -g") endif () -# TODO: let CMake handle this? -hpa -#if (${QUIET_MAKE}) -# set(MAKE "${MAKE} -s") -#endif() - if (NOT DEFINED NO_PARALLEL_MAKE) set(NO_PARALLEL_MAKE 0) endif () @@ -79,30 +74,18 @@ endif () include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake") +# N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa if (NOT DEFINED NUM_THREADS) set(NUM_THREADS ${NUM_CORES}) endif () if (${NUM_THREADS} EQUAL 1) set(USE_THREAD 0) +elseif(NOT DEFINED USE_THREAD) + set(USE_THREAD 1) endif () -if (DEFINED USE_THREAD) - if (NOT ${USE_THREAD}) - unset(SMP) - else () - set(SMP 1) - endif () -else () - # N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa - if (${NUM_THREADS} EQUAL 1) - unset(SMP) - else () - set(SMP 1) - endif () -endif () - -if (${SMP}) +if (USE_THREAD) message(STATUS "SMP enabled.") endif () @@ -182,7 +165,7 @@ if (NO_AVX2) set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2") endif () -if (SMP) +if (USE_THREAD) set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER") if (${ARCH} STREQUAL "mips64") @@ -386,7 +369,7 @@ if (NOT DEFINED LIBSUFFIX) endif () if (DYNAMIC_ARCH) - if (DEFINED SMP) + if (USE_THREAD) set(LIBNAME "${LIBPREFIX}p${REVISION}.${LIBSUFFIX}") set(LIBNAME_P "${LIBPREFIX}p${REVISION}_p.${LIBSUFFIX}") else () @@ -394,7 +377,7 @@ if (DYNAMIC_ARCH) set(LIBNAME_P "${LIBPREFIX}${REVISION}_p.${LIBSUFFIX}") endif () else () - if (DEFINED SMP) + if (USE_THREAD) set(LIBNAME "${LIBPREFIX}_${LIBCORE}p${REVISION}.${LIBSUFFIX}") set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}p${REVISION}_p.${LIBSUFFIX}") else () From 7c1acc07f09ab8e99082fabac9651c2c367d82fd Mon Sep 17 00:00:00 2001 From: Date: Thu, 17 Aug 2017 03:32:04 +1000 Subject: [PATCH 004/122] Fix bug that required fortran. Fix bug that needed CXX var. Remove redundant set vars. Fix threading detection. Do not attempt to run code if cross compiling. --- CMakeLists.txt | 4 +-- cmake/c_check.cmake | 2 +- cmake/os.cmake | 13 ------- cmake/prebuild.cmake | 5 +++ cmake/system.cmake | 83 +++++++++++++------------------------------- 5 files changed, 33 insertions(+), 74 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 95dbdf9ed..3bafeb9dd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -179,10 +179,10 @@ if (USE_THREAD) target_link_libraries(${OpenBLAS_LIBNAME} ${CMAKE_THREAD_LIBS_INIT}) endif() -if (NOT MSVC) +if (NOT MSVC AND NOT NOFORTRAN) # Build test and ctest add_subdirectory(test) - if(NOT NO_CBLAS) + if(NOT NO_CBLAS) add_subdirectory(ctest) endif() endif() diff --git a/cmake/c_check.cmake b/cmake/c_check.cmake index 4ac661f47..99590c975 100644 --- a/cmake/c_check.cmake +++ b/cmake/c_check.cmake @@ -90,7 +90,7 @@ else() set(BINARY32 1) endif() -set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID}) +set(COMPILER_ID ${CMAKE_C_COMPILER_ID}) if (${COMPILER_ID} STREQUAL "GNU") set(COMPILER_ID "GCC") endif () diff --git a/cmake/os.cmake b/cmake/os.cmake index 1e9458ccf..f50097aaf 100644 --- a/cmake/os.cmake +++ b/cmake/os.cmake @@ -3,19 +3,6 @@ ## Description: Ported from portion of OpenBLAS/Makefile.system ## Detects the OS and sets appropriate variables. -if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") - set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var - set(MD5SUM "md5 -r") -endif () - -if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD") - set(MD5SUM "md5 -r") -endif () - -if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD") - set(MD5SUM "md5 -n") -endif () - if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") set(EXTRALIB "${EXTRALIB} -lm") set(NO_EXPRECISION 1) diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 2c262b0b6..d66b0ccc0 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -37,6 +37,9 @@ # CPUIDEMU = ../../cpuid/table.o + +# Cannot run getarch on target if we are cross-compiling +if(NOT CMAKE_CROSSCOMPILING) if (DEFINED CPUIDEMU) set(EXFLAGS "-DCPUIDEMU -DVENDOR=99") endif () @@ -157,3 +160,5 @@ if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") MESSAGE(FATAL_ERROR "Compiling gen_config_h failed ${GEN_CONFIG_H_LOG}") endif () endif () + +endif(NOT CMAKE_CROSSCOMPILING) diff --git a/cmake/system.cmake b/cmake/system.cmake index c0957c715..880223758 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -53,7 +53,7 @@ if (NO_AVX2) endif () if (CMAKE_BUILD_TYPE STREQUAL "Debug") - set(GETARCH_FLAGS "${GETARCH_FLAGS} -g") + set(GETARCH_FLAGS "${GETARCH_FLAGS} ${CMAKE_C_FLAGS_DEBUG}") endif () if (NOT DEFINED NO_PARALLEL_MAKE) @@ -75,18 +75,25 @@ endif () include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake") # N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa -if (NOT DEFINED NUM_THREADS) - set(NUM_THREADS ${NUM_CORES}) -endif () +if (NOT CMAKE_CROSSCOMPILING) + if (NOT DEFINED NUM_CORES) + include(ProcessorCount) + ProcessorCount(NUM_CORES) + endif() + + if (NOT NUM_CORES EQUAL 0) + set(NUM_THREADS ${NUM_CORES}) + endif () +endif() -if (${NUM_THREADS} EQUAL 1) +if (${NUM_THREADS} LESS 2) set(USE_THREAD 0) elseif(NOT DEFINED USE_THREAD) set(USE_THREAD 1) endif () if (USE_THREAD) - message(STATUS "SMP enabled.") + message(STATUS "Multi-threading enabled with ${NUM_THREADS} threads.") endif () if (NOT DEFINED NEED_PIC) @@ -95,15 +102,6 @@ endif () # TODO: I think CMake should be handling all this stuff -hpa unset(ARFLAGS) -set(CPP "${COMPILER} -E") -set(AR "${CROSS_SUFFIX}ar") -set(AS "${CROSS_SUFFIX}as") -set(LD "${CROSS_SUFFIX}ld") -set(RANLIB "${CROSS_SUFFIX}ranlib") -set(NM "${CROSS_SUFFIX}nm") -set(DLLWRAP "${CROSS_SUFFIX}dllwrap") -set(OBJCOPY "${CROSS_SUFFIX}objcopy") -set(OBJCONV "${CROSS_SUFFIX}objconv") # OS dependent settings include("${PROJECT_SOURCE_DIR}/cmake/os.cmake") @@ -132,11 +130,13 @@ if (NEED_PIC) set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC") endif () - if (${F_COMPILER} STREQUAL "SUN") - set(FCOMMON_OPT "${FCOMMON_OPT} -pic") - else () - set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC") - endif () + if (NOT NOFORTRAN) + if (${F_COMPILER} STREQUAL "SUN") + set(FCOMMON_OPT "${FCOMMON_OPT} -pic") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC") + endif () + endif() endif () if (DYNAMIC_ARCH) @@ -278,52 +278,19 @@ if (MIXED_MEMORY_ALLOCATION) set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION") endif () -if (${CMAKE_SYSTEM_NAME} STREQUAL "SunOS") - set(TAR gtar) - set(PATCH gpatch) - set(GREP ggrep) -else () - set(TAR tar) - set(PATCH patch) - set(GREP grep) -endif () - -if (NOT DEFINED MD5SUM) - set(MD5SUM md5sum) -endif () - -set(AWK awk) - -set(SED sed) - set(REVISION "-r${OpenBLAS_VERSION}") set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION}) -if (CMAKE_BUILD_TYPE STREQUAL "Debug") - set(COMMON_OPT "${COMMON_OPT} -g") -endif () - -if (NOT DEFINED COMMON_OPT) - set(COMMON_OPT "-O2") -endif () - -#For x86 32-bit -if (DEFINED BINARY AND BINARY EQUAL 32) -if (NOT MSVC) - set(COMMON_OPT "${COMMON_OPT} -m32") -endif() -endif() - -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CCOMMON_OPT}") if(NOT MSVC) -set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}") +set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${CCOMMON_OPT}") endif() # TODO: not sure what PFLAGS is -hpa -set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}") +set(PFLAGS "${PFLAGS} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}") -set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COMMON_OPT} ${FCOMMON_OPT}") +set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${FCOMMON_OPT}") # TODO: not sure what FPFLAGS is -hpa -set(FPFLAGS "${FPFLAGS} ${COMMON_OPT} ${FCOMMON_OPT} ${COMMON_PROF}") +set(FPFLAGS "${FPFLAGS} ${FCOMMON_OPT} ${COMMON_PROF}") #For LAPACK Fortran codes. set(LAPACK_FFLAGS "${LAPACK_FFLAGS} ${CMAKE_Fortran_FLAGS}") From 6aac06587d289d7b734caecfc968df67a5d808c6 Mon Sep 17 00:00:00 2001 From: Sacha Refshauge Date: Thu, 17 Aug 2017 17:27:01 +1000 Subject: [PATCH 005/122] Fix typos and use CMake OpenMP support. --- CMakeLists.txt | 41 +++++++++++++++++++++++------------------ cmake/arch.cmake | 35 +++++++++-------------------------- cmake/system.cmake | 19 ++++++++++--------- 3 files changed, 42 insertions(+), 53 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3bafeb9dd..db59f5b3f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,14 +3,12 @@ ## cmake_minimum_required(VERSION 2.8.5) -project(OpenBLAS C) +project(OpenBLAS C ASM) set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MINOR_VERSION 3) set(OpenBLAS_PATCH_VERSION 0.dev) set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") -enable_language(ASM) - # Adhere to GNU filesystem layout conventions include(GNUInstallDirs) @@ -176,6 +174,11 @@ add_subdirectory(utest) if (USE_THREAD) # Add threading library to linker find_package(Threads) + if (THREADS_HAVE_PTHREAD_ARG) + set_property(TARGET ${OpenBLAS_LIBNAME} PROPERTY COMPILE_OPTIONS "-pthread") + set_property(TARGET ${OpenBLAS_LIBNAME} PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread") + endif() + message("PTHREAD: ${CMAKE_THREAD_LIBS_INIT}") target_link_libraries(${OpenBLAS_LIBNAME} ${CMAKE_THREAD_LIBS_INIT}) endif() @@ -219,26 +222,28 @@ install(TARGETS ${OpenBLAS_LIBNAME} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ) # Install include files - set (GENCONFIG_BIN ${CMAKE_BINARY_DIR}/gen_config_h${CMAKE_EXECUTABLE_SUFFIX}) +set (GENCONFIG_BIN ${CMAKE_BINARY_DIR}/gen_config_h${CMAKE_EXECUTABLE_SUFFIX}) - execute_process(COMMAND ${GENCONFIG_BIN} - ${CMAKE_CURRENT_SOURCE_DIR}/config.h - ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h - OUTPUT_VARIABLE OPENBLAS_CONFIG_H_CONTENTS) +execute_process(COMMAND ${GENCONFIG_BIN} + ${CMAKE_CURRENT_SOURCE_DIR}/config.h + ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h + OUTPUT_VARIABLE OPENBLAS_CONFIG_H_CONTENTS) - file(WRITE ${CMAKE_BINARY_DIR}/openblas_config.tmp "${OPENBLAS_CONFIG_H_CONTENTS}") - configure_file(${CMAKE_BINARY_DIR}/openblas_config.tmp ${CMAKE_BINARY_DIR}/openblas_config.h COPYONLY) +file(WRITE ${CMAKE_BINARY_DIR}/openblas_config.tmp "${OPENBLAS_CONFIG_H_CONTENTS}") +configure_file(${CMAKE_BINARY_DIR}/openblas_config.tmp ${CMAKE_BINARY_DIR}/openblas_config.h COPYONLY) - install (FILES ${CMAKE_BINARY_DIR}/openblas_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +install (FILES ${CMAKE_BINARY_DIR}/openblas_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) - message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}") +if(NOT NOFORTRAN) + message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}") - file(WRITE ${CMAKE_BINARY_DIR}/f77blas.h "") - file(APPEND ${CMAKE_BINARY_DIR}/f77blas.h "#ifndef OPENBLAS_F77BLAS_H\n#define OPENBLAS_F77BLAS_H\n#include \"openblas_config.h\"\n") - file(READ ${CMAKE_CURRENT_SOURCE_DIR}/common_interface.h COMMON_INTERFACE_H_CONTENTS) - file(APPEND ${CMAKE_BINARY_DIR}/f77blas.h "${COMMON_INTERFACE_H_CONTENTS}") - file(APPEND ${CMAKE_BINARY_DIR}/f77blas.h "#endif") - install (FILES ${CMAKE_BINARY_DIR}/f77blas.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + file(WRITE ${CMAKE_BINARY_DIR}/f77blas.h "") + file(APPEND ${CMAKE_BINARY_DIR}/f77blas.h "#ifndef OPENBLAS_F77BLAS_H\n#define OPENBLAS_F77BLAS_H\n#include \"openblas_config.h\"\n") + file(READ ${CMAKE_CURRENT_SOURCE_DIR}/common_interface.h COMMON_INTERFACE_H_CONTENTS) + file(APPEND ${CMAKE_BINARY_DIR}/f77blas.h "${COMMON_INTERFACE_H_CONTENTS}") + file(APPEND ${CMAKE_BINARY_DIR}/f77blas.h "#endif") + install (FILES ${CMAKE_BINARY_DIR}/f77blas.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +endif() if(NOT NO_CBLAS) message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}") diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 5b897d857..bc9a7621c 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -33,32 +33,15 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel") endif () if (USE_OPENMP) - - if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB") - set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") - endif () - - if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang") - message(WARNING "Clang doesn't support OpenMP yet.") - set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") - endif () - - if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel") - set(CCOMMON_OPT "${CCOMMON_OPT} -openmp") - endif () - - if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI") - set(CCOMMON_OPT "${CCOMMON_OPT} -mp") - endif () - - if (${CMAKE_C_COMPILER_ID} STREQUAL "OPEN64") - set(CCOMMON_OPT "${CCOMMON_OPT} -mp") - set(CEXTRALIB "${CEXTRALIB} -lstdc++") - endif () - - if (${CMAKE_C_COMPILER_ID} STREQUAL "PATHSCALE") - set(CCOMMON_OPT "${CCOMMON_OPT} -mp") - endif () + # USE_SIMPLE_THREADED_LEVEL3 = 1 + # NO_AFFINITY = 1 + find_package(OpenMP) + if (OpenMP_FOUND) + set(CCOMMON_OPT "${CCOMMON_OPT} ${OpenMP_C_FLAGS} -DUSE_OPENMP") + set(FCOMMON_OPT "${FCOMMON_OPT} ${OpenMP_Fortran_FLAGS}") + elseif (UNIX) + set(USE_OPENMP 0) + endif() endif () diff --git a/cmake/system.cmake b/cmake/system.cmake index 880223758..e31e74a42 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -27,7 +27,7 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) endif () if (DEFINED TARGET) - message(STATUS "Targetting the ${TARGET} architecture.") + message(STATUS "Targeting the ${TARGET} architecture.") set(GETARCH_FLAGS "-DFORCE_${TARGET}") endif () @@ -92,6 +92,12 @@ elseif(NOT DEFINED USE_THREAD) set(USE_THREAD 1) endif () +# TODO: Fix. Isn't working. Was never working in CMake. +# Undefined reference to get_num_procs, blas_thread_shutdown, ... +if (UNIX) + set(USE_THREAD 0) +endif() + if (USE_THREAD) message(STATUS "Multi-threading enabled with ${NUM_THREADS} threads.") endif () @@ -166,6 +172,8 @@ if (NO_AVX2) endif () if (USE_THREAD) + # USE_SIMPLE_THREADED_LEVEL3 = 1 + # NO_AFFINITY = 1 set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER") if (${ARCH} STREQUAL "mips64") @@ -174,16 +182,9 @@ if (USE_THREAD) endif () endif () - if (USE_OPENMP) - # USE_SIMPLE_THREADED_LEVEL3 = 1 - # NO_AFFINITY = 1 - set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_OPENMP") - endif () - if (BIGNUMA) set(CCOMMON_OPT "${CCOMMON_OPT} -DBIGNUMA") endif () - endif () if (NO_WARMUP) @@ -298,7 +299,7 @@ set(LAPACK_FPFLAGS "${LAPACK_FPFLAGS} ${FPFLAGS}") #Disable -fopenmp for LAPACK Fortran codes on Windows. if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") - set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parralel") + set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parallel") foreach (FILTER_FLAG ${FILTER_FLAGS}) string(REPLACE ${FILTER_FLAG} "" LAPACK_FFLAGS ${LAPACK_FFLAGS}) string(REPLACE ${FILTER_FLAG} "" LAPACK_FPFLAGS ${LAPACK_FPFLAGS}) From 37858d11462110a9f5b3c28dd6ed5915c6010925 Mon Sep 17 00:00:00 2001 From: Sacha Refshauge Date: Sat, 19 Aug 2017 15:07:42 +1000 Subject: [PATCH 006/122] Fix threading usage in CMake: s/SMP/USE_THREAD/ --- driver/level2/CMakeLists.txt | 8 ++++---- driver/level3/CMakeLists.txt | 10 +++++----- driver/others/CMakeLists.txt | 2 +- lapack/CMakeLists.txt | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index aa5f03107..8fceba905 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -73,7 +73,7 @@ GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ;XCONJ" "gbmv_d" false "" "" "" 2) # special defines for complex foreach (float_type ${FLOAT_TYPES}) - if (SMP) + if (USE_THREAD) GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false ${float_type}) GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false ${float_type}) @@ -107,7 +107,7 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("z${ulvm_source}" "LOWER;HEMVREV" "${op_name}_M" false "" "" false ${float_type}) endforeach() - if (SMP) + if (USE_THREAD) GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type}) GenerateNamedObjects("gemv_thread.c" "CONJ;TRANSA" "gemv_thread_c" false "" "" false ${float_type}) @@ -186,7 +186,7 @@ foreach (float_type ${FLOAT_TYPES}) GenerateCombinationObjects("${l_source}" "UNIT" "N" "TRANSA" 0 "${op_name}_TU" false ${float_type}) endforeach () - if (SMP) + if (USE_THREAD) GenerateNamedObjects("ger_thread.c" "" "" false "" "" false ${float_type}) foreach(nu_smp_source ${NU_SMP_SOURCES}) string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_source}) @@ -197,7 +197,7 @@ foreach (float_type ${FLOAT_TYPES}) endif () endforeach () -if (SMP) +if (USE_THREAD) GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2) endif () diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index c91e85f9c..f788c45b9 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -9,7 +9,7 @@ set(GEMM_COMPLEX_DEFINES RN CN RT CT NR TR RR CR NC TC RC CC) foreach (GEMM_DEFINE ${GEMM_DEFINES}) string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0) - if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) + if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0) endif () endforeach () @@ -32,7 +32,7 @@ GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "" 1) GenerateCombinationObjects("syr2k_k.c" "LOWER;TRANS" "U;N" "" 1) GenerateCombinationObjects("syrk_kernel.c" "LOWER" "U" "" 2) GenerateCombinationObjects("syr2k_kernel.c" "LOWER" "U" "" 2) -if (SMP) +if (USE_THREAD) # N.B. these do NOT have a float type (e.g. DOUBLE) defined! GenerateNamedObjects("gemm_thread_m.c;gemm_thread_n.c;gemm_thread_mn.c;gemm_thread_variable.c;syrk_thread.c" "" "" 0 "" "" 1) @@ -71,7 +71,7 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type}) GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type}) - if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) + if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) #herk GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type}) GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type}) @@ -93,7 +93,7 @@ foreach (float_type ${FLOAT_TYPES}) if(USE_GEMM3M) GenerateNamedObjects("gemm3m.c" "${gemm_define}" "gemm3m_${gemm_define_LC}" false "" "" false ${float_type}) endif() - if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) + if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type}) if(USE_GEMM3M) GenerateNamedObjects("gemm3m.c" "${gemm_define};THREADED_LEVEL3" "gemm3m_thread_${gemm_define_LC}" false "" "" false ${float_type}) @@ -106,7 +106,7 @@ foreach (float_type ${FLOAT_TYPES}) foreach (GEMM_DEFINE ${GEMM_DEFINES}) string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) GenerateNamedObjects("gemm3m.c" "${GEMM_DEFINE}" "gemm3m_${GEMM_DEFINE_LC}" false "" "" false ${float_type}) - if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) + if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) GenerateNamedObjects("gemm3m.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm3m_thread_${GEMM_DEFINE_LC}" false "" "" false ${float_type}) endif () endforeach () diff --git a/driver/others/CMakeLists.txt b/driver/others/CMakeLists.txt index 376cc66c4..e20b14e79 100644 --- a/driver/others/CMakeLists.txt +++ b/driver/others/CMakeLists.txt @@ -7,7 +7,7 @@ else () set(MEMORY memory.c) endif () -if (SMP) +if (USE_THREAD) if (USE_OPENMP) set(BLAS_SERVER blas_server_omp.c) diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index b613c6c2b..9fb000651 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -46,7 +46,7 @@ GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" false "" "" false 3) GenerateNamedObjects("laswp/generic/laswp_k.c" "" "laswp_plus" false "" "" false 3) GenerateNamedObjects("laswp/generic/laswp_k.c" "MINUS" "laswp_minus" false "" "" false 3) -if (SMP) +if (USE_THREAD) if (USE_OPENMP) set(GETRF_SRC getrf/getrf_parallel_omp.c) From b9ec72546c499591f43a54bf03d11194839a3ccf Mon Sep 17 00:00:00 2001 From: Sacha Refshauge Date: Sun, 20 Aug 2017 00:13:24 +1000 Subject: [PATCH 007/122] Only run utest without NOFORTRAN, same as Makefile. Linux now compiles. --- CMakeLists.txt | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index db59f5b3f..bb65944d0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,9 +117,9 @@ endforeach () # Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. if (NOT NOFORTRAN AND NOT NO_LAPACK) include("${PROJECT_SOURCE_DIR}/cmake/lapack.cmake") -if (NOT NO_LAPACKE) - include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake") -endif () + if (NOT NO_LAPACKE) + include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake") + endif () endif () # Only generate .def for dll on MSVC and always produce pdb files for debug and release @@ -169,7 +169,6 @@ foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES}) endforeach() enable_testing() -add_subdirectory(utest) if (USE_THREAD) # Add threading library to linker @@ -183,11 +182,14 @@ if (USE_THREAD) endif() if (NOT MSVC AND NOT NOFORTRAN) + add_subdirectory(utest) # Build test and ctest add_subdirectory(test) - if(NOT NO_CBLAS) + if(NOT NO_CBLAS) add_subdirectory(ctest) endif() +elseif(MSVC) + add_subdirectory(utest) endif() set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES From 4474465438b55fb8204d85477efabd419693742b Mon Sep 17 00:00:00 2001 From: Sacha Refshauge Date: Sun, 20 Aug 2017 00:13:46 +1000 Subject: [PATCH 008/122] Remove _static usages for tests --- ctest/CMakeLists.txt | 6 +++--- test/CMakeLists.txt | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ctest/CMakeLists.txt b/ctest/CMakeLists.txt index 73070d429..14c9d1944 100644 --- a/ctest/CMakeLists.txt +++ b/ctest/CMakeLists.txt @@ -16,7 +16,7 @@ foreach(float_type ${FLOAT_TYPES}) add_executable(x${float_char}cblat1 c_${float_char}blat1.f c_${float_char}blas1.c) - target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}_static) + target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}) add_test(NAME "x${float_char}cblat1" COMMAND "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat1") @@ -28,7 +28,7 @@ foreach(float_type ${FLOAT_TYPES}) auxiliary.c c_xerbla.c constant.c) - target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}_static) + target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}) add_test(NAME "x${float_char}cblat2" COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat2" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in2") @@ -40,7 +40,7 @@ foreach(float_type ${FLOAT_TYPES}) auxiliary.c c_xerbla.c constant.c) - target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}_static) + target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}) add_test(NAME "x${float_char}cblat3" COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat3" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3") diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 69a1ceb91..adeee3452 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -11,7 +11,7 @@ set(OpenBLAS_Tests foreach(test_bin ${OpenBLAS_Tests}) add_executable(${test_bin} ${test_bin}.f) -target_link_libraries(${test_bin} ${OpenBLAS_LIBNAME}_static) +target_link_libraries(${test_bin} ${OpenBLAS_LIBNAME}) endforeach() # $1 exec, $2 input, $3 output_result From 408b4fe83f1cf592b16b9781840e4f15bb67e59a Mon Sep 17 00:00:00 2001 From: Sacha Refshauge Date: Sun, 20 Aug 2017 00:59:00 +1000 Subject: [PATCH 009/122] Add a CMake GCC and Clang target to Travis CI --- .travis.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/.travis.yml b/.travis.yml index e86f28137..c6d43cd92 100644 --- a/.travis.yml +++ b/.travis.yml @@ -119,6 +119,34 @@ jobs: - TARGET_BOX=LINUX64_MUSL - BTYPE="BINARY=64 NO_AFFINITY=1 USE_OPENMP=0 NO_LAPACK=0 TARGET=core2" + - &test-cmake + stage: test + compiler: clang + addons: + apt: + packages: + - gfortran + - cmake + dist: trusty + sudo: true + before_script: + - COMMON_ARGS="-DTARGET=NEHALEM -DNUM_THREADS=32" + script: + - set -e + - mkdir build + - CONFIG=Release + - cmake -Bbuild -H. $CMAKE_ARGS $COMMON_ARGS -DCMAKE_BUILD_TYPE=$CONFIG + - cmake --build build --config $CONFIG -- -j2 + env: + - CMAKE=1 + - <<: *test-cmake + env: + - CMAKE=1 CMAKE_ARGS="-DNOFORTRAN=1" + - <<: *test-cmake + compiler: gcc + env: + - CMAKE=1 + # whitelist branches: only: From 11911fd941c74341decd07ba27a226ff28d44fb9 Mon Sep 17 00:00:00 2001 From: Sacha Refshauge Date: Sun, 20 Aug 2017 00:59:14 +1000 Subject: [PATCH 010/122] Add kernel/Makefile.LA to CMake --- kernel/CMakeLists.txt | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 849ef21d4..2295829c9 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -464,6 +464,30 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type}) endforeach () + # Makefile.LA + if(NOT NO_LAPACK) + foreach (float_type ${FLOAT_TYPES}) + if (NOT DEFINED ${float_char}NEG_TCOPY) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C" OR ${float_char} STREQUAL "X") + set(${float_char}NEG_TCOPY ../generic/zneg_tcopy.c) + else () + set(${float_char}NEG_TCOPY ../generic/neg_tcopy.c) + endif () + endif () + + if (NOT DEFINED ${float_char}LASWP_NCOPY) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C" OR ${float_char} STREQUAL "X") + set(${float_char}LASWP_NCOPY ../generic/zlaswp_ncopy.c) + else () + set(${float_char}LASWP_NCOPY ../generic/laswp_ncopy.c) + endif () + endif () + string(SUBSTRING ${float_type} 0 1 float_char) + GenerateNamedObjects("${KERNELDIR}/${${float_char}NEG_TCOPY}_${${float_char}GEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}LASWP_NCOPY}_${${float_char}GEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false ${float_type}) + endforeach() + endif() + if (${DYNAMIC_ARCH}) set(SETPARAM_TARGET_DIR ${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}) file(READ ${CMAKE_CURRENT_SOURCE_DIR}/setparam-ref.c SETPARAM_REF_CONTENTS) @@ -484,9 +508,6 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) file(REMOVE ${SETPARAM_TARGET_DIR}/kernel${TSUFFIX}.tmp) endif () - # Makefile.LA - #DBLASOBJS += dneg_tcopy$(TSUFFIX).$(SUFFIX) dlaswp_ncopy$(TSUFFIX).$(SUFFIX) - add_library(kernel${TSUFFIX} OBJECT ${OPENBLAS_SRC}) set_target_properties(kernel${TSUFFIX} PROPERTIES COMPILE_FLAGS "${KERNEL_DEFINITIONS}") get_target_property(KERNEL_INCLUDE_DIRECTORIES kernel${TSUFFIX} INCLUDE_DIRECTORIES) From 0a7a527a92d2724d99eb43daa7302d11bd5a7012 Mon Sep 17 00:00:00 2001 From: Sacha Refshauge Date: Sun, 20 Aug 2017 20:08:53 +1000 Subject: [PATCH 011/122] Add support for cross compiling. Add support for not having host compiler as CMake cannot detect such a compiler. Add support for not using getarch. Successfully builds Android ARMV8. Any target can be added by supplying the TARGET_CORE config in prebuild.cmake. --- cmake/prebuild.cmake | 80 ++++++++++++++++++++++++++++++++++++++++++-- cmake/system.cmake | 17 +++++----- 2 files changed, 86 insertions(+), 11 deletions(-) diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index d66b0ccc0..a1b3e0f57 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -38,12 +38,19 @@ # CPUIDEMU = ../../cpuid/table.o -# Cannot run getarch on target if we are cross-compiling -if(NOT CMAKE_CROSSCOMPILING) if (DEFINED CPUIDEMU) set(EXFLAGS "-DCPUIDEMU -DVENDOR=99") endif () +if(CMAKE_CROSSCOMPILING AND NOT DEFINED TARGET_CORE) + # Detect target without running getarch + if(AARCH64) + set(TARGET_CORE "ARMV8") + else() + message(FATAL_ERROR "When cross compiling, a TARGET_CORE is required.") + endif() +endif() + if (DEFINED TARGET_CORE) # set the C flags for just this file set(GETARCH2_FLAGS "-DBUILD_KERNEL") @@ -63,6 +70,73 @@ if (NOT NOFORTRAN) include("${PROJECT_SOURCE_DIR}/cmake/f_check.cmake") endif () +# Cannot run getarch on target if we are cross-compiling +if (CMAKE_CROSSCOMPILING) + # Write to config as getarch would + + # TODO: Set up defines that getarch sets up based on every other target + # Perhaps this should be inside a different file as it grows larger + file(APPEND ${TARGET_CONF_TEMP} + "#define ${TARGET_CORE}\n" + "#define CHAR_CORENAME \"${TARGET_CORE}\"\n") + if ("${TARGET_CORE}" STREQUAL "ARMV8") + file(APPEND ${TARGET_CONF_TEMP} + "#define L1_DATA_SIZE\t32768\n" + "#define L1_DATA_LINESIZE\t64\n" + "#define L2_SIZE\t262144\n" + "#define L2_LINESIZE\t64\n" + "#define DTB_DEFAULT_ENTRIES\t64\n" + "#define DTB_SIZE\t4096\n" + "#define L2_ASSOCIATIVE\t32\n") + set(SGEMM_UNROLL_M 4) + set(SGEMM_UNROLL_N 4) + elseif ("${TARGET_CORE}" STREQUAL "CORTEXA57") + file(APPEND ${TARGET_CONF_TEMP} + "#define L1_CODE_SIZE\t49152\n" + "#define L1_CODE_LINESIZE\t64\n" + "#define L1_CODE_ASSOCIATIVE\t3\n" + "#define L1_DATA_SIZE\t32768\n" + "#define L1_DATA_LINESIZE\t64\n" + "#define L1_DATA_ASSOCIATIVE\t2\n" + "#define L2_SIZE\t2097152\n" + "#define L2_LINESIZE\t64\n" + "#define L2_ASSOCIATIVE\t16\n" + "#define DTB_DEFAULT_ENTRIES\t64\n" + "#define DTB_SIZE\t4096\n" + "#define HAVE_VFPV4\n" + "#define HAVE_VFPV3\n" + "#define HAVE_VFP\n" + "#define HAVE_NEON\n") + set(SGEMM_DEFAULT_UNROLL_M 16) + set(SGEMM_DEFAULT_UNROLL_N 4) + set(DGEMM_DEFAULT_UNROLL_M 8) + set(DGEMM_DEFAULT_UNROLL_N 4) + set(CGEMM_DEFAULT_UNROLL_M 8) + set(CGEMM_DEFAULT_UNROLL_N 4) + set(ZGEMM_DEFAULT_UNROLL_M 8) + set(ZGEMM_DEFAULT_UNROLL_N 4) + endif() + + # Or should this actually be NUM_CORES? + if (${NUM_THREADS} GREATER 0) + file(APPEND ${TARGET_CONF_TEMP} "#define NUM_CORES\t${NUM_THREADS}\n") + endif() + + # GetArch_2nd + foreach(float_char S;D;Q;C;Z;X) + if (NOT DEFINED ${float_char}GEMM_UNROLL_M) + set(${float_char}GEMM_UNROLL_M 2) + endif() + if (NOT DEFINED ${float_char}GEMM_UNROLL_N) + set(${float_char}GEMM_UNROLL_N 2) + endif() + endforeach() + file(APPEND ${TARGET_CONF_TEMP} + "#define GEMM_MULTITHREAD_THRESHOLD\t${GEMM_MULTITHREAD_THRESHOLD}\n") + # Move to where gen_config_h would place it + file(RENAME ${TARGET_CONF_TEMP} "${PROJECT_BINARY_DIR}/config.h") + +else() # compile getarch set(GETARCH_SRC ${PROJECT_SOURCE_DIR}/getarch.c @@ -161,4 +235,4 @@ if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") endif () endif () -endif(NOT CMAKE_CROSSCOMPILING) +endif(CMAKE_CROSSCOMPILING) diff --git a/cmake/system.cmake b/cmake/system.cmake index e31e74a42..310b714f0 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -12,6 +12,9 @@ set(NETLIB_LAPACK_DIR "${PROJECT_SOURCE_DIR}/lapack-netlib") # TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1. if (DEFINED TARGET_CORE) set(TARGET ${TARGET_CORE}) + if(NOT DEFINED CORE) + set(CORE ${TARGET_CORE}) + endif() endif () # Force fallbacks for 32bit @@ -72,8 +75,6 @@ else () set(ONLY_CBLAS 0) endif () -include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake") - # N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa if (NOT CMAKE_CROSSCOMPILING) if (NOT DEFINED NUM_CORES) @@ -86,22 +87,22 @@ if (NOT CMAKE_CROSSCOMPILING) endif () endif() +if (NOT DEFINED NUM_THREADS) + set(NUM_THREADS 0) +endif() + if (${NUM_THREADS} LESS 2) set(USE_THREAD 0) elseif(NOT DEFINED USE_THREAD) set(USE_THREAD 1) endif () -# TODO: Fix. Isn't working. Was never working in CMake. -# Undefined reference to get_num_procs, blas_thread_shutdown, ... -if (UNIX) - set(USE_THREAD 0) -endif() - if (USE_THREAD) message(STATUS "Multi-threading enabled with ${NUM_THREADS} threads.") endif () +include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake") + if (NOT DEFINED NEED_PIC) set(NEED_PIC 1) endif () From 69b560751c8b18784a18e7fb87f9f7d3b8521696 Mon Sep 17 00:00:00 2001 From: Sacha Refshauge Date: Sun, 20 Aug 2017 22:50:31 +1000 Subject: [PATCH 012/122] Improvements to previous commit (cross-compile). Fix typos and bad if statements discovered in 0.2.20. --- cmake/f_check.cmake | 4 - cmake/prebuild.cmake | 219 +++++++++++++++++++++--------------------- cmake/system.cmake | 11 --- kernel/CMakeLists.txt | 2 +- 4 files changed, 108 insertions(+), 128 deletions(-) diff --git a/cmake/f_check.cmake b/cmake/f_check.cmake index 6eee027a5..4848553d9 100644 --- a/cmake/f_check.cmake +++ b/cmake/f_check.cmake @@ -34,11 +34,7 @@ else() endif() if (NOT ONLY_CBLAS) - # N.B. f_check is not cross-platform, so instead try to use CMake variables # run f_check (appends to TARGET files) -# message(STATUS "Running f_check...") -# execute_process(COMMAND perl f_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_Fortran_COMPILER} -# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) # TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile # TODO: set FEXTRALIB flags a la f_check? diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index a1b3e0f57..6ecb9f08f 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -42,23 +42,12 @@ if (DEFINED CPUIDEMU) set(EXFLAGS "-DCPUIDEMU -DVENDOR=99") endif () -if(CMAKE_CROSSCOMPILING AND NOT DEFINED TARGET_CORE) - # Detect target without running getarch - if(AARCH64) - set(TARGET_CORE "ARMV8") - else() - message(FATAL_ERROR "When cross compiling, a TARGET_CORE is required.") - endif() -endif() - -if (DEFINED TARGET_CORE) +if (BUILD_KERNEL) # set the C flags for just this file set(GETARCH2_FLAGS "-DBUILD_KERNEL") - set(TARGET_MAKE "Makefile_kernel.conf") set(TARGET_CONF "config_kernel.h") set(TARGET_CONF_DIR ${PROJECT_BINARY_DIR}/kernel_config/${TARGET_CORE}) else() - set(TARGET_MAKE "Makefile.conf") set(TARGET_CONF "config.h") set(TARGET_CONF_DIR ${PROJECT_BINARY_DIR}) endif () @@ -70,16 +59,26 @@ if (NOT NOFORTRAN) include("${PROJECT_SOURCE_DIR}/cmake/f_check.cmake") endif () +# This check requires c_check for arch but it should probably be done earlier +if(CMAKE_CROSSCOMPILING AND NOT DEFINED CORE) + # Detect target without running getarch + if(ARM64) + set(CORE "ARMV8") + else() + message(FATAL_ERROR "When cross compiling, a CORE is required.") + endif() +endif() + # Cannot run getarch on target if we are cross-compiling -if (CMAKE_CROSSCOMPILING) +if (DEFINED CORE AND CMAKE_CROSSCOMPILING) # Write to config as getarch would # TODO: Set up defines that getarch sets up based on every other target # Perhaps this should be inside a different file as it grows larger file(APPEND ${TARGET_CONF_TEMP} - "#define ${TARGET_CORE}\n" - "#define CHAR_CORENAME \"${TARGET_CORE}\"\n") - if ("${TARGET_CORE}" STREQUAL "ARMV8") + "#define ${CORE}\n" + "#define CHAR_CORENAME \"${CORE}\"\n") + if ("${CORE}" STREQUAL "ARMV8") file(APPEND ${TARGET_CONF_TEMP} "#define L1_DATA_SIZE\t32768\n" "#define L1_DATA_LINESIZE\t64\n" @@ -90,7 +89,7 @@ if (CMAKE_CROSSCOMPILING) "#define L2_ASSOCIATIVE\t32\n") set(SGEMM_UNROLL_M 4) set(SGEMM_UNROLL_N 4) - elseif ("${TARGET_CORE}" STREQUAL "CORTEXA57") + elseif ("${CORE}" STREQUAL "CORTEXA57") file(APPEND ${TARGET_CONF_TEMP} "#define L1_CODE_SIZE\t49152\n" "#define L1_CODE_LINESIZE\t64\n" @@ -107,14 +106,14 @@ if (CMAKE_CROSSCOMPILING) "#define HAVE_VFPV3\n" "#define HAVE_VFP\n" "#define HAVE_NEON\n") - set(SGEMM_DEFAULT_UNROLL_M 16) - set(SGEMM_DEFAULT_UNROLL_N 4) - set(DGEMM_DEFAULT_UNROLL_M 8) - set(DGEMM_DEFAULT_UNROLL_N 4) - set(CGEMM_DEFAULT_UNROLL_M 8) - set(CGEMM_DEFAULT_UNROLL_N 4) - set(ZGEMM_DEFAULT_UNROLL_M 8) - set(ZGEMM_DEFAULT_UNROLL_N 4) + set(SGEMM_UNROLL_M 16) + set(SGEMM_UNROLL_N 4) + set(DGEMM_UNROLL_M 8) + set(DGEMM_UNROLL_N 4) + set(CGEMM_UNROLL_M 8) + set(CGEMM_UNROLL_N 4) + set(ZGEMM_UNROLL_M 8) + set(ZGEMM_UNROLL_N 4) endif() # Or should this actually be NUM_CORES? @@ -134,105 +133,101 @@ if (CMAKE_CROSSCOMPILING) file(APPEND ${TARGET_CONF_TEMP} "#define GEMM_MULTITHREAD_THRESHOLD\t${GEMM_MULTITHREAD_THRESHOLD}\n") # Move to where gen_config_h would place it - file(RENAME ${TARGET_CONF_TEMP} "${PROJECT_BINARY_DIR}/config.h") - -else() -# compile getarch -set(GETARCH_SRC - ${PROJECT_SOURCE_DIR}/getarch.c - ${CPUIDEMO} -) - -if ("${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC") - #Use generic for MSVC now - message("MSVC") - set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC) -else() - list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S) -endif () - -if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") - # disable WindowsStore strict CRT checks - set(GETARCH_FLAGS ${GETARCH_FLAGS} -D_CRT_SECURE_NO_WARNINGS) -endif () + file(RENAME ${TARGET_CONF_TEMP} "${TARGET_CONF_DIR}/${TARGET_CONF}") -set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") -set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") -file(MAKE_DIRECTORY ${GETARCH_DIR}) -configure_file(${TARGET_CONF_TEMP} ${GETARCH_DIR}/${TARGET_CONF} COPYONLY) -if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") - try_compile(GETARCH_RESULT ${GETARCH_DIR} - SOURCES ${GETARCH_SRC} - COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${GETARCH_DIR} -I${PROJECT_SOURCE_DIR} -I${PROJECT_BINARY_DIR} - OUTPUT_VARIABLE GETARCH_LOG - COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN} +else(NOT CMAKE_CROSSCOMPILING) + # compile getarch + set(GETARCH_SRC + ${PROJECT_SOURCE_DIR}/getarch.c + ${CPUIDEMU} ) - if (NOT ${GETARCH_RESULT}) - MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}") + if ("${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC") + #Use generic for MSVC now + message("MSVC") + set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC) + else() + list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S) endif () -endif () -message(STATUS "Running getarch") - -# use the cmake binary w/ the -E param to run a shell command in a cross-platform way -execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT) -execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT) - -message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}") - -# append config data from getarch to the TARGET file and read in CMake vars -file(APPEND ${TARGET_CONF_TEMP} ${GETARCH_CONF_OUT}) -ParseGetArchVars(${GETARCH_MAKE_OUT}) - -set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build") -set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}") -file(MAKE_DIRECTORY ${GETARCH2_DIR}) -configure_file(${TARGET_CONF_TEMP} ${GETARCH2_DIR}/${TARGET_CONF} COPYONLY) -if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") - try_compile(GETARCH2_RESULT ${GETARCH2_DIR} - SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c - COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${GETARCH2_DIR} -I${PROJECT_SOURCE_DIR} -I${PROJECT_BINARY_DIR} - OUTPUT_VARIABLE GETARCH2_LOG - COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} - ) - if (NOT ${GETARCH2_RESULT}) - MESSAGE(FATAL_ERROR "Compiling getarch_2nd failed ${GETARCH2_LOG}") + if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") + # disable WindowsStore strict CRT checks + set(GETARCH_FLAGS ${GETARCH_FLAGS} -D_CRT_SECURE_NO_WARNINGS) endif () -endif () -# use the cmake binary w/ the -E param to run a shell command in a cross-platform way -execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT) -execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT) + set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") + set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") + file(MAKE_DIRECTORY ${GETARCH_DIR}) + configure_file(${TARGET_CONF_TEMP} ${GETARCH_DIR}/${TARGET_CONF} COPYONLY) + if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") + try_compile(GETARCH_RESULT ${GETARCH_DIR} + SOURCES ${GETARCH_SRC} + COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${GETARCH_DIR} -I${PROJECT_SOURCE_DIR} -I${PROJECT_BINARY_DIR} + OUTPUT_VARIABLE GETARCH_LOG + COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN} + ) + + if (NOT ${GETARCH_RESULT}) + MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}") + endif () + endif () + message(STATUS "Running getarch") + + # use the cmake binary w/ the -E param to run a shell command in a cross-platform way + execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT) + execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT) + + message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}") + + # append config data from getarch to the TARGET file and read in CMake vars + file(APPEND ${TARGET_CONF_TEMP} ${GETARCH_CONF_OUT}) + ParseGetArchVars(${GETARCH_MAKE_OUT}) + + set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build") + set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}") + file(MAKE_DIRECTORY ${GETARCH2_DIR}) + configure_file(${TARGET_CONF_TEMP} ${GETARCH2_DIR}/${TARGET_CONF} COPYONLY) + if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") + try_compile(GETARCH2_RESULT ${GETARCH2_DIR} + SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c + COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${GETARCH2_DIR} -I${PROJECT_SOURCE_DIR} -I${PROJECT_BINARY_DIR} + OUTPUT_VARIABLE GETARCH2_LOG + COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} + ) + + if (NOT ${GETARCH2_RESULT}) + MESSAGE(FATAL_ERROR "Compiling getarch_2nd failed ${GETARCH2_LOG}") + endif () + endif () -# append config data from getarch_2nd to the TARGET file and read in CMake vars -file(APPEND ${TARGET_CONF_TEMP} ${GETARCH2_CONF_OUT}) + # use the cmake binary w/ the -E param to run a shell command in a cross-platform way + execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT) + execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT) -if (${BUILD_KERNEL}) - configure_file(${TARGET_CONF_TEMP} ${PROJECT_BINARY_DIR}/kernel_config/${TARGET_CORE}/${TARGET_CONF} COPYONLY) -else () - configure_file(${TARGET_CONF_TEMP} ${PROJECT_BINARY_DIR}/${TARGET_CONF} COPYONLY) -endif () + # append config data from getarch_2nd to the TARGET file and read in CMake vars + file(APPEND ${TARGET_CONF_TEMP} ${GETARCH2_CONF_OUT}) -ParseGetArchVars(${GETARCH2_MAKE_OUT}) + configure_file(${TARGET_CONF_TEMP} ${TARGET_CONF_DIR}/${TARGET_CONF} COPYONLY) -# compile get_config_h -set(GEN_CONFIG_H_DIR "${PROJECT_BINARY_DIR}/genconfig_h_build") -set(GEN_CONFIG_H_BIN "gen_config_h${CMAKE_EXECUTABLE_SUFFIX}") -set(GEN_CONFIG_H_FLAGS "-DVERSION=\"${OpenBLAS_VERSION}\"") -file(MAKE_DIRECTORY ${GEN_CONFIG_H_DIR}) + ParseGetArchVars(${GETARCH2_MAKE_OUT}) -if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") - try_compile(GEN_CONFIG_H_RESULT ${GEN_CONFIG_H_DIR} - SOURCES ${PROJECT_SOURCE_DIR}/gen_config_h.c - COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GEN_CONFIG_H_FLAGS} -I${PROJECT_SOURCE_DIR} - OUTPUT_VARIABLE GEN_CONFIG_H_LOG - COPY_FILE ${PROJECT_BINARY_DIR}/${GEN_CONFIG_H_BIN} - ) + # compile get_config_h + set(GEN_CONFIG_H_DIR "${PROJECT_BINARY_DIR}/genconfig_h_build") + set(GEN_CONFIG_H_BIN "gen_config_h${CMAKE_EXECUTABLE_SUFFIX}") + set(GEN_CONFIG_H_FLAGS "-DVERSION=\"${OpenBLAS_VERSION}\"") + file(MAKE_DIRECTORY ${GEN_CONFIG_H_DIR}) - if (NOT ${GEN_CONFIG_H_RESULT}) - MESSAGE(FATAL_ERROR "Compiling gen_config_h failed ${GEN_CONFIG_H_LOG}") - endif () + if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") + try_compile(GEN_CONFIG_H_RESULT ${GEN_CONFIG_H_DIR} + SOURCES ${PROJECT_SOURCE_DIR}/gen_config_h.c + COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GEN_CONFIG_H_FLAGS} -I${PROJECT_SOURCE_DIR} + OUTPUT_VARIABLE GEN_CONFIG_H_LOG + COPY_FILE ${PROJECT_BINARY_DIR}/${GEN_CONFIG_H_BIN} + ) + + if (NOT ${GEN_CONFIG_H_RESULT}) + MESSAGE(FATAL_ERROR "Compiling gen_config_h failed ${GEN_CONFIG_H_LOG}") + endif () endif () -endif(CMAKE_CROSSCOMPILING) +endif() diff --git a/cmake/system.cmake b/cmake/system.cmake index 310b714f0..a66550245 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -4,17 +4,9 @@ ## set(NETLIB_LAPACK_DIR "${PROJECT_SOURCE_DIR}/lapack-netlib") -# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa -# http://stackoverflow.com/questions/714100/os-detecting-makefile - -# TODO: Makefile.system sets HOSTCC = $(CC) here if not already set -hpa - # TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1. if (DEFINED TARGET_CORE) set(TARGET ${TARGET_CORE}) - if(NOT DEFINED CORE) - set(CORE ${TARGET_CORE}) - endif() endif () # Force fallbacks for 32bit @@ -107,9 +99,6 @@ if (NOT DEFINED NEED_PIC) set(NEED_PIC 1) endif () -# TODO: I think CMake should be handling all this stuff -hpa -unset(ARFLAGS) - # OS dependent settings include("${PROJECT_SOURCE_DIR}/cmake/os.cmake") diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 2295829c9..9178ba745 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -22,7 +22,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) if (${ADD_COMMONOBJS}) if (${ARCH} STREQUAL "x86") - if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") + if (NOT "${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC") GenerateNamedObjects("${KERNELDIR}/cpuid.S" "" "" false "" "" true) else() GenerateNamedObjects("${KERNELDIR}/cpuid_win.c" "" "" false "" "" true) From 47ebce4d1a9b04ebf18c3a11859a07af8114794e Mon Sep 17 00:00:00 2001 From: Sacha Refshauge Date: Mon, 21 Aug 2017 00:37:29 +1000 Subject: [PATCH 013/122] Clean up, fix old typos. Simplify arch usages. Move system arch check to earlier position. --- CMakeLists.txt | 31 ---------- cmake/arch.cmake | 14 ++--- cmake/cc.cmake | 17 ++---- cmake/fc.cmake | 8 +-- cmake/os.cmake | 4 +- cmake/prebuild.cmake | 54 ++++++++++++++---- cmake/system.cmake | 63 +++++++++------------ cmake/{c_check.cmake => system_check.cmake} | 53 ++--------------- kernel/CMakeLists.txt | 4 +- 9 files changed, 94 insertions(+), 154 deletions(-) rename cmake/{c_check.cmake => system_check.cmake} (59%) diff --git a/CMakeLists.txt b/CMakeLists.txt index bb65944d0..235a48747 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,11 +87,6 @@ if (BUILD_COMPLEX16) list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE endif () -set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench) - -# all :: libs netlib tests shared - -# libs : if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN") message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.") endif () @@ -125,18 +120,10 @@ endif () # Only generate .def for dll on MSVC and always produce pdb files for debug and release if(MSVC) set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def") -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF") endif() -#ifeq ($(DYNAMIC_ARCH), 1) -# @$(MAKE) -C kernel commonlibs || exit 1 -# @for d in $(DYNAMIC_CORE) ; \ -# do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ -# done -# @echo DYNAMIC_ARCH=1 >> Makefile.conf_last -#endif if (${DYNAMIC_ARCH}) add_subdirectory(kernel) foreach(TARGET_CORE ${DYNAMIC_CORE}) @@ -197,24 +184,6 @@ set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES SOVERSION ${OpenBLAS_MAJOR_VERSION} ) - -# TODO: Why is the config saved here? Is this necessary with CMake? -#Save the config files for installation -# @cp Makefile.conf Makefile.conf_last -# @cp config.h config_last.h -#ifdef QUAD_PRECISION -# @echo "#define QUAD_PRECISION">> config_last.h -#endif -#ifeq ($(EXPRECISION), 1) -# @echo "#define EXPRECISION">> config_last.h -#endif -### - -#ifdef USE_THREAD -# @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last -#endif -# @touch lib.grd - # Install project # Install libraries diff --git a/cmake/arch.cmake b/cmake/arch.cmake index bc9a7621c..69a117ca9 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -3,9 +3,9 @@ ## Description: Ported from portion of OpenBLAS/Makefile.system ## Sets various variables based on architecture. -if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64") +if (X86 OR X86_64) - if (${ARCH} STREQUAL "x86") + if (X86) if (NOT BINARY) set(NO_BINARY_MODE 1) endif () @@ -46,11 +46,11 @@ endif () if (DYNAMIC_ARCH) - if (${ARCH} STREQUAL "x86") + if (X86) set(DYNAMIC_CORE KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO) endif () - if (${ARCH} STREQUAL "x86_64") + if (X86_64) set(DYNAMIC_CORE PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO) if (NOT NO_AVX) set(DYNAMIC_CORE ${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR) @@ -77,7 +77,7 @@ if (${ARCH} STREQUAL "ia64") endif () endif () -if (${ARCH} STREQUAL "mips64") +if (MIPS64) set(NO_BINARY_MODE 1) endif () @@ -86,12 +86,12 @@ if (${ARCH} STREQUAL "alpha") set(BINARY_DEFINED 1) endif () -if (${ARCH} STREQUAL "arm") +if (ARM) set(NO_BINARY_MODE 1) set(BINARY_DEFINED 1) endif () -if (${ARCH} STREQUAL "arm64") +if (ARM64) set(NO_BINARY_MODE 1) set(BINARY_DEFINED 1) endif () diff --git a/cmake/cc.cmake b/cmake/cc.cmake index de196524f..98f9298f8 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -15,7 +15,7 @@ if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR if (NO_BINARY_MODE) - if (${ARCH} STREQUAL "mips64") + if (MIPS64) if (BINARY64) set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64") else () @@ -24,17 +24,12 @@ if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR set(BINARY_DEFINED 1) endif () - if (${CORE} STREQUAL "LOONGSON3A") + if (${CORE} STREQUAL "LOONGSON3A" OR ${CORE} STREQUAL "LOONGSON3B") set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64") set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64") endif () - if (${CORE} STREQUAL "LOONGSON3B") - set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64") - set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64") - endif () - - if (${OSNAME} STREQUAL "AIX") + if (CMAKE_SYSTEM_NAME STREQUAL "AIX") set(BINARY_DEFINED 1) endif () endif () @@ -66,7 +61,7 @@ endif () if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") - if (${ARCH} STREQUAL "mips64") + if (MIPS64) if (NOT BINARY64) set(CCOMMON_OPT "${CCOMMON_OPT} -n32") @@ -94,10 +89,10 @@ endif () if (${CMAKE_C_COMPILER} STREQUAL "SUN") set(CCOMMON_OPT "${CCOMMON_OPT} -w") - if (${ARCH} STREQUAL "x86") + if (X86) set(CCOMMON_OPT "${CCOMMON_OPT} -m32") else () - set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + set(CCOMMON_OPT "${CCOMMON_OPT} -m64") endif () endif () diff --git a/cmake/fc.cmake b/cmake/fc.cmake index ee9d2051b..f1c69d923 100644 --- a/cmake/fc.cmake +++ b/cmake/fc.cmake @@ -50,7 +50,7 @@ if (${F_COMPILER} STREQUAL "GFORTRAN") set(EXTRALIB "{EXTRALIB} -lgfortran") endif () if (NO_BINARY_MODE) - if (${ARCH} STREQUAL "mips64") + if (MIPS64) if (BINARY64) set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64") else () @@ -130,7 +130,7 @@ if (${F_COMPILER} STREQUAL "PATHSCALE") endif () endif () - if (NOT ${ARCH} STREQUAL "mips64") + if (NOT MIPS64) if (NOT BINARY64) set(FCOMMON_OPT "${FCOMMON_OPT} -m32") else () @@ -158,7 +158,7 @@ if (${F_COMPILER} STREQUAL "OPEN64") endif () endif () - if (${ARCH} STREQUAL "mips64") + if (MIPS64) if (NOT BINARY64) set(FCOMMON_OPT "${FCOMMON_OPT} -n32") @@ -189,7 +189,7 @@ endif () if (${F_COMPILER} STREQUAL "SUN") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN") - if (${ARCH} STREQUAL "x86") + if (X86) set(FCOMMON_OPT "${FCOMMON_OPT} -m32") else () set(FCOMMON_OPT "${FCOMMON_OPT} -m64") diff --git a/cmake/os.cmake b/cmake/os.cmake index f50097aaf..1321ef619 100644 --- a/cmake/os.cmake +++ b/cmake/os.cmake @@ -43,7 +43,7 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") # Ensure the correct stack alignment on Win32 # http://permalink.gmane.org/gmane.comp.lib.openblas.general/97 - if (${ARCH} STREQUAL "x86") + if (X86) if (NOT MSVC AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2") endif () @@ -75,7 +75,7 @@ if (QUAD_PRECISION) set(NO_EXPRECISION 1) endif () -if (${ARCH} STREQUAL "x86") +if (X86) set(NO_EXPRECISION 1) endif () diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 6ecb9f08f..3b8ab9be6 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -53,22 +53,37 @@ else() endif () set(TARGET_CONF_TEMP "${PROJECT_BINARY_DIR}/${TARGET_CONF}.tmp") -include("${PROJECT_SOURCE_DIR}/cmake/c_check.cmake") +# c_check +set(FU "") +if (APPLE OR (MSVC AND NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")) + set(FU "_") +endif() + +set(COMPILER_ID ${CMAKE_C_COMPILER_ID}) +if (${COMPILER_ID} STREQUAL "GNU") + set(COMPILER_ID "GCC") +endif () + +string(TOUPPER ${ARCH} UC_ARCH) + +file(WRITE ${TARGET_CONF_TEMP} + "#define OS_${HOST_OS}\t1\n" + "#define ARCH_${UC_ARCH}\t1\n" + "#define C_${COMPILER_ID}\t1\n" + "#define __${BINARY}BIT__\t1\n" + "#define FUNDERSCORE\t${FU}\n") + +if (${HOST_OS} STREQUAL "WINDOWSSTORE") + file(APPEND ${TARGET_CONF_TEMP} + "#define OS_WINNT\t1\n") +endif () + +# f_check if (NOT NOFORTRAN) include("${PROJECT_SOURCE_DIR}/cmake/f_check.cmake") endif () -# This check requires c_check for arch but it should probably be done earlier -if(CMAKE_CROSSCOMPILING AND NOT DEFINED CORE) - # Detect target without running getarch - if(ARM64) - set(CORE "ARMV8") - else() - message(FATAL_ERROR "When cross compiling, a CORE is required.") - endif() -endif() - # Cannot run getarch on target if we are cross-compiling if (DEFINED CORE AND CMAKE_CROSSCOMPILING) # Write to config as getarch would @@ -78,7 +93,22 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING) file(APPEND ${TARGET_CONF_TEMP} "#define ${CORE}\n" "#define CHAR_CORENAME \"${CORE}\"\n") - if ("${CORE}" STREQUAL "ARMV8") + if ("${CORE}" STREQUAL "ARMV7") + file(APPEND ${TARGET_CONF_TEMP} + "#define L1_DATA_SIZE\t65536\n" + "#define L1_DATA_LINESIZE\t32\n" + "#define L2_SIZE\t512488\n" + "#define L2_LINESIZE\t32\n" + "#define DTB_DEFAULT_ENTRIES\t64\n" + "#define DTB_SIZE\t4096\n" + "#define L2_ASSOCIATIVE\t4\n" + "#define HAVE_VFPV3\n" + "#define HAVE_VFP\n") + set(SGEMM_UNROLL_M 4) + set(SGEMM_UNROLL_N 4) + set(DGEMM_UNROLL_M 4) + set(DGEMM_UNROLL_N 4) + elseif ("${CORE}" STREQUAL "ARMV8") file(APPEND ${TARGET_CONF_TEMP} "#define L1_DATA_SIZE\t32768\n" "#define L1_DATA_LINESIZE\t64\n" diff --git a/cmake/system.cmake b/cmake/system.cmake index a66550245..daa2683d2 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -4,6 +4,26 @@ ## set(NETLIB_LAPACK_DIR "${PROJECT_SOURCE_DIR}/lapack-netlib") +# System detection, via CMake. +include("${PROJECT_SOURCE_DIR}/cmake/system_check.cmake") + +if(CMAKE_CROSSCOMPILING AND NOT DEFINED TARGET) + # Detect target without running getarch + if (ARM64) + set(TARGET "ARMV8") + elseif(ARM) + set(TARGET "ARMV7") # TODO: Ask compiler which arch this is + else() + message(FATAL_ERROR "When cross compiling, a TARGET is required.") + endif() +endif() + +# Other files expect CORE, which is actually TARGET and will become TARGET_CORE for kernel build. Confused yet? +# It seems we are meant to use TARGET as input and CORE internally as kernel. +if(NOT DEFINED CORE AND DEFINED TARGET) + set(CORE ${TARGET}) +endif() + # TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1. if (DEFINED TARGET_CORE) set(TARGET ${TARGET_CORE}) @@ -56,7 +76,7 @@ if (NOT DEFINED NO_PARALLEL_MAKE) endif () set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_PARALLEL_MAKE=${NO_PARALLEL_MAKE}") -if (CMAKE_CXX_COMPILER STREQUAL loongcc) +if (CMAKE_C_COMPILER STREQUAL loongcc) set(GETARCH_FLAGS "${GETARCH_FLAGS} -static") endif () @@ -75,6 +95,7 @@ if (NOT CMAKE_CROSSCOMPILING) endif() if (NOT NUM_CORES EQUAL 0) + # HT? set(NUM_THREADS ${NUM_CORES}) endif () endif() @@ -153,7 +174,7 @@ if (NO_AVX) set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX") endif () -if (${ARCH} STREQUAL "x86") +if (X86) set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX") endif () @@ -166,7 +187,7 @@ if (USE_THREAD) # NO_AFFINITY = 1 set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER") - if (${ARCH} STREQUAL "mips64") + if (MIPS64) if (NOT ${CORE} STREQUAL "LOONGSON3B") set(USE_SIMPLE_THREADED_LEVEL3 1) endif () @@ -237,7 +258,7 @@ if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux") set(NO_AFFINITY 1) endif () -if (NOT ${ARCH} STREQUAL "x86_64" AND NOT ${ARCH} STREQUAL "x86" AND NOT ${CORE} STREQUAL "LOONGSON3B") +if (NOT X86_64 AND NOT X86 AND NOT ${CORE} STREQUAL "LOONGSON3B") set(NO_AFFINITY 1) endif () @@ -377,7 +398,7 @@ endif () set(USE_GEMM3M 0) if (DEFINED ARCH) - if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS") + if (X86 OR X86_64 OR ${ARCH} STREQUAL "ia64" OR MIPS64) set(USE_GEMM3M 1) endif () @@ -460,35 +481,3 @@ endif () # export CUFLAGS # export CULIB #endif - -#.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f -# -#.f.$(SUFFIX): -# $(FC) $(FFLAGS) -c $< -o $(@F) -# -#.f.$(PSUFFIX): -# $(FC) $(FPFLAGS) -pg -c $< -o $(@F) - -# these are not cross-platform -#ifdef BINARY64 -#PATHSCALEPATH = /opt/pathscale/lib/3.1 -#PGIPATH = /opt/pgi/linux86-64/7.1-5/lib -#else -#PATHSCALEPATH = /opt/pathscale/lib/3.1/32 -#PGIPATH = /opt/pgi/linux86/7.1-5/lib -#endif - -#ACMLPATH = /opt/acml/4.3.0 -#ifneq ($(OSNAME), Darwin) -#MKLPATH = /opt/intel/mkl/10.2.2.025/lib -#else -#MKLPATH = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib -#endif -#ATLASPATH = /opt/atlas/3.9.17/opteron -#FLAMEPATH = $(HOME)/flame/lib -#ifneq ($(OSNAME), SunOS) -#SUNPATH = /opt/sunstudio12.1 -#else -#SUNPATH = /opt/SUNWspro -#endif - diff --git a/cmake/c_check.cmake b/cmake/system_check.cmake similarity index 59% rename from cmake/c_check.cmake rename to cmake/system_check.cmake index 99590c975..d47c38cdd 100644 --- a/cmake/c_check.cmake +++ b/cmake/system_check.cmake @@ -4,32 +4,6 @@ ## This is triggered by prebuild.cmake and runs before any of the code is built. ## Creates config.h and Makefile.conf. -# CMake vars set by this file: -# OSNAME (use CMAKE_SYSTEM_NAME) -# ARCH -# C_COMPILER (use CMAKE_C_COMPILER) -# BINARY32 -# BINARY64 -# FU -# CROSS_SUFFIX -# CROSS -# CEXTRALIB - -# Defines set by this file: -# OS_ -# ARCH_ -# C_ -# __32BIT__ -# __64BIT__ -# FUNDERSCORE -# PTHREAD_CREATE_FUNC - -# N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables. -set(FU "") -if (APPLE OR (MSVC AND NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")) - set(FU "_") -endif() - # Convert CMake vars into the format that OpenBLAS expects string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS) if (${HOST_OS} STREQUAL "WINDOWS") @@ -37,7 +11,7 @@ if (${HOST_OS} STREQUAL "WINDOWS") endif () if(CMAKE_COMPILER_IS_GNUCC AND WIN32) - execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpmachine + execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpmachine OUTPUT_VARIABLE OPENBLAS_GCC_TARGET_MACHINE OUTPUT_STRIP_TRAILING_WHITESPACE) if(OPENBLAS_GCC_TARGET_MACHINE MATCHES "amd64|x86_64|AMD64") @@ -50,8 +24,10 @@ if(CMAKE_CL_64 OR MINGW64) set(X86_64 1) elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING)) set(X86 1) -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc.*|power.*|Power.*") set(PPC 1) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "mips64.*") + set(MIPS64 1) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") set(X86_64 1) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*") @@ -77,7 +53,7 @@ else() endif () if (NOT BINARY) - if (X86_64 OR ARM64 OR PPC OR ARCH STREQUAL "mips64") + if (X86_64 OR ARM64 OR PPC OR MIPS64) set(BINARY 64) else () set(BINARY 32) @@ -90,22 +66,3 @@ else() set(BINARY32 1) endif() -set(COMPILER_ID ${CMAKE_C_COMPILER_ID}) -if (${COMPILER_ID} STREQUAL "GNU") - set(COMPILER_ID "GCC") -endif () - -string(TOUPPER ${ARCH} UC_ARCH) - -file(WRITE ${TARGET_CONF_TEMP} - "#define OS_${HOST_OS}\t1\n" - "#define ARCH_${UC_ARCH}\t1\n" - "#define C_${COMPILER_ID}\t1\n" - "#define __${BINARY}BIT__\t1\n" - "#define FUNDERSCORE\t${FU}\n") - -if (${HOST_OS} STREQUAL "WINDOWSSTORE") - file(APPEND ${TARGET_CONF_TEMP} - "#define OS_WINNT\t1\n") -endif () - diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 9178ba745..09e513ca5 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -21,7 +21,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) endif () if (${ADD_COMMONOBJS}) - if (${ARCH} STREQUAL "x86") + if (X86) if (NOT "${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC") GenerateNamedObjects("${KERNELDIR}/cpuid.S" "" "" false "" "" true) else() @@ -121,7 +121,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) # Makefile.L3 set(USE_TRMM false) - if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "{CORE}" STREQUAL "zen") + if (ARM OR ARM64 OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "{CORE}" STREQUAL "zen") set(USE_TRMM true) endif () From a1b87eac6bbe140aceb28d50e585979df471a460 Mon Sep 17 00:00:00 2001 From: Sacha Refshauge Date: Wed, 23 Aug 2017 07:19:02 +1000 Subject: [PATCH 014/122] Do not require Perl for MSVC if CMake >= 3.4 --- CMakeLists.txt | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 235a48747..be6729974 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -119,9 +119,11 @@ endif () # Only generate .def for dll on MSVC and always produce pdb files for debug and release if(MSVC) -set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def") -set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi") -set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF") + if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 3.4) + set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def") + endif() + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi") + set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF") endif() if (${DYNAMIC_ARCH}) @@ -141,7 +143,15 @@ endif() # add objects to the openblas lib add_library(${OpenBLAS_LIBNAME} ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) -include("${PROJECT_SOURCE_DIR}/cmake/export.cmake") +# Handle MSVC exports +if(MSVC AND BUILD_SHARED_LIBS) + if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 3.4) + include("${PROJECT_SOURCE_DIR}/cmake/export.cmake") + else() + # Creates verbose .def file (51KB vs 18KB) + set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true) + endif() +endif() # Set output for libopenblas set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) @@ -164,7 +174,6 @@ if (USE_THREAD) set_property(TARGET ${OpenBLAS_LIBNAME} PROPERTY COMPILE_OPTIONS "-pthread") set_property(TARGET ${OpenBLAS_LIBNAME} PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread") endif() - message("PTHREAD: ${CMAKE_THREAD_LIBS_INIT}") target_link_libraries(${OpenBLAS_LIBNAME} ${CMAKE_THREAD_LIBS_INIT}) endif() From 7a867082d84905be5efcc39c820a23e7aa19eff0 Mon Sep 17 00:00:00 2001 From: Sacha Date: Wed, 23 Aug 2017 11:16:24 +1000 Subject: [PATCH 015/122] Fix open_blas.config which was never working out-of-source. Remove need for gen_config_h.exe. If OpenMP is requested, do not silently ignore when it isn't available. --- CMakeLists.txt | 27 +++++++++++++++------------ cmake/arch.cmake | 4 +--- cmake/prebuild.cmake | 19 ------------------- 3 files changed, 16 insertions(+), 34 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index be6729974..da13f8a0b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -199,18 +199,21 @@ set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES install(TARGETS ${OpenBLAS_LIBNAME} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ) - -# Install include files -set (GENCONFIG_BIN ${CMAKE_BINARY_DIR}/gen_config_h${CMAKE_EXECUTABLE_SUFFIX}) - -execute_process(COMMAND ${GENCONFIG_BIN} - ${CMAKE_CURRENT_SOURCE_DIR}/config.h - ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h - OUTPUT_VARIABLE OPENBLAS_CONFIG_H_CONTENTS) - -file(WRITE ${CMAKE_BINARY_DIR}/openblas_config.tmp "${OPENBLAS_CONFIG_H_CONTENTS}") -configure_file(${CMAKE_BINARY_DIR}/openblas_config.tmp ${CMAKE_BINARY_DIR}/openblas_config.h COPYONLY) + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ) + +set(OPENBLAS_TMP ${CMAKE_BINARY_DIR}/openblas_config.tmp) +file(WRITE ${OPENBLAS_TMP} "#ifndef OPENBLAS_CONFIG_H\n") +file(APPEND ${OPENBLAS_TMP} "#define OPENBLAS_CONFIG_H\n") +file(STRINGS ${PROJECT_BINARY_DIR}/config.h __lines) +foreach(line ${__lines}) + string(REPLACE "#define " "" line ${line}) + file(APPEND ${OPENBLAS_TMP} "#define OPENBLAS_${line}\n") +endforeach() +file(APPEND ${OPENBLAS_TMP} "#define OPENBLAS_VERSION \"OpenBLAS ${OpenBLAS_VERSION}\"\n") +file(READ ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h __template) +file(APPEND ${OPENBLAS_TMP} "${__template}") +file(APPEND ${OPENBLAS_TMP} "#endif /* OPENBLAS_CONFIG_H */\n") +configure_file(${OPENBLAS_TMP} ${CMAKE_BINARY_DIR}/openblas_config.h COPYONLY) install (FILES ${CMAKE_BINARY_DIR}/openblas_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 69a117ca9..798a9ef82 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -35,12 +35,10 @@ endif () if (USE_OPENMP) # USE_SIMPLE_THREADED_LEVEL3 = 1 # NO_AFFINITY = 1 - find_package(OpenMP) + find_package(OpenMP REQUIRED) if (OpenMP_FOUND) set(CCOMMON_OPT "${CCOMMON_OPT} ${OpenMP_C_FLAGS} -DUSE_OPENMP") set(FCOMMON_OPT "${FCOMMON_OPT} ${OpenMP_Fortran_FLAGS}") - elseif (UNIX) - set(USE_OPENMP 0) endif() endif () diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 3b8ab9be6..cc5475630 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -241,23 +241,4 @@ else(NOT CMAKE_CROSSCOMPILING) ParseGetArchVars(${GETARCH2_MAKE_OUT}) - # compile get_config_h - set(GEN_CONFIG_H_DIR "${PROJECT_BINARY_DIR}/genconfig_h_build") - set(GEN_CONFIG_H_BIN "gen_config_h${CMAKE_EXECUTABLE_SUFFIX}") - set(GEN_CONFIG_H_FLAGS "-DVERSION=\"${OpenBLAS_VERSION}\"") - file(MAKE_DIRECTORY ${GEN_CONFIG_H_DIR}) - - if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") - try_compile(GEN_CONFIG_H_RESULT ${GEN_CONFIG_H_DIR} - SOURCES ${PROJECT_SOURCE_DIR}/gen_config_h.c - COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GEN_CONFIG_H_FLAGS} -I${PROJECT_SOURCE_DIR} - OUTPUT_VARIABLE GEN_CONFIG_H_LOG - COPY_FILE ${PROJECT_BINARY_DIR}/${GEN_CONFIG_H_BIN} - ) - - if (NOT ${GEN_CONFIG_H_RESULT}) - MESSAGE(FATAL_ERROR "Compiling gen_config_h failed ${GEN_CONFIG_H_LOG}") - endif () -endif () - endif() From ef64991506c88486ed84611e40956d89f868d624 Mon Sep 17 00:00:00 2001 From: Sacha Date: Wed, 23 Aug 2017 12:47:38 +1000 Subject: [PATCH 016/122] Clean up config file writing. --- CMakeLists.txt | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index da13f8a0b..771764e2e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -177,15 +177,17 @@ if (USE_THREAD) target_link_libraries(${OpenBLAS_LIBNAME} ${CMAKE_THREAD_LIBS_INIT}) endif() -if (NOT MSVC AND NOT NOFORTRAN) +if (MSVC OR NOT NOFORTRAN) + # Broken without fortran on unix add_subdirectory(utest) +endif() + +if (NOT MSVC AND NOT NOFORTRAN) # Build test and ctest add_subdirectory(test) if(NOT NO_CBLAS) add_subdirectory(ctest) endif() -elseif(MSVC) - add_subdirectory(utest) endif() set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES @@ -201,31 +203,33 @@ install(TARGETS ${OpenBLAS_LIBNAME} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ) -set(OPENBLAS_TMP ${CMAKE_BINARY_DIR}/openblas_config.tmp) -file(WRITE ${OPENBLAS_TMP} "#ifndef OPENBLAS_CONFIG_H\n") -file(APPEND ${OPENBLAS_TMP} "#define OPENBLAS_CONFIG_H\n") +message(STATUS "Generating openblas_config.h in ${CMAKE_INSTALL_INCLUDEDIR}") + +set(OPENBLAS_CONFIG_H ${CMAKE_BINARY_DIR}/openblas_config.h) +file(WRITE ${OPENBLAS_CONFIG_H} "#ifndef OPENBLAS_CONFIG_H\n") +file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_CONFIG_H\n") file(STRINGS ${PROJECT_BINARY_DIR}/config.h __lines) foreach(line ${__lines}) string(REPLACE "#define " "" line ${line}) - file(APPEND ${OPENBLAS_TMP} "#define OPENBLAS_${line}\n") + file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_${line}\n") endforeach() -file(APPEND ${OPENBLAS_TMP} "#define OPENBLAS_VERSION \"OpenBLAS ${OpenBLAS_VERSION}\"\n") -file(READ ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h __template) -file(APPEND ${OPENBLAS_TMP} "${__template}") -file(APPEND ${OPENBLAS_TMP} "#endif /* OPENBLAS_CONFIG_H */\n") -configure_file(${OPENBLAS_TMP} ${CMAKE_BINARY_DIR}/openblas_config.h COPYONLY) - -install (FILES ${CMAKE_BINARY_DIR}/openblas_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_VERSION \"OpenBLAS ${OpenBLAS_VERSION}\"\n") +file(READ ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h OPENBLAS_CONFIG_TEMPLATE_H_CONTENTS) +file(APPEND ${OPENBLAS_CONFIG_H} "${OPENBLAS_CONFIG_TEMPLATE_H_CONTENTS}\n") +file(APPEND ${OPENBLAS_CONFIG_H} "#endif /* OPENBLAS_CONFIG_H */\n") +install (FILES ${OPENBLAS_CONFIG_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) if(NOT NOFORTRAN) message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}") - file(WRITE ${CMAKE_BINARY_DIR}/f77blas.h "") - file(APPEND ${CMAKE_BINARY_DIR}/f77blas.h "#ifndef OPENBLAS_F77BLAS_H\n#define OPENBLAS_F77BLAS_H\n#include \"openblas_config.h\"\n") + set(F77BLAS_H ${CMAKE_BINARY_DIR}/f77blas.h) + file(WRITE ${F77BLAS_H} "#ifndef OPENBLAS_F77BLAS_H\n") + file(APPEND ${F77BLAS_H} "#define OPENBLAS_F77BLAS_H\n") + file(APPEND ${F77BLAS_H} "#include \"openblas_config.h\"\n") file(READ ${CMAKE_CURRENT_SOURCE_DIR}/common_interface.h COMMON_INTERFACE_H_CONTENTS) - file(APPEND ${CMAKE_BINARY_DIR}/f77blas.h "${COMMON_INTERFACE_H_CONTENTS}") - file(APPEND ${CMAKE_BINARY_DIR}/f77blas.h "#endif") - install (FILES ${CMAKE_BINARY_DIR}/f77blas.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + file(APPEND ${F77BLAS_H} "${COMMON_INTERFACE_H_CONTENTS}\n") + file(APPEND ${F77BLAS_H} "#endif") + install (FILES ${F77BLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) endif() if(NOT NO_CBLAS) From e3d844b0621fc0d6849626e580581507366055c7 Mon Sep 17 00:00:00 2001 From: Shivraj Patil Date: Fri, 22 Sep 2017 11:57:43 +0530 Subject: [PATCH 017/122] Added mips I6500 core Signed-off-by: Shivraj Patil --- Makefile.prebuild | 4 ++++ Makefile.system | 5 +++++ TargetList.txt | 1 + cpuid_mips64.c | 17 ++++++++++++++++- getarch.c | 15 +++++++++++++++ kernel/mips64/KERNEL.I6500 | 1 + param.h | 2 +- 7 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 kernel/mips64/KERNEL.I6500 diff --git a/Makefile.prebuild b/Makefile.prebuild index 524f0a741..daa556f65 100644 --- a/Makefile.prebuild +++ b/Makefile.prebuild @@ -29,6 +29,10 @@ ifeq ($(TARGET), P6600) TARGET_FLAGS = -mips64r6 endif +ifeq ($(TARGET), I6500) +TARGET_FLAGS = -mips64r6 +endif + all: getarch_2nd ./getarch_2nd 0 >> $(TARGET_MAKE) ./getarch_2nd 1 >> $(TARGET_CONF) diff --git a/Makefile.system b/Makefile.system index 74bc5cd9f..972238f36 100644 --- a/Makefile.system +++ b/Makefile.system @@ -568,6 +568,11 @@ CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS) FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS) endif +ifeq ($(CORE), I6500) +CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS) +FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS) +endif + ifeq ($(OSNAME), AIX) BINARY_DEFINED = 1 endif diff --git a/TargetList.txt b/TargetList.txt index 7f2705842..d40545cf8 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -63,6 +63,7 @@ LOONGSON3A LOONGSON3B I6400 P6600 +I6500 5.IA64 CPU: ITANIUM2 diff --git a/cpuid_mips64.c b/cpuid_mips64.c index ac1554c79..dcb559a7c 100644 --- a/cpuid_mips64.c +++ b/cpuid_mips64.c @@ -76,6 +76,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CPU_LOONGSON3B 3 #define CPU_I6400 4 #define CPU_P6600 5 +#define CPU_I6500 6 static char *cpuname[] = { "UNKOWN", @@ -83,7 +84,8 @@ static char *cpuname[] = { "LOONGSON3A", "LOONGSON3B", "I6400", - "P6600" + "P6600", + "I6500" }; int detect(void){ @@ -165,6 +167,8 @@ void get_subarchitecture(void){ printf("I6400"); }else if(detect()==CPU_P6600){ printf("P6600"); + }else if(detect()==CPU_I6500){ + printf("I6500"); }else{ printf("SICORTEX"); } @@ -211,6 +215,15 @@ void get_cpuconfig(void){ printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_SIZE 4096\n"); printf("#define L2_ASSOCIATIVE 8\n"); + }else if(detect()==CPU_I6500){ + printf("#define I6500\n"); + printf("#define L1_DATA_SIZE 65536\n"); + printf("#define L1_DATA_LINESIZE 32\n"); + printf("#define L2_SIZE 1048576\n"); + printf("#define L2_LINESIZE 32\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + printf("#define L2_ASSOCIATIVE 8\n"); }else{ printf("#define SICORTEX\n"); printf("#define L1_DATA_SIZE 32768\n"); @@ -232,6 +245,8 @@ void get_libname(void){ printf("i6400\n"); }else if(detect()==CPU_P6600) { printf("p6600\n"); + }else if(detect()==CPU_I6500) { + printf("i6500\n"); }else{ printf("mips64\n"); } diff --git a/getarch.c b/getarch.c index 2728b80b7..c0fff0436 100644 --- a/getarch.c +++ b/getarch.c @@ -134,6 +134,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* #define FORCE_I6400 */ /* #define FORCE_P6600 */ /* #define FORCE_P5600 */ +/* #define FORCE_I6500 */ /* #define FORCE_ITANIUM2 */ /* #define FORCE_SPARC */ /* #define FORCE_SPARCV7 */ @@ -765,6 +766,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else #endif +#ifdef FORCE_I6500 +#define FORCE +#define ARCHITECTURE "MIPS" +#define SUBARCHITECTURE "I6500" +#define SUBDIRNAME "mips64" +#define ARCHCONFIG "-DI6500 " \ + "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " +#define LIBNAME "i6500" +#define CORENAME "I6500" +#else +#endif + #ifdef FORCE_ITANIUM2 #define FORCE #define ARCHITECTURE "IA64" diff --git a/kernel/mips64/KERNEL.I6500 b/kernel/mips64/KERNEL.I6500 new file mode 100644 index 000000000..abf44814a --- /dev/null +++ b/kernel/mips64/KERNEL.I6500 @@ -0,0 +1 @@ +include $(KERNELDIR)/../mips/KERNEL.P5600 diff --git a/param.h b/param.h index 2d5bccee7..189cdc4a0 100644 --- a/param.h +++ b/param.h @@ -2291,7 +2291,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 16 #endif -#if defined(P5600) || defined(I6400) || defined(P6600) +#if defined(P5600) || defined(I6400) || defined(P6600) || defined(I6500) #define SNUMOPT 2 #define DNUMOPT 2 From 79e754e5483986170f66127bb874bb3060d18ca8 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 25 Sep 2017 23:45:14 +0200 Subject: [PATCH 018/122] Rewrite NOFORTRAN conditionals ... so that they do not trigger accidentally when NOFORTRAN is empty/unset --- exports/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exports/Makefile b/exports/Makefile index 873adc354..4005ab08a 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -51,7 +51,7 @@ endif endif endif -ifeq ($(NOFORTRAN), $(filter $(NOFORTRAN),1 2)) +ifneq (,$(filter 1 2,$(NOFORTRAN))) FEXTRALIB = endif @@ -112,7 +112,7 @@ else $(OBJCONV) @objconv.def ../$(LIBNAME) ../$(LIBNAME).osx.renamed $(LIBDYNNAME) : ../$(LIBNAME).osx.renamed osx.def endif -ifeq ($(NOFORTRAN), $(filter $(NOFORTRAN),1 2)) +ifneq (,$(filter 1 2,$(NOFORTRAN))) #only build without Fortran $(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name "$(CURDIR)/../$(LIBDYNNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) else From 6aaa1078658fbf890f5a4d45204bd9a303030ad6 Mon Sep 17 00:00:00 2001 From: Tim Moon Date: Wed, 27 Sep 2017 19:25:33 -0700 Subject: [PATCH 019/122] Reducing threads for multi-threaded GEMMs on small matrices. --- driver/level3/level3_thread.c | 65 +++++------------------------------ 1 file changed, 9 insertions(+), 56 deletions(-) diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c index fec873e51..8ab4ef699 100644 --- a/driver/level3/level3_thread.c +++ b/driver/level3/level3_thread.c @@ -684,8 +684,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO BLASLONG m = args -> m; BLASLONG n = args -> n; BLASLONG nthreads = args -> nthreads; - BLASLONG divN, divT; - int mode; if (nthreads == 1) { GEMM_LOCAL(args, range_m, range_n, sa, sb, 0); @@ -706,66 +704,21 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO n = n_to - n_from; } - if ((m < nthreads * SWITCH_RATIO) || (n < nthreads * SWITCH_RATIO)) { + if ((m < 2 * SWITCH_RATIO) || (n < 2 * SWITCH_RATIO)) { GEMM_LOCAL(args, range_m, range_n, sa, sb, 0); return 0; } - divT = nthreads; - divN = 1; - -#if 0 - while ((GEMM_P * divT > m * SWITCH_RATIO) && (divT > 1)) { - do { - divT --; - divN = 1; - while (divT * divN < nthreads) divN ++; - } while ((divT * divN != nthreads) && (divT > 1)); + if (m < nthreads * SWITCH_RATIO) { + nthreads = blas_quickdivide(m, SWITCH_RATIO); } -#endif - - // fprintf(stderr, "divN = %4ld divT = %4ld\n", divN, divT); - - args -> nthreads = divT; - - if (divN == 1){ - - gemm_driver(args, range_m, range_n, sa, sb, 0); - } else { -#ifndef COMPLEX -#ifdef XDOUBLE - mode = BLAS_XDOUBLE | BLAS_REAL; -#elif defined(DOUBLE) - mode = BLAS_DOUBLE | BLAS_REAL; -#else - mode = BLAS_SINGLE | BLAS_REAL; -#endif -#else -#ifdef XDOUBLE - mode = BLAS_XDOUBLE | BLAS_COMPLEX; -#elif defined(DOUBLE) - mode = BLAS_DOUBLE | BLAS_COMPLEX; -#else - mode = BLAS_SINGLE | BLAS_COMPLEX; -#endif -#endif - -#if defined(TN) || defined(TT) || defined(TR) || defined(TC) || \ - defined(CN) || defined(CT) || defined(CR) || defined(CC) - mode |= (BLAS_TRANSA_T); -#endif -#if defined(NT) || defined(TT) || defined(RT) || defined(CT) || \ - defined(NC) || defined(TC) || defined(RC) || defined(CC) - mode |= (BLAS_TRANSB_T); -#endif - -#ifdef OS_WINDOWS - gemm_thread_n(mode, args, range_m, range_n, GEMM_LOCAL, sa, sb, divN); -#else - gemm_thread_n(mode, args, range_m, range_n, gemm_driver, sa, sb, divN); -#endif - + if (n < nthreads * SWITCH_RATIO) { + nthreads = blas_quickdivide(n, SWITCH_RATIO); } + args -> nthreads = nthreads; + + gemm_driver(args, range_m, range_n, sa, sb, 0); + return 0; } From 9c017a221827496dcfecbfcebd2bca74fd46e93a Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 28 Sep 2017 12:17:09 +0200 Subject: [PATCH 020/122] Save and restore VSX registers --- kernel/power/cgemm_kernel_8x4_power8.S | 77 ++++++++++++++++++---- kernel/power/cgemm_tcopy_8_power8.S | 54 ++++++++++++++- kernel/power/ctrmm_kernel_8x4_power8.S | 71 +++++++++++++++++--- kernel/power/dgemm_kernel_16x4_power8.S | 69 ++++++++++++++++--- kernel/power/dgemm_ncopy_4_power8.S | 63 ++++++++++++++++-- kernel/power/dgemm_tcopy_16_power8.S | 58 +++++++++++++++- kernel/power/dtrmm_kernel_16x4_power8.S | 72 +++++++++++++++++--- kernel/power/dtrsm_kernel_LT_16x4_power8.S | 61 ++++++++++++++++- kernel/power/sgemm_kernel_16x8_power8.S | 69 +++++++++++++++++-- kernel/power/sgemm_tcopy_16_power8.S | 54 ++++++++++++++- kernel/power/sgemm_tcopy_8_power8.S | 54 ++++++++++++++- kernel/power/strmm_kernel_16x8_power8.S | 69 +++++++++++++++++-- kernel/power/zgemm_kernel_8x2_power8.S | 74 +++++++++++++++++---- kernel/power/zgemm_tcopy_8_power8.S | 52 ++++++++++++++- kernel/power/ztrmm_kernel_8x2_power8.S | 76 +++++++++++++++++---- 15 files changed, 884 insertions(+), 89 deletions(-) diff --git a/kernel/power/cgemm_kernel_8x4_power8.S b/kernel/power/cgemm_kernel_8x4_power8.S index 0c462ce8e..8dbb6011d 100644 --- a/kernel/power/cgemm_kernel_8x4_power8.S +++ b/kernel/power/cgemm_kernel_8x4_power8.S @@ -82,15 +82,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef __64BIT__ -#define STACKSIZE 32000 -#define ALPHA_R_SP 296(SP) -#define ALPHA_I_SP 304(SP) -#define FZERO 312(SP) +#define STACKSIZE 32196 +#define ALPHA_R_SP 296+196(SP) +#define ALPHA_I_SP 304+196(SP) +#define FZERO 312+196(SP) #else -#define STACKSIZE 256 -#define ALPHA_R_SP 224(SP) -#define ALPHA_I_SP 232(SP) -#define FZERO 240(SP) +#define STACKSIZE 456 +#define ALPHA_R_SP 224+200(SP) +#define ALPHA_I_SP 232+200(SP) +#define FZERO 240+200(SP) #endif #define M r3 @@ -138,6 +138,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define FRAMEPOINTER r12 +#define VECSAVE r11 + #define BBUFFER r14 #define L r15 #define o12 r16 @@ -167,6 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi SP, SP, -STACKSIZE addi SP, SP, -STACKSIZE addi SP, SP, -STACKSIZE + li r0, 0 stfd f14, 0(SP) @@ -211,6 +214,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + addi r11, SP, 288 #else stw r31, 144(SP) stw r30, 148(SP) @@ -230,7 +234,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r16, 204(SP) stw r15, 208(SP) stw r14, 212(SP) + addi r11, SP, 224 #endif + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11, 0 stfs f1, ALPHA_R_SP stfs f2, ALPHA_I_SP @@ -301,9 +330,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef __64BIT__ - addi T1 , SP, 296 + addi T1 , SP, 296+196 #else - addi T1 , SP, 224 + addi T1 , SP, 224+200 #endif stxsspx vs1, 0, T1 @@ -375,6 +404,7 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + addi r11, SP, 288 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -394,7 +424,32 @@ L999: lwz r16, 204(SP) lwz r15, 208(SP) lwz r14, 212(SP) + addi r11, 224 #endif + lvx v20, r11, r0 + addi r11, r11, 16 + lvx v21, r11, r0 + addi r11, r11, 16 + lvx v22, r11, r0 + addi r11, r11, 16 + lvx v23, r11, r0 + addi r11, r11, 16 + lvx v24, r11, r0 + addi r11, r11, 16 + lvx v25, r11, r0 + addi r11, r11, 16 + lvx v26, r11, r0 + addi r11, r11, 16 + lvx v27, r11, r0 + addi r11, r11, 16 + lvx v28, r11, r0 + addi r11, r11, 16 + lvx v29, r11, r0 + addi r11, r11, 16 + lvx v30, r11, r0 + addi r11, r11, 16 + lvx v31, r11, r0 + li r11, 0 addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE @@ -404,4 +459,4 @@ L999: blr EPILOGUE -#endif +#endif^ diff --git a/kernel/power/cgemm_tcopy_8_power8.S b/kernel/power/cgemm_tcopy_8_power8.S index b1a7d2b27..66a50584c 100644 --- a/kernel/power/cgemm_tcopy_8_power8.S +++ b/kernel/power/cgemm_tcopy_8_power8.S @@ -88,6 +88,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define J r12 + #define PREA r14 #define PREB r15 #define BO r16 @@ -109,7 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "cgemm_tcopy_macros_8_power8.S" -#define STACKSIZE 384 +#define STACKSIZE 576 PROLOGUE @@ -136,6 +137,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + addi r11, SP, 288 + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11, 0 cmpwi cr0, M, 0 ble- L999 @@ -197,9 +223,33 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + addi r11, SP, 288 + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/ctrmm_kernel_8x4_power8.S b/kernel/power/ctrmm_kernel_8x4_power8.S index 460a387fb..26f49c663 100644 --- a/kernel/power/ctrmm_kernel_8x4_power8.S +++ b/kernel/power/ctrmm_kernel_8x4_power8.S @@ -83,13 +83,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef __64BIT__ #define STACKSIZE 400 -#define ALPHA_R_SP 304(SP) -#define ALPHA_I_SP 312(SP) +#define STACKSIZE 592 +#define ALPHA_R_SP 304+192(SP) +#define ALPHA_I_SP 312+192(SP) #else #define STACKSIZE 256 -#define ALPHA_R_SP 224(SP) -#define ALPHA_I_SP 232(SP) -#define FZERO 240(SP) +#define STACKSIZE 452 +#define ALPHA_R_SP 224+196(SP) +#define ALPHA_I_SP 232+196(SP) +#define FZERO 240+196(SP) #endif #define M r3 @@ -135,6 +137,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define alpha_sr vs30 #define alpha_si vs31 +#define VECSAVE r11 + #define o12 r12 #define KKK r13 #define K1 r14 @@ -208,6 +212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r14, 280(SP) std r13, 288(SP) std r12, 296(SP) + addi r11, SP, 304 #else stw r31, 144(SP) stw r30, 148(SP) @@ -228,7 +233,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r15, 208(SP) stw r14, 212(SP) stw r13, 216(SP) + addi r11, SP, 224 #endif + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11, 0 stfs f1, ALPHA_R_SP stfs f2, ALPHA_I_SP @@ -295,9 +325,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef __64BIT__ - addi T1, SP, 304 + addi T1, SP, 304+192 #else - addi T1, SP, 224 + addi T1, SP, 224+196 #endif lxsspx alpha_dr, 0, T1 @@ -369,6 +399,7 @@ L999: ld r14, 280(SP) ld r13, 288(SP) ld r12, 296(SP) + addi r11, SP, 304 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -389,10 +420,34 @@ L999: lwz r15, 208(SP) lwz r14, 212(SP) lwz r13, 216(SP) + addi r11, SP, 224 #endif + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/dgemm_kernel_16x4_power8.S b/kernel/power/dgemm_kernel_16x4_power8.S index 8af7fe389..41958eab0 100644 --- a/kernel/power/dgemm_kernel_16x4_power8.S +++ b/kernel/power/dgemm_kernel_16x4_power8.S @@ -83,12 +83,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef __64BIT__ #define STACKSIZE 320 -#define ALPHA_SP 296(SP) -#define FZERO 304(SP) +#define STACKSIZE 512 +#define ALPHA_SP 296+192(SP) +#define FZERO 304+192(SP) #else #define STACKSIZE 240 -#define ALPHA_SP 224(SP) -#define FZERO 232(SP) +#define STACKSIZE 440 +#define ALPHA_SP 224+200(SP) +#define FZERO 232+200(SP) #endif #define M r3 @@ -210,6 +212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + addi r11,SP,288 #else stw r31, 144(SP) stw r30, 148(SP) @@ -229,7 +232,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r16, 204(SP) stw r15, 208(SP) stw r14, 212(SP) + addi r11,SP,224 #endif + stvx v20, r11,r0 +addi r11,r11,16 + stvx v21, r11,r0 +addi r11,r11,16 + stvx v22, r11,r0 +addi r11,r11,16 + stvx v23, r11,r0 +addi r11,r11,16 + stvx v24, r11,r0 +addi r11,r11,16 + stvx v25, r11,r0 +addi r11,r11,16 + stvx v26, r11,r0 +addi r11,r11,16 + stvx v27, r11,r0 +addi r11,r11,16 + stvx v28, r11,r0 +addi r11,r11,16 + stvx v29, r11,r0 +addi r11,r11,16 + stvx v30, r11,r0 +addi r11,r11,16 + stvx v31, r11,r0 +li r11,0 stfd f1, ALPHA_SP stw r0, FZERO @@ -269,12 +297,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ble .L999_H1 #ifdef __64BIT__ - addi T1, SP, 296 + addi T1, SP, 296+192 #else - addi T1, SP, 224 + addi T1, SP, 224+200 #endif - li PRE, 384 + li PRE, 384 li o8 , 8 li o16, 16 li o24, 24 @@ -334,6 +362,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + addi r11,SP,288 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -353,10 +382,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. lwz r16, 204(SP) lwz r15, 208(SP) lwz r14, 212(SP) + addi r11,SP,224 #endif + lvx v20, r11,r3 +addi r11,r11,16 + lvx v21, r11,r3 +addi r11,r11,16 + lvx v22, r11,r3 +addi r11,r11,16 + lvx v23, r11,r3 +addi r11,r11,16 + lvx v24, r11,r3 +addi r11,r11,16 + lvx v25, r11,r3 +addi r11,r11,16 + lvx v26, r11,r3 +addi r11,r11,16 + lvx v27, r11,r3 +addi r11,r11,16 + lvx v28, r11,r3 +addi r11,r11,16 + lvx v29, r11,r3 +addi r11,r11,16 + lvx v30, r11,r3 +addi r11,r11,16 + lvx v31, r11,r3 +li r11,0 addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/dgemm_ncopy_4_power8.S b/kernel/power/dgemm_ncopy_4_power8.S index 31966047f..e0936574d 100644 --- a/kernel/power/dgemm_ncopy_4_power8.S +++ b/kernel/power/dgemm_ncopy_4_power8.S @@ -110,12 +110,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "dgemm_ncopy_macros_4_power8.S" #define STACKSIZE 384 - +#define STACKSIZE 576 PROLOGUE PROFCODE addi SP, SP, -STACKSIZE +//addi SP, SP, -208 li r0, 0 stfd f14, 0(SP) @@ -157,6 +158,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r15, 272(SP) std r14, 280(SP) +addi r11,SP,288 + stvx v20, r11,r0 +addi r11,r11,16 + stvx v21, r11,r0 +addi r11,r11,16 + stvx v22, r11,r0 +addi r11,r11,16 + stvx v23, r11,r0 +addi r11,r11,16 + stvx v24, r11,r0 +addi r11,r11,16 + stvx v25, r11,r0 +addi r11,r11,16 + stvx v26, r11,r0 +addi r11,r11,16 + stvx v27, r11,r0 +addi r11,r11,16 + stvx v28, r11,r0 +addi r11,r11,16 + stvx v29, r11,r0 +addi r11,r11,16 + stvx v30, r11,r0 +addi r11,r11,16 + stvx v31, r11,r0 +li r11,0 + cmpwi cr0, M, 0 ble- L999 cmpwi cr0, N, 0 @@ -164,8 +191,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. slwi LDA, LDA, BASE_SHIFT - li PREA, 384 - li PREB, 384 + //li PREA, 384 + //li PREB, 384 + li PREA, 576 + li PREB, 576 + li o8, 8 li o16, 16 @@ -219,9 +249,34 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) +addi r11,SP,288 + lvx v20, r11,r3 +addi r11,r11,16 + lvx v21, r11,r3 +addi r11,r11,16 + lvx v22, r11,r3 +addi r11,r11,16 + lvx v23, r11,r3 +addi r11,r11,16 + lvx v24, r11,r3 +addi r11,r11,16 + lvx v25, r11,r3 +addi r11,r11,16 + lvx v26, r11,r3 +addi r11,r11,16 + lvx v27, r11,r3 +addi r11,r11,16 + lvx v28, r11,r3 +addi r11,r11,16 + lvx v29, r11,r3 +addi r11,r11,16 + lvx v30, r11,r3 +addi r11,r11,16 + lvx v31, r11,r3 +li r11,0 addi SP, SP, STACKSIZE - +//addi SP, SP, 208 blr EPILOGUE diff --git a/kernel/power/dgemm_tcopy_16_power8.S b/kernel/power/dgemm_tcopy_16_power8.S index eb37877e0..6da816220 100644 --- a/kernel/power/dgemm_tcopy_16_power8.S +++ b/kernel/power/dgemm_tcopy_16_power8.S @@ -110,12 +110,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "dgemm_tcopy_macros_16_power8.S" #define STACKSIZE 384 +#define STACKSIZE 576 PROLOGUE PROFCODE addi SP, SP, -STACKSIZE +//addi SP, SP, -208 + li r0, 0 std r31, 144(SP) @@ -136,6 +139,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) +addi r11,SP,288 + stvx v20, r11,r0 +addi r11,r11,16 + stvx v21, r11,r0 +addi r11,r11,16 + stvx v22, r11,r0 +addi r11,r11,16 + stvx v23, r11,r0 +addi r11,r11,16 + stvx v24, r11,r0 +addi r11,r11,16 + stvx v25, r11,r0 +addi r11,r11,16 + stvx v26, r11,r0 +addi r11,r11,16 + stvx v27, r11,r0 +addi r11,r11,16 + stvx v28, r11,r0 +addi r11,r11,16 + stvx v29, r11,r0 +addi r11,r11,16 + stvx v30, r11,r0 +addi r11,r11,16 + stvx v31, r11,r0 +li r11,0 cmpwi cr0, M, 0 ble- L999 @@ -170,7 +198,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add B2, B2, B add B1, B1, B - li PREA, 384 + //li PREA, 384 + li PREA, 576 addi PREB, M16, 128 li o8, 8 @@ -202,9 +231,34 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) +addi r11,SP,288 + lvx v20, r11,r3 +addi r11,r11,16 + lvx v21, r11,r3 +addi r11,r11,16 + lvx v22, r11,r3 +addi r11,r11,16 + lvx v23, r11,r3 +addi r11,r11,16 + lvx v24, r11,r3 +addi r11,r11,16 + lvx v25, r11,r3 +addi r11,r11,16 + lvx v26, r11,r3 +addi r11,r11,16 + lvx v27, r11,r3 +addi r11,r11,16 + lvx v28, r11,r3 +addi r11,r11,16 + lvx v29, r11,r3 +addi r11,r11,16 + lvx v30, r11,r3 +addi r11,r11,16 + lvx v31, r11,r3 +li r11,0 addi SP, SP, STACKSIZE - +//addi SP, SP, 208 blr EPILOGUE diff --git a/kernel/power/dtrmm_kernel_16x4_power8.S b/kernel/power/dtrmm_kernel_16x4_power8.S index e9dbd991e..47e703a3a 100644 --- a/kernel/power/dtrmm_kernel_16x4_power8.S +++ b/kernel/power/dtrmm_kernel_16x4_power8.S @@ -83,12 +83,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef __64BIT__ #define STACKSIZE 320 -#define ALPHA_SP 296(SP) -#define FZERO 304(SP) +#define STACKSIZE 520 +#define ALPHA_SP 296+200(SP) +#define FZERO 304+200(SP) #else -#define STACKSIZE 240 -#define ALPHA_SP 224(SP) -#define FZERO 232(SP) +#define STACKSIZE 436 +#define ALPHA_SP 224+196(SP) +#define FZERO 232+196(SP) #endif #define M r3 @@ -152,6 +153,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define PRE r30 #define T2 r31 +#define VECSAVE r11 + #include "dtrmm_macros_16x4_power8.S" @@ -206,6 +209,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r15, 272(SP) std r14, 280(SP) std r13, 288(SP) + addi r11, SP, 304 #else stw r31, 144(SP) stw r30, 148(SP) @@ -226,7 +230,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r15, 208(SP) stw r14, 212(SP) stw r13, 216(SP) + addi r11, r0, 224 #endif + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11 ,r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11,0 + + stw r31, 144(SP) stfd f1, ALPHA_SP stw r0, FZERO @@ -270,9 +301,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ble .L999_H1 #ifdef __64BIT__ - addi ALPHA, SP, 296 + addi ALPHA, SP, 296+200 #else - addi ALPHA, SP, 224 + addi ALPHA, SP, 224+196 #endif li PRE, 256 @@ -332,6 +363,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ld r15, 272(SP) ld r14, 280(SP) ld r13, 288(SP) + addi r11, SP, 304 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -352,10 +384,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. lwz r15, 208(SP) lwz r14, 212(SP) lwz r13, 216(SP) + addi r11, SP, 224 #endif + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/dtrsm_kernel_LT_16x4_power8.S b/kernel/power/dtrsm_kernel_LT_16x4_power8.S index fdfc5ac70..7a4a30390 100644 --- a/kernel/power/dtrsm_kernel_LT_16x4_power8.S +++ b/kernel/power/dtrsm_kernel_LT_16x4_power8.S @@ -48,8 +48,9 @@ #ifdef __64BIT__ #define STACKSIZE 320 -#define ALPHA 296(SP) -#define FZERO 304(SP) +#define STACKSIZE 520 +#define ALPHA 296+200(SP) +#define FZERO 304+200(SP) #else #define STACKSIZE 240 #define ALPHA 224(SP) @@ -112,6 +113,8 @@ #define o48 r30 #define T1 r31 +#define VECSAVE r11 + #include "dtrsm_macros_LT_16x4_power8.S" #ifndef NEEDPARAM @@ -163,6 +166,7 @@ std r17, 256(SP) std r16, 264(SP) std r15, 272(SP) + addi r11,SP,288 #else stw r31, 144(SP) stw r30, 148(SP) @@ -178,7 +182,32 @@ stw r20, 188(SP) stw r19, 192(SP) stw r18, 196(SP) + addi r11,SP,208 #endif + stvx v20, r11,r0 +addi r11,r11,16 + stvx v21, r11,r0 +addi r11,r11,16 + stvx v22, r11,r0 +addi r11,r11,16 + stvx v23, r11,r0 +addi r11,r11,16 + stvx v24, r11,r0 +addi r11,r11,16 + stvx v25, r11,r0 +addi r11,r11,16 + stvx v26, r11,r0 +addi r11,r11,16 + stvx v27, r11,r0 +addi r11,r11,16 + stvx v28, r11,r0 +addi r11,r11,16 + stvx v29, r11,r0 +addi r11,r11,16 + stvx v30, r11,r0 +addi r11,r11,16 + stvx v31, r11,r0 +li r11,0 #if defined(_AIX) || defined(__APPLE__) @@ -269,6 +298,7 @@ L999: ld r17, 256(SP) ld r16, 264(SP) ld r15, 272(SP) + addi r11,SP,288 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -284,10 +314,35 @@ L999: lwz r20, 188(SP) lwz r19, 192(SP) lwz r18, 196(SP) + addi r11,SP,208 #endif + lvx v20, r11,r3 +addi r11,r11,16 + lvx v21, r11,r3 +addi r11,r11,16 + lvx v22, r11,r3 +addi r11,r11,16 + lvx v23, r11,r3 +addi r11,r11,16 + lvx v24, r11,r3 +addi r11,r11,16 + lvx v25, r11,r3 +addi r11,r11,16 + lvx v26, r11,r3 +addi r11,r11,16 + lvx v27, r11,r3 +addi r11,r11,16 + lvx v28, r11,r3 +addi r11,r11,16 + lvx v29, r11,r3 +addi r11,r11,16 + lvx v30, r11,r3 +addi r11,r11,16 + lvx v31, r11,r3 +li r11,0 - addi SP, SP, STACKSIZE + addi SP, SP, STACKSIZE blr EPILOGUE diff --git a/kernel/power/sgemm_kernel_16x8_power8.S b/kernel/power/sgemm_kernel_16x8_power8.S index e169eb970..c72b00cf6 100644 --- a/kernel/power/sgemm_kernel_16x8_power8.S +++ b/kernel/power/sgemm_kernel_16x8_power8.S @@ -83,12 +83,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef __64BIT__ #define STACKSIZE 32752 -#define ALPHA_SP 296(SP) -#define FZERO 304(SP) +#define ALPHA_SP 296+192(SP) +#define FZERO 304+192(SP) #else -#define STACKSIZE 240 -#define ALPHA_SP 224(SP) -#define FZERO 232(SP) +#define STACKSIZE 440 +#define ALPHA_SP 224+200(SP) +#define FZERO 232+200(SP) #endif #define M r3 @@ -132,6 +132,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define o0 0 +#define VECSAVE r11 + #define FRAMEPOINTER r12 #define BBUFFER r14 @@ -211,6 +213,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + addi r11, SP, 288 #else stw r31, 144(SP) stw r30, 148(SP) @@ -230,7 +233,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r16, 204(SP) stw r15, 208(SP) stw r14, 212(SP) + addi r11, SP, 224 #endif + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11,0 + // stfd f1, ALPHA_SP // stw r0, FZERO @@ -281,7 +310,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. li T1, -4096 and BBUFFER, BBUFFER, T1 - addi T1, SP, 300 + addi T1, SP, 300+192 stxsspx f1, o0 , T1 stxsspx f1, o4 , T1 stxsspx f1, o8 , T1 @@ -339,6 +368,7 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + addi r11, SP, 288 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -358,13 +388,38 @@ L999: lwz r16, 204(SP) lwz r15, 208(SP) lwz r14, 212(SP) + addi r11, SP, 224 #endif + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 + addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/sgemm_tcopy_16_power8.S b/kernel/power/sgemm_tcopy_16_power8.S index 764d5b187..8f6b4d8c4 100644 --- a/kernel/power/sgemm_tcopy_16_power8.S +++ b/kernel/power/sgemm_tcopy_16_power8.S @@ -110,8 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "sgemm_tcopy_macros_16_power8.S" -#define STACKSIZE 384 - +#define STACKSIZE 576 PROLOGUE PROFCODE @@ -137,6 +136,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + addi r11 ,SP, 288 + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11, 0 cmpwi cr0, M, 0 ble- L999 @@ -203,9 +227,33 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + addi r11, SP, 288 + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/sgemm_tcopy_8_power8.S b/kernel/power/sgemm_tcopy_8_power8.S index 2bbd6e696..98185432a 100644 --- a/kernel/power/sgemm_tcopy_8_power8.S +++ b/kernel/power/sgemm_tcopy_8_power8.S @@ -110,8 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "sgemm_tcopy_macros_8_power8.S" -#define STACKSIZE 384 - +#define STACKSIZE 576 PROLOGUE PROFCODE @@ -137,6 +136,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + addi r11, SP, 288 + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11, 0 cmpwi cr0, M, 0 ble- L999 @@ -198,9 +222,33 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + addi r11,SP,288 + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/strmm_kernel_16x8_power8.S b/kernel/power/strmm_kernel_16x8_power8.S index f756d5d92..f9b8a0bb8 100644 --- a/kernel/power/strmm_kernel_16x8_power8.S +++ b/kernel/power/strmm_kernel_16x8_power8.S @@ -83,8 +83,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef __64BIT__ #define STACKSIZE 340 -#define ALPHA_SP 296(SP) -#define FZERO 304(SP) +#define STACKSIZE 540 +#define ALPHA_SP 296+200(SP) +#define FZERO 304+200(SP) #else #define STACKSIZE 240 #define ALPHA_SP 224(SP) @@ -132,6 +133,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define o0 0 +#define VECSAVE r11 + #define TBUFFER r13 #define o12 r14 #define o4 r15 @@ -207,6 +210,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r15, 272(SP) std r14, 280(SP) std r13, 288(SP) + addi r11, SP, 304 #else stw r31, 144(SP) stw r30, 148(SP) @@ -226,8 +230,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r16, 204(SP) stw r15, 208(SP) stw r14, 212(SP) - stw r13, 216(SP) + stw r13, 216(SP) + addi r11, SP, 224 #endif + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11, 0 + // stfd f1, ALPHA_SP // stw r0, FZERO @@ -271,16 +301,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmpwi cr0, K, 0 ble L999_H1 - li PRE, 256 + li PRE, 256 li o4 , 4 li o8 , 8 li o12, 12 li o16, 16 li o32, 32 li o48, 48 - addi TBUFFER, SP, 320 + addi TBUFFER, SP, 320+200 - addi T1, SP, 300 + addi T1, SP, 300+200 stxsspx f1, o0 , T1 stxsspx f1, o4 , T1 stxsspx f1, o8 , T1 @@ -339,6 +369,7 @@ L999: ld r15, 272(SP) ld r14, 280(SP) ld r13, 288(SP) + addi r11, SP, 304 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -359,10 +390,34 @@ L999: lwz r15, 208(SP) lwz r14, 212(SP) lwz r13, 216(SP) + addi r11, SP, 224 #endif + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/zgemm_kernel_8x2_power8.S b/kernel/power/zgemm_kernel_8x2_power8.S index 02c94a88a..5526b91c9 100644 --- a/kernel/power/zgemm_kernel_8x2_power8.S +++ b/kernel/power/zgemm_kernel_8x2_power8.S @@ -117,15 +117,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef __64BIT__ -#define STACKSIZE 32000 -#define ALPHA_R_SP 296(SP) -#define ALPHA_I_SP 304(SP) -#define FZERO 312(SP) +#define STACKSIZE 32192 +#define ALPHA_R_SP 296+192(SP) +#define ALPHA_I_SP 304+192(SP) +#define FZERO 312+192(SP) #else -#define STACKSIZE 256 -#define ALPHA_R_SP 224(SP) -#define ALPHA_I_SP 232(SP) -#define FZERO 240(SP) +#define STACKSIZE 460 +#define ALPHA_R_SP 224+204(SP) +#define ALPHA_I_SP 232+204(SP) +#define FZERO 240+204(SP) #endif #define M r3 @@ -168,6 +168,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define alpha_r vs30 #define alpha_i vs31 +#define VECSAVE r11 #define FRAMEPOINTER r12 @@ -245,6 +246,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + addi r11, SP, 288 #else stw r31, 144(SP) stw r30, 148(SP) @@ -263,7 +265,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r17, 200(SP) stw r16, 204(SP) stw r15, 208(SP) + addi r11, SP, 224 #endif + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11,0 stfd f1, ALPHA_R_SP stfd f2, ALPHA_I_SP @@ -332,9 +359,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. and BBUFFER, BBUFFER, T1 #ifdef __64BIT__ - addi ALPHA, SP, 296 + addi ALPHA, SP, 296+192 #else - addi ALPHA, SP, 224 + addi ALPHA, SP, 224+192+12 #endif lxsdx alpha_r, 0, ALPHA @@ -389,6 +416,7 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + addi r11, SP, 288 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -407,13 +435,37 @@ L999: lwz r17, 200(SP) lwz r16, 204(SP) lwz r15, 208(SP) + addi r11, SP, 224 #endif + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/zgemm_tcopy_8_power8.S b/kernel/power/zgemm_tcopy_8_power8.S index 1f3f35419..2841a9921 100644 --- a/kernel/power/zgemm_tcopy_8_power8.S +++ b/kernel/power/zgemm_tcopy_8_power8.S @@ -110,6 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "zgemm_tcopy_macros_8_power8.S" #define STACKSIZE 384 +#define STACKSIZE 576 PROLOGUE @@ -136,6 +137,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r16, 264(SP) std r15, 272(SP) std r14, 280(SP) + addi r11, SP ,288 + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11 ,16 + stvx v31, r11, r0 + li r11,0 cmpwi cr0, M, 0 ble- L999 @@ -196,9 +222,33 @@ L999: ld r16, 264(SP) ld r15, 272(SP) ld r14, 280(SP) + addi r11, SP, 288 + lvx v20, r11,r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11,0 addi SP, SP, STACKSIZE - blr EPILOGUE diff --git a/kernel/power/ztrmm_kernel_8x2_power8.S b/kernel/power/ztrmm_kernel_8x2_power8.S index 0cfe613d5..c1415138c 100644 --- a/kernel/power/ztrmm_kernel_8x2_power8.S +++ b/kernel/power/ztrmm_kernel_8x2_power8.S @@ -1,3 +1,4 @@ + /*************************************************************************** Copyright (c) 2013-2016, The OpenBLAS Project All rights reserved. @@ -82,15 +83,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef __64BIT__ -#define STACKSIZE 320 -#define ALPHA_R_SP 296(SP) -#define ALPHA_I_SP 304(SP) -#define FZERO 312(SP) +#define STACKSIZE 520 +#define ALPHA_R_SP 296+200(SP) +#define ALPHA_I_SP 304+200(SP) +#define FZERO 312+200(SP) #else -#define STACKSIZE 256 -#define ALPHA_R_SP 224(SP) -#define ALPHA_I_SP 232(SP) -#define FZERO 240(SP) +#define STACKSIZE 452 +#define ALPHA_R_SP 224+196(SP) +#define ALPHA_I_SP 232+196(SP) +#define FZERO 240+196(SP) #endif #define M r3 @@ -133,6 +134,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define alpha_r vs30 #define alpha_i vs31 +#define VECSAVE r11 + #define KKK r13 #define K1 r14 #define L r15 @@ -204,6 +207,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. std r15, 272(SP) std r14, 280(SP) std r13, 288(SP) + addi r11, SP, 304 #else stw r31, 144(SP) stw r30, 148(SP) @@ -224,7 +228,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. stw r15, 208(SP) stw r14, 212(SP) stw r13, 216(SP) + addi r11, SP, 224 #endif + stvx v20, r11, r0 + addi r11, r11, 16 + stvx v21, r11, r0 + addi r11, r11, 16 + stvx v22, r11, r0 + addi r11, r11, 16 + stvx v23, r11, r0 + addi r11, r11, 16 + stvx v24, r11, r0 + addi r11, r11, 16 + stvx v25, r11, r0 + addi r11, r11, 16 + stvx v26, r11, r0 + addi r11, r11, 16 + stvx v27, r11, r0 + addi r11, r11, 16 + stvx v28, r11, r0 + addi r11, r11, 16 + stvx v29, r11, r0 + addi r11, r11, 16 + stvx v30, r11, r0 + addi r11, r11, 16 + stvx v31, r11, r0 + li r11, 0 stfd f1, ALPHA_R_SP stfd f2, ALPHA_I_SP @@ -289,9 +318,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. li o48 , 48 #ifdef __64BIT__ - addi ALPHA, SP, 296 + addi ALPHA, SP, 296+200 #else - addi ALPHA, SP, 224 + addi ALPHA, SP, 224+196 #endif lxsdx alpha_r, 0, ALPHA @@ -347,6 +376,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ld r15, 272(SP) ld r14, 280(SP) ld r13, 288(SP) + addi r11, SP, 304 #else lwz r31, 144(SP) lwz r30, 148(SP) @@ -367,10 +397,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. lwz r15, 208(SP) lwz r14, 212(SP) lwz r13, 216(SP) + addi r11, SP, 224 #endif + lvx v20, r11, r3 + addi r11, r11, 16 + lvx v21, r11, r3 + addi r11, r11, 16 + lvx v22, r11, r3 + addi r11, r11, 16 + lvx v23, r11, r3 + addi r11, r11, 16 + lvx v24, r11, r3 + addi r11, r11, 16 + lvx v25, r11, r3 + addi r11, r11, 16 + lvx v26, r11, r3 + addi r11, r11, 16 + lvx v27, r11, r3 + addi r11, r11, 16 + lvx v28, r11, r3 + addi r11, r11, 16 + lvx v29, r11, r3 + addi r11, r11, 16 + lvx v30, r11, r3 + addi r11, r11, 16 + lvx v31, r11, r3 + li r11, 0 addi SP, SP, STACKSIZE - blr EPILOGUE From a89d6711c6b2472e2a7a32824e326c222aee6e89 Mon Sep 17 00:00:00 2001 From: Tim Moon Date: Thu, 28 Sep 2017 12:56:29 -0700 Subject: [PATCH 021/122] Increasing flexibility of GEMM benchmark. m, n, and k can be set to arbitrary constants. A and B matrices can be transposed independently. --- benchmark/gemm.c | 132 ++++++++++++++++++++++++++++------------------- 1 file changed, 78 insertions(+), 54 deletions(-) diff --git a/benchmark/gemm.c b/benchmark/gemm.c index 9d661e648..809813c92 100644 --- a/benchmark/gemm.c +++ b/benchmark/gemm.c @@ -121,13 +121,15 @@ static void *huge_malloc(BLASLONG size){ int main(int argc, char *argv[]){ FLOAT *a, *b, *c; - FLOAT alpha[] = {1.0, 1.0}; + FLOAT alpha[] = {1.0, 0.0}; FLOAT beta [] = {0.0, 0.0}; - char trans='N'; - blasint m, n, i, j; + char transa = 'N'; + char transb = 'N'; + blasint m, n, k, i, j, lda, ldb, ldc; int loops = 1; - int has_param_n=0; - int l; + int has_param_m = 0; + int has_param_n = 0; + int has_param_k = 0; char *p; int from = 1; @@ -135,86 +137,108 @@ int main(int argc, char *argv[]){ int step = 1; struct timeval start, stop; - double time1,timeg; + double time1, timeg; argc--;argv++; - if (argc > 0) { from = atol(*argv); argc--; argv++;} - if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} - if (argc > 0) { step = atol(*argv); argc--; argv++;} + if (argc > 0) { from = atol(*argv); argc--; argv++; } + if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++; } + if (argc > 0) { step = atol(*argv); argc--; argv++; } - if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; + if ((p = getenv("OPENBLAS_TRANS"))) { + transa=*p; + transb=*p; + } + if ((p = getenv("OPENBLAS_TRANSA"))) { + transa=*p; + } + if ((p = getenv("OPENBLAS_TRANSB"))) { + transb=*p; + } + TOUPPER(transa); + TOUPPER(transb); - fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c\n", from, to, step, trans); + fprintf(stderr, "From : %3d To : %3d Step=%d : Transa=%c : Transb=%c\n", from, to, step, transa, transb); - if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ - fprintf(stderr,"Out of Memory!!\n");exit(1); + p = getenv("OPENBLAS_LOOPS"); + if ( p != NULL ) { + loops = atoi(p); } - if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ - fprintf(stderr,"Out of Memory!!\n");exit(1); + if ((p = getenv("OPENBLAS_PARAM_M"))) { + m = atoi(p); + has_param_m=1; + } else { + m = to; + } + if ((p = getenv("OPENBLAS_PARAM_N"))) { + n = atoi(p); + has_param_n=1; + } else { + n = to; + } + if ((p = getenv("OPENBLAS_PARAM_K"))) { + k = atoi(p); + has_param_k=1; + } else { + k = to; } - if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ + if (( a = (FLOAT *)malloc(sizeof(FLOAT) * m * k * COMPSIZE)) == NULL) { fprintf(stderr,"Out of Memory!!\n");exit(1); } - - p = getenv("OPENBLAS_LOOPS"); - if ( p != NULL ) - loops = atoi(p); - - if ((p = getenv("OPENBLAS_PARAM_N"))) { - n = atoi(p); - has_param_n=1; + if (( b = (FLOAT *)malloc(sizeof(FLOAT) * k * n * COMPSIZE)) == NULL) { + fprintf(stderr,"Out of Memory!!\n");exit(1); + } + if (( c = (FLOAT *)malloc(sizeof(FLOAT) * m * n * COMPSIZE)) == NULL) { + fprintf(stderr,"Out of Memory!!\n");exit(1); } #ifdef linux srandom(getpid()); #endif - - for(j = 0; j < to; j++){ - for(i = 0; i < to * COMPSIZE; i++){ - a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; - b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; - c[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; - } - } - - - - fprintf(stderr, " SIZE Flops Time\n"); - for(m = from; m <= to; m += step) - { + for (i = 0; i < m * k * COMPSIZE; i++) { + a[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; + } + for (i = 0; i < k * n * COMPSIZE; i++) { + b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; + } + for (i = 0; i < m * n * COMPSIZE; i++) { + c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; + } + + fprintf(stderr, " SIZE Flops Time\n"); + for (i = from; i <= to; i += step) { + timeg=0; - if ( has_param_n == 1 && n <= m ) - n=n; - else - n=m; + if (!has_param_m) { m = i; } + if (!has_param_n) { n = i; } + if (!has_param_k) { k = i; } + if (transa == 'N') { lda = m; } + else { lda = k; } + if (transb == 'N') { ldb = k; } + else { ldb = n; } + ldc = m; - - fprintf(stderr, " %6dx%d : ", (int)m, (int)n); + fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k); gettimeofday( &start, (struct timezone *)0); - for (l=0; l Date: Sat, 30 Sep 2017 18:40:03 +0200 Subject: [PATCH 022/122] Add trivial smoketest for xpotrf --- utest/test_potrs.c | 147 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/utest/test_potrs.c b/utest/test_potrs.c index f5dae5086..7afeb4c9d 100644 --- a/utest/test_potrs.c +++ b/utest/test_potrs.c @@ -394,3 +394,150 @@ CTEST(potrf, bug_695){ CTEST_ERR("%s:%d got NaN", __FILE__, __LINE__); } } + + +// Check potrf factorizes a small problem correctly +CTEST(potrf, smoketest_trivial){ + float A1s[4] = {2, 0.3, 0.3, 3}; + double A1d[4] = {2, 0.3, 0.3, 3}; + openblas_complex_float A1c[4] = { + openblas_make_complex_float(2,0), + openblas_make_complex_float(0.3,0.1), + openblas_make_complex_float(0.3,-0.1), + openblas_make_complex_float(3,0) + }; + openblas_complex_double A1z[4] = { + openblas_make_complex_double(2,0), + openblas_make_complex_double(0.3,0.1), + openblas_make_complex_double(0.3,-0.1), + openblas_make_complex_double(3,0) + }; + float zeros = 0, ones = 1; + double zerod = 0, oned = 1; + openblas_complex_float zeroc = openblas_make_complex_float(0, 0), + onec = openblas_make_complex_float(1, 0); + openblas_complex_double zeroz = openblas_make_complex_double(0, 0), + onez = openblas_make_complex_float(1, 0); + + char uplo, trans1, trans2; + blasint nv = 4; + blasint n = 2; + blasint inc = 1; + blasint info = 0; + int i, j, cycle; + + float As[4], Bs[4]; + double Ad[4], Bd[4]; + openblas_complex_float Ac[4], Bc[4]; + openblas_complex_double Az[4], Bz[4]; + + for (cycle = 0; cycle < 2; ++cycle) { + if (cycle == 0) { + uplo = 'L'; + } + else { + uplo = 'U'; + } + + BLASFUNC(scopy)(&nv, A1s, &inc, As, &inc); + BLASFUNC(dcopy)(&nv, A1d, &inc, Ad, &inc); + BLASFUNC(ccopy)(&nv, (float *)A1c, &inc, (float *)Ac, &inc); + BLASFUNC(zcopy)(&nv, (double *)A1z, &inc, (double *)Az, &inc); + + BLASFUNC(spotrf)(&uplo, &n, As, &n, &info); + if (info != 0) { + CTEST_ERR("%s:%d info != 0", __FILE__, __LINE__); + } + + BLASFUNC(dpotrf)(&uplo, &n, Ad, &n, &info); + if (info != 0) { + CTEST_ERR("%s:%d info != 0", __FILE__, __LINE__); + } + + BLASFUNC(cpotrf)(&uplo, &n, (float *)Ac, &n, &info); + if (info != 0) { + CTEST_ERR("%s:%d info != 0", __FILE__, __LINE__); + } + + BLASFUNC(zpotrf)(&uplo, &n, (double *)Az, &n, &info); + if (info != 0) { + CTEST_ERR("%s:%d info != 0", __FILE__, __LINE__); + } + + /* Fill the other triangle */ + if (uplo == 'L') { + for (i = 0; i < n; ++i) { + for (j = i+1; j < n; ++j) { + As[i+n*j] = 0; + Ad[i+n*j] = 0; + Ac[i+n*j] = zeroc; + Az[i+n*j] = zeroz; + } + } + } + else { + for (i = 0; i < n; ++i) { + for (j = 0; j < i; ++j) { + As[i+n*j] = 0; + Ad[i+n*j] = 0; + Ac[i+n*j] = zeroc; + Az[i+n*j] = zeroz; + } + } + } + + /* B = A A^H or A^H A */ + if (uplo == 'L') { + trans1 = 'N'; + trans2 = 'C'; + } + else { + trans1 = 'C'; + trans2 = 'N'; + } + + BLASFUNC(sgemm)(&trans1, &trans2, &n, &n, &n, &ones, As, &n, As, &n, &zeros, Bs, &n); + BLASFUNC(dgemm)(&trans1, &trans2, &n, &n, &n, &oned, Ad, &n, Ad, &n, &zerod, Bd, &n); + BLASFUNC(cgemm)(&trans1, &trans2, &n, &n, &n, (float *)&onec, + (float *)Ac, &n, (float *)Ac, &n, (float *)&zeroc, (float *)Bc, &n); + BLASFUNC(zgemm)(&trans1, &trans2, &n, &n, &n, (double *)&onez, + (double *)Az, &n, (double *)Az, &n, (double *)&zeroz, (double *)Bz, &n); + + /* Check result is close to original */ + for (i = 0; i < n; ++i) { + for (j = 0; j < n; ++j) { + double err; + + err = fabs(A1s[i+n*j] - Bs[i+n*j]); + if (err > 1e-5) { + CTEST_ERR("%s:%d %c s(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); + } + + err = fabs(A1d[i+n*j] - Bd[i+n*j]); + if (err > 1e-12) { + CTEST_ERR("%s:%d %c d(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); + } + +#ifdef OPENBLAS_COMPLEX_C99 + err = cabsf(A1c[i+n*j] - Bc[i+n*j]); +#else + err = hypot(A1c[i+n*j].real - Bc[i+n*j].real, + A1c[i+n*j].imag - Bc[i+n*j].imag); +#endif + if (err > 1e-5) { + CTEST_ERR("%s:%d %c c(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); + } + +#ifdef OPENBLAS_COMPLEX_C99 + err = cabs(A1z[i+n*j] - Bz[i+n*j]); +#else + err = hypot(A1z[i+n*j].real - Bz[i+n*j].real, + A1z[i+n*j].imag - Bz[i+n*j].imag); +#endif + if (err > 1e-12) { + CTEST_ERR("%s:%d %c z(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); + } + } + } + } +} From f96afd94b00ee2500ad66fd532a7921d95dc4c82 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 1 Oct 2017 01:06:39 +0200 Subject: [PATCH 023/122] Fix out-of-bounds accesses where the data should be zero anyway --- kernel/generic/ztrmm_utcopy_8.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/kernel/generic/ztrmm_utcopy_8.c b/kernel/generic/ztrmm_utcopy_8.c index 6c0448443..24043d8e8 100644 --- a/kernel/generic/ztrmm_utcopy_8.c +++ b/kernel/generic/ztrmm_utcopy_8.c @@ -828,11 +828,17 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 0] = ONE; b[ 1] = ZERO; #else - b[ 0] = *(a01 + 0); - b[ 1] = *(a01 + 1); +// out-of-bounds memory accesses, see issue 601 +// b[ 0] = *(a01 + 0); +// b[ 1] = *(a01 + 1); + b[0]=ZERO; + b[1]=ZERO; #endif - b[ 2] = *(a02 + 0); - b[ 3] = *(a02 + 1); +// out-of-bounds memory accesses, see issue 601 +// b[ 2] = *(a02 + 0); +// b[ 3] = *(a02 + 1); + b[2]=ZERO; + b[3]=ZERO; b += 4; } posY += 2; From 860dcfc7037bdb022083c4cda39da8f72628f8bf Mon Sep 17 00:00:00 2001 From: Tim Moon Date: Tue, 3 Oct 2017 13:43:39 -0700 Subject: [PATCH 024/122] Use 2D thread distribution for small GEMMs. Allows maximum use of available cores if one of M and N is small and the other is large. --- driver/level3/level3_thread.c | 96 +++++++++++++++++++++++------------ 1 file changed, 63 insertions(+), 33 deletions(-) diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c index 8ab4ef699..0fd39eea7 100644 --- a/driver/level3/level3_thread.c +++ b/driver/level3/level3_thread.c @@ -219,12 +219,14 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *buffer[DIVIDE_RATE]; BLASLONG k, lda, ldb, ldc; - BLASLONG m_from, m_to, n_from, n_to, N_from, N_to; + BLASLONG m_from, m_to, n_from, n_to; FLOAT *alpha, *beta; FLOAT *a, *b, *c; job_t *job = (job_t *)args -> common; + BLASLONG nthreads_m; BLASLONG xxx, bufferside; + BLASLONG mypos_m, mypos_n; BLASLONG ls, min_l, jjs, min_jj; BLASLONG is, min_i, div_n; @@ -259,26 +261,28 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, alpha = (FLOAT *)args -> alpha; beta = (FLOAT *)args -> beta; + nthreads_m = args -> nthreads; + if (range_m) { + nthreads_m = range_m[-1]; + } + + mypos_m = mypos % nthreads_m; + mypos_n = mypos / nthreads_m; + m_from = 0; m_to = M; if (range_m) { - m_from = range_m[0]; - m_to = range_m[1]; + m_from = range_m[mypos_m + 0]; + m_to = range_m[mypos_m + 1]; } n_from = 0; n_to = N; - N_from = 0; - N_to = N; - if (range_n) { n_from = range_n[mypos + 0]; n_to = range_n[mypos + 1]; - - N_from = range_n[0]; - N_to = range_n[args -> nthreads]; } if (beta) { @@ -287,7 +291,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, #else if ((beta[0] != ONE) || (beta[1] != ZERO)) #endif - BETA_OPERATION(m_from, m_to, N_from, N_to, beta, c, ldc); + BETA_OPERATION(m_from, m_to, range_n[mypos_n * nthreads_m], range_n[(mypos_n + 1) * nthreads_m], beta, c, ldc); } if ((k == 0) || (alpha == NULL)) return 0; @@ -299,8 +303,8 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, ) return 0; #if 0 - fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld N_from : %ld N_to : %ld\n", - mypos, m_from, m_to, n_from, n_to, N_from, N_to); + fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld\n", + mypos, m_from, m_to, n_from, n_to); fprintf(stderr, "GEMM: P = %4ld Q = %4ld R = %4ld\n", (BLASLONG)GEMM_P, (BLASLONG)GEMM_Q, (BLASLONG)GEMM_R); @@ -394,7 +398,8 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, } #endif - for (i = 0; i < args -> nthreads; i++) job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside]; + for (i = mypos_n * nthreads_m; i < (mypos_n + 1) * nthreads_m; i++) + job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside]; WMB; } @@ -402,13 +407,13 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, do { current ++; - if (current >= args -> nthreads) current = 0; + if (current >= (mypos_n + 1) * nthreads_m) current = mypos_n * nthreads_m; div_n = (range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE; for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) { - if (current != mypos) { + if (current != mypos) { START_RPCC(); @@ -479,7 +484,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, } current ++; - if (current >= args -> nthreads) current = 0; + if (current >= (mypos_n + 1) * nthreads_m) current = mypos_n * nthreads_m; } while (current != mypos); @@ -525,7 +530,8 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, } static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG - *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos){ + *range_n, FLOAT *sa, FLOAT *sb, + BLASLONG nthreads_m, BLASLONG nthreads_n) { blas_arg_t newarg; @@ -537,8 +543,10 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG blas_queue_t queue[MAX_CPU_NUMBER]; - BLASLONG range_M[MAX_CPU_NUMBER + 1]; - BLASLONG range_N[MAX_CPU_NUMBER + 1]; + BLASLONG range_M_buffer[MAX_CPU_NUMBER + 2]; + BLASLONG range_N_buffer[MAX_CPU_NUMBER + 2]; + BLASLONG *range_M = range_M_buffer + 1; + BLASLONG *range_N = range_N_buffer + 1; BLASLONG num_cpu_m, num_cpu_n; @@ -595,6 +603,9 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG newarg.gemm_r = args -> gemm_r; #endif + range_M[-1] = nthreads_m; + range_N[-1] = nthreads_n; + if (!range_m) { range_M[0] = 0; m = args -> m; @@ -607,7 +618,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG while (m > 0){ - width = blas_quickdivide(m + nthreads - num_cpu_m - 1, nthreads - num_cpu_m); + width = blas_quickdivide(m + nthreads_m - num_cpu_m - 1, nthreads_m - num_cpu_m); m -= width; if (m < 0) width = width + m; @@ -617,12 +628,16 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG num_cpu_m ++; } - for (i = 0; i < num_cpu_m; i++) { + for (i = num_cpu_m; i < MAX_CPU_NUMBER; i++) { + range_M[i + 1] = range_M[num_cpu_m]; + } + + for (i = 0; i < nthreads; i++) { queue[i].mode = mode; queue[i].routine = inner_thread; queue[i].args = &newarg; - queue[i].range_m = &range_M[i]; - queue[i].range_n = &range_N[0]; + queue[i].range_m = range_M; + queue[i].range_n = range_N; queue[i].sa = NULL; queue[i].sb = NULL; queue[i].next = &queue[i + 1]; @@ -659,17 +674,21 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG num_cpu_n ++; } - for (j = 0; j < num_cpu_m; j++) { - for (i = 0; i < num_cpu_m; i++) { + for (j = num_cpu_n; j < MAX_CPU_NUMBER; j++) { + range_N[j + 1] = range_N[num_cpu_n]; + } + + for (j = 0; j < MAX_CPU_NUMBER; j++) { + for (i = 0; i < MAX_CPU_NUMBER; i++) { for (k = 0; k < DIVIDE_RATE; k++) { job[j].working[i][CACHE_LINE_SIZE * k] = 0; } } } - queue[num_cpu_m - 1].next = NULL; + queue[nthreads - 1].next = NULL; - exec_blas(num_cpu_m, queue); + exec_blas(nthreads, queue); } #ifdef USE_ALLOC_HEAP @@ -684,6 +703,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO BLASLONG m = args -> m; BLASLONG n = args -> n; BLASLONG nthreads = args -> nthreads; + BLASLONG nthreads_m, nthreads_n; if (nthreads == 1) { GEMM_LOCAL(args, range_m, range_n, sa, sb, 0); @@ -704,21 +724,31 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO n = n_to - n_from; } - if ((m < 2 * SWITCH_RATIO) || (n < 2 * SWITCH_RATIO)) { + nthreads_m = nthreads; + while (m < nthreads_m * SWITCH_RATIO) { + nthreads_m = nthreads_m / 2; + } + + if (nthreads_m < 1) { GEMM_LOCAL(args, range_m, range_n, sa, sb, 0); return 0; } - if (m < nthreads * SWITCH_RATIO) { - nthreads = blas_quickdivide(m, SWITCH_RATIO); + nthreads_n = nthreads / nthreads_m; + if (n < nthreads_m * (nthreads_n - 1)) { + nthreads_n = (n + nthreads_m - 1) / nthreads_m; } - if (n < nthreads * SWITCH_RATIO) { - nthreads = blas_quickdivide(n, SWITCH_RATIO); + + nthreads = nthreads_m * nthreads_n; + + if (nthreads <= 1) { + GEMM_LOCAL(args, range_m, range_n, sa, sb, 0); + return 0; } args -> nthreads = nthreads; - gemm_driver(args, range_m, range_n, sa, sb, 0); + gemm_driver(args, range_m, range_n, sa, sb, nthreads_m, nthreads_n); return 0; } From 9de52b489a5cf5214106ade8fd86d8b854d16a9c Mon Sep 17 00:00:00 2001 From: Tim Moon Date: Tue, 3 Oct 2017 16:32:08 -0700 Subject: [PATCH 025/122] Cleaning up and documenting multi-threaded GEMM code. --- driver/level3/level3_thread.c | 341 +++++++++++++++------------------- 1 file changed, 151 insertions(+), 190 deletions(-) diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c index 0fd39eea7..22a12d465 100644 --- a/driver/level3/level3_thread.c +++ b/driver/level3/level3_thread.c @@ -97,21 +97,21 @@ typedef struct { #ifndef BETA_OPERATION #ifndef COMPLEX -#define BETA_OPERATION(M_FROM, M_TO, N_FROM, N_TO, BETA, C, LDC) \ - GEMM_BETA((M_TO) - (M_FROM), (N_TO - N_FROM), 0, \ - BETA[0], NULL, 0, NULL, 0, \ - (FLOAT *)(C) + ((M_FROM) + (N_FROM) * (LDC)) * COMPSIZE, LDC) +#define BETA_OPERATION(M_FROM, M_TO, N_FROM, N_TO, BETA, C, LDC) \ + GEMM_BETA((M_TO) - (M_FROM), (N_TO - N_FROM), 0, \ + BETA[0], NULL, 0, NULL, 0, \ + (FLOAT *)(C) + ((M_FROM) + (N_FROM) * (LDC)) * COMPSIZE, LDC) #else -#define BETA_OPERATION(M_FROM, M_TO, N_FROM, N_TO, BETA, C, LDC) \ - GEMM_BETA((M_TO) - (M_FROM), (N_TO - N_FROM), 0, \ - BETA[0], BETA[1], NULL, 0, NULL, 0, \ - (FLOAT *)(C) + ((M_FROM) + (N_FROM) * (LDC)) * COMPSIZE, LDC) +#define BETA_OPERATION(M_FROM, M_TO, N_FROM, N_TO, BETA, C, LDC) \ + GEMM_BETA((M_TO) - (M_FROM), (N_TO - N_FROM), 0, \ + BETA[0], BETA[1], NULL, 0, NULL, 0, \ + (FLOAT *)(C) + ((M_FROM) + (N_FROM) * (LDC)) * COMPSIZE, LDC) #endif #endif #ifndef ICOPY_OPERATION #if defined(NN) || defined(NT) || defined(NC) || defined(NR) || \ - defined(RN) || defined(RT) || defined(RC) || defined(RR) + defined(RN) || defined(RT) || defined(RC) || defined(RR) #define ICOPY_OPERATION(M, N, A, LDA, X, Y, BUFFER) GEMM_ITCOPY(M, N, (FLOAT *)(A) + ((Y) + (X) * (LDA)) * COMPSIZE, LDA, BUFFER); #else #define ICOPY_OPERATION(M, N, A, LDA, X, Y, BUFFER) GEMM_INCOPY(M, N, (FLOAT *)(A) + ((X) + (Y) * (LDA)) * COMPSIZE, LDA, BUFFER); @@ -120,7 +120,7 @@ typedef struct { #ifndef OCOPY_OPERATION #if defined(NN) || defined(TN) || defined(CN) || defined(RN) || \ - defined(NR) || defined(TR) || defined(CR) || defined(RR) + defined(NR) || defined(TR) || defined(CR) || defined(RR) #define OCOPY_OPERATION(M, N, A, LDA, X, Y, BUFFER) GEMM_ONCOPY(M, N, (FLOAT *)(A) + ((X) + (Y) * (LDA)) * COMPSIZE, LDA, BUFFER); #else #define OCOPY_OPERATION(M, N, A, LDA, X, Y, BUFFER) GEMM_OTCOPY(M, N, (FLOAT *)(A) + ((Y) + (X) * (LDA)) * COMPSIZE, LDA, BUFFER); @@ -144,36 +144,36 @@ typedef struct { #ifndef KERNEL_OPERATION #ifndef COMPLEX -#define KERNEL_OPERATION(M, N, K, ALPHA, SA, SB, C, LDC, X, Y) \ - KERNEL_FUNC(M, N, K, ALPHA[0], SA, SB, (FLOAT *)(C) + ((X) + (Y) * LDC) * COMPSIZE, LDC) +#define KERNEL_OPERATION(M, N, K, ALPHA, SA, SB, C, LDC, X, Y) \ + KERNEL_FUNC(M, N, K, ALPHA[0], SA, SB, (FLOAT *)(C) + ((X) + (Y) * LDC) * COMPSIZE, LDC) #else -#define KERNEL_OPERATION(M, N, K, ALPHA, SA, SB, C, LDC, X, Y) \ - KERNEL_FUNC(M, N, K, ALPHA[0], ALPHA[1], SA, SB, (FLOAT *)(C) + ((X) + (Y) * LDC) * COMPSIZE, LDC) +#define KERNEL_OPERATION(M, N, K, ALPHA, SA, SB, C, LDC, X, Y) \ + KERNEL_FUNC(M, N, K, ALPHA[0], ALPHA[1], SA, SB, (FLOAT *)(C) + ((X) + (Y) * LDC) * COMPSIZE, LDC) #endif #endif #ifndef FUSED_KERNEL_OPERATION #if defined(NN) || defined(TN) || defined(CN) || defined(RN) || \ - defined(NR) || defined(TR) || defined(CR) || defined(RR) + defined(NR) || defined(TR) || defined(CR) || defined(RR) #ifndef COMPLEX #define FUSED_KERNEL_OPERATION(M, N, K, ALPHA, SA, SB, B, LDB, C, LDC, I, J, L) \ - FUSED_GEMM_KERNEL_N(M, N, K, ALPHA[0], SA, SB, \ - (FLOAT *)(B) + ((L) + (J) * LDB) * COMPSIZE, LDB, (FLOAT *)(C) + ((I) + (J) * LDC) * COMPSIZE, LDC) + FUSED_GEMM_KERNEL_N(M, N, K, ALPHA[0], SA, SB, \ + (FLOAT *)(B) + ((L) + (J) * LDB) * COMPSIZE, LDB, (FLOAT *)(C) + ((I) + (J) * LDC) * COMPSIZE, LDC) #else #define FUSED_KERNEL_OPERATION(M, N, K, ALPHA, SA, SB, B, LDB, C, LDC, I, J, L) \ - FUSED_GEMM_KERNEL_N(M, N, K, ALPHA[0], ALPHA[1], SA, SB, \ - (FLOAT *)(B) + ((L) + (J) * LDB) * COMPSIZE, LDB, (FLOAT *)(C) + ((I) + (J) * LDC) * COMPSIZE, LDC) + FUSED_GEMM_KERNEL_N(M, N, K, ALPHA[0], ALPHA[1], SA, SB, \ + (FLOAT *)(B) + ((L) + (J) * LDB) * COMPSIZE, LDB, (FLOAT *)(C) + ((I) + (J) * LDC) * COMPSIZE, LDC) #endif #else #ifndef COMPLEX #define FUSED_KERNEL_OPERATION(M, N, K, ALPHA, SA, SB, B, LDB, C, LDC, I, J, L) \ - FUSED_GEMM_KERNEL_T(M, N, K, ALPHA[0], SA, SB, \ - (FLOAT *)(B) + ((J) + (L) * LDB) * COMPSIZE, LDB, (FLOAT *)(C) + ((I) + (J) * LDC) * COMPSIZE, LDC) + FUSED_GEMM_KERNEL_T(M, N, K, ALPHA[0], SA, SB, \ + (FLOAT *)(B) + ((J) + (L) * LDB) * COMPSIZE, LDB, (FLOAT *)(C) + ((I) + (J) * LDC) * COMPSIZE, LDC) #else #define FUSED_KERNEL_OPERATION(M, N, K, ALPHA, SA, SB, B, LDB, C, LDC, I, J, L) \ - FUSED_GEMM_KERNEL_T(M, N, K, ALPHA[0], ALPHA[1], SA, SB, \ - (FLOAT *)(B) + ((J) + (L) * LDB) * COMPSIZE, LDB, (FLOAT *)(C) + ((I) + (J) * LDC) * COMPSIZE, LDC) + FUSED_GEMM_KERNEL_T(M, N, K, ALPHA[0], ALPHA[1], SA, SB, \ + (FLOAT *)(B) + ((J) + (L) * LDB) * COMPSIZE, LDB, (FLOAT *)(C) + ((I) + (J) * LDC) * COMPSIZE, LDC) #endif #endif #endif @@ -224,12 +224,12 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *alpha, *beta; FLOAT *a, *b, *c; job_t *job = (job_t *)args -> common; + BLASLONG nthreads_m; - BLASLONG xxx, bufferside; BLASLONG mypos_m, mypos_n; - BLASLONG ls, min_l, jjs, min_jj; - BLASLONG is, min_i, div_n; + BLASLONG is, js, ls, bufferside, jjs; + BLASLONG min_i, min_l, div_n, min_jj; BLASLONG i, current; BLASLONG l1stride; @@ -261,30 +261,29 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, alpha = (FLOAT *)args -> alpha; beta = (FLOAT *)args -> beta; + /* Initialize 2D CPU distribution */ nthreads_m = args -> nthreads; if (range_m) { nthreads_m = range_m[-1]; } + mypos_n = blas_quickdivide(mypos, nthreads_m); /* mypos_n = mypos / nthreads_m */ + mypos_m = mypos - mypos_n * nthreads_m; /* mypos_m = mypos % nthreads_m */ - mypos_m = mypos % nthreads_m; - mypos_n = mypos / nthreads_m; - + /* Initialize m and n */ m_from = 0; m_to = M; - if (range_m) { m_from = range_m[mypos_m + 0]; m_to = range_m[mypos_m + 1]; } - n_from = 0; n_to = N; - if (range_n) { n_from = range_n[mypos + 0]; n_to = range_n[mypos + 1]; } + /* Multiply C by beta if needed */ if (beta) { #ifndef COMPLEX if (beta[0] != ONE) @@ -294,43 +293,37 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, BETA_OPERATION(m_from, m_to, range_n[mypos_n * nthreads_m], range_n[(mypos_n + 1) * nthreads_m], beta, c, ldc); } + /* Return early if no more computation is needed */ if ((k == 0) || (alpha == NULL)) return 0; - if ((alpha[0] == ZERO) #ifdef COMPLEX && (alpha[1] == ZERO) #endif ) return 0; -#if 0 - fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld\n", - mypos, m_from, m_to, n_from, n_to); - - fprintf(stderr, "GEMM: P = %4ld Q = %4ld R = %4ld\n", (BLASLONG)GEMM_P, (BLASLONG)GEMM_Q, (BLASLONG)GEMM_R); - -#endif - + /* Initialize workspace for local region of B */ div_n = (n_to - n_from + DIVIDE_RATE - 1) / DIVIDE_RATE; - buffer[0] = sb; for (i = 1; i < DIVIDE_RATE; i++) { buffer[i] = buffer[i - 1] + GEMM_Q * ((div_n + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N * COMPSIZE; } - + /* Iterate through steps of k */ for(ls = 0; ls < k; ls += min_l){ + /* Determine step size in k */ min_l = k - ls; - if (min_l >= GEMM_Q * 2) { min_l = GEMM_Q; } else { if (min_l > GEMM_Q) min_l = (min_l + 1) / 2; } + /* Determine step size in m + * Note: We are currently on the first step in m + */ l1stride = 1; min_i = m_to - m_from; - if (min_i >= GEMM_P * 2) { min_i = GEMM_P; } else { @@ -341,109 +334,106 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, } } + /* Copy local region of A into workspace */ START_RPCC(); - ICOPY_OPERATION(min_l, min_i, a, lda, ls, m_from, sa); - STOP_RPCC(copy_A); + /* Copy local region of B into workspace and apply kernel */ div_n = (n_to - n_from + DIVIDE_RATE - 1) / DIVIDE_RATE; + for (js = n_from, bufferside = 0; js < n_to; js += div_n, bufferside ++) { - for (xxx = n_from, bufferside = 0; xxx < n_to; xxx += div_n, bufferside ++) { - + /* Make sure if no one is using workspace */ START_RPCC(); - - /* Make sure if no one is using buffer */ for (i = 0; i < args -> nthreads; i++) while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;}; - STOP_RPCC(waiting1); #if defined(FUSED_GEMM) && !defined(TIMING) - FUSED_KERNEL_OPERATION(min_i, MIN(n_to, xxx + div_n) - xxx, min_l, alpha, - sa, buffer[bufferside], b, ldb, c, ldc, m_from, xxx, ls); + /* Fused operation to copy region of B into workspace and apply kernel */ + FUSED_KERNEL_OPERATION(min_i, MIN(n_to, js + div_n) - js, min_l, alpha, + sa, buffer[bufferside], b, ldb, c, ldc, m_from, js, ls); #else - for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){ - min_jj = MIN(n_to, xxx + div_n) - jjs; - + /* Split local region of B into parts */ + for(jjs = js; jjs < MIN(n_to, js + div_n); jjs += min_jj){ + min_jj = MIN(n_to, js + div_n) - jjs; if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; else - if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N; - else - if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; - + if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N; + else + if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; + /* Copy part of local region of B into workspace */ START_RPCC(); - OCOPY_OPERATION(min_l, min_jj, b, ldb, ls, jjs, - buffer[bufferside] + min_l * (jjs - xxx) * COMPSIZE * l1stride); - + buffer[bufferside] + min_l * (jjs - js) * COMPSIZE * l1stride); STOP_RPCC(copy_B); + /* Apply kernel with local region of A and part of local region of B */ START_RPCC(); - KERNEL_OPERATION(min_i, min_jj, min_l, alpha, - sa, buffer[bufferside] + min_l * (jjs - xxx) * COMPSIZE * l1stride, + sa, buffer[bufferside] + min_l * (jjs - js) * COMPSIZE * l1stride, c, ldc, m_from, jjs); - STOP_RPCC(kernel); #ifdef TIMING - ops += 2 * min_i * min_jj * min_l; + ops += 2 * min_i * min_jj * min_l; #endif } #endif + /* Set flag so other threads can access local region of B */ for (i = mypos_n * nthreads_m; i < (mypos_n + 1) * nthreads_m; i++) job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside]; WMB; } + /* Get regions of B from other threads and apply kernel */ current = mypos; - do { + + /* This thread accesses regions of B from threads in the range + * [ mypos_n * nthreads_m, (mypos_n+1) * nthreads_m ) */ current ++; if (current >= (mypos_n + 1) * nthreads_m) current = mypos_n * nthreads_m; + /* Split other region of B into parts */ div_n = (range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE; + for (js = range_n[current], bufferside = 0; js < range_n[current + 1]; js += div_n, bufferside ++) { + if (current != mypos) { - for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) { - - if (current != mypos) { - + /* Wait until other region of B is initialized */ START_RPCC(); - - /* thread has to wait */ while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;}; - STOP_RPCC(waiting2); + /* Apply kernel with local region of A and part of other region of B */ START_RPCC(); - - KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), min_l, alpha, + KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - js, div_n), min_l, alpha, sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside], - c, ldc, m_from, xxx); + c, ldc, m_from, js); + STOP_RPCC(kernel); - STOP_RPCC(kernel); #ifdef TIMING - ops += 2 * min_i * MIN(range_n[current + 1] - xxx, div_n) * min_l; + ops += 2 * min_i * MIN(range_n[current + 1] - js, div_n) * min_l; #endif } + /* Clear synchronization flag if this thread is done with other region of B */ if (m_to - m_from == min_i) { job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0; } } } while (current != mypos); - + /* Iterate through steps of m + * Note: First step has already been finished */ for(is = m_from + min_i; is < m_to; is += min_i){ min_i = m_to - is; - if (min_i >= GEMM_P * 2) { min_i = GEMM_P; } else @@ -451,38 +441,39 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, min_i = (((min_i + 1) / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M; } + /* Copy local region of A into workspace */ START_RPCC(); - ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa); - STOP_RPCC(copy_A); + /* Get regions of B and apply kernel */ current = mypos; do { + /* Split region of B into parts and apply kernel */ div_n = (range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE; + for (js = range_n[current], bufferside = 0; js < range_n[current + 1]; js += div_n, bufferside ++) { - for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) { - + /* Apply kernel with local region of A and part of region of B */ START_RPCC(); - - KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), min_l, alpha, + KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - js, div_n), min_l, alpha, sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside], - c, ldc, is, xxx); - - STOP_RPCC(kernel); - + c, ldc, is, js); + STOP_RPCC(kernel); + #ifdef TIMING - ops += 2 * min_i * MIN(range_n[current + 1] - xxx, div_n) * min_l; -#endif - - if (is + min_i >= m_to) { - /* Thread doesn't need this buffer any more */ - job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0; - WMB; - } + ops += 2 * min_i * MIN(range_n[current + 1] - js, div_n) * min_l; +#endif + + /* Clear synchronization flag if this thread is done with region of B */ + if (is + min_i >= m_to) { + job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0; + WMB; + } } + /* This thread accesses regions of B from threads in the range + * [ mypos_n * nthreads_m, (mypos_n+1) * nthreads_m ) */ current ++; if (current >= (mypos_n + 1) * nthreads_m) current = mypos_n * nthreads_m; @@ -492,14 +483,13 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, } + /* Wait until all other threads are done with local region of B */ START_RPCC(); - for (i = 0; i < args -> nthreads; i++) { - for (xxx = 0; xxx < DIVIDE_RATE; xxx++) { - while (job[mypos].working[i][CACHE_LINE_SIZE * xxx] ) {YIELDING;}; + for (js = 0; js < DIVIDE_RATE; js++) { + while (job[mypos].working[i][CACHE_LINE_SIZE * js] ) {YIELDING;}; } } - STOP_RPCC(waiting3); #ifdef TIMING @@ -512,17 +502,6 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, (double)waiting2 /(double)total * 100., (double)waiting3 /(double)total * 100., (double)ops/(double)kernel / 4. * 100.); - -#if 0 - fprintf(stderr, "GEMM [%2ld] Copy_A : %6.2ld Copy_B : %6.2ld Wait : %6.2ld\n", - mypos, copy_A, copy_B, waiting); - - fprintf(stderr, "Waiting[%2ld] %6.2f %6.2f %6.2f\n", - mypos, - (double)waiting1/(double)waiting * 100., - (double)waiting2/(double)waiting * 100., - (double)waiting3/(double)waiting * 100.); -#endif fprintf(stderr, "\n"); #endif @@ -545,17 +524,16 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG BLASLONG range_M_buffer[MAX_CPU_NUMBER + 2]; BLASLONG range_N_buffer[MAX_CPU_NUMBER + 2]; - BLASLONG *range_M = range_M_buffer + 1; - BLASLONG *range_N = range_N_buffer + 1; - + BLASLONG *range_M, *range_N; BLASLONG num_cpu_m, num_cpu_n; BLASLONG nthreads = args -> nthreads; BLASLONG width, i, j, k, js; BLASLONG m, n, n_from, n_to; - int mode; + int mode; + /* Get execution mode */ #ifndef COMPLEX #ifdef XDOUBLE mode = BLAS_XDOUBLE | BLAS_REAL | BLAS_NODE; @@ -574,6 +552,16 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG #endif #endif +#ifdef USE_ALLOC_HEAP + /* Dynamically allocate workspace */ + job = (job_t*)malloc(MAX_CPU_NUMBER * sizeof(job_t)); + if(job==NULL){ + fprintf(stderr, "OpenBLAS: malloc failed in %s\n", __func__); + exit(1); + } +#endif + + /* Initialize struct for arguments */ newarg.m = args -> m; newarg.n = args -> n; newarg.k = args -> k; @@ -586,26 +574,19 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG newarg.alpha = args -> alpha; newarg.beta = args -> beta; newarg.nthreads = args -> nthreads; - -#ifdef USE_ALLOC_HEAP - job = (job_t*)malloc(MAX_CPU_NUMBER * sizeof(job_t)); - if(job==NULL){ - fprintf(stderr, "OpenBLAS: malloc failed in %s\n", __func__); - exit(1); - } -#endif - newarg.common = (void *)job; - #ifdef PARAMTEST - newarg.gemm_p = args -> gemm_p; - newarg.gemm_q = args -> gemm_q; - newarg.gemm_r = args -> gemm_r; + newarg.gemm_p = args -> gemm_p; + newarg.gemm_q = args -> gemm_q; + newarg.gemm_r = args -> gemm_r; #endif + /* Initialize partitions in m and n + * Note: The number of CPU partitions is stored in the -1 entry */ + range_M = &range_M_buffer[1]; + range_N = &range_N_buffer[1]; range_M[-1] = nthreads_m; range_N[-1] = nthreads_n; - if (!range_m) { range_M[0] = 0; m = args -> m; @@ -614,24 +595,20 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG m = range_m[1] - range_m[0]; } + /* Partition m into nthreads_m regions */ num_cpu_m = 0; - while (m > 0){ - - width = blas_quickdivide(m + nthreads_m - num_cpu_m - 1, nthreads_m - num_cpu_m); - + width = blas_quickdivide(m + nthreads_m - num_cpu_m - 1, nthreads_m - num_cpu_m); m -= width; if (m < 0) width = width + m; - range_M[num_cpu_m + 1] = range_M[num_cpu_m] + width; - num_cpu_m ++; } - for (i = num_cpu_m; i < MAX_CPU_NUMBER; i++) { range_M[i + 1] = range_M[num_cpu_m]; } + /* Initialize parameters for parallel execution */ for (i = 0; i < nthreads; i++) { queue[i].mode = mode; queue[i].routine = inner_thread; @@ -642,10 +619,11 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG queue[i].sb = NULL; queue[i].next = &queue[i + 1]; } - queue[0].sa = sa; queue[0].sb = sb; + queue[nthreads - 1].next = NULL; + /* Iterate through steps of n */ if (!range_n) { n_from = 0; n_to = args -> n; @@ -653,41 +631,34 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG n_from = range_n[0]; n_to = range_n[1]; } - for(js = n_from; js < n_to; js += GEMM_R * nthreads){ n = n_to - js; if (n > GEMM_R * nthreads) n = GEMM_R * nthreads; + /* Partition (a step of) n into nthreads regions */ range_N[0] = js; - num_cpu_n = 0; - while (n > 0){ - - width = blas_quickdivide(n + nthreads - num_cpu_n - 1, nthreads - num_cpu_n); - + width = blas_quickdivide(n + nthreads - num_cpu_n - 1, nthreads - num_cpu_n); n -= width; if (n < 0) width = width + n; - range_N[num_cpu_n + 1] = range_N[num_cpu_n] + width; - num_cpu_n ++; } - for (j = num_cpu_n; j < MAX_CPU_NUMBER; j++) { range_N[j + 1] = range_N[num_cpu_n]; } - for (j = 0; j < MAX_CPU_NUMBER; j++) { - for (i = 0; i < MAX_CPU_NUMBER; i++) { + /* Clear synchronization flags */ + for (i = 0; i < MAX_CPU_NUMBER; i++) { + for (j = 0; j < MAX_CPU_NUMBER; j++) { for (k = 0; k < DIVIDE_RATE; k++) { - job[j].working[i][CACHE_LINE_SIZE * k] = 0; + job[i].working[j][CACHE_LINE_SIZE * k] = 0; } } } - queue[nthreads - 1].next = NULL; - + /* Execute parallel computation */ exec_blas(nthreads, queue); } @@ -702,53 +673,43 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO BLASLONG m = args -> m; BLASLONG n = args -> n; - BLASLONG nthreads = args -> nthreads; BLASLONG nthreads_m, nthreads_n; - if (nthreads == 1) { - GEMM_LOCAL(args, range_m, range_n, sa, sb, 0); - return 0; - } - + /* Get dimensions from index ranges if available */ if (range_m) { - BLASLONG m_from = *(((BLASLONG *)range_m) + 0); - BLASLONG m_to = *(((BLASLONG *)range_m) + 1); - - m = m_to - m_from; + m = range_m[1] - range_m[0]; } - if (range_n) { - BLASLONG n_from = *(((BLASLONG *)range_n) + 0); - BLASLONG n_to = *(((BLASLONG *)range_n) + 1); - - n = n_to - n_from; + n = range_n[1] - range_n[0]; } - nthreads_m = nthreads; - while (m < nthreads_m * SWITCH_RATIO) { - nthreads_m = nthreads_m / 2; - } - - if (nthreads_m < 1) { - GEMM_LOCAL(args, range_m, range_n, sa, sb, 0); - return 0; + /* CPU partitions in m should have at least SWITCH_RATIO rows */ + if (m < 2 * SWITCH_RATIO) { + nthreads_m = 1; + } else { + nthreads_m = args -> nthreads; + while (m < nthreads_m * SWITCH_RATIO) { + nthreads_m = nthreads_m / 2; + } } - nthreads_n = nthreads / nthreads_m; - if (n < nthreads_m * (nthreads_n - 1)) { - nthreads_n = (n + nthreads_m - 1) / nthreads_m; + /* At most one CPU partition in n should have less than nthreads_m columns */ + if (n < nthreads_m) { + nthreads_n = 1; + } else { + nthreads_n = blas_quickdivide(n + nthreads_m - 1, nthreads_m); + if (nthreads_m * nthreads_n > args -> nthreads) { + nthreads_n = blas_quickdivide(args -> nthreads, nthreads_m); + } } - nthreads = nthreads_m * nthreads_n; - - if (nthreads <= 1) { + /* Execute serial or parallel computation */ + if (nthreads_m * nthreads_n <= 1) { GEMM_LOCAL(args, range_m, range_n, sa, sb, 0); - return 0; + } else { + args -> nthreads = nthreads_m * nthreads_n; + gemm_driver(args, range_m, range_n, sa, sb, nthreads_m, nthreads_n); } - args -> nthreads = nthreads; - - gemm_driver(args, range_m, range_n, sa, sb, nthreads_m, nthreads_n); - return 0; } From 30486a356c2de3e0f284a8efebba891050299765 Mon Sep 17 00:00:00 2001 From: Tim Moon Date: Wed, 4 Oct 2017 12:37:49 -0700 Subject: [PATCH 026/122] Reduce number of data partitions in n. --- driver/level3/level3_thread.c | 37 +++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c index 22a12d465..77ceac6e8 100644 --- a/driver/level3/level3_thread.c +++ b/driver/level3/level3_thread.c @@ -525,7 +525,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG BLASLONG range_M_buffer[MAX_CPU_NUMBER + 2]; BLASLONG range_N_buffer[MAX_CPU_NUMBER + 2]; BLASLONG *range_M, *range_N; - BLASLONG num_cpu_m, num_cpu_n; + BLASLONG num_parts; BLASLONG nthreads = args -> nthreads; @@ -596,16 +596,16 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG } /* Partition m into nthreads_m regions */ - num_cpu_m = 0; + num_parts = 0; while (m > 0){ - width = blas_quickdivide(m + nthreads_m - num_cpu_m - 1, nthreads_m - num_cpu_m); + width = blas_quickdivide(m + nthreads_m - num_parts - 1, nthreads_m - num_parts); m -= width; if (m < 0) width = width + m; - range_M[num_cpu_m + 1] = range_M[num_cpu_m] + width; - num_cpu_m ++; + range_M[num_parts + 1] = range_M[num_parts] + width; + num_parts ++; } - for (i = num_cpu_m; i < MAX_CPU_NUMBER; i++) { - range_M[i + 1] = range_M[num_cpu_m]; + for (i = num_parts; i < MAX_CPU_NUMBER; i++) { + range_M[i + 1] = range_M[num_parts]; } /* Initialize parameters for parallel execution */ @@ -637,16 +637,19 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG /* Partition (a step of) n into nthreads regions */ range_N[0] = js; - num_cpu_n = 0; + num_parts = 0; while (n > 0){ - width = blas_quickdivide(n + nthreads - num_cpu_n - 1, nthreads - num_cpu_n); + width = blas_quickdivide(n + nthreads - num_parts - 1, nthreads - num_parts); + if (width < SWITCH_RATIO) { + width = SWITCH_RATIO; + } n -= width; if (n < 0) width = width + n; - range_N[num_cpu_n + 1] = range_N[num_cpu_n] + width; - num_cpu_n ++; + range_N[num_parts + 1] = range_N[num_parts] + width; + num_parts ++; } - for (j = num_cpu_n; j < MAX_CPU_NUMBER; j++) { - range_N[j + 1] = range_N[num_cpu_n]; + for (j = num_parts; j < MAX_CPU_NUMBER; j++) { + range_N[j + 1] = range_N[num_parts]; } /* Clear synchronization flags */ @@ -683,7 +686,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO n = range_n[1] - range_n[0]; } - /* CPU partitions in m should have at least SWITCH_RATIO rows */ + /* Partitions in m should have at least SWITCH_RATIO rows */ if (m < 2 * SWITCH_RATIO) { nthreads_m = 1; } else { @@ -693,11 +696,11 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO } } - /* At most one CPU partition in n should have less than nthreads_m columns */ - if (n < nthreads_m) { + /* Partitions in n should have at most SWITCH_RATIO * nthreads_m columns */ + if (n < SWITCH_RATIO * nthreads_m) { nthreads_n = 1; } else { - nthreads_n = blas_quickdivide(n + nthreads_m - 1, nthreads_m); + nthreads_n = (n + SWITCH_RATIO * nthreads_m - 1) / (SWITCH_RATIO * nthreads_m); if (nthreads_m * nthreads_n > args -> nthreads) { nthreads_n = blas_quickdivide(args -> nthreads, nthreads_m); } From 9d92f526dd0995d06933a47111c9b7c6cad3ed26 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 6 Oct 2017 23:51:32 +0200 Subject: [PATCH 027/122] Comment out a code block that performs out-of-bounds memory accesses ...and does not appear to be needed even when it stays within the bounds of the array --- kernel/generic/ztrmm_utcopy_8.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/kernel/generic/ztrmm_utcopy_8.c b/kernel/generic/ztrmm_utcopy_8.c index 24043d8e8..fb286d0e6 100644 --- a/kernel/generic/ztrmm_utcopy_8.c +++ b/kernel/generic/ztrmm_utcopy_8.c @@ -823,24 +823,22 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 3] = *(a01 + 3); b += 4; } - } else { +#if 1 + } +#else + } else { #ifdef UNIT b[ 0] = ONE; b[ 1] = ZERO; #else -// out-of-bounds memory accesses, see issue 601 -// b[ 0] = *(a01 + 0); -// b[ 1] = *(a01 + 1); - b[0]=ZERO; - b[1]=ZERO; + b[ 0] = *(a01 + 0); + b[ 1] = *(a01 + 1); #endif -// out-of-bounds memory accesses, see issue 601 -// b[ 2] = *(a02 + 0); -// b[ 3] = *(a02 + 1); - b[2]=ZERO; - b[3]=ZERO; + b[ 2] = *(a02 + 0); + b[ 3] = *(a02 + 1); b += 4; } +#endif posY += 2; } From c7a8512d121c15a2f4fde2f156853b1335971f27 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 9 Oct 2017 23:34:18 +0200 Subject: [PATCH 028/122] Cmake fixes for DYNAMIC_ARCH builds and whitespace in path names (#1323) * prebuild.cmake: Put quotes around path names that may contain whitespace (Copied from alexkaratakis' PR #1295) * kernel/CMakeLists.txt: Fix common_lapack header inclusion and DYNAMIC_ARCH generation of ?neg_tcopy and ?laswp_ncopy files * lapack/CMakeLists.txt: Use correct template for ?laswp_(plus,minus) functions --- cmake/prebuild.cmake | 12 ++++++------ kernel/CMakeLists.txt | 13 +++++++++++-- lapack/CMakeLists.txt | 13 ++++++++++--- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index cc5475630..b783ef90d 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -192,7 +192,7 @@ else(NOT CMAKE_CROSSCOMPILING) if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") try_compile(GETARCH_RESULT ${GETARCH_DIR} SOURCES ${GETARCH_SRC} - COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${GETARCH_DIR} -I${PROJECT_SOURCE_DIR} -I${PROJECT_BINARY_DIR} + COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${GETARCH_DIR} -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}" OUTPUT_VARIABLE GETARCH_LOG COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN} ) @@ -204,8 +204,8 @@ else(NOT CMAKE_CROSSCOMPILING) message(STATUS "Running getarch") # use the cmake binary w/ the -E param to run a shell command in a cross-platform way - execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT) - execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT) +execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT) +execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 1 OUTPUT_VARIABLE GETARCH_CONF_OUT) message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}") @@ -220,7 +220,7 @@ else(NOT CMAKE_CROSSCOMPILING) if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") try_compile(GETARCH2_RESULT ${GETARCH2_DIR} SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c - COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${GETARCH2_DIR} -I${PROJECT_SOURCE_DIR} -I${PROJECT_BINARY_DIR} + COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${GETARCH2_DIR} -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}" OUTPUT_VARIABLE GETARCH2_LOG COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} ) @@ -231,8 +231,8 @@ else(NOT CMAKE_CROSSCOMPILING) endif () # use the cmake binary w/ the -E param to run a shell command in a cross-platform way - execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT) - execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT) +execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT) +execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT) # append config data from getarch_2nd to the TARGET file and read in CMake vars file(APPEND ${TARGET_CONF_TEMP} ${GETARCH2_CONF_OUT}) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 09e513ca5..a720f6249 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -16,7 +16,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") set(KERNEL_INTERFACE common_level1.h common_level2.h common_level3.h) - if(NOT ${NO_LAPACK}) + if(NOT NO_LAPACK) set(KERNEL_INTERFACE ${KERNEL_INTERFACE} common_lapack.h) endif () @@ -500,12 +500,21 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) set(KERNEL_TSUFFIX_CONTENTS "") foreach (KERNEL_INTERFACE_H ${KERNEL_INTERFACE}) file(READ ${PROJECT_SOURCE_DIR}/${KERNEL_INTERFACE_H} KERNEL_INTERFACE_H_CONTENTS) - string(REGEX REPLACE "[ ]*\\(" "${TSUFFIX}(" KERNEL_INTERFACE_H_CONTENTS_NEW "${KERNEL_INTERFACE_H_CONTENTS}") + string(REGEX REPLACE " *\\(" "${TSUFFIX}(" KERNEL_INTERFACE_H_CONTENTS_NEW "${KERNEL_INTERFACE_H_CONTENTS}") set(KERNEL_TSUFFIX_CONTENTS "${KERNEL_TSUFFIX_CONTENTS}\n${KERNEL_INTERFACE_H_CONTENTS_NEW}") endforeach() file(WRITE ${SETPARAM_TARGET_DIR}/kernel${TSUFFIX}.tmp "${KERNEL_TSUFFIX_CONTENTS}") configure_file(${SETPARAM_TARGET_DIR}/kernel${TSUFFIX}.tmp ${SETPARAM_TARGET_DIR}/kernel${TSUFFIX}.h COPYONLY) file(REMOVE ${SETPARAM_TARGET_DIR}/kernel${TSUFFIX}.tmp) + + foreach (float_type ${FLOAT_TYPES}) + # a bit of metaprogramming here to pull out the appropriate KERNEL var + string(SUBSTRING ${float_type} 0 1 float_char) + GenerateNamedObjects("generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false ${float_type}) + GenerateNamedObjects("generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false ${float_type}) + endforeach () + + endif () add_library(kernel${TSUFFIX} OBJECT ${OPENBLAS_SRC}) diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index 9fb000651..c0a7543ca 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -42,9 +42,16 @@ set(UNIT_SOURCES2 GenerateNamedObjects("${LAPACK_SOURCES}") GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" false "" "" false 3) -# TODO: laswp needs arch specific code -GenerateNamedObjects("laswp/generic/laswp_k.c" "" "laswp_plus" false "" "" false 3) -GenerateNamedObjects("laswp/generic/laswp_k.c" "MINUS" "laswp_minus" false "" "" false 3) +GenerateNamedObjects("laswp/generic/laswp_k_4.c" "" "laswp_plus" false "" "" false 3) +GenerateNamedObjects("laswp/generic/laswp_k_4.c" "MINUS" "laswp_minus" false "" "" false 3) + +# dynamic_arch laswp needs arch specific code ? +#foreach(TARGET_CORE ${DYNAMIC_CORE}) +# set(TSUFFIX "_${TARGET_CORE}") +# +#GenerateNamedObjects("laswp/generic/laswp_k_4.c" "" "laswp_plus" false "" ${TSUFFIX} false 3) +#GenerateNamedObjects("laswp/generic/laswp_k_4.c" "MINUS" "laswp_minus" false "" ${TSUFFIX} false 3) +#endforeach () if (USE_THREAD) From bfa9b9f6b22a48806d07b9e96829d901e2bf32ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20K=C3=B6hler?= Date: Tue, 10 Oct 2017 10:12:04 +0200 Subject: [PATCH 029/122] Update README.md Add POWER 8 to the list of additional architectures. --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 52d3b1ef3..562f6d17f 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,9 @@ Please read GotoBLAS_01Readme.txt - **ARMV8**: Experimental - **ARM Cortex-A57**: Experimental +#### PPC/PPC64 +- **POWER8**: Optmized Level-3 BLAS and some Level-1, only with USE_OPENMP=1 + #### IBM zEnterprise System: - **Z13**: Optimized Level-3 BLAS and Level-1,2 (double precision) From 78cec6209c513809eb2f9a2db16d2061e4e1a3f8 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 12 Oct 2017 16:58:37 +0200 Subject: [PATCH 030/122] Add ReLAPACK option --- CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 771764e2e..4fdfb3ffa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON endif() option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF) option(DYNAMIC_ARCH "Build with DYNAMIC_ARCH" OFF) +option(BUILD_RELAPACK "Build with ReLAPACK (recursive LAPACK" OFF) ####### if(BUILD_WITHOUT_LAPACK) set(NO_LAPACK 1) @@ -55,6 +56,9 @@ endif () set(SUBDIRS ${BLASDIRS}) if (NOT NO_LAPACK) list(APPEND SUBDIRS lapack) + if(BUILD_RELAPACK) + list(APPEND SUBDIRS relapack/src) + endif() endif () # set which float types we want to build for @@ -141,7 +145,7 @@ endif() # add objects to the openblas lib -add_library(${OpenBLAS_LIBNAME} ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) +add_library(${OpenBLAS_LIBNAME} ${LA_SOURCES} ${LAPACKE_SOURCES} ${RELA_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) # Handle MSVC exports if(MSVC AND BUILD_SHARED_LIBS) From fbf83f4833cf85c1c21404a3442f62ba5db7f1fe Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 12 Oct 2017 17:00:00 +0200 Subject: [PATCH 031/122] Add cmake build list file for ReLAPACK --- relapack/src/CMakeLists.txt | 85 +++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 relapack/src/CMakeLists.txt diff --git a/relapack/src/CMakeLists.txt b/relapack/src/CMakeLists.txt new file mode 100644 index 000000000..2d861f54b --- /dev/null +++ b/relapack/src/CMakeLists.txt @@ -0,0 +1,85 @@ +include_directories(${PROJECT_SOURCE_DIR}) +include_directories(${PROJECT_BINARY_DIR}) + +set(RELAFILES +clauum.c +ctrsyl_rec2.c +dsytrf.c +spbtrf.c +strsyl_rec2.c +zhetrf_rook_rec2.c +ztrsyl.c +cgbtrf.c +cpbtrf.c +ctrtri.c +dsytrf_rec2.c +spotrf.c +strtri.c +zlauum.c +ztrsyl_rec2.c +cgemmt.c +cpotrf.c +dgbtrf.c +dsytrf_rook.c +lapack_wrappers.c +ssygst.c +zgbtrf.c +zpbtrf.c +ztrtri.c +cgetrf.c +csytrf.c +dgemmt.c +dsytrf_rook_rec2.c +ssytrf.c +zgemmt.c +zpotrf.c +chegst.c +csytrf_rec2.c +dgetrf.c +dtgsyl.c +ssytrf_rec2.c +zgetrf.c +zsytrf.c +chetrf.c +csytrf_rook.c +dlauum.c +dtrsyl.c +sgbtrf.c +ssytrf_rook.c +zhegst.c +zsytrf_rec2.c +chetrf_rec2.c +csytrf_rook_rec2.c +dpbtrf.c +dtrsyl_rec2.c +sgemmt.c +ssytrf_rook_rec2.c +zhetrf.c +zsytrf_rook.c +chetrf_rook.c +ctgsyl.c +dpotrf.c +dtrtri.c +sgetrf.c +stgsyl.c +zhetrf_rec2.c +zsytrf_rook_rec2.c +chetrf_rook_rec2.c +ctrsyl.c +dsygst.c +f2c.c +slauum.c +strsyl.c +zhetrf_rook.c +ztgsyl.c +) + + + +# add relapack folder to the sources +set(RELA_SOURCES "") +foreach (RELA_FILE ${RELAFILES}) + list(APPEND RELA_SOURCES "${PROJECT_SOURCE_DIR}/relapack/src/${RELA_FILE}") +endforeach () +add_library(relapack_src OBJECT ${RELA_SOURCES}) +set_source_files_properties(${RELA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}") From 962b20a9bb9ee12cc6ad8381e04bd00bcf1e8e19 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 12 Oct 2017 17:02:01 +0200 Subject: [PATCH 032/122] Optionally add ReLAPACK to LIB_COMPONENTS --- cmake/system.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/system.cmake b/cmake/system.cmake index daa2683d2..236a7e888 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -387,6 +387,9 @@ if (NOT NO_LAPACK) if (NOT NO_LAPACKE) set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACKE") endif () + if (BUILD_RELAPACK) + set(LIB_COMPONENTS "${LIB_COMPONENTS} ReLAPACK") + endif () endif () if (ONLY_CBLAS) From 8ac87c1cb63fd8518c7a99d6b06fb47524f2153b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 16 Oct 2017 23:27:51 +0200 Subject: [PATCH 033/122] Implement DSDOT with unchanged sdot microkernels --- kernel/x86_64/sdot.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/kernel/x86_64/sdot.c b/kernel/x86_64/sdot.c index 389252f8b..f786d1895 100644 --- a/kernel/x86_64/sdot.c +++ b/kernel/x86_64/sdot.c @@ -68,7 +68,11 @@ static void sdot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d) #endif +#if defined (DSDOT) +double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#else FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#endif { BLASLONG i=0; BLASLONG ix=0,iy=0; @@ -91,12 +95,19 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) i = n1; while(i < n) { - +#if defined(DSDOT) + dot += (double)y[i] * (double)x[i] ; +#else dot += y[i] * x[i] ; +#endif i++ ; } +#if defined(DSDOT) + dot+=(double)mydot; +#else dot+=mydot; +#endif return(dot); @@ -106,8 +117,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) while(i < n1) { - +#if defined (DSDOT) + dot += (double)y[iy] * (double)x[ix] + (double)y[iy+inc_y] * (double)x[ix+inc_x]; +#else dot += y[iy] * x[ix] + y[iy+inc_y] * x[ix+inc_x]; +#endif ix += inc_x*2 ; iy += inc_y*2 ; i+=2 ; @@ -116,8 +130,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) while(i < n) { - +#if defined (DSDOT) + dot += (double)y[iy] * (double)x[ix] ; +#else dot += y[iy] * x[ix] ; +#endif ix += inc_x ; iy += inc_y ; i++ ; From 28c3fa8950045d658e2c9b604a061927ffb9dc61 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 16 Oct 2017 23:29:03 +0200 Subject: [PATCH 034/122] Add dsdot --- kernel/x86_64/KERNEL.HASWELL | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/x86_64/KERNEL.HASWELL b/kernel/x86_64/KERNEL.HASWELL index f2e1374d3..848de38df 100644 --- a/kernel/x86_64/KERNEL.HASWELL +++ b/kernel/x86_64/KERNEL.HASWELL @@ -24,6 +24,8 @@ DDOTKERNEL = ddot.c CDOTKERNEL = cdot.c ZDOTKERNEL = zdot.c +DSDOTKERNEL = sdot.c + SAXPYKERNEL = saxpy.c DAXPYKERNEL = daxpy.c CAXPYKERNEL = caxpy.c From 5e3e91d0fc5562782ddac9c01d6765cb24f171a6 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 22 Oct 2017 18:18:51 +0200 Subject: [PATCH 035/122] Split the microkernel workload into chunks of 32 floats for dsdot mode to limit loss of precision --- kernel/x86_64/sdot.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/kernel/x86_64/sdot.c b/kernel/x86_64/sdot.c index f786d1895..b6f3c21af 100644 --- a/kernel/x86_64/sdot.c +++ b/kernel/x86_64/sdot.c @@ -78,7 +78,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) BLASLONG ix=0,iy=0; double dot = 0.0 ; +#if defined (DSDOT) + double mydot = 0.0; + FLOAT asmdot = 0.0; +#else FLOAT mydot=0.0; +#endif BLASLONG n1; if ( n <= 0 ) return(dot); @@ -89,9 +94,23 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) n1 = n & (BLASLONG)(-32); if ( n1 ) +#if defined(DSDOT) + { + FLOAT *x1=x; + FLOAT *y1=y; + BLASLONG n2 = 32; + while (i Date: Tue, 24 Oct 2017 10:07:44 +0200 Subject: [PATCH 036/122] Fix 32bit HASWELL --- kernel/Makefile.L3 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 066426396..4284fbfa0 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -29,8 +29,10 @@ USE_TRMM = 1 endif ifeq ($(CORE), HASWELL) +ifeq ($(ARCH), x86_64) USE_TRMM = 1 endif +endif ifeq ($(CORE), ZEN) USE_TRMM = 1 From a0128aa489720ac2fd883dbeebfecffd4812ff99 Mon Sep 17 00:00:00 2001 From: Ashwin Sekhar T K Date: Tue, 24 Oct 2017 10:47:11 +0000 Subject: [PATCH 037/122] ARM64: Convert all labels to local labels While debugging/profiling applications using perf or other tools, the kernels appear scattered in the profile reports. This is because the labels within the kernels are not local and each label is shown as a separate function. To avoid this, all the labels within the kernels are changed to local labels. --- kernel/arm64/amax.S | 50 +- kernel/arm64/asum.S | 40 +- kernel/arm64/axpy.S | 42 +- kernel/arm64/casum.S | 40 +- kernel/arm64/cgemm_kernel_4x4.S | 284 +++++------ kernel/arm64/cgemm_kernel_8x4.S | 350 ++++++------- kernel/arm64/cgemm_kernel_8x4_thunderx2t99.S | 350 ++++++------- kernel/arm64/copy.S | 40 +- kernel/arm64/ctrmm_kernel_4x4.S | 258 +++++----- kernel/arm64/ctrmm_kernel_8x4.S | 350 ++++++------- kernel/arm64/daxpy_thunderx2t99.S | 44 +- kernel/arm64/dgemm_kernel_4x4.S | 286 +++++------ kernel/arm64/dgemm_kernel_4x8.S | 352 ++++++------- kernel/arm64/dgemm_kernel_8x4.S | 338 ++++++------ kernel/arm64/dgemm_kernel_8x4_thunderx2t99.S | 338 ++++++------ kernel/arm64/dgemm_ncopy_4.S | 72 +-- kernel/arm64/dgemm_ncopy_8.S | 96 ++-- kernel/arm64/dgemm_tcopy_4.S | 72 +-- kernel/arm64/dgemm_tcopy_8.S | 112 ++-- kernel/arm64/dot.S | 40 +- kernel/arm64/dtrmm_kernel_4x4.S | 258 +++++----- kernel/arm64/dtrmm_kernel_4x8.S | 352 ++++++------- kernel/arm64/dtrmm_kernel_8x4.S | 338 ++++++------ kernel/arm64/gemv_n.S | 62 +-- kernel/arm64/gemv_t.S | 62 +-- kernel/arm64/iamax.S | 48 +- kernel/arm64/izamax.S | 48 +- kernel/arm64/nrm2.S | 32 +- kernel/arm64/rot.S | 40 +- kernel/arm64/scal.S | 46 +- kernel/arm64/sgemm_kernel_16x4.S | 442 ++++++++-------- kernel/arm64/sgemm_kernel_16x4_thunderx2t99.S | 442 ++++++++-------- kernel/arm64/sgemm_kernel_4x4.S | 310 +++++------ kernel/arm64/sgemm_kernel_8x8.S | 482 +++++++++--------- kernel/arm64/strmm_kernel_16x4.S | 442 ++++++++-------- kernel/arm64/strmm_kernel_4x4.S | 260 +++++----- kernel/arm64/strmm_kernel_8x8.S | 482 +++++++++--------- kernel/arm64/swap.S | 42 +- kernel/arm64/zamax.S | 50 +- kernel/arm64/zasum.S | 40 +- kernel/arm64/zaxpy.S | 42 +- kernel/arm64/zdot.S | 40 +- kernel/arm64/zgemm_kernel_4x4.S | 260 +++++----- kernel/arm64/zgemm_kernel_4x4_thunderx2t99.S | 260 +++++----- kernel/arm64/zgemv_n.S | 52 +- kernel/arm64/zgemv_t.S | 52 +- kernel/arm64/znrm2.S | 32 +- kernel/arm64/zrot.S | 40 +- kernel/arm64/zscal.S | 68 +-- kernel/arm64/ztrmm_kernel_4x4.S | 260 +++++----- 50 files changed, 4469 insertions(+), 4469 deletions(-) diff --git a/kernel/arm64/amax.S b/kernel/arm64/amax.S index c02321ae0..f535ddf27 100644 --- a/kernel/arm64/amax.S +++ b/kernel/arm64/amax.S @@ -160,62 +160,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE cmp N, xzr - ble amax_kernel_zero + ble .Lamax_kernel_zero cmp INC_X, xzr - ble amax_kernel_zero + ble .Lamax_kernel_zero cmp INC_X, #1 - bne amax_kernel_S_BEGIN + bne .Lamax_kernel_S_BEGIN -amax_kernel_F_BEGIN: +.Lamax_kernel_F_BEGIN: asr I, N, #2 cmp I, xzr - beq amax_kernel_F1_INIT + beq .Lamax_kernel_F1_INIT INIT_F4 subs I, I, #1 - beq amax_kernel_F1 + beq .Lamax_kernel_F1 -amax_kernel_F4: +.Lamax_kernel_F4: KERNEL_F4 subs I, I, #1 - bne amax_kernel_F4 + bne .Lamax_kernel_F4 -amax_kernel_F1: +.Lamax_kernel_F1: ands I, N, #3 - ble amax_kernel_L999 + ble .Lamax_kernel_L999 -amax_kernel_F10: +.Lamax_kernel_F10: KERNEL_F1 subs I, I, #1 - bne amax_kernel_F10 + bne .Lamax_kernel_F10 ret -amax_kernel_F1_INIT: +.Lamax_kernel_F1_INIT: INIT_F1 subs N, N, #1 - b amax_kernel_F1 + b .Lamax_kernel_F1 -amax_kernel_S_BEGIN: +.Lamax_kernel_S_BEGIN: INIT_S subs N, N, #1 - ble amax_kernel_L999 + ble .Lamax_kernel_L999 asr I, N, #2 cmp I, xzr - ble amax_kernel_S1 + ble .Lamax_kernel_S1 -amax_kernel_S4: +.Lamax_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -223,25 +223,25 @@ amax_kernel_S4: KERNEL_S1 subs I, I, #1 - bne amax_kernel_S4 + bne .Lamax_kernel_S4 -amax_kernel_S1: +.Lamax_kernel_S1: ands I, N, #3 - ble amax_kernel_L999 + ble .Lamax_kernel_L999 -amax_kernel_S10: +.Lamax_kernel_S10: KERNEL_S1 subs I, I, #1 - bne amax_kernel_S10 + bne .Lamax_kernel_S10 -amax_kernel_L999: +.Lamax_kernel_L999: ret -amax_kernel_zero: +.Lamax_kernel_zero: fmov MAXF, REG0 ret diff --git a/kernel/arm64/asum.S b/kernel/arm64/asum.S index bee8927b1..e88eb07c2 100644 --- a/kernel/arm64/asum.S +++ b/kernel/arm64/asum.S @@ -122,52 +122,52 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif cmp N, xzr - ble asum_kernel_L999 + ble .Lasum_kernel_L999 cmp INC_X, xzr - ble asum_kernel_L999 + ble .Lasum_kernel_L999 cmp INC_X, #1 - bne asum_kernel_S_BEGIN + bne .Lasum_kernel_S_BEGIN -asum_kernel_F_BEGIN: +.Lasum_kernel_F_BEGIN: asr I, N, #3 cmp I, xzr - beq asum_kernel_F1 + beq .Lasum_kernel_F1 -asum_kernel_F8: +.Lasum_kernel_F8: KERNEL_F8 subs I, I, #1 - bne asum_kernel_F8 + bne .Lasum_kernel_F8 KERNEL_F8_FINALIZE -asum_kernel_F1: +.Lasum_kernel_F1: ands I, N, #7 - ble asum_kernel_L999 + ble .Lasum_kernel_L999 -asum_kernel_F10: +.Lasum_kernel_F10: KERNEL_F1 subs I, I, #1 - bne asum_kernel_F10 + bne .Lasum_kernel_F10 -asum_kernel_L999: +.Lasum_kernel_L999: ret -asum_kernel_S_BEGIN: +.Lasum_kernel_S_BEGIN: INIT_S asr I, N, #2 cmp I, xzr - ble asum_kernel_S1 + ble .Lasum_kernel_S1 -asum_kernel_S4: +.Lasum_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -175,19 +175,19 @@ asum_kernel_S4: KERNEL_S1 subs I, I, #1 - bne asum_kernel_S4 + bne .Lasum_kernel_S4 -asum_kernel_S1: +.Lasum_kernel_S1: ands I, N, #3 - ble asum_kernel_L999 + ble .Lasum_kernel_L999 -asum_kernel_S10: +.Lasum_kernel_S10: KERNEL_S1 subs I, I, #1 - bne asum_kernel_S10 + bne .Lasum_kernel_S10 ret diff --git a/kernel/arm64/axpy.S b/kernel/arm64/axpy.S index 554902c09..809435110 100644 --- a/kernel/arm64/axpy.S +++ b/kernel/arm64/axpy.S @@ -135,53 +135,53 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE cmp N, xzr - ble axpy_kernel_L999 + ble .Laxpy_kernel_L999 fcmp DA, #0.0 - beq axpy_kernel_L999 + beq .Laxpy_kernel_L999 cmp INC_X, #1 - bne axpy_kernel_S_BEGIN + bne .Laxpy_kernel_S_BEGIN cmp INC_Y, #1 - bne axpy_kernel_S_BEGIN + bne .Laxpy_kernel_S_BEGIN -axpy_kernel_F_BEGIN: +.Laxpy_kernel_F_BEGIN: asr I, N, #3 cmp I, xzr - beq axpy_kernel_F1 + beq .Laxpy_kernel_F1 -axpy_kernel_F8: +.Laxpy_kernel_F8: KERNEL_F8 subs I, I, #1 - bne axpy_kernel_F8 + bne .Laxpy_kernel_F8 -axpy_kernel_F1: +.Laxpy_kernel_F1: ands I, N, #7 - ble axpy_kernel_L999 + ble .Laxpy_kernel_L999 -axpy_kernel_F10: +.Laxpy_kernel_F10: KERNEL_F1 subs I, I, #1 - bne axpy_kernel_F10 + bne .Laxpy_kernel_F10 mov w0, wzr ret -axpy_kernel_S_BEGIN: +.Laxpy_kernel_S_BEGIN: INIT_S asr I, N, #2 cmp I, xzr - ble axpy_kernel_S1 + ble .Laxpy_kernel_S1 -axpy_kernel_S4: +.Laxpy_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -189,21 +189,21 @@ axpy_kernel_S4: KERNEL_S1 subs I, I, #1 - bne axpy_kernel_S4 + bne .Laxpy_kernel_S4 -axpy_kernel_S1: +.Laxpy_kernel_S1: ands I, N, #3 - ble axpy_kernel_L999 + ble .Laxpy_kernel_L999 -axpy_kernel_S10: +.Laxpy_kernel_S10: KERNEL_S1 subs I, I, #1 - bne axpy_kernel_S10 + bne .Laxpy_kernel_S10 -axpy_kernel_L999: +.Laxpy_kernel_L999: mov w0, wzr ret diff --git a/kernel/arm64/casum.S b/kernel/arm64/casum.S index 8f09eecfa..7c82827a5 100644 --- a/kernel/arm64/casum.S +++ b/kernel/arm64/casum.S @@ -98,52 +98,52 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fmov s1, SUMF cmp N, xzr - ble asum_kernel_L999 + ble .Lcasum_kernel_L999 cmp INC_X, xzr - ble asum_kernel_L999 + ble .Lcasum_kernel_L999 cmp INC_X, #1 - bne asum_kernel_S_BEGIN + bne .Lcasum_kernel_S_BEGIN -asum_kernel_F_BEGIN: +.Lcasum_kernel_F_BEGIN: asr I, N, #3 cmp I, xzr - beq asum_kernel_F1 + beq .Lcasum_kernel_F1 -asum_kernel_F8: +.Lcasum_kernel_F8: KERNEL_F8 subs I, I, #1 - bne asum_kernel_F8 + bne .Lcasum_kernel_F8 KERNEL_F8_FINALIZE -asum_kernel_F1: +.Lcasum_kernel_F1: ands I, N, #7 - ble asum_kernel_L999 + ble .Lcasum_kernel_L999 -asum_kernel_F10: +.Lcasum_kernel_F10: KERNEL_F1 subs I, I, #1 - bne asum_kernel_F10 + bne .Lcasum_kernel_F10 -asum_kernel_L999: +.Lcasum_kernel_L999: ret -asum_kernel_S_BEGIN: +.Lcasum_kernel_S_BEGIN: INIT_S asr I, N, #2 cmp I, xzr - ble asum_kernel_S1 + ble .Lcasum_kernel_S1 -asum_kernel_S4: +.Lcasum_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -151,19 +151,19 @@ asum_kernel_S4: KERNEL_S1 subs I, I, #1 - bne asum_kernel_S4 + bne .Lcasum_kernel_S4 -asum_kernel_S1: +.Lcasum_kernel_S1: ands I, N, #3 - ble asum_kernel_L999 + ble .Lcasum_kernel_L999 -asum_kernel_S10: +.Lcasum_kernel_S10: KERNEL_S1 subs I, I, #1 - bne asum_kernel_S10 + bne .Lcasum_kernel_S10 ret diff --git a/kernel/arm64/cgemm_kernel_4x4.S b/kernel/arm64/cgemm_kernel_4x4.S index 7f2ddea07..bbf0c7537 100644 --- a/kernel/arm64/cgemm_kernel_4x4.S +++ b/kernel/arm64/cgemm_kernel_4x4.S @@ -1072,11 +1072,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble cgemm_kernel_L2_BEGIN + ble .Lcgemm_kernel_L2_BEGIN /******************************************************************************/ -cgemm_kernel_L4_BEGIN: +.Lcgemm_kernel_L4_BEGIN: mov pCRow0, pC // pCRow0 = C add pC, pC, LDC, lsl #2 @@ -1084,96 +1084,96 @@ cgemm_kernel_L4_BEGIN: mov pA, origPA // pA = start of A array add ppA, temp, pA -cgemm_kernel_L4_M8_BEGIN: +.Lcgemm_kernel_L4_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble cgemm_kernel_L4_M4_BEGIN + ble .Lcgemm_kernel_L4_M4_BEGIN -cgemm_kernel_L4_M8_20: +.Lcgemm_kernel_L4_M8_20: mov pB, origPB asr counterL , origK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt cgemm_kernel_L4_M8_32 + blt .Lcgemm_kernel_L4_M8_32 KERNEL8x4_I // do one in the K KERNEL8x4_M2 // do another in the K subs counterL, counterL, #2 // subtract 2 - ble cgemm_kernel_L4_M8_22a + ble .Lcgemm_kernel_L4_M8_22a .align 5 -cgemm_kernel_L4_M8_22: +.Lcgemm_kernel_L4_M8_22: KERNEL8x4_M1 KERNEL8x4_M2 subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M8_22 + bgt .Lcgemm_kernel_L4_M8_22 -cgemm_kernel_L4_M8_22a: +.Lcgemm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_E - b cgemm_kernel_L4_M8_44 + b .Lcgemm_kernel_L4_M8_44 -cgemm_kernel_L4_M8_32: +.Lcgemm_kernel_L4_M8_32: tst counterL, #1 - ble cgemm_kernel_L4_M8_40 + ble .Lcgemm_kernel_L4_M8_40 KERNEL8x4_I KERNEL8x4_E - b cgemm_kernel_L4_M8_44 + b .Lcgemm_kernel_L4_M8_44 -cgemm_kernel_L4_M8_40: +.Lcgemm_kernel_L4_M8_40: INIT8x4 -cgemm_kernel_L4_M8_44: +.Lcgemm_kernel_L4_M8_44: ands counterL , origK, #1 - ble cgemm_kernel_L4_M8_100 + ble .Lcgemm_kernel_L4_M8_100 -cgemm_kernel_L4_M8_46: +.Lcgemm_kernel_L4_M8_46: KERNEL8x4_SUB -cgemm_kernel_L4_M8_100: +.Lcgemm_kernel_L4_M8_100: SAVE8x4 -cgemm_kernel_L4_M8_END: +.Lcgemm_kernel_L4_M8_END: lsl temp, origK, #5 // k * 4 * 8 add pA, pA, temp add ppA, ppA, temp subs counterI, counterI, #1 - bne cgemm_kernel_L4_M8_20 + bne .Lcgemm_kernel_L4_M8_20 -cgemm_kernel_L4_M4_BEGIN: +.Lcgemm_kernel_L4_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble cgemm_kernel_L4_END + ble .Lcgemm_kernel_L4_END tst counterI, #4 - ble cgemm_kernel_L4_M2_BEGIN + ble .Lcgemm_kernel_L4_M2_BEGIN -cgemm_kernel_L4_M4_20: +.Lcgemm_kernel_L4_M4_20: INIT4x4 mov pB, origPB asr counterL, origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble cgemm_kernel_L4_M4_40 + ble .Lcgemm_kernel_L4_M4_40 -cgemm_kernel_L4_M4_22: +.Lcgemm_kernel_L4_M4_22: KERNEL4x4_SUB KERNEL4x4_SUB @@ -1186,47 +1186,47 @@ cgemm_kernel_L4_M4_22: KERNEL4x4_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M4_22 + bgt .Lcgemm_kernel_L4_M4_22 -cgemm_kernel_L4_M4_40: +.Lcgemm_kernel_L4_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L4_M4_100 + ble .Lcgemm_kernel_L4_M4_100 -cgemm_kernel_L4_M4_42: +.Lcgemm_kernel_L4_M4_42: KERNEL4x4_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M4_42 + bgt .Lcgemm_kernel_L4_M4_42 -cgemm_kernel_L4_M4_100: +.Lcgemm_kernel_L4_M4_100: SAVE4x4 -cgemm_kernel_L4_M4_END: +.Lcgemm_kernel_L4_M4_END: -cgemm_kernel_L4_M2_BEGIN: +.Lcgemm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble cgemm_kernel_L4_END + ble .Lcgemm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble cgemm_kernel_L4_M1_BEGIN + ble .Lcgemm_kernel_L4_M1_BEGIN -cgemm_kernel_L4_M2_20: +.Lcgemm_kernel_L4_M2_20: INIT2x4 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L4_M2_40 + ble .Lcgemm_kernel_L4_M2_40 -cgemm_kernel_L4_M2_22: +.Lcgemm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -1239,43 +1239,43 @@ cgemm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M2_22 + bgt .Lcgemm_kernel_L4_M2_22 -cgemm_kernel_L4_M2_40: +.Lcgemm_kernel_L4_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L4_M2_100 + ble .Lcgemm_kernel_L4_M2_100 -cgemm_kernel_L4_M2_42: +.Lcgemm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M2_42 + bgt .Lcgemm_kernel_L4_M2_42 -cgemm_kernel_L4_M2_100: +.Lcgemm_kernel_L4_M2_100: SAVE2x4 -cgemm_kernel_L4_M2_END: +.Lcgemm_kernel_L4_M2_END: -cgemm_kernel_L4_M1_BEGIN: +.Lcgemm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble cgemm_kernel_L4_END + ble .Lcgemm_kernel_L4_END -cgemm_kernel_L4_M1_20: +.Lcgemm_kernel_L4_M1_20: INIT1x4 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L4_M1_40 + ble .Lcgemm_kernel_L4_M1_40 -cgemm_kernel_L4_M1_22: +.Lcgemm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1287,45 +1287,45 @@ cgemm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M1_22 + bgt .Lcgemm_kernel_L4_M1_22 -cgemm_kernel_L4_M1_40: +.Lcgemm_kernel_L4_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L4_M1_100 + ble .Lcgemm_kernel_L4_M1_100 -cgemm_kernel_L4_M1_42: +.Lcgemm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M1_42 + bgt .Lcgemm_kernel_L4_M1_42 -cgemm_kernel_L4_M1_100: +.Lcgemm_kernel_L4_M1_100: SAVE1x4 -cgemm_kernel_L4_END: +.Lcgemm_kernel_L4_END: lsl temp, origK, #5 add origPB, origPB, temp // B = B + K * 4 * 8 subs counterJ, counterJ , #1 // j-- - bgt cgemm_kernel_L4_BEGIN + bgt .Lcgemm_kernel_L4_BEGIN /******************************************************************************/ -cgemm_kernel_L2_BEGIN: // less than 2 left in N direction +.Lcgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble cgemm_kernel_L999 // error, N was less than 4? + ble .Lcgemm_kernel_L999 // error, N was less than 4? tst counterJ , #2 - ble cgemm_kernel_L1_BEGIN + ble .Lcgemm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1335,24 +1335,24 @@ cgemm_kernel_L2_BEGIN: // less than 2 left in N direction -cgemm_kernel_L2_M4_BEGIN: +.Lcgemm_kernel_L2_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI,#0 - ble cgemm_kernel_L2_M2_BEGIN + ble .Lcgemm_kernel_L2_M2_BEGIN -cgemm_kernel_L2_M4_20: +.Lcgemm_kernel_L2_M4_20: INIT4x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble cgemm_kernel_L2_M4_40 + ble .Lcgemm_kernel_L2_M4_40 .align 5 -cgemm_kernel_L2_M4_22: +.Lcgemm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1364,50 +1364,50 @@ cgemm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M4_22 + bgt .Lcgemm_kernel_L2_M4_22 -cgemm_kernel_L2_M4_40: +.Lcgemm_kernel_L2_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L2_M4_100 + ble .Lcgemm_kernel_L2_M4_100 -cgemm_kernel_L2_M4_42: +.Lcgemm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M4_42 + bgt .Lcgemm_kernel_L2_M4_42 -cgemm_kernel_L2_M4_100: +.Lcgemm_kernel_L2_M4_100: SAVE4x2 -cgemm_kernel_L2_M4_END: +.Lcgemm_kernel_L2_M4_END: subs counterI, counterI, #1 - bgt cgemm_kernel_L2_M4_20 + bgt .Lcgemm_kernel_L2_M4_20 -cgemm_kernel_L2_M2_BEGIN: +.Lcgemm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble cgemm_kernel_L2_END + ble .Lcgemm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble cgemm_kernel_L2_M1_BEGIN + ble .Lcgemm_kernel_L2_M1_BEGIN -cgemm_kernel_L2_M2_20: +.Lcgemm_kernel_L2_M2_20: INIT2x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble cgemm_kernel_L2_M2_40 + ble .Lcgemm_kernel_L2_M2_40 -cgemm_kernel_L2_M2_22: +.Lcgemm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -1420,43 +1420,43 @@ cgemm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M2_22 + bgt .Lcgemm_kernel_L2_M2_22 -cgemm_kernel_L2_M2_40: +.Lcgemm_kernel_L2_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L2_M2_100 + ble .Lcgemm_kernel_L2_M2_100 -cgemm_kernel_L2_M2_42: +.Lcgemm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M2_42 + bgt .Lcgemm_kernel_L2_M2_42 -cgemm_kernel_L2_M2_100: +.Lcgemm_kernel_L2_M2_100: SAVE2x2 -cgemm_kernel_L2_M2_END: +.Lcgemm_kernel_L2_M2_END: -cgemm_kernel_L2_M1_BEGIN: +.Lcgemm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble cgemm_kernel_L2_END + ble .Lcgemm_kernel_L2_END -cgemm_kernel_L2_M1_20: +.Lcgemm_kernel_L2_M1_20: INIT1x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble cgemm_kernel_L2_M1_40 + ble .Lcgemm_kernel_L2_M1_40 -cgemm_kernel_L2_M1_22: +.Lcgemm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1468,36 +1468,36 @@ cgemm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M1_22 + bgt .Lcgemm_kernel_L2_M1_22 -cgemm_kernel_L2_M1_40: +.Lcgemm_kernel_L2_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L2_M1_100 + ble .Lcgemm_kernel_L2_M1_100 -cgemm_kernel_L2_M1_42: +.Lcgemm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M1_42 + bgt .Lcgemm_kernel_L2_M1_42 -cgemm_kernel_L2_M1_100: +.Lcgemm_kernel_L2_M1_100: SAVE1x2 -cgemm_kernel_L2_END: +.Lcgemm_kernel_L2_END: add origPB, origPB, origK, lsl #4 // B = B + K * 2 * 8 /******************************************************************************/ -cgemm_kernel_L1_BEGIN: +.Lcgemm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble cgemm_kernel_L999 // done + ble .Lcgemm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -1507,24 +1507,24 @@ cgemm_kernel_L1_BEGIN: -cgemm_kernel_L1_M4_BEGIN: +.Lcgemm_kernel_L1_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble cgemm_kernel_L1_M2_BEGIN + ble .Lcgemm_kernel_L1_M2_BEGIN -cgemm_kernel_L1_M4_20: +.Lcgemm_kernel_L1_M4_20: INIT4x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L1_M4_40 + ble .Lcgemm_kernel_L1_M4_40 .align 5 -cgemm_kernel_L1_M4_22: +.Lcgemm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -1536,50 +1536,50 @@ cgemm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M4_22 + bgt .Lcgemm_kernel_L1_M4_22 -cgemm_kernel_L1_M4_40: +.Lcgemm_kernel_L1_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L1_M4_100 + ble .Lcgemm_kernel_L1_M4_100 -cgemm_kernel_L1_M4_42: +.Lcgemm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M4_42 + bgt .Lcgemm_kernel_L1_M4_42 -cgemm_kernel_L1_M4_100: +.Lcgemm_kernel_L1_M4_100: SAVE4x1 -cgemm_kernel_L1_M4_END: +.Lcgemm_kernel_L1_M4_END: subs counterI, counterI, #1 - bgt cgemm_kernel_L1_M4_20 + bgt .Lcgemm_kernel_L1_M4_20 -cgemm_kernel_L1_M2_BEGIN: +.Lcgemm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble cgemm_kernel_L1_END + ble .Lcgemm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble cgemm_kernel_L1_M1_BEGIN + ble .Lcgemm_kernel_L1_M1_BEGIN -cgemm_kernel_L1_M2_20: +.Lcgemm_kernel_L1_M2_20: INIT2x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L1_M2_40 + ble .Lcgemm_kernel_L1_M2_40 -cgemm_kernel_L1_M2_22: +.Lcgemm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -1592,43 +1592,43 @@ cgemm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M2_22 + bgt .Lcgemm_kernel_L1_M2_22 -cgemm_kernel_L1_M2_40: +.Lcgemm_kernel_L1_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L1_M2_100 + ble .Lcgemm_kernel_L1_M2_100 -cgemm_kernel_L1_M2_42: +.Lcgemm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M2_42 + bgt .Lcgemm_kernel_L1_M2_42 -cgemm_kernel_L1_M2_100: +.Lcgemm_kernel_L1_M2_100: SAVE2x1 -cgemm_kernel_L1_M2_END: +.Lcgemm_kernel_L1_M2_END: -cgemm_kernel_L1_M1_BEGIN: +.Lcgemm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble cgemm_kernel_L1_END + ble .Lcgemm_kernel_L1_END -cgemm_kernel_L1_M1_20: +.Lcgemm_kernel_L1_M1_20: INIT1x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L1_M1_40 + ble .Lcgemm_kernel_L1_M1_40 -cgemm_kernel_L1_M1_22: +.Lcgemm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -1640,30 +1640,30 @@ cgemm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M1_22 + bgt .Lcgemm_kernel_L1_M1_22 -cgemm_kernel_L1_M1_40: +.Lcgemm_kernel_L1_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L1_M1_100 + ble .Lcgemm_kernel_L1_M1_100 -cgemm_kernel_L1_M1_42: +.Lcgemm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M1_42 + bgt .Lcgemm_kernel_L1_M1_42 -cgemm_kernel_L1_M1_100: +.Lcgemm_kernel_L1_M1_100: SAVE1x1 -cgemm_kernel_L1_END: +.Lcgemm_kernel_L1_END: -cgemm_kernel_L999: +.Lcgemm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/cgemm_kernel_8x4.S b/kernel/arm64/cgemm_kernel_8x4.S index 5d1462808..24e08a646 100644 --- a/kernel/arm64/cgemm_kernel_8x4.S +++ b/kernel/arm64/cgemm_kernel_8x4.S @@ -1407,11 +1407,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble cgemm_kernel_L2_BEGIN + ble .Lcgemm_kernel_L2_BEGIN /******************************************************************************/ -cgemm_kernel_L4_BEGIN: +.Lcgemm_kernel_L4_BEGIN: mov pCRow0, pC add pCRow1, pCRow0, LDC add pCRow2, pCRow1, LDC @@ -1421,21 +1421,21 @@ cgemm_kernel_L4_BEGIN: mov pA, origPA // pA = start of A array -cgemm_kernel_L4_M8_BEGIN: +.Lcgemm_kernel_L4_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble cgemm_kernel_L4_M4_BEGIN + ble .Lcgemm_kernel_L4_M4_BEGIN .align 5 -cgemm_kernel_L4_M8_20: +.Lcgemm_kernel_L4_M8_20: mov pB, origPB asr counterL , origK, #3 cmp counterL , #2 - blt cgemm_kernel_L4_M8_32 + blt .Lcgemm_kernel_L4_M8_32 KERNEL8x4_I KERNEL8x4_M2 @@ -1447,10 +1447,10 @@ cgemm_kernel_L4_M8_20: KERNEL8x4_M2 subs counterL, counterL, #2 // subtract 2 - ble cgemm_kernel_L4_M8_22a + ble .Lcgemm_kernel_L4_M8_22a .align 5 -cgemm_kernel_L4_M8_22: +.Lcgemm_kernel_L4_M8_22: KERNEL8x4_M1 KERNEL8x4_M2 @@ -1462,10 +1462,10 @@ cgemm_kernel_L4_M8_22: KERNEL8x4_M2 subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M8_22 + bgt .Lcgemm_kernel_L4_M8_22 .align 5 -cgemm_kernel_L4_M8_22a: +.Lcgemm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_M2 @@ -1476,13 +1476,13 @@ cgemm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_E - b cgemm_kernel_L4_M8_44 + b .Lcgemm_kernel_L4_M8_44 .align 5 -cgemm_kernel_L4_M8_32: +.Lcgemm_kernel_L4_M8_32: tst counterL, #1 - ble cgemm_kernel_L4_M8_40 + ble .Lcgemm_kernel_L4_M8_40 KERNEL8x4_I KERNEL8x4_M2 @@ -1493,116 +1493,116 @@ cgemm_kernel_L4_M8_32: KERNEL8x4_M1 KERNEL8x4_E - b cgemm_kernel_L4_M8_44 + b .Lcgemm_kernel_L4_M8_44 -cgemm_kernel_L4_M8_40: +.Lcgemm_kernel_L4_M8_40: INIT8x4 -cgemm_kernel_L4_M8_44: +.Lcgemm_kernel_L4_M8_44: ands counterL , origK, #7 - ble cgemm_kernel_L4_M8_100 + ble .Lcgemm_kernel_L4_M8_100 .align 5 -cgemm_kernel_L4_M8_46: +.Lcgemm_kernel_L4_M8_46: KERNEL8x4_SUB subs counterL, counterL, #1 - bne cgemm_kernel_L4_M8_46 + bne .Lcgemm_kernel_L4_M8_46 -cgemm_kernel_L4_M8_100: +.Lcgemm_kernel_L4_M8_100: prfm PLDL1KEEP, [pA] prfm PLDL1KEEP, [pA, #64] prfm PLDL1KEEP, [origPB] SAVE8x4 -cgemm_kernel_L4_M8_END: +.Lcgemm_kernel_L4_M8_END: subs counterI, counterI, #1 - bne cgemm_kernel_L4_M8_20 + bne .Lcgemm_kernel_L4_M8_20 -cgemm_kernel_L4_M4_BEGIN: +.Lcgemm_kernel_L4_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble cgemm_kernel_L4_END + ble .Lcgemm_kernel_L4_END tst counterI, #4 - ble cgemm_kernel_L4_M2_BEGIN + ble .Lcgemm_kernel_L4_M2_BEGIN -cgemm_kernel_L4_M4_20: +.Lcgemm_kernel_L4_M4_20: mov pB, origPB asr counterL , origK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt cgemm_kernel_L4_M4_32 + blt .Lcgemm_kernel_L4_M4_32 KERNEL4x4_I // do one in the K KERNEL4x4_M2 // do another in the K subs counterL, counterL, #2 - ble cgemm_kernel_L4_M4_22a + ble .Lcgemm_kernel_L4_M4_22a .align 5 -cgemm_kernel_L4_M4_22: +.Lcgemm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M4_22 + bgt .Lcgemm_kernel_L4_M4_22 -cgemm_kernel_L4_M4_22a: +.Lcgemm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_E - b cgemm_kernel_L4_M4_44 -cgemm_kernel_L4_M4_32: + b .Lcgemm_kernel_L4_M4_44 +.Lcgemm_kernel_L4_M4_32: tst counterL, #1 - ble cgemm_kernel_L4_M4_40 + ble .Lcgemm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_E - b cgemm_kernel_L4_M4_44 -cgemm_kernel_L4_M4_40: + b .Lcgemm_kernel_L4_M4_44 +.Lcgemm_kernel_L4_M4_40: INIT4x4 -cgemm_kernel_L4_M4_44: +.Lcgemm_kernel_L4_M4_44: ands counterL , origK, #1 - ble cgemm_kernel_L4_M4_100 + ble .Lcgemm_kernel_L4_M4_100 -cgemm_kernel_L4_M4_46: +.Lcgemm_kernel_L4_M4_46: KERNEL4x4_SUB -cgemm_kernel_L4_M4_100: +.Lcgemm_kernel_L4_M4_100: SAVE4x4 -cgemm_kernel_L4_M4_END: +.Lcgemm_kernel_L4_M4_END: -cgemm_kernel_L4_M2_BEGIN: +.Lcgemm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble cgemm_kernel_L4_END + ble .Lcgemm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble cgemm_kernel_L4_M1_BEGIN + ble .Lcgemm_kernel_L4_M1_BEGIN -cgemm_kernel_L4_M2_20: +.Lcgemm_kernel_L4_M2_20: INIT2x4 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L4_M2_40 + ble .Lcgemm_kernel_L4_M2_40 -cgemm_kernel_L4_M2_22: +.Lcgemm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -1615,43 +1615,43 @@ cgemm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M2_22 + bgt .Lcgemm_kernel_L4_M2_22 -cgemm_kernel_L4_M2_40: +.Lcgemm_kernel_L4_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L4_M2_100 + ble .Lcgemm_kernel_L4_M2_100 -cgemm_kernel_L4_M2_42: +.Lcgemm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M2_42 + bgt .Lcgemm_kernel_L4_M2_42 -cgemm_kernel_L4_M2_100: +.Lcgemm_kernel_L4_M2_100: SAVE2x4 -cgemm_kernel_L4_M2_END: +.Lcgemm_kernel_L4_M2_END: -cgemm_kernel_L4_M1_BEGIN: +.Lcgemm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble cgemm_kernel_L4_END + ble .Lcgemm_kernel_L4_END -cgemm_kernel_L4_M1_20: +.Lcgemm_kernel_L4_M1_20: INIT1x4 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L4_M1_40 + ble .Lcgemm_kernel_L4_M1_40 -cgemm_kernel_L4_M1_22: +.Lcgemm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1663,45 +1663,45 @@ cgemm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M1_22 + bgt .Lcgemm_kernel_L4_M1_22 -cgemm_kernel_L4_M1_40: +.Lcgemm_kernel_L4_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L4_M1_100 + ble .Lcgemm_kernel_L4_M1_100 -cgemm_kernel_L4_M1_42: +.Lcgemm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M1_42 + bgt .Lcgemm_kernel_L4_M1_42 -cgemm_kernel_L4_M1_100: +.Lcgemm_kernel_L4_M1_100: SAVE1x4 -cgemm_kernel_L4_END: +.Lcgemm_kernel_L4_END: lsl temp, origK, #5 add origPB, origPB, temp // B = B + K * 4 * 8 subs counterJ, counterJ , #1 // j-- - bgt cgemm_kernel_L4_BEGIN + bgt .Lcgemm_kernel_L4_BEGIN /******************************************************************************/ -cgemm_kernel_L2_BEGIN: // less than 2 left in N direction +.Lcgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble cgemm_kernel_L999 // error, N was less than 4? + ble .Lcgemm_kernel_L999 // error, N was less than 4? tst counterJ , #2 - ble cgemm_kernel_L1_BEGIN + ble .Lcgemm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1710,14 +1710,14 @@ cgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov pA, origPA // pA = A -cgemm_kernel_L2_M8_BEGIN: +.Lcgemm_kernel_L2_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble cgemm_kernel_L2_M4_BEGIN + ble .Lcgemm_kernel_L2_M4_BEGIN -cgemm_kernel_L2_M8_20: +.Lcgemm_kernel_L2_M8_20: INIT8x2 @@ -1725,10 +1725,10 @@ cgemm_kernel_L2_M8_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble cgemm_kernel_L2_M8_40 + ble .Lcgemm_kernel_L2_M8_40 .align 5 -cgemm_kernel_L2_M8_22: +.Lcgemm_kernel_L2_M8_22: KERNEL8x2_SUB KERNEL8x2_SUB KERNEL8x2_SUB @@ -1740,50 +1740,50 @@ cgemm_kernel_L2_M8_22: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M8_22 + bgt .Lcgemm_kernel_L2_M8_22 -cgemm_kernel_L2_M8_40: +.Lcgemm_kernel_L2_M8_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L2_M8_100 + ble .Lcgemm_kernel_L2_M8_100 -cgemm_kernel_L2_M8_42: +.Lcgemm_kernel_L2_M8_42: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M8_42 + bgt .Lcgemm_kernel_L2_M8_42 -cgemm_kernel_L2_M8_100: +.Lcgemm_kernel_L2_M8_100: SAVE8x2 -cgemm_kernel_L2_M8_END: +.Lcgemm_kernel_L2_M8_END: subs counterI, counterI, #1 - bgt cgemm_kernel_L2_M8_20 + bgt .Lcgemm_kernel_L2_M8_20 -cgemm_kernel_L2_M4_BEGIN: +.Lcgemm_kernel_L2_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble cgemm_kernel_L2_END + ble .Lcgemm_kernel_L2_END tst counterI, #4 // counterI = counterI / 2 - ble cgemm_kernel_L2_M2_BEGIN + ble .Lcgemm_kernel_L2_M2_BEGIN -cgemm_kernel_L2_M4_20: +.Lcgemm_kernel_L2_M4_20: INIT4x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble cgemm_kernel_L2_M4_40 + ble .Lcgemm_kernel_L2_M4_40 .align 5 -cgemm_kernel_L2_M4_22: +.Lcgemm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1795,46 +1795,46 @@ cgemm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M4_22 + bgt .Lcgemm_kernel_L2_M4_22 -cgemm_kernel_L2_M4_40: +.Lcgemm_kernel_L2_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L2_M4_100 + ble .Lcgemm_kernel_L2_M4_100 -cgemm_kernel_L2_M4_42: +.Lcgemm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M4_42 + bgt .Lcgemm_kernel_L2_M4_42 -cgemm_kernel_L2_M4_100: +.Lcgemm_kernel_L2_M4_100: SAVE4x2 -cgemm_kernel_L2_M4_END: +.Lcgemm_kernel_L2_M4_END: -cgemm_kernel_L2_M2_BEGIN: +.Lcgemm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble cgemm_kernel_L2_END + ble .Lcgemm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble cgemm_kernel_L2_M1_BEGIN + ble .Lcgemm_kernel_L2_M1_BEGIN -cgemm_kernel_L2_M2_20: +.Lcgemm_kernel_L2_M2_20: INIT2x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble cgemm_kernel_L2_M2_40 + ble .Lcgemm_kernel_L2_M2_40 -cgemm_kernel_L2_M2_22: +.Lcgemm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -1847,43 +1847,43 @@ cgemm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M2_22 + bgt .Lcgemm_kernel_L2_M2_22 -cgemm_kernel_L2_M2_40: +.Lcgemm_kernel_L2_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L2_M2_100 + ble .Lcgemm_kernel_L2_M2_100 -cgemm_kernel_L2_M2_42: +.Lcgemm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M2_42 + bgt .Lcgemm_kernel_L2_M2_42 -cgemm_kernel_L2_M2_100: +.Lcgemm_kernel_L2_M2_100: SAVE2x2 -cgemm_kernel_L2_M2_END: +.Lcgemm_kernel_L2_M2_END: -cgemm_kernel_L2_M1_BEGIN: +.Lcgemm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble cgemm_kernel_L2_END + ble .Lcgemm_kernel_L2_END -cgemm_kernel_L2_M1_20: +.Lcgemm_kernel_L2_M1_20: INIT1x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble cgemm_kernel_L2_M1_40 + ble .Lcgemm_kernel_L2_M1_40 -cgemm_kernel_L2_M1_22: +.Lcgemm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1895,36 +1895,36 @@ cgemm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M1_22 + bgt .Lcgemm_kernel_L2_M1_22 -cgemm_kernel_L2_M1_40: +.Lcgemm_kernel_L2_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L2_M1_100 + ble .Lcgemm_kernel_L2_M1_100 -cgemm_kernel_L2_M1_42: +.Lcgemm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M1_42 + bgt .Lcgemm_kernel_L2_M1_42 -cgemm_kernel_L2_M1_100: +.Lcgemm_kernel_L2_M1_100: SAVE1x2 -cgemm_kernel_L2_END: +.Lcgemm_kernel_L2_END: add origPB, origPB, origK, lsl #4 // B = B + K * 2 * 8 /******************************************************************************/ -cgemm_kernel_L1_BEGIN: +.Lcgemm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble cgemm_kernel_L999 // done + ble .Lcgemm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -1933,24 +1933,24 @@ cgemm_kernel_L1_BEGIN: mov pA, origPA // pA = A -cgemm_kernel_L1_M8_BEGIN: +.Lcgemm_kernel_L1_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble cgemm_kernel_L1_M4_BEGIN + ble .Lcgemm_kernel_L1_M4_BEGIN -cgemm_kernel_L1_M8_20: +.Lcgemm_kernel_L1_M8_20: INIT8x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L1_M8_40 + ble .Lcgemm_kernel_L1_M8_40 .align 5 -cgemm_kernel_L1_M8_22: +.Lcgemm_kernel_L1_M8_22: KERNEL8x1_SUB KERNEL8x1_SUB KERNEL8x1_SUB @@ -1962,51 +1962,51 @@ cgemm_kernel_L1_M8_22: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M8_22 + bgt .Lcgemm_kernel_L1_M8_22 -cgemm_kernel_L1_M8_40: +.Lcgemm_kernel_L1_M8_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L1_M8_100 + ble .Lcgemm_kernel_L1_M8_100 -cgemm_kernel_L1_M8_42: +.Lcgemm_kernel_L1_M8_42: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M8_42 + bgt .Lcgemm_kernel_L1_M8_42 -cgemm_kernel_L1_M8_100: +.Lcgemm_kernel_L1_M8_100: SAVE8x1 -cgemm_kernel_L1_M8_END: +.Lcgemm_kernel_L1_M8_END: subs counterI, counterI, #1 - bgt cgemm_kernel_L1_M8_20 + bgt .Lcgemm_kernel_L1_M8_20 -cgemm_kernel_L1_M4_BEGIN: +.Lcgemm_kernel_L1_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble cgemm_kernel_L1_END + ble .Lcgemm_kernel_L1_END tst counterI, #4 // counterI = counterI / 2 - ble cgemm_kernel_L1_M2_BEGIN + ble .Lcgemm_kernel_L1_M2_BEGIN -cgemm_kernel_L1_M4_20: +.Lcgemm_kernel_L1_M4_20: INIT4x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L1_M4_40 + ble .Lcgemm_kernel_L1_M4_40 .align 5 -cgemm_kernel_L1_M4_22: +.Lcgemm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -2018,47 +2018,47 @@ cgemm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M4_22 + bgt .Lcgemm_kernel_L1_M4_22 -cgemm_kernel_L1_M4_40: +.Lcgemm_kernel_L1_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L1_M4_100 + ble .Lcgemm_kernel_L1_M4_100 -cgemm_kernel_L1_M4_42: +.Lcgemm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M4_42 + bgt .Lcgemm_kernel_L1_M4_42 -cgemm_kernel_L1_M4_100: +.Lcgemm_kernel_L1_M4_100: SAVE4x1 -cgemm_kernel_L1_M4_END: +.Lcgemm_kernel_L1_M4_END: -cgemm_kernel_L1_M2_BEGIN: +.Lcgemm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble cgemm_kernel_L1_END + ble .Lcgemm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble cgemm_kernel_L1_M1_BEGIN + ble .Lcgemm_kernel_L1_M1_BEGIN -cgemm_kernel_L1_M2_20: +.Lcgemm_kernel_L1_M2_20: INIT2x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L1_M2_40 + ble .Lcgemm_kernel_L1_M2_40 -cgemm_kernel_L1_M2_22: +.Lcgemm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -2071,43 +2071,43 @@ cgemm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M2_22 + bgt .Lcgemm_kernel_L1_M2_22 -cgemm_kernel_L1_M2_40: +.Lcgemm_kernel_L1_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L1_M2_100 + ble .Lcgemm_kernel_L1_M2_100 -cgemm_kernel_L1_M2_42: +.Lcgemm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M2_42 + bgt .Lcgemm_kernel_L1_M2_42 -cgemm_kernel_L1_M2_100: +.Lcgemm_kernel_L1_M2_100: SAVE2x1 -cgemm_kernel_L1_M2_END: +.Lcgemm_kernel_L1_M2_END: -cgemm_kernel_L1_M1_BEGIN: +.Lcgemm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble cgemm_kernel_L1_END + ble .Lcgemm_kernel_L1_END -cgemm_kernel_L1_M1_20: +.Lcgemm_kernel_L1_M1_20: INIT1x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L1_M1_40 + ble .Lcgemm_kernel_L1_M1_40 -cgemm_kernel_L1_M1_22: +.Lcgemm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -2119,30 +2119,30 @@ cgemm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M1_22 + bgt .Lcgemm_kernel_L1_M1_22 -cgemm_kernel_L1_M1_40: +.Lcgemm_kernel_L1_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L1_M1_100 + ble .Lcgemm_kernel_L1_M1_100 -cgemm_kernel_L1_M1_42: +.Lcgemm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M1_42 + bgt .Lcgemm_kernel_L1_M1_42 -cgemm_kernel_L1_M1_100: +.Lcgemm_kernel_L1_M1_100: SAVE1x1 -cgemm_kernel_L1_END: +.Lcgemm_kernel_L1_END: -cgemm_kernel_L999: +.Lcgemm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/cgemm_kernel_8x4_thunderx2t99.S b/kernel/arm64/cgemm_kernel_8x4_thunderx2t99.S index 367cd0217..29a68ff22 100644 --- a/kernel/arm64/cgemm_kernel_8x4_thunderx2t99.S +++ b/kernel/arm64/cgemm_kernel_8x4_thunderx2t99.S @@ -1432,11 +1432,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble cgemm_kernel_L2_BEGIN + ble .Lcgemm_kernel_L2_BEGIN /******************************************************************************/ -cgemm_kernel_L4_BEGIN: +.Lcgemm_kernel_L4_BEGIN: mov pCRow0, pC add pCRow1, pCRow0, LDC add pCRow2, pCRow1, LDC @@ -1446,21 +1446,21 @@ cgemm_kernel_L4_BEGIN: mov pA, origPA // pA = start of A array -cgemm_kernel_L4_M8_BEGIN: +.Lcgemm_kernel_L4_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble cgemm_kernel_L4_M4_BEGIN + ble .Lcgemm_kernel_L4_M4_BEGIN .align 5 -cgemm_kernel_L4_M8_20: +.Lcgemm_kernel_L4_M8_20: mov pB, origPB asr counterL , origK, #5 // origK / 32 cmp counterL , #2 - blt cgemm_kernel_L4_M8_32 + blt .Lcgemm_kernel_L4_M8_32 KERNEL8x4_I KERNEL8x4_M2 @@ -1470,18 +1470,18 @@ cgemm_kernel_L4_M8_20: KERNEL8x4_M1_M2_x8 subs counterL, counterL, #2 // subtract 2 - ble cgemm_kernel_L4_M8_22a + ble .Lcgemm_kernel_L4_M8_22a .align 5 -cgemm_kernel_L4_M8_22: +.Lcgemm_kernel_L4_M8_22: KERNEL8x4_M1_M2_x16 subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M8_22 + bgt .Lcgemm_kernel_L4_M8_22 .align 5 -cgemm_kernel_L4_M8_22a: +.Lcgemm_kernel_L4_M8_22a: KERNEL8x4_M1_M2_x8 KERNEL8x4_M1_M2_x4 @@ -1490,13 +1490,13 @@ cgemm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_E - b cgemm_kernel_L4_M8_44 + b .Lcgemm_kernel_L4_M8_44 .align 5 -cgemm_kernel_L4_M8_32: +.Lcgemm_kernel_L4_M8_32: tst counterL, #1 - ble cgemm_kernel_L4_M8_40 + ble .Lcgemm_kernel_L4_M8_40 KERNEL8x4_I KERNEL8x4_M2 @@ -1506,116 +1506,116 @@ cgemm_kernel_L4_M8_32: KERNEL8x4_M1 KERNEL8x4_E - b cgemm_kernel_L4_M8_44 + b .Lcgemm_kernel_L4_M8_44 -cgemm_kernel_L4_M8_40: +.Lcgemm_kernel_L4_M8_40: INIT8x4 -cgemm_kernel_L4_M8_44: +.Lcgemm_kernel_L4_M8_44: ands counterL , origK, #31 - ble cgemm_kernel_L4_M8_100 + ble .Lcgemm_kernel_L4_M8_100 .align 5 -cgemm_kernel_L4_M8_46: +.Lcgemm_kernel_L4_M8_46: KERNEL8x4_SUB subs counterL, counterL, #1 - bne cgemm_kernel_L4_M8_46 + bne .Lcgemm_kernel_L4_M8_46 -cgemm_kernel_L4_M8_100: +.Lcgemm_kernel_L4_M8_100: prfm PLDL1KEEP, [pA] prfm PLDL1KEEP, [pA, #64] prfm PLDL1KEEP, [origPB] SAVE8x4 -cgemm_kernel_L4_M8_END: +.Lcgemm_kernel_L4_M8_END: subs counterI, counterI, #1 - bne cgemm_kernel_L4_M8_20 + bne .Lcgemm_kernel_L4_M8_20 -cgemm_kernel_L4_M4_BEGIN: +.Lcgemm_kernel_L4_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble cgemm_kernel_L4_END + ble .Lcgemm_kernel_L4_END tst counterI, #4 - ble cgemm_kernel_L4_M2_BEGIN + ble .Lcgemm_kernel_L4_M2_BEGIN -cgemm_kernel_L4_M4_20: +.Lcgemm_kernel_L4_M4_20: mov pB, origPB asr counterL , origK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt cgemm_kernel_L4_M4_32 + blt .Lcgemm_kernel_L4_M4_32 KERNEL4x4_I // do one in the K KERNEL4x4_M2 // do another in the K subs counterL, counterL, #2 - ble cgemm_kernel_L4_M4_22a + ble .Lcgemm_kernel_L4_M4_22a .align 5 -cgemm_kernel_L4_M4_22: +.Lcgemm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M4_22 + bgt .Lcgemm_kernel_L4_M4_22 -cgemm_kernel_L4_M4_22a: +.Lcgemm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_E - b cgemm_kernel_L4_M4_44 -cgemm_kernel_L4_M4_32: + b .Lcgemm_kernel_L4_M4_44 +.Lcgemm_kernel_L4_M4_32: tst counterL, #1 - ble cgemm_kernel_L4_M4_40 + ble .Lcgemm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_E - b cgemm_kernel_L4_M4_44 -cgemm_kernel_L4_M4_40: + b .Lcgemm_kernel_L4_M4_44 +.Lcgemm_kernel_L4_M4_40: INIT4x4 -cgemm_kernel_L4_M4_44: +.Lcgemm_kernel_L4_M4_44: ands counterL , origK, #1 - ble cgemm_kernel_L4_M4_100 + ble .Lcgemm_kernel_L4_M4_100 -cgemm_kernel_L4_M4_46: +.Lcgemm_kernel_L4_M4_46: KERNEL4x4_SUB -cgemm_kernel_L4_M4_100: +.Lcgemm_kernel_L4_M4_100: SAVE4x4 -cgemm_kernel_L4_M4_END: +.Lcgemm_kernel_L4_M4_END: -cgemm_kernel_L4_M2_BEGIN: +.Lcgemm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble cgemm_kernel_L4_END + ble .Lcgemm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble cgemm_kernel_L4_M1_BEGIN + ble .Lcgemm_kernel_L4_M1_BEGIN -cgemm_kernel_L4_M2_20: +.Lcgemm_kernel_L4_M2_20: INIT2x4 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L4_M2_40 + ble .Lcgemm_kernel_L4_M2_40 -cgemm_kernel_L4_M2_22: +.Lcgemm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -1628,43 +1628,43 @@ cgemm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M2_22 + bgt .Lcgemm_kernel_L4_M2_22 -cgemm_kernel_L4_M2_40: +.Lcgemm_kernel_L4_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L4_M2_100 + ble .Lcgemm_kernel_L4_M2_100 -cgemm_kernel_L4_M2_42: +.Lcgemm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M2_42 + bgt .Lcgemm_kernel_L4_M2_42 -cgemm_kernel_L4_M2_100: +.Lcgemm_kernel_L4_M2_100: SAVE2x4 -cgemm_kernel_L4_M2_END: +.Lcgemm_kernel_L4_M2_END: -cgemm_kernel_L4_M1_BEGIN: +.Lcgemm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble cgemm_kernel_L4_END + ble .Lcgemm_kernel_L4_END -cgemm_kernel_L4_M1_20: +.Lcgemm_kernel_L4_M1_20: INIT1x4 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L4_M1_40 + ble .Lcgemm_kernel_L4_M1_40 -cgemm_kernel_L4_M1_22: +.Lcgemm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1676,45 +1676,45 @@ cgemm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M1_22 + bgt .Lcgemm_kernel_L4_M1_22 -cgemm_kernel_L4_M1_40: +.Lcgemm_kernel_L4_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L4_M1_100 + ble .Lcgemm_kernel_L4_M1_100 -cgemm_kernel_L4_M1_42: +.Lcgemm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L4_M1_42 + bgt .Lcgemm_kernel_L4_M1_42 -cgemm_kernel_L4_M1_100: +.Lcgemm_kernel_L4_M1_100: SAVE1x4 -cgemm_kernel_L4_END: +.Lcgemm_kernel_L4_END: lsl temp, origK, #5 add origPB, origPB, temp // B = B + K * 4 * 8 subs counterJ, counterJ , #1 // j-- - bgt cgemm_kernel_L4_BEGIN + bgt .Lcgemm_kernel_L4_BEGIN /******************************************************************************/ -cgemm_kernel_L2_BEGIN: // less than 2 left in N direction +.Lcgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble cgemm_kernel_L999 // error, N was less than 4? + ble .Lcgemm_kernel_L999 // error, N was less than 4? tst counterJ , #2 - ble cgemm_kernel_L1_BEGIN + ble .Lcgemm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1723,14 +1723,14 @@ cgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov pA, origPA // pA = A -cgemm_kernel_L2_M8_BEGIN: +.Lcgemm_kernel_L2_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble cgemm_kernel_L2_M4_BEGIN + ble .Lcgemm_kernel_L2_M4_BEGIN -cgemm_kernel_L2_M8_20: +.Lcgemm_kernel_L2_M8_20: INIT8x2 @@ -1738,10 +1738,10 @@ cgemm_kernel_L2_M8_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble cgemm_kernel_L2_M8_40 + ble .Lcgemm_kernel_L2_M8_40 .align 5 -cgemm_kernel_L2_M8_22: +.Lcgemm_kernel_L2_M8_22: KERNEL8x2_SUB KERNEL8x2_SUB KERNEL8x2_SUB @@ -1753,50 +1753,50 @@ cgemm_kernel_L2_M8_22: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M8_22 + bgt .Lcgemm_kernel_L2_M8_22 -cgemm_kernel_L2_M8_40: +.Lcgemm_kernel_L2_M8_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L2_M8_100 + ble .Lcgemm_kernel_L2_M8_100 -cgemm_kernel_L2_M8_42: +.Lcgemm_kernel_L2_M8_42: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M8_42 + bgt .Lcgemm_kernel_L2_M8_42 -cgemm_kernel_L2_M8_100: +.Lcgemm_kernel_L2_M8_100: SAVE8x2 -cgemm_kernel_L2_M8_END: +.Lcgemm_kernel_L2_M8_END: subs counterI, counterI, #1 - bgt cgemm_kernel_L2_M8_20 + bgt .Lcgemm_kernel_L2_M8_20 -cgemm_kernel_L2_M4_BEGIN: +.Lcgemm_kernel_L2_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble cgemm_kernel_L2_END + ble .Lcgemm_kernel_L2_END tst counterI, #4 // counterI = counterI / 2 - ble cgemm_kernel_L2_M2_BEGIN + ble .Lcgemm_kernel_L2_M2_BEGIN -cgemm_kernel_L2_M4_20: +.Lcgemm_kernel_L2_M4_20: INIT4x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble cgemm_kernel_L2_M4_40 + ble .Lcgemm_kernel_L2_M4_40 .align 5 -cgemm_kernel_L2_M4_22: +.Lcgemm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1808,46 +1808,46 @@ cgemm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M4_22 + bgt .Lcgemm_kernel_L2_M4_22 -cgemm_kernel_L2_M4_40: +.Lcgemm_kernel_L2_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L2_M4_100 + ble .Lcgemm_kernel_L2_M4_100 -cgemm_kernel_L2_M4_42: +.Lcgemm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M4_42 + bgt .Lcgemm_kernel_L2_M4_42 -cgemm_kernel_L2_M4_100: +.Lcgemm_kernel_L2_M4_100: SAVE4x2 -cgemm_kernel_L2_M4_END: +.Lcgemm_kernel_L2_M4_END: -cgemm_kernel_L2_M2_BEGIN: +.Lcgemm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble cgemm_kernel_L2_END + ble .Lcgemm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble cgemm_kernel_L2_M1_BEGIN + ble .Lcgemm_kernel_L2_M1_BEGIN -cgemm_kernel_L2_M2_20: +.Lcgemm_kernel_L2_M2_20: INIT2x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble cgemm_kernel_L2_M2_40 + ble .Lcgemm_kernel_L2_M2_40 -cgemm_kernel_L2_M2_22: +.Lcgemm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -1860,43 +1860,43 @@ cgemm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M2_22 + bgt .Lcgemm_kernel_L2_M2_22 -cgemm_kernel_L2_M2_40: +.Lcgemm_kernel_L2_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L2_M2_100 + ble .Lcgemm_kernel_L2_M2_100 -cgemm_kernel_L2_M2_42: +.Lcgemm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M2_42 + bgt .Lcgemm_kernel_L2_M2_42 -cgemm_kernel_L2_M2_100: +.Lcgemm_kernel_L2_M2_100: SAVE2x2 -cgemm_kernel_L2_M2_END: +.Lcgemm_kernel_L2_M2_END: -cgemm_kernel_L2_M1_BEGIN: +.Lcgemm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble cgemm_kernel_L2_END + ble .Lcgemm_kernel_L2_END -cgemm_kernel_L2_M1_20: +.Lcgemm_kernel_L2_M1_20: INIT1x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble cgemm_kernel_L2_M1_40 + ble .Lcgemm_kernel_L2_M1_40 -cgemm_kernel_L2_M1_22: +.Lcgemm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1908,36 +1908,36 @@ cgemm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M1_22 + bgt .Lcgemm_kernel_L2_M1_22 -cgemm_kernel_L2_M1_40: +.Lcgemm_kernel_L2_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L2_M1_100 + ble .Lcgemm_kernel_L2_M1_100 -cgemm_kernel_L2_M1_42: +.Lcgemm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L2_M1_42 + bgt .Lcgemm_kernel_L2_M1_42 -cgemm_kernel_L2_M1_100: +.Lcgemm_kernel_L2_M1_100: SAVE1x2 -cgemm_kernel_L2_END: +.Lcgemm_kernel_L2_END: add origPB, origPB, origK, lsl #4 // B = B + K * 2 * 8 /******************************************************************************/ -cgemm_kernel_L1_BEGIN: +.Lcgemm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble cgemm_kernel_L999 // done + ble .Lcgemm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -1946,24 +1946,24 @@ cgemm_kernel_L1_BEGIN: mov pA, origPA // pA = A -cgemm_kernel_L1_M8_BEGIN: +.Lcgemm_kernel_L1_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble cgemm_kernel_L1_M4_BEGIN + ble .Lcgemm_kernel_L1_M4_BEGIN -cgemm_kernel_L1_M8_20: +.Lcgemm_kernel_L1_M8_20: INIT8x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L1_M8_40 + ble .Lcgemm_kernel_L1_M8_40 .align 5 -cgemm_kernel_L1_M8_22: +.Lcgemm_kernel_L1_M8_22: KERNEL8x1_SUB KERNEL8x1_SUB KERNEL8x1_SUB @@ -1975,51 +1975,51 @@ cgemm_kernel_L1_M8_22: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M8_22 + bgt .Lcgemm_kernel_L1_M8_22 -cgemm_kernel_L1_M8_40: +.Lcgemm_kernel_L1_M8_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L1_M8_100 + ble .Lcgemm_kernel_L1_M8_100 -cgemm_kernel_L1_M8_42: +.Lcgemm_kernel_L1_M8_42: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M8_42 + bgt .Lcgemm_kernel_L1_M8_42 -cgemm_kernel_L1_M8_100: +.Lcgemm_kernel_L1_M8_100: SAVE8x1 -cgemm_kernel_L1_M8_END: +.Lcgemm_kernel_L1_M8_END: subs counterI, counterI, #1 - bgt cgemm_kernel_L1_M8_20 + bgt .Lcgemm_kernel_L1_M8_20 -cgemm_kernel_L1_M4_BEGIN: +.Lcgemm_kernel_L1_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble cgemm_kernel_L1_END + ble .Lcgemm_kernel_L1_END tst counterI, #4 // counterI = counterI / 2 - ble cgemm_kernel_L1_M2_BEGIN + ble .Lcgemm_kernel_L1_M2_BEGIN -cgemm_kernel_L1_M4_20: +.Lcgemm_kernel_L1_M4_20: INIT4x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L1_M4_40 + ble .Lcgemm_kernel_L1_M4_40 .align 5 -cgemm_kernel_L1_M4_22: +.Lcgemm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -2031,47 +2031,47 @@ cgemm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M4_22 + bgt .Lcgemm_kernel_L1_M4_22 -cgemm_kernel_L1_M4_40: +.Lcgemm_kernel_L1_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L1_M4_100 + ble .Lcgemm_kernel_L1_M4_100 -cgemm_kernel_L1_M4_42: +.Lcgemm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M4_42 + bgt .Lcgemm_kernel_L1_M4_42 -cgemm_kernel_L1_M4_100: +.Lcgemm_kernel_L1_M4_100: SAVE4x1 -cgemm_kernel_L1_M4_END: +.Lcgemm_kernel_L1_M4_END: -cgemm_kernel_L1_M2_BEGIN: +.Lcgemm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble cgemm_kernel_L1_END + ble .Lcgemm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble cgemm_kernel_L1_M1_BEGIN + ble .Lcgemm_kernel_L1_M1_BEGIN -cgemm_kernel_L1_M2_20: +.Lcgemm_kernel_L1_M2_20: INIT2x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L1_M2_40 + ble .Lcgemm_kernel_L1_M2_40 -cgemm_kernel_L1_M2_22: +.Lcgemm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -2084,43 +2084,43 @@ cgemm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M2_22 + bgt .Lcgemm_kernel_L1_M2_22 -cgemm_kernel_L1_M2_40: +.Lcgemm_kernel_L1_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L1_M2_100 + ble .Lcgemm_kernel_L1_M2_100 -cgemm_kernel_L1_M2_42: +.Lcgemm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M2_42 + bgt .Lcgemm_kernel_L1_M2_42 -cgemm_kernel_L1_M2_100: +.Lcgemm_kernel_L1_M2_100: SAVE2x1 -cgemm_kernel_L1_M2_END: +.Lcgemm_kernel_L1_M2_END: -cgemm_kernel_L1_M1_BEGIN: +.Lcgemm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble cgemm_kernel_L1_END + ble .Lcgemm_kernel_L1_END -cgemm_kernel_L1_M1_20: +.Lcgemm_kernel_L1_M1_20: INIT1x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble cgemm_kernel_L1_M1_40 + ble .Lcgemm_kernel_L1_M1_40 -cgemm_kernel_L1_M1_22: +.Lcgemm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -2132,30 +2132,30 @@ cgemm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M1_22 + bgt .Lcgemm_kernel_L1_M1_22 -cgemm_kernel_L1_M1_40: +.Lcgemm_kernel_L1_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble cgemm_kernel_L1_M1_100 + ble .Lcgemm_kernel_L1_M1_100 -cgemm_kernel_L1_M1_42: +.Lcgemm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt cgemm_kernel_L1_M1_42 + bgt .Lcgemm_kernel_L1_M1_42 -cgemm_kernel_L1_M1_100: +.Lcgemm_kernel_L1_M1_100: SAVE1x1 -cgemm_kernel_L1_END: +.Lcgemm_kernel_L1_END: -cgemm_kernel_L999: +.Lcgemm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/copy.S b/kernel/arm64/copy.S index 70eab96fb..b8c6bfcd4 100644 --- a/kernel/arm64/copy.S +++ b/kernel/arm64/copy.S @@ -159,50 +159,50 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE cmp N, xzr - ble copy_kernel_L999 + ble .Lcopy_kernel_L999 cmp INC_X, #1 - bne copy_kernel_S_BEGIN + bne .Lcopy_kernel_S_BEGIN cmp INC_Y, #1 - bne copy_kernel_S_BEGIN + bne .Lcopy_kernel_S_BEGIN -copy_kernel_F_BEGIN: +.Lcopy_kernel_F_BEGIN: asr I, N, #2 cmp I, xzr - beq copy_kernel_F1 + beq .Lcopy_kernel_F1 -copy_kernel_F4: +.Lcopy_kernel_F4: KERNEL_F4 subs I, I, #1 - bne copy_kernel_F4 + bne .Lcopy_kernel_F4 -copy_kernel_F1: +.Lcopy_kernel_F1: ands I, N, #3 - ble copy_kernel_L999 + ble .Lcopy_kernel_L999 -copy_kernel_F10: +.Lcopy_kernel_F10: KERNEL_F1 subs I, I, #1 - bne copy_kernel_F10 + bne .Lcopy_kernel_F10 mov w0, wzr ret -copy_kernel_S_BEGIN: +.Lcopy_kernel_S_BEGIN: INIT_S asr I, N, #2 cmp I, xzr - ble copy_kernel_S1 + ble .Lcopy_kernel_S1 -copy_kernel_S4: +.Lcopy_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -210,21 +210,21 @@ copy_kernel_S4: KERNEL_S1 subs I, I, #1 - bne copy_kernel_S4 + bne .Lcopy_kernel_S4 -copy_kernel_S1: +.Lcopy_kernel_S1: ands I, N, #3 - ble copy_kernel_L999 + ble .Lcopy_kernel_L999 -copy_kernel_S10: +.Lcopy_kernel_S10: KERNEL_S1 subs I, I, #1 - bne copy_kernel_S10 + bne .Lcopy_kernel_S10 -copy_kernel_L999: +.Lcopy_kernel_L999: mov w0, wzr ret diff --git a/kernel/arm64/ctrmm_kernel_4x4.S b/kernel/arm64/ctrmm_kernel_4x4.S index 3de27257a..79d33e93c 100644 --- a/kernel/arm64/ctrmm_kernel_4x4.S +++ b/kernel/arm64/ctrmm_kernel_4x4.S @@ -785,11 +785,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble ctrmm_kernel_L2_BEGIN + ble .Lctrmm_kernel_L2_BEGIN /******************************************************************************/ -ctrmm_kernel_L4_BEGIN: +.Lctrmm_kernel_L4_BEGIN: mov pCRow0, pC // pCRow0 = C add pC, pC, LDC, lsl #2 @@ -798,14 +798,14 @@ ctrmm_kernel_L4_BEGIN: #endif mov pA, origPA // pA = start of A array -ctrmm_kernel_L4_M4_BEGIN: +.Lctrmm_kernel_L4_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble ctrmm_kernel_L4_M2_BEGIN + ble .Lctrmm_kernel_L4_M2_BEGIN -ctrmm_kernel_L4_M4_20: +.Lctrmm_kernel_L4_M4_20: #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) mov pB, origPB @@ -826,55 +826,55 @@ ctrmm_kernel_L4_M4_20: asr counterL , tempK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt ctrmm_kernel_L4_M4_32 + blt .Lctrmm_kernel_L4_M4_32 KERNEL4x4_I // do one in the K KERNEL4x4_M2 // do another in the K subs counterL, counterL, #2 - ble ctrmm_kernel_L4_M4_22a + ble .Lctrmm_kernel_L4_M4_22a .align 5 -ctrmm_kernel_L4_M4_22: +.Lctrmm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 subs counterL, counterL, #1 - bgt ctrmm_kernel_L4_M4_22 + bgt .Lctrmm_kernel_L4_M4_22 -ctrmm_kernel_L4_M4_22a: +.Lctrmm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_E - b ctrmm_kernel_L4_M4_44 + b .Lctrmm_kernel_L4_M4_44 -ctrmm_kernel_L4_M4_32: +.Lctrmm_kernel_L4_M4_32: tst counterL, #1 - ble ctrmm_kernel_L4_M4_40 + ble .Lctrmm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_E - b ctrmm_kernel_L4_M4_44 + b .Lctrmm_kernel_L4_M4_44 -ctrmm_kernel_L4_M4_40: +.Lctrmm_kernel_L4_M4_40: INIT4x4 -ctrmm_kernel_L4_M4_44: +.Lctrmm_kernel_L4_M4_44: ands counterL , tempK, #1 - ble ctrmm_kernel_L4_M4_100 + ble .Lctrmm_kernel_L4_M4_100 -ctrmm_kernel_L4_M4_46: +.Lctrmm_kernel_L4_M4_46: KERNEL4x4_SUB -ctrmm_kernel_L4_M4_100: +.Lctrmm_kernel_L4_M4_100: SAVE4x4 @@ -893,20 +893,20 @@ ctrmm_kernel_L4_M4_100: add tempOffset, tempOffset, #4 #endif -ctrmm_kernel_L4_M4_END: +.Lctrmm_kernel_L4_M4_END: subs counterI, counterI, #1 - bne ctrmm_kernel_L4_M4_20 + bne .Lctrmm_kernel_L4_M4_20 -ctrmm_kernel_L4_M2_BEGIN: +.Lctrmm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble ctrmm_kernel_L4_END + ble .Lctrmm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble ctrmm_kernel_L4_M1_BEGIN + ble .Lctrmm_kernel_L4_M1_BEGIN -ctrmm_kernel_L4_M2_20: +.Lctrmm_kernel_L4_M2_20: INIT2x4 @@ -930,9 +930,9 @@ ctrmm_kernel_L4_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble ctrmm_kernel_L4_M2_40 + ble .Lctrmm_kernel_L4_M2_40 -ctrmm_kernel_L4_M2_22: +.Lctrmm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -945,22 +945,22 @@ ctrmm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L4_M2_22 + bgt .Lctrmm_kernel_L4_M2_22 -ctrmm_kernel_L4_M2_40: +.Lctrmm_kernel_L4_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L4_M2_100 + ble .Lctrmm_kernel_L4_M2_100 -ctrmm_kernel_L4_M2_42: +.Lctrmm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L4_M2_42 + bgt .Lctrmm_kernel_L4_M2_42 -ctrmm_kernel_L4_M2_100: +.Lctrmm_kernel_L4_M2_100: SAVE2x4 @@ -980,15 +980,15 @@ ctrmm_kernel_L4_M2_100: add tempOffset, tempOffset, #2 #endif -ctrmm_kernel_L4_M2_END: +.Lctrmm_kernel_L4_M2_END: -ctrmm_kernel_L4_M1_BEGIN: +.Lctrmm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble ctrmm_kernel_L4_END + ble .Lctrmm_kernel_L4_END -ctrmm_kernel_L4_M1_20: +.Lctrmm_kernel_L4_M1_20: INIT1x4 @@ -1012,9 +1012,9 @@ ctrmm_kernel_L4_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble ctrmm_kernel_L4_M1_40 + ble .Lctrmm_kernel_L4_M1_40 -ctrmm_kernel_L4_M1_22: +.Lctrmm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1026,22 +1026,22 @@ ctrmm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L4_M1_22 + bgt .Lctrmm_kernel_L4_M1_22 -ctrmm_kernel_L4_M1_40: +.Lctrmm_kernel_L4_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L4_M1_100 + ble .Lctrmm_kernel_L4_M1_100 -ctrmm_kernel_L4_M1_42: +.Lctrmm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L4_M1_42 + bgt .Lctrmm_kernel_L4_M1_42 -ctrmm_kernel_L4_M1_100: +.Lctrmm_kernel_L4_M1_100: SAVE1x4 @@ -1061,7 +1061,7 @@ ctrmm_kernel_L4_M1_100: add tempOffset, tempOffset, #1 #endif -ctrmm_kernel_L4_END: +.Lctrmm_kernel_L4_END: lsl temp, origK, #5 add origPB, origPB, temp // B = B + K * 4 * 8 @@ -1071,19 +1071,19 @@ ctrmm_kernel_L4_END: #endif subs counterJ, counterJ , #1 // j-- - bgt ctrmm_kernel_L4_BEGIN + bgt .Lctrmm_kernel_L4_BEGIN /******************************************************************************/ -ctrmm_kernel_L2_BEGIN: // less than 2 left in N direction +.Lctrmm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble ctrmm_kernel_L999 // error, N was less than 4? + ble .Lctrmm_kernel_L999 // error, N was less than 4? tst counterJ , #2 - ble ctrmm_kernel_L1_BEGIN + ble .Lctrmm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1095,14 +1095,14 @@ ctrmm_kernel_L2_BEGIN: // less than 2 left in N direction mov pA, origPA // pA = A -ctrmm_kernel_L2_M4_BEGIN: +.Lctrmm_kernel_L2_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI,#0 - ble ctrmm_kernel_L2_M2_BEGIN + ble .Lctrmm_kernel_L2_M2_BEGIN -ctrmm_kernel_L2_M4_20: +.Lctrmm_kernel_L2_M4_20: INIT4x2 @@ -1126,10 +1126,10 @@ ctrmm_kernel_L2_M4_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble ctrmm_kernel_L2_M4_40 + ble .Lctrmm_kernel_L2_M4_40 .align 5 -ctrmm_kernel_L2_M4_22: +.Lctrmm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1141,22 +1141,22 @@ ctrmm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L2_M4_22 + bgt .Lctrmm_kernel_L2_M4_22 -ctrmm_kernel_L2_M4_40: +.Lctrmm_kernel_L2_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L2_M4_100 + ble .Lctrmm_kernel_L2_M4_100 -ctrmm_kernel_L2_M4_42: +.Lctrmm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L2_M4_42 + bgt .Lctrmm_kernel_L2_M4_42 -ctrmm_kernel_L2_M4_100: +.Lctrmm_kernel_L2_M4_100: SAVE4x2 @@ -1176,22 +1176,22 @@ ctrmm_kernel_L2_M4_100: add tempOffset, tempOffset, #4 #endif -ctrmm_kernel_L2_M4_END: +.Lctrmm_kernel_L2_M4_END: subs counterI, counterI, #1 - bgt ctrmm_kernel_L2_M4_20 + bgt .Lctrmm_kernel_L2_M4_20 -ctrmm_kernel_L2_M2_BEGIN: +.Lctrmm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble ctrmm_kernel_L2_END + ble .Lctrmm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble ctrmm_kernel_L2_M1_BEGIN + ble .Lctrmm_kernel_L2_M1_BEGIN -ctrmm_kernel_L2_M2_20: +.Lctrmm_kernel_L2_M2_20: INIT2x2 @@ -1215,9 +1215,9 @@ ctrmm_kernel_L2_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble ctrmm_kernel_L2_M2_40 + ble .Lctrmm_kernel_L2_M2_40 -ctrmm_kernel_L2_M2_22: +.Lctrmm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -1230,22 +1230,22 @@ ctrmm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L2_M2_22 + bgt .Lctrmm_kernel_L2_M2_22 -ctrmm_kernel_L2_M2_40: +.Lctrmm_kernel_L2_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L2_M2_100 + ble .Lctrmm_kernel_L2_M2_100 -ctrmm_kernel_L2_M2_42: +.Lctrmm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L2_M2_42 + bgt .Lctrmm_kernel_L2_M2_42 -ctrmm_kernel_L2_M2_100: +.Lctrmm_kernel_L2_M2_100: SAVE2x2 @@ -1265,15 +1265,15 @@ ctrmm_kernel_L2_M2_100: add tempOffset, tempOffset, #2 #endif -ctrmm_kernel_L2_M2_END: +.Lctrmm_kernel_L2_M2_END: -ctrmm_kernel_L2_M1_BEGIN: +.Lctrmm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble ctrmm_kernel_L2_END + ble .Lctrmm_kernel_L2_END -ctrmm_kernel_L2_M1_20: +.Lctrmm_kernel_L2_M1_20: INIT1x2 @@ -1297,9 +1297,9 @@ ctrmm_kernel_L2_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble ctrmm_kernel_L2_M1_40 + ble .Lctrmm_kernel_L2_M1_40 -ctrmm_kernel_L2_M1_22: +.Lctrmm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1311,22 +1311,22 @@ ctrmm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L2_M1_22 + bgt .Lctrmm_kernel_L2_M1_22 -ctrmm_kernel_L2_M1_40: +.Lctrmm_kernel_L2_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L2_M1_100 + ble .Lctrmm_kernel_L2_M1_100 -ctrmm_kernel_L2_M1_42: +.Lctrmm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L2_M1_42 + bgt .Lctrmm_kernel_L2_M1_42 -ctrmm_kernel_L2_M1_100: +.Lctrmm_kernel_L2_M1_100: SAVE1x2 @@ -1346,7 +1346,7 @@ ctrmm_kernel_L2_M1_100: add tempOffset, tempOffset, #1 #endif -ctrmm_kernel_L2_END: +.Lctrmm_kernel_L2_END: #if !defined(LEFT) add tempOffset, tempOffset, #2 #endif @@ -1354,11 +1354,11 @@ ctrmm_kernel_L2_END: /******************************************************************************/ -ctrmm_kernel_L1_BEGIN: +.Lctrmm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble ctrmm_kernel_L999 // done + ble .Lctrmm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -1370,14 +1370,14 @@ ctrmm_kernel_L1_BEGIN: mov pA, origPA // pA = A -ctrmm_kernel_L1_M4_BEGIN: +.Lctrmm_kernel_L1_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble ctrmm_kernel_L1_M2_BEGIN + ble .Lctrmm_kernel_L1_M2_BEGIN -ctrmm_kernel_L1_M4_20: +.Lctrmm_kernel_L1_M4_20: INIT4x1 @@ -1401,10 +1401,10 @@ ctrmm_kernel_L1_M4_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble ctrmm_kernel_L1_M4_40 + ble .Lctrmm_kernel_L1_M4_40 .align 5 -ctrmm_kernel_L1_M4_22: +.Lctrmm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -1416,22 +1416,22 @@ ctrmm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L1_M4_22 + bgt .Lctrmm_kernel_L1_M4_22 -ctrmm_kernel_L1_M4_40: +.Lctrmm_kernel_L1_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L1_M4_100 + ble .Lctrmm_kernel_L1_M4_100 -ctrmm_kernel_L1_M4_42: +.Lctrmm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L1_M4_42 + bgt .Lctrmm_kernel_L1_M4_42 -ctrmm_kernel_L1_M4_100: +.Lctrmm_kernel_L1_M4_100: SAVE4x1 @@ -1451,22 +1451,22 @@ ctrmm_kernel_L1_M4_100: add tempOffset, tempOffset, #4 #endif -ctrmm_kernel_L1_M4_END: +.Lctrmm_kernel_L1_M4_END: subs counterI, counterI, #1 - bgt ctrmm_kernel_L1_M4_20 + bgt .Lctrmm_kernel_L1_M4_20 -ctrmm_kernel_L1_M2_BEGIN: +.Lctrmm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble ctrmm_kernel_L1_END + ble .Lctrmm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble ctrmm_kernel_L1_M1_BEGIN + ble .Lctrmm_kernel_L1_M1_BEGIN -ctrmm_kernel_L1_M2_20: +.Lctrmm_kernel_L1_M2_20: INIT2x1 @@ -1490,9 +1490,9 @@ ctrmm_kernel_L1_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble ctrmm_kernel_L1_M2_40 + ble .Lctrmm_kernel_L1_M2_40 -ctrmm_kernel_L1_M2_22: +.Lctrmm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -1505,22 +1505,22 @@ ctrmm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L1_M2_22 + bgt .Lctrmm_kernel_L1_M2_22 -ctrmm_kernel_L1_M2_40: +.Lctrmm_kernel_L1_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L1_M2_100 + ble .Lctrmm_kernel_L1_M2_100 -ctrmm_kernel_L1_M2_42: +.Lctrmm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L1_M2_42 + bgt .Lctrmm_kernel_L1_M2_42 -ctrmm_kernel_L1_M2_100: +.Lctrmm_kernel_L1_M2_100: SAVE2x1 @@ -1540,15 +1540,15 @@ ctrmm_kernel_L1_M2_100: add tempOffset, tempOffset, #2 #endif -ctrmm_kernel_L1_M2_END: +.Lctrmm_kernel_L1_M2_END: -ctrmm_kernel_L1_M1_BEGIN: +.Lctrmm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble ctrmm_kernel_L1_END + ble .Lctrmm_kernel_L1_END -ctrmm_kernel_L1_M1_20: +.Lctrmm_kernel_L1_M1_20: INIT1x1 @@ -1572,9 +1572,9 @@ ctrmm_kernel_L1_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble ctrmm_kernel_L1_M1_40 + ble .Lctrmm_kernel_L1_M1_40 -ctrmm_kernel_L1_M1_22: +.Lctrmm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -1586,30 +1586,30 @@ ctrmm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L1_M1_22 + bgt .Lctrmm_kernel_L1_M1_22 -ctrmm_kernel_L1_M1_40: +.Lctrmm_kernel_L1_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L1_M1_100 + ble .Lctrmm_kernel_L1_M1_100 -ctrmm_kernel_L1_M1_42: +.Lctrmm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L1_M1_42 + bgt .Lctrmm_kernel_L1_M1_42 -ctrmm_kernel_L1_M1_100: +.Lctrmm_kernel_L1_M1_100: SAVE1x1 -ctrmm_kernel_L1_END: +.Lctrmm_kernel_L1_END: -ctrmm_kernel_L999: +.Lctrmm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/ctrmm_kernel_8x4.S b/kernel/arm64/ctrmm_kernel_8x4.S index 680fb56c3..5c0827397 100644 --- a/kernel/arm64/ctrmm_kernel_8x4.S +++ b/kernel/arm64/ctrmm_kernel_8x4.S @@ -1405,11 +1405,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble ctrmm_kernel_L2_BEGIN + ble .Lctrmm_kernel_L2_BEGIN /******************************************************************************/ -ctrmm_kernel_L4_BEGIN: +.Lctrmm_kernel_L4_BEGIN: mov pCRow0, pC add pCRow1, pCRow0, LDC add pCRow2, pCRow1, LDC @@ -1423,14 +1423,14 @@ ctrmm_kernel_L4_BEGIN: #endif mov pA, origPA // pA = start of A array -ctrmm_kernel_L4_M8_BEGIN: +.Lctrmm_kernel_L4_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble ctrmm_kernel_L4_M4_BEGIN + ble .Lctrmm_kernel_L4_M4_BEGIN -ctrmm_kernel_L4_M8_20: +.Lctrmm_kernel_L4_M8_20: #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) mov pB, origPB @@ -1452,7 +1452,7 @@ ctrmm_kernel_L4_M8_20: asr counterL , tempK, #3 cmp counterL , #2 - blt ctrmm_kernel_L4_M8_32 + blt .Lctrmm_kernel_L4_M8_32 KERNEL8x4_I KERNEL8x4_M2 @@ -1464,10 +1464,10 @@ ctrmm_kernel_L4_M8_20: KERNEL8x4_M2 subs counterL, counterL, #2 // subtract 2 - ble ctrmm_kernel_L4_M8_22a + ble .Lctrmm_kernel_L4_M8_22a .align 5 -ctrmm_kernel_L4_M8_22: +.Lctrmm_kernel_L4_M8_22: KERNEL8x4_M1 KERNEL8x4_M2 @@ -1479,10 +1479,10 @@ ctrmm_kernel_L4_M8_22: KERNEL8x4_M2 subs counterL, counterL, #1 - bgt ctrmm_kernel_L4_M8_22 + bgt .Lctrmm_kernel_L4_M8_22 .align 5 -ctrmm_kernel_L4_M8_22a: +.Lctrmm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_M2 @@ -1493,13 +1493,13 @@ ctrmm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_E - b ctrmm_kernel_L4_M8_44 + b .Lctrmm_kernel_L4_M8_44 .align 5 -ctrmm_kernel_L4_M8_32: +.Lctrmm_kernel_L4_M8_32: tst counterL, #1 - ble ctrmm_kernel_L4_M8_40 + ble .Lctrmm_kernel_L4_M8_40 KERNEL8x4_I KERNEL8x4_M2 @@ -1510,26 +1510,26 @@ ctrmm_kernel_L4_M8_32: KERNEL8x4_M1 KERNEL8x4_E - b ctrmm_kernel_L4_M8_44 + b .Lctrmm_kernel_L4_M8_44 -ctrmm_kernel_L4_M8_40: +.Lctrmm_kernel_L4_M8_40: INIT8x4 -ctrmm_kernel_L4_M8_44: +.Lctrmm_kernel_L4_M8_44: ands counterL , tempK, #7 - ble ctrmm_kernel_L4_M8_100 + ble .Lctrmm_kernel_L4_M8_100 .align 5 -ctrmm_kernel_L4_M8_46: +.Lctrmm_kernel_L4_M8_46: KERNEL8x4_SUB subs counterL, counterL, #1 - bne ctrmm_kernel_L4_M8_46 + bne .Lctrmm_kernel_L4_M8_46 -ctrmm_kernel_L4_M8_100: +.Lctrmm_kernel_L4_M8_100: SAVE8x4 @@ -1552,21 +1552,21 @@ ctrmm_kernel_L4_M8_100: prfm PLDL1KEEP, [pA, #64] prfm PLDL1KEEP, [origPB] -ctrmm_kernel_L4_M8_END: +.Lctrmm_kernel_L4_M8_END: subs counterI, counterI, #1 - bne ctrmm_kernel_L4_M8_20 + bne .Lctrmm_kernel_L4_M8_20 -ctrmm_kernel_L4_M4_BEGIN: +.Lctrmm_kernel_L4_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble ctrmm_kernel_L4_END + ble .Lctrmm_kernel_L4_END tst counterI, #4 - ble ctrmm_kernel_L4_M2_BEGIN + ble .Lctrmm_kernel_L4_M2_BEGIN -ctrmm_kernel_L4_M4_20: +.Lctrmm_kernel_L4_M4_20: #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) mov pB, origPB @@ -1587,46 +1587,46 @@ ctrmm_kernel_L4_M4_20: asr counterL , tempK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt ctrmm_kernel_L4_M4_32 + blt .Lctrmm_kernel_L4_M4_32 KERNEL4x4_I // do one in the K KERNEL4x4_M2 // do another in the K subs counterL, counterL, #2 - ble ctrmm_kernel_L4_M4_22a + ble .Lctrmm_kernel_L4_M4_22a .align 5 -ctrmm_kernel_L4_M4_22: +.Lctrmm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 subs counterL, counterL, #1 - bgt ctrmm_kernel_L4_M4_22 + bgt .Lctrmm_kernel_L4_M4_22 -ctrmm_kernel_L4_M4_22a: +.Lctrmm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_E - b ctrmm_kernel_L4_M4_44 -ctrmm_kernel_L4_M4_32: + b .Lctrmm_kernel_L4_M4_44 +.Lctrmm_kernel_L4_M4_32: tst counterL, #1 - ble ctrmm_kernel_L4_M4_40 + ble .Lctrmm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_E - b ctrmm_kernel_L4_M4_44 -ctrmm_kernel_L4_M4_40: + b .Lctrmm_kernel_L4_M4_44 +.Lctrmm_kernel_L4_M4_40: INIT4x4 -ctrmm_kernel_L4_M4_44: +.Lctrmm_kernel_L4_M4_44: ands counterL , tempK, #1 - ble ctrmm_kernel_L4_M4_100 + ble .Lctrmm_kernel_L4_M4_100 -ctrmm_kernel_L4_M4_46: +.Lctrmm_kernel_L4_M4_46: KERNEL4x4_SUB -ctrmm_kernel_L4_M4_100: +.Lctrmm_kernel_L4_M4_100: SAVE4x4 @@ -1645,18 +1645,18 @@ ctrmm_kernel_L4_M4_100: add tempOffset, tempOffset, #4 #endif -ctrmm_kernel_L4_M4_END: +.Lctrmm_kernel_L4_M4_END: -ctrmm_kernel_L4_M2_BEGIN: +.Lctrmm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble ctrmm_kernel_L4_END + ble .Lctrmm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble ctrmm_kernel_L4_M1_BEGIN + ble .Lctrmm_kernel_L4_M1_BEGIN -ctrmm_kernel_L4_M2_20: +.Lctrmm_kernel_L4_M2_20: INIT2x4 @@ -1679,9 +1679,9 @@ ctrmm_kernel_L4_M2_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble ctrmm_kernel_L4_M2_40 + ble .Lctrmm_kernel_L4_M2_40 -ctrmm_kernel_L4_M2_22: +.Lctrmm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -1694,22 +1694,22 @@ ctrmm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L4_M2_22 + bgt .Lctrmm_kernel_L4_M2_22 -ctrmm_kernel_L4_M2_40: +.Lctrmm_kernel_L4_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L4_M2_100 + ble .Lctrmm_kernel_L4_M2_100 -ctrmm_kernel_L4_M2_42: +.Lctrmm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L4_M2_42 + bgt .Lctrmm_kernel_L4_M2_42 -ctrmm_kernel_L4_M2_100: +.Lctrmm_kernel_L4_M2_100: SAVE2x4 @@ -1729,15 +1729,15 @@ ctrmm_kernel_L4_M2_100: add tempOffset, tempOffset, #2 #endif -ctrmm_kernel_L4_M2_END: +.Lctrmm_kernel_L4_M2_END: -ctrmm_kernel_L4_M1_BEGIN: +.Lctrmm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble ctrmm_kernel_L4_END + ble .Lctrmm_kernel_L4_END -ctrmm_kernel_L4_M1_20: +.Lctrmm_kernel_L4_M1_20: INIT1x4 @@ -1761,9 +1761,9 @@ ctrmm_kernel_L4_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble ctrmm_kernel_L4_M1_40 + ble .Lctrmm_kernel_L4_M1_40 -ctrmm_kernel_L4_M1_22: +.Lctrmm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1775,22 +1775,22 @@ ctrmm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L4_M1_22 + bgt .Lctrmm_kernel_L4_M1_22 -ctrmm_kernel_L4_M1_40: +.Lctrmm_kernel_L4_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L4_M1_100 + ble .Lctrmm_kernel_L4_M1_100 -ctrmm_kernel_L4_M1_42: +.Lctrmm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L4_M1_42 + bgt .Lctrmm_kernel_L4_M1_42 -ctrmm_kernel_L4_M1_100: +.Lctrmm_kernel_L4_M1_100: SAVE1x4 @@ -1810,7 +1810,7 @@ ctrmm_kernel_L4_M1_100: add tempOffset, tempOffset, #1 #endif -ctrmm_kernel_L4_END: +.Lctrmm_kernel_L4_END: lsl temp, origK, #5 add origPB, origPB, temp // B = B + K * 4 * 8 @@ -1820,19 +1820,19 @@ ctrmm_kernel_L4_END: #endif subs counterJ, counterJ , #1 // j-- - bgt ctrmm_kernel_L4_BEGIN + bgt .Lctrmm_kernel_L4_BEGIN /******************************************************************************/ -ctrmm_kernel_L2_BEGIN: // less than 2 left in N direction +.Lctrmm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble ctrmm_kernel_L999 // error, N was less than 4? + ble .Lctrmm_kernel_L999 // error, N was less than 4? tst counterJ , #2 - ble ctrmm_kernel_L1_BEGIN + ble .Lctrmm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1843,14 +1843,14 @@ ctrmm_kernel_L2_BEGIN: // less than 2 left in N direction #endif mov pA, origPA // pA = A -ctrmm_kernel_L2_M8_BEGIN: +.Lctrmm_kernel_L2_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble ctrmm_kernel_L2_M4_BEGIN + ble .Lctrmm_kernel_L2_M4_BEGIN -ctrmm_kernel_L2_M8_20: +.Lctrmm_kernel_L2_M8_20: INIT8x2 @@ -1874,10 +1874,10 @@ ctrmm_kernel_L2_M8_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble ctrmm_kernel_L2_M8_40 + ble .Lctrmm_kernel_L2_M8_40 .align 5 -ctrmm_kernel_L2_M8_22: +.Lctrmm_kernel_L2_M8_22: KERNEL8x2_SUB KERNEL8x2_SUB KERNEL8x2_SUB @@ -1889,22 +1889,22 @@ ctrmm_kernel_L2_M8_22: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L2_M8_22 + bgt .Lctrmm_kernel_L2_M8_22 -ctrmm_kernel_L2_M8_40: +.Lctrmm_kernel_L2_M8_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L2_M8_100 + ble .Lctrmm_kernel_L2_M8_100 -ctrmm_kernel_L2_M8_42: +.Lctrmm_kernel_L2_M8_42: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L2_M8_42 + bgt .Lctrmm_kernel_L2_M8_42 -ctrmm_kernel_L2_M8_100: +.Lctrmm_kernel_L2_M8_100: SAVE8x2 @@ -1924,21 +1924,21 @@ ctrmm_kernel_L2_M8_100: add tempOffset, tempOffset, #8 #endif -ctrmm_kernel_L2_M8_END: +.Lctrmm_kernel_L2_M8_END: subs counterI, counterI, #1 - bgt ctrmm_kernel_L2_M8_20 + bgt .Lctrmm_kernel_L2_M8_20 -ctrmm_kernel_L2_M4_BEGIN: +.Lctrmm_kernel_L2_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble ctrmm_kernel_L2_END + ble .Lctrmm_kernel_L2_END tst counterI, #4 // counterI = counterI / 2 - ble ctrmm_kernel_L2_M2_BEGIN + ble .Lctrmm_kernel_L2_M2_BEGIN -ctrmm_kernel_L2_M4_20: +.Lctrmm_kernel_L2_M4_20: INIT4x2 @@ -1962,10 +1962,10 @@ ctrmm_kernel_L2_M4_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble ctrmm_kernel_L2_M4_40 + ble .Lctrmm_kernel_L2_M4_40 .align 5 -ctrmm_kernel_L2_M4_22: +.Lctrmm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1977,22 +1977,22 @@ ctrmm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L2_M4_22 + bgt .Lctrmm_kernel_L2_M4_22 -ctrmm_kernel_L2_M4_40: +.Lctrmm_kernel_L2_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L2_M4_100 + ble .Lctrmm_kernel_L2_M4_100 -ctrmm_kernel_L2_M4_42: +.Lctrmm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L2_M4_42 + bgt .Lctrmm_kernel_L2_M4_42 -ctrmm_kernel_L2_M4_100: +.Lctrmm_kernel_L2_M4_100: SAVE4x2 @@ -2012,19 +2012,19 @@ ctrmm_kernel_L2_M4_100: add tempOffset, tempOffset, #4 #endif -ctrmm_kernel_L2_M4_END: +.Lctrmm_kernel_L2_M4_END: -ctrmm_kernel_L2_M2_BEGIN: +.Lctrmm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble ctrmm_kernel_L2_END + ble .Lctrmm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble ctrmm_kernel_L2_M1_BEGIN + ble .Lctrmm_kernel_L2_M1_BEGIN -ctrmm_kernel_L2_M2_20: +.Lctrmm_kernel_L2_M2_20: INIT2x2 @@ -2048,9 +2048,9 @@ ctrmm_kernel_L2_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble ctrmm_kernel_L2_M2_40 + ble .Lctrmm_kernel_L2_M2_40 -ctrmm_kernel_L2_M2_22: +.Lctrmm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -2063,22 +2063,22 @@ ctrmm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L2_M2_22 + bgt .Lctrmm_kernel_L2_M2_22 -ctrmm_kernel_L2_M2_40: +.Lctrmm_kernel_L2_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L2_M2_100 + ble .Lctrmm_kernel_L2_M2_100 -ctrmm_kernel_L2_M2_42: +.Lctrmm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L2_M2_42 + bgt .Lctrmm_kernel_L2_M2_42 -ctrmm_kernel_L2_M2_100: +.Lctrmm_kernel_L2_M2_100: SAVE2x2 @@ -2098,15 +2098,15 @@ ctrmm_kernel_L2_M2_100: add tempOffset, tempOffset, #2 #endif -ctrmm_kernel_L2_M2_END: +.Lctrmm_kernel_L2_M2_END: -ctrmm_kernel_L2_M1_BEGIN: +.Lctrmm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble ctrmm_kernel_L2_END + ble .Lctrmm_kernel_L2_END -ctrmm_kernel_L2_M1_20: +.Lctrmm_kernel_L2_M1_20: INIT1x2 @@ -2130,9 +2130,9 @@ ctrmm_kernel_L2_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble ctrmm_kernel_L2_M1_40 + ble .Lctrmm_kernel_L2_M1_40 -ctrmm_kernel_L2_M1_22: +.Lctrmm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -2144,22 +2144,22 @@ ctrmm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L2_M1_22 + bgt .Lctrmm_kernel_L2_M1_22 -ctrmm_kernel_L2_M1_40: +.Lctrmm_kernel_L2_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L2_M1_100 + ble .Lctrmm_kernel_L2_M1_100 -ctrmm_kernel_L2_M1_42: +.Lctrmm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L2_M1_42 + bgt .Lctrmm_kernel_L2_M1_42 -ctrmm_kernel_L2_M1_100: +.Lctrmm_kernel_L2_M1_100: SAVE1x2 @@ -2179,7 +2179,7 @@ ctrmm_kernel_L2_M1_100: add tempOffset, tempOffset, #1 #endif -ctrmm_kernel_L2_END: +.Lctrmm_kernel_L2_END: #if !defined(LEFT) add tempOffset, tempOffset, #2 #endif @@ -2187,11 +2187,11 @@ ctrmm_kernel_L2_END: /******************************************************************************/ -ctrmm_kernel_L1_BEGIN: +.Lctrmm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble ctrmm_kernel_L999 // done + ble .Lctrmm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C add pC , pC , LDC // Update pC to point to next @@ -2201,14 +2201,14 @@ ctrmm_kernel_L1_BEGIN: #endif mov pA, origPA // pA = A -ctrmm_kernel_L1_M8_BEGIN: +.Lctrmm_kernel_L1_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble ctrmm_kernel_L1_M4_BEGIN + ble .Lctrmm_kernel_L1_M4_BEGIN -ctrmm_kernel_L1_M8_20: +.Lctrmm_kernel_L1_M8_20: INIT8x1 @@ -2232,10 +2232,10 @@ ctrmm_kernel_L1_M8_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble ctrmm_kernel_L1_M8_40 + ble .Lctrmm_kernel_L1_M8_40 .align 5 -ctrmm_kernel_L1_M8_22: +.Lctrmm_kernel_L1_M8_22: KERNEL8x1_SUB KERNEL8x1_SUB KERNEL8x1_SUB @@ -2247,22 +2247,22 @@ ctrmm_kernel_L1_M8_22: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L1_M8_22 + bgt .Lctrmm_kernel_L1_M8_22 -ctrmm_kernel_L1_M8_40: +.Lctrmm_kernel_L1_M8_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L1_M8_100 + ble .Lctrmm_kernel_L1_M8_100 -ctrmm_kernel_L1_M8_42: +.Lctrmm_kernel_L1_M8_42: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L1_M8_42 + bgt .Lctrmm_kernel_L1_M8_42 -ctrmm_kernel_L1_M8_100: +.Lctrmm_kernel_L1_M8_100: SAVE8x1 @@ -2282,21 +2282,21 @@ ctrmm_kernel_L1_M8_100: add tempOffset, tempOffset, #8 #endif -ctrmm_kernel_L1_M8_END: +.Lctrmm_kernel_L1_M8_END: subs counterI, counterI, #1 - bgt ctrmm_kernel_L1_M8_20 + bgt .Lctrmm_kernel_L1_M8_20 -ctrmm_kernel_L1_M4_BEGIN: +.Lctrmm_kernel_L1_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble ctrmm_kernel_L1_END + ble .Lctrmm_kernel_L1_END tst counterI, #4 // counterI = counterI / 2 - ble ctrmm_kernel_L1_M2_BEGIN + ble .Lctrmm_kernel_L1_M2_BEGIN -ctrmm_kernel_L1_M4_20: +.Lctrmm_kernel_L1_M4_20: INIT4x1 @@ -2319,10 +2319,10 @@ ctrmm_kernel_L1_M4_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble ctrmm_kernel_L1_M4_40 + ble .Lctrmm_kernel_L1_M4_40 .align 5 -ctrmm_kernel_L1_M4_22: +.Lctrmm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -2334,22 +2334,22 @@ ctrmm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L1_M4_22 + bgt .Lctrmm_kernel_L1_M4_22 -ctrmm_kernel_L1_M4_40: +.Lctrmm_kernel_L1_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L1_M4_100 + ble .Lctrmm_kernel_L1_M4_100 -ctrmm_kernel_L1_M4_42: +.Lctrmm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L1_M4_42 + bgt .Lctrmm_kernel_L1_M4_42 -ctrmm_kernel_L1_M4_100: +.Lctrmm_kernel_L1_M4_100: SAVE4x1 @@ -2369,18 +2369,18 @@ ctrmm_kernel_L1_M4_100: add tempOffset, tempOffset, #4 #endif -ctrmm_kernel_L1_M4_END: +.Lctrmm_kernel_L1_M4_END: -ctrmm_kernel_L1_M2_BEGIN: +.Lctrmm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble ctrmm_kernel_L1_END + ble .Lctrmm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble ctrmm_kernel_L1_M1_BEGIN + ble .Lctrmm_kernel_L1_M1_BEGIN -ctrmm_kernel_L1_M2_20: +.Lctrmm_kernel_L1_M2_20: INIT2x1 @@ -2404,9 +2404,9 @@ ctrmm_kernel_L1_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble ctrmm_kernel_L1_M2_40 + ble .Lctrmm_kernel_L1_M2_40 -ctrmm_kernel_L1_M2_22: +.Lctrmm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -2419,22 +2419,22 @@ ctrmm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L1_M2_22 + bgt .Lctrmm_kernel_L1_M2_22 -ctrmm_kernel_L1_M2_40: +.Lctrmm_kernel_L1_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L1_M2_100 + ble .Lctrmm_kernel_L1_M2_100 -ctrmm_kernel_L1_M2_42: +.Lctrmm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L1_M2_42 + bgt .Lctrmm_kernel_L1_M2_42 -ctrmm_kernel_L1_M2_100: +.Lctrmm_kernel_L1_M2_100: SAVE2x1 @@ -2454,15 +2454,15 @@ ctrmm_kernel_L1_M2_100: add tempOffset, tempOffset, #2 #endif -ctrmm_kernel_L1_M2_END: +.Lctrmm_kernel_L1_M2_END: -ctrmm_kernel_L1_M1_BEGIN: +.Lctrmm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble ctrmm_kernel_L1_END + ble .Lctrmm_kernel_L1_END -ctrmm_kernel_L1_M1_20: +.Lctrmm_kernel_L1_M1_20: INIT1x1 @@ -2486,9 +2486,9 @@ ctrmm_kernel_L1_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble ctrmm_kernel_L1_M1_40 + ble .Lctrmm_kernel_L1_M1_40 -ctrmm_kernel_L1_M1_22: +.Lctrmm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -2500,30 +2500,30 @@ ctrmm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L1_M1_22 + bgt .Lctrmm_kernel_L1_M1_22 -ctrmm_kernel_L1_M1_40: +.Lctrmm_kernel_L1_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ctrmm_kernel_L1_M1_100 + ble .Lctrmm_kernel_L1_M1_100 -ctrmm_kernel_L1_M1_42: +.Lctrmm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt ctrmm_kernel_L1_M1_42 + bgt .Lctrmm_kernel_L1_M1_42 -ctrmm_kernel_L1_M1_100: +.Lctrmm_kernel_L1_M1_100: SAVE1x1 -ctrmm_kernel_L1_END: +.Lctrmm_kernel_L1_END: -ctrmm_kernel_L999: +.Lctrmm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/daxpy_thunderx2t99.S b/kernel/arm64/daxpy_thunderx2t99.S index 5eb2ec0c3..b8d0af5c2 100644 --- a/kernel/arm64/daxpy_thunderx2t99.S +++ b/kernel/arm64/daxpy_thunderx2t99.S @@ -122,53 +122,53 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE cmp N, xzr - ble axpy_kernel_L999 + ble .Ldaxpy_kernel_L999 fcmp DA, #0.0 - beq axpy_kernel_L999 + beq .Ldaxpy_kernel_L999 cmp INC_X, #1 - bne axpy_kernel_S_BEGIN + bne .Ldaxpy_kernel_S_BEGIN cmp INC_Y, #1 - bne axpy_kernel_S_BEGIN + bne .Ldaxpy_kernel_S_BEGIN -axpy_kernel_F_BEGIN: +.Ldaxpy_kernel_F_BEGIN: asr I, N, #5 cmp I, xzr - beq axpy_kernel_F1 + beq .Ldaxpy_kernel_F1 .align 5 -axpy_kernel_F32: +.Ldaxpy_kernel_F32: KERNEL_F32 subs I, I, #1 - bne axpy_kernel_F32 + bne .Ldaxpy_kernel_F32 -axpy_kernel_F1: +.Ldaxpy_kernel_F1: ands I, N, #31 - ble axpy_kernel_L999 + ble .Ldaxpy_kernel_L999 -axpy_kernel_F10: +.Ldaxpy_kernel_F10: KERNEL_F1 subs I, I, #1 - bne axpy_kernel_F10 + bne .Ldaxpy_kernel_F10 - b axpy_kernel_L999 + b .Ldaxpy_kernel_L999 -axpy_kernel_S_BEGIN: +.Ldaxpy_kernel_S_BEGIN: INIT_S asr I, N, #2 cmp I, xzr - ble axpy_kernel_S1 + ble .Ldaxpy_kernel_S1 -axpy_kernel_S4: +.Ldaxpy_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -176,21 +176,21 @@ axpy_kernel_S4: KERNEL_S1 subs I, I, #1 - bne axpy_kernel_S4 + bne .Ldaxpy_kernel_S4 -axpy_kernel_S1: +.Ldaxpy_kernel_S1: ands I, N, #3 - ble axpy_kernel_L999 + ble .Ldaxpy_kernel_L999 -axpy_kernel_S10: +.Ldaxpy_kernel_S10: KERNEL_S1 subs I, I, #1 - bne axpy_kernel_S10 + bne .Ldaxpy_kernel_S10 -axpy_kernel_L999: +.Ldaxpy_kernel_L999: mov w0, wzr ret diff --git a/kernel/arm64/dgemm_kernel_4x4.S b/kernel/arm64/dgemm_kernel_4x4.S index 44b0f7ff2..349167062 100644 --- a/kernel/arm64/dgemm_kernel_4x4.S +++ b/kernel/arm64/dgemm_kernel_4x4.S @@ -775,9 +775,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble dgemm_kernel_L2_BEGIN + ble .Ldgemm_kernel_L2_BEGIN -dgemm_kernel_L4_BEGIN: +.Ldgemm_kernel_L4_BEGIN: mov pCRow0, pC add pCRow1, pCRow0, LDC add pCRow2, pCRow1, LDC @@ -791,20 +791,20 @@ dgemm_kernel_L4_BEGIN: //------------------------------------------------------------------------------ -dgemm_kernel_L4_M8_BEGIN: +.Ldgemm_kernel_L4_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble dgemm_kernel_L4_M4_BEGIN + ble .Ldgemm_kernel_L4_M4_BEGIN .align 5 -dgemm_kernel_L4_M8_20: +.Ldgemm_kernel_L4_M8_20: mov pB, origPB asr counterL , origK, #2 // L = K / 4 cmp counterL , #2 - blt dgemm_kernel_L4_M8_32 + blt .Ldgemm_kernel_L4_M8_32 KERNEL8x4_I KERNEL8x4_M2 @@ -812,60 +812,60 @@ dgemm_kernel_L4_M8_20: KERNEL8x4_M2 subs counterL, counterL, #2 // subtract 2 - ble dgemm_kernel_L4_M8_22a + ble .Ldgemm_kernel_L4_M8_22a .align 5 -dgemm_kernel_L4_M8_22: +.Ldgemm_kernel_L4_M8_22: KERNEL8x4_M1 KERNEL8x4_M2 KERNEL8x4_M1 KERNEL8x4_M2 subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M8_22 + bgt .Ldgemm_kernel_L4_M8_22 .align 5 -dgemm_kernel_L4_M8_22a: +.Ldgemm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_M2 KERNEL8x4_M1 KERNEL8x4_E - b dgemm_kernel_L4_M8_44 + b .Ldgemm_kernel_L4_M8_44 .align 5 -dgemm_kernel_L4_M8_32: +.Ldgemm_kernel_L4_M8_32: tst counterL, #1 - ble dgemm_kernel_L4_M8_40 + ble .Ldgemm_kernel_L4_M8_40 KERNEL8x4_I KERNEL8x4_M2 KERNEL8x4_M1 KERNEL8x4_E - b dgemm_kernel_L4_M8_44 + b .Ldgemm_kernel_L4_M8_44 -dgemm_kernel_L4_M8_40: +.Ldgemm_kernel_L4_M8_40: INIT8x4 -dgemm_kernel_L4_M8_44: +.Ldgemm_kernel_L4_M8_44: ands counterL , origK, #3 - ble dgemm_kernel_L4_M8_100 + ble .Ldgemm_kernel_L4_M8_100 .align 5 -dgemm_kernel_L4_M8_46: +.Ldgemm_kernel_L4_M8_46: KERNEL8x4_SUB subs counterL, counterL, #1 - bne dgemm_kernel_L4_M8_46 + bne .Ldgemm_kernel_L4_M8_46 -dgemm_kernel_L4_M8_100: +.Ldgemm_kernel_L4_M8_100: lsl temp, origK, #5 prfm PLDL1KEEP, [pA, temp] prfm PLDL1KEEP, [ppA, temp] @@ -873,31 +873,31 @@ dgemm_kernel_L4_M8_100: SAVE8x4 -dgemm_kernel_L4_M8_END: +.Ldgemm_kernel_L4_M8_END: lsl temp, origK, #5 // k * 4 * 8 add pA, pA, temp add ppA, ppA, temp subs counterI, counterI, #1 - bne dgemm_kernel_L4_M8_20 + bne .Ldgemm_kernel_L4_M8_20 -dgemm_kernel_L4_M4_BEGIN: +.Ldgemm_kernel_L4_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble dgemm_kernel_L4_END + ble .Ldgemm_kernel_L4_END tst counterI, #4 - ble dgemm_kernel_L4_M2_BEGIN + ble .Ldgemm_kernel_L4_M2_BEGIN -dgemm_kernel_L4_M4_20: +.Ldgemm_kernel_L4_M4_20: INIT4x4 mov pB, origPB asr counterL, origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble dgemm_kernel_L4_M4_40 + ble .Ldgemm_kernel_L4_M4_40 -dgemm_kernel_L4_M4_22: +.Ldgemm_kernel_L4_M4_22: KERNEL4x4_SUB KERNEL4x4_SUB @@ -910,47 +910,47 @@ dgemm_kernel_L4_M4_22: KERNEL4x4_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M4_22 + bgt .Ldgemm_kernel_L4_M4_22 -dgemm_kernel_L4_M4_40: +.Ldgemm_kernel_L4_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L4_M4_100 + ble .Ldgemm_kernel_L4_M4_100 -dgemm_kernel_L4_M4_42: +.Ldgemm_kernel_L4_M4_42: KERNEL4x4_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M4_42 + bgt .Ldgemm_kernel_L4_M4_42 -dgemm_kernel_L4_M4_100: +.Ldgemm_kernel_L4_M4_100: SAVE4x4 -dgemm_kernel_L4_M4_END: +.Ldgemm_kernel_L4_M4_END: -dgemm_kernel_L4_M2_BEGIN: +.Ldgemm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dgemm_kernel_L4_END + ble .Ldgemm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble dgemm_kernel_L4_M1_BEGIN + ble .Ldgemm_kernel_L4_M1_BEGIN -dgemm_kernel_L4_M2_20: +.Ldgemm_kernel_L4_M2_20: INIT2x4 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L4_M2_40 + ble .Ldgemm_kernel_L4_M2_40 -dgemm_kernel_L4_M2_22: +.Ldgemm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -963,43 +963,43 @@ dgemm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M2_22 + bgt .Ldgemm_kernel_L4_M2_22 -dgemm_kernel_L4_M2_40: +.Ldgemm_kernel_L4_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L4_M2_100 + ble .Ldgemm_kernel_L4_M2_100 -dgemm_kernel_L4_M2_42: +.Ldgemm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M2_42 + bgt .Ldgemm_kernel_L4_M2_42 -dgemm_kernel_L4_M2_100: +.Ldgemm_kernel_L4_M2_100: SAVE2x4 -dgemm_kernel_L4_M2_END: +.Ldgemm_kernel_L4_M2_END: -dgemm_kernel_L4_M1_BEGIN: +.Ldgemm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dgemm_kernel_L4_END + ble .Ldgemm_kernel_L4_END -dgemm_kernel_L4_M1_20: +.Ldgemm_kernel_L4_M1_20: INIT1x4 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L4_M1_40 + ble .Ldgemm_kernel_L4_M1_40 -dgemm_kernel_L4_M1_22: +.Ldgemm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1011,45 +1011,45 @@ dgemm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M1_22 + bgt .Ldgemm_kernel_L4_M1_22 -dgemm_kernel_L4_M1_40: +.Ldgemm_kernel_L4_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L4_M1_100 + ble .Ldgemm_kernel_L4_M1_100 -dgemm_kernel_L4_M1_42: +.Ldgemm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M1_42 + bgt .Ldgemm_kernel_L4_M1_42 -dgemm_kernel_L4_M1_100: +.Ldgemm_kernel_L4_M1_100: SAVE1x4 -dgemm_kernel_L4_END: +.Ldgemm_kernel_L4_END: lsl temp, origK, #5 add origPB, origPB, temp // B = B + K * 4 * 8 subs counterJ, counterJ , #1 // j-- - bgt dgemm_kernel_L4_BEGIN + bgt .Ldgemm_kernel_L4_BEGIN /******************************************************************************/ -dgemm_kernel_L2_BEGIN: // less than 2 left in N direction +.Ldgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble dgemm_kernel_L999 // error, N was less than 4? + ble .Ldgemm_kernel_L999 // error, N was less than 4? tst counterJ , #2 - ble dgemm_kernel_L1_BEGIN + ble .Ldgemm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1059,24 +1059,24 @@ dgemm_kernel_L2_BEGIN: // less than 2 left in N direction -dgemm_kernel_L2_M4_BEGIN: +.Ldgemm_kernel_L2_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI,#0 - ble dgemm_kernel_L2_M2_BEGIN + ble .Ldgemm_kernel_L2_M2_BEGIN -dgemm_kernel_L2_M4_20: +.Ldgemm_kernel_L2_M4_20: INIT4x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dgemm_kernel_L2_M4_40 + ble .Ldgemm_kernel_L2_M4_40 .align 5 -dgemm_kernel_L2_M4_22: +.Ldgemm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1088,50 +1088,50 @@ dgemm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M4_22 + bgt .Ldgemm_kernel_L2_M4_22 -dgemm_kernel_L2_M4_40: +.Ldgemm_kernel_L2_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L2_M4_100 + ble .Ldgemm_kernel_L2_M4_100 -dgemm_kernel_L2_M4_42: +.Ldgemm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M4_42 + bgt .Ldgemm_kernel_L2_M4_42 -dgemm_kernel_L2_M4_100: +.Ldgemm_kernel_L2_M4_100: SAVE4x2 -dgemm_kernel_L2_M4_END: +.Ldgemm_kernel_L2_M4_END: subs counterI, counterI, #1 - bgt dgemm_kernel_L2_M4_20 + bgt .Ldgemm_kernel_L2_M4_20 -dgemm_kernel_L2_M2_BEGIN: +.Ldgemm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dgemm_kernel_L2_END + ble .Ldgemm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble dgemm_kernel_L2_M1_BEGIN + ble .Ldgemm_kernel_L2_M1_BEGIN -dgemm_kernel_L2_M2_20: +.Ldgemm_kernel_L2_M2_20: INIT2x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dgemm_kernel_L2_M2_40 + ble .Ldgemm_kernel_L2_M2_40 -dgemm_kernel_L2_M2_22: +.Ldgemm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -1144,43 +1144,43 @@ dgemm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M2_22 + bgt .Ldgemm_kernel_L2_M2_22 -dgemm_kernel_L2_M2_40: +.Ldgemm_kernel_L2_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L2_M2_100 + ble .Ldgemm_kernel_L2_M2_100 -dgemm_kernel_L2_M2_42: +.Ldgemm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M2_42 + bgt .Ldgemm_kernel_L2_M2_42 -dgemm_kernel_L2_M2_100: +.Ldgemm_kernel_L2_M2_100: SAVE2x2 -dgemm_kernel_L2_M2_END: +.Ldgemm_kernel_L2_M2_END: -dgemm_kernel_L2_M1_BEGIN: +.Ldgemm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dgemm_kernel_L2_END + ble .Ldgemm_kernel_L2_END -dgemm_kernel_L2_M1_20: +.Ldgemm_kernel_L2_M1_20: INIT1x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble dgemm_kernel_L2_M1_40 + ble .Ldgemm_kernel_L2_M1_40 -dgemm_kernel_L2_M1_22: +.Ldgemm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1192,36 +1192,36 @@ dgemm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M1_22 + bgt .Ldgemm_kernel_L2_M1_22 -dgemm_kernel_L2_M1_40: +.Ldgemm_kernel_L2_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L2_M1_100 + ble .Ldgemm_kernel_L2_M1_100 -dgemm_kernel_L2_M1_42: +.Ldgemm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M1_42 + bgt .Ldgemm_kernel_L2_M1_42 -dgemm_kernel_L2_M1_100: +.Ldgemm_kernel_L2_M1_100: SAVE1x2 -dgemm_kernel_L2_END: +.Ldgemm_kernel_L2_END: add origPB, origPB, origK, lsl #4 // B = B + K * 2 * 8 /******************************************************************************/ -dgemm_kernel_L1_BEGIN: +.Ldgemm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble dgemm_kernel_L999 // done + ble .Ldgemm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -1231,24 +1231,24 @@ dgemm_kernel_L1_BEGIN: -dgemm_kernel_L1_M4_BEGIN: +.Ldgemm_kernel_L1_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble dgemm_kernel_L1_M2_BEGIN + ble .Ldgemm_kernel_L1_M2_BEGIN -dgemm_kernel_L1_M4_20: +.Ldgemm_kernel_L1_M4_20: INIT4x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L1_M4_40 + ble .Ldgemm_kernel_L1_M4_40 .align 5 -dgemm_kernel_L1_M4_22: +.Ldgemm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -1260,50 +1260,50 @@ dgemm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M4_22 + bgt .Ldgemm_kernel_L1_M4_22 -dgemm_kernel_L1_M4_40: +.Ldgemm_kernel_L1_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L1_M4_100 + ble .Ldgemm_kernel_L1_M4_100 -dgemm_kernel_L1_M4_42: +.Ldgemm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M4_42 + bgt .Ldgemm_kernel_L1_M4_42 -dgemm_kernel_L1_M4_100: +.Ldgemm_kernel_L1_M4_100: SAVE4x1 -dgemm_kernel_L1_M4_END: +.Ldgemm_kernel_L1_M4_END: subs counterI, counterI, #1 - bgt dgemm_kernel_L1_M4_20 + bgt .Ldgemm_kernel_L1_M4_20 -dgemm_kernel_L1_M2_BEGIN: +.Ldgemm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dgemm_kernel_L1_END + ble .Ldgemm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble dgemm_kernel_L1_M1_BEGIN + ble .Ldgemm_kernel_L1_M1_BEGIN -dgemm_kernel_L1_M2_20: +.Ldgemm_kernel_L1_M2_20: INIT2x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L1_M2_40 + ble .Ldgemm_kernel_L1_M2_40 -dgemm_kernel_L1_M2_22: +.Ldgemm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -1316,43 +1316,43 @@ dgemm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M2_22 + bgt .Ldgemm_kernel_L1_M2_22 -dgemm_kernel_L1_M2_40: +.Ldgemm_kernel_L1_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L1_M2_100 + ble .Ldgemm_kernel_L1_M2_100 -dgemm_kernel_L1_M2_42: +.Ldgemm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M2_42 + bgt .Ldgemm_kernel_L1_M2_42 -dgemm_kernel_L1_M2_100: +.Ldgemm_kernel_L1_M2_100: SAVE2x1 -dgemm_kernel_L1_M2_END: +.Ldgemm_kernel_L1_M2_END: -dgemm_kernel_L1_M1_BEGIN: +.Ldgemm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dgemm_kernel_L1_END + ble .Ldgemm_kernel_L1_END -dgemm_kernel_L1_M1_20: +.Ldgemm_kernel_L1_M1_20: INIT1x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L1_M1_40 + ble .Ldgemm_kernel_L1_M1_40 -dgemm_kernel_L1_M1_22: +.Ldgemm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -1364,30 +1364,30 @@ dgemm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M1_22 + bgt .Ldgemm_kernel_L1_M1_22 -dgemm_kernel_L1_M1_40: +.Ldgemm_kernel_L1_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L1_M1_100 + ble .Ldgemm_kernel_L1_M1_100 -dgemm_kernel_L1_M1_42: +.Ldgemm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M1_42 + bgt .Ldgemm_kernel_L1_M1_42 -dgemm_kernel_L1_M1_100: +.Ldgemm_kernel_L1_M1_100: SAVE1x1 -dgemm_kernel_L1_END: +.Ldgemm_kernel_L1_END: -dgemm_kernel_L999: +.Ldgemm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/dgemm_kernel_4x8.S b/kernel/arm64/dgemm_kernel_4x8.S index b04dbb5d5..ced26b49c 100644 --- a/kernel/arm64/dgemm_kernel_4x8.S +++ b/kernel/arm64/dgemm_kernel_4x8.S @@ -938,98 +938,98 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov counterJ, origN asr counterJ, counterJ, #3 // J = J / 8 cmp counterJ, #0 - ble dgemm_kernel_L4_BEGIN + ble .Ldgemm_kernel_L4_BEGIN /******************************************************************************/ -dgemm_kernel_L8_BEGIN: +.Ldgemm_kernel_L8_BEGIN: mov pCRow0, pC // pCRow0 = C add pC, pC, LDC, lsl #3 mov pA, origPA // pA = start of A array -dgemm_kernel_L8_M4_BEGIN: +.Ldgemm_kernel_L8_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble dgemm_kernel_L8_M2_BEGIN + ble .Ldgemm_kernel_L8_M2_BEGIN -dgemm_kernel_L8_M4_20: +.Ldgemm_kernel_L8_M4_20: mov pB, origPB asr counterL , origK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt dgemm_kernel_L8_M4_32 + blt .Ldgemm_kernel_L8_M4_32 KERNEL4x8_I // do one in the K KERNEL4x8_M2 // do another in the K subs counterL, counterL, #2 - ble dgemm_kernel_L8_M4_22a + ble .Ldgemm_kernel_L8_M4_22a .align 5 -dgemm_kernel_L8_M4_22: +.Ldgemm_kernel_L8_M4_22: KERNEL4x8_M1 KERNEL4x8_M2 subs counterL, counterL, #1 - bgt dgemm_kernel_L8_M4_22 + bgt .Ldgemm_kernel_L8_M4_22 -dgemm_kernel_L8_M4_22a: +.Ldgemm_kernel_L8_M4_22a: KERNEL4x8_M1 KERNEL4x8_E - b dgemm_kernel_L8_M4_44 + b .Ldgemm_kernel_L8_M4_44 -dgemm_kernel_L8_M4_32: +.Ldgemm_kernel_L8_M4_32: tst counterL, #1 - ble dgemm_kernel_L8_M4_40 + ble .Ldgemm_kernel_L8_M4_40 KERNEL4x8_I KERNEL4x8_E - b dgemm_kernel_L8_M4_44 + b .Ldgemm_kernel_L8_M4_44 -dgemm_kernel_L8_M4_40: +.Ldgemm_kernel_L8_M4_40: INIT4x8 -dgemm_kernel_L8_M4_44: +.Ldgemm_kernel_L8_M4_44: ands counterL , origK, #1 - ble dgemm_kernel_L8_M4_100 + ble .Ldgemm_kernel_L8_M4_100 -dgemm_kernel_L8_M4_46: +.Ldgemm_kernel_L8_M4_46: KERNEL4x8_SUB -dgemm_kernel_L8_M4_100: +.Ldgemm_kernel_L8_M4_100: SAVE4x8 -dgemm_kernel_L8_M4_END: +.Ldgemm_kernel_L8_M4_END: subs counterI, counterI, #1 - bne dgemm_kernel_L8_M4_20 + bne .Ldgemm_kernel_L8_M4_20 -dgemm_kernel_L8_M2_BEGIN: +.Ldgemm_kernel_L8_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dgemm_kernel_L8_END + ble .Ldgemm_kernel_L8_END tst counterI, #2 // counterI = counterI / 2 - ble dgemm_kernel_L8_M1_BEGIN + ble .Ldgemm_kernel_L8_M1_BEGIN -dgemm_kernel_L8_M2_20: +.Ldgemm_kernel_L8_M2_20: INIT2x8 @@ -1037,9 +1037,9 @@ dgemm_kernel_L8_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L8_M2_40 + ble .Ldgemm_kernel_L8_M2_40 -dgemm_kernel_L8_M2_22: +.Ldgemm_kernel_L8_M2_22: KERNEL2x8_SUB KERNEL2x8_SUB @@ -1052,34 +1052,34 @@ dgemm_kernel_L8_M2_22: KERNEL2x8_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L8_M2_22 + bgt .Ldgemm_kernel_L8_M2_22 -dgemm_kernel_L8_M2_40: +.Ldgemm_kernel_L8_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L8_M2_100 + ble .Ldgemm_kernel_L8_M2_100 -dgemm_kernel_L8_M2_42: +.Ldgemm_kernel_L8_M2_42: KERNEL2x8_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L8_M2_42 + bgt .Ldgemm_kernel_L8_M2_42 -dgemm_kernel_L8_M2_100: +.Ldgemm_kernel_L8_M2_100: SAVE2x8 -dgemm_kernel_L8_M2_END: +.Ldgemm_kernel_L8_M2_END: -dgemm_kernel_L8_M1_BEGIN: +.Ldgemm_kernel_L8_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dgemm_kernel_L8_END + ble .Ldgemm_kernel_L8_END -dgemm_kernel_L8_M1_20: +.Ldgemm_kernel_L8_M1_20: INIT1x8 @@ -1087,9 +1087,9 @@ dgemm_kernel_L8_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L8_M1_40 + ble .Ldgemm_kernel_L8_M1_40 -dgemm_kernel_L8_M1_22: +.Ldgemm_kernel_L8_M1_22: KERNEL1x8_SUB KERNEL1x8_SUB KERNEL1x8_SUB @@ -1101,131 +1101,131 @@ dgemm_kernel_L8_M1_22: KERNEL1x8_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L8_M1_22 + bgt .Ldgemm_kernel_L8_M1_22 -dgemm_kernel_L8_M1_40: +.Ldgemm_kernel_L8_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L8_M1_100 + ble .Ldgemm_kernel_L8_M1_100 -dgemm_kernel_L8_M1_42: +.Ldgemm_kernel_L8_M1_42: KERNEL1x8_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L8_M1_42 + bgt .Ldgemm_kernel_L8_M1_42 -dgemm_kernel_L8_M1_100: +.Ldgemm_kernel_L8_M1_100: SAVE1x8 -dgemm_kernel_L8_END: +.Ldgemm_kernel_L8_END: lsl temp, origK, #6 add origPB, origPB, temp // B = B + K * 8 * 8 subs counterJ, counterJ , #1 // j-- - bgt dgemm_kernel_L8_BEGIN + bgt .Ldgemm_kernel_L8_BEGIN /******************************************************************************/ -dgemm_kernel_L4_BEGIN: +.Ldgemm_kernel_L4_BEGIN: mov counterJ , origN tst counterJ , #7 - ble dgemm_kernel_L999 + ble .Ldgemm_kernel_L999 tst counterJ , #4 - ble dgemm_kernel_L2_BEGIN + ble .Ldgemm_kernel_L2_BEGIN mov pCRow0, pC // pCRow0 = C add pC, pC, LDC, lsl #2 mov pA, origPA // pA = start of A array -dgemm_kernel_L4_M4_BEGIN: +.Ldgemm_kernel_L4_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble dgemm_kernel_L4_M2_BEGIN + ble .Ldgemm_kernel_L4_M2_BEGIN -dgemm_kernel_L4_M4_20: +.Ldgemm_kernel_L4_M4_20: mov pB, origPB asr counterL , origK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt dgemm_kernel_L4_M4_32 + blt .Ldgemm_kernel_L4_M4_32 KERNEL4x4_I // do one in the K KERNEL4x4_M2 // do another in the K subs counterL, counterL, #2 - ble dgemm_kernel_L4_M4_22a + ble .Ldgemm_kernel_L4_M4_22a .align 5 -dgemm_kernel_L4_M4_22: +.Ldgemm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M4_22 + bgt .Ldgemm_kernel_L4_M4_22 -dgemm_kernel_L4_M4_22a: +.Ldgemm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_E - b dgemm_kernel_L4_M4_44 + b .Ldgemm_kernel_L4_M4_44 -dgemm_kernel_L4_M4_32: +.Ldgemm_kernel_L4_M4_32: tst counterL, #1 - ble dgemm_kernel_L4_M4_40 + ble .Ldgemm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_E - b dgemm_kernel_L4_M4_44 + b .Ldgemm_kernel_L4_M4_44 -dgemm_kernel_L4_M4_40: +.Ldgemm_kernel_L4_M4_40: INIT4x4 -dgemm_kernel_L4_M4_44: +.Ldgemm_kernel_L4_M4_44: ands counterL , origK, #1 - ble dgemm_kernel_L4_M4_100 + ble .Ldgemm_kernel_L4_M4_100 -dgemm_kernel_L4_M4_46: +.Ldgemm_kernel_L4_M4_46: KERNEL4x4_SUB -dgemm_kernel_L4_M4_100: +.Ldgemm_kernel_L4_M4_100: SAVE4x4 -dgemm_kernel_L4_M4_END: +.Ldgemm_kernel_L4_M4_END: subs counterI, counterI, #1 - bne dgemm_kernel_L4_M4_20 + bne .Ldgemm_kernel_L4_M4_20 -dgemm_kernel_L4_M2_BEGIN: +.Ldgemm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dgemm_kernel_L4_END + ble .Ldgemm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble dgemm_kernel_L4_M1_BEGIN + ble .Ldgemm_kernel_L4_M1_BEGIN -dgemm_kernel_L4_M2_20: +.Ldgemm_kernel_L4_M2_20: INIT2x4 @@ -1233,9 +1233,9 @@ dgemm_kernel_L4_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L4_M2_40 + ble .Ldgemm_kernel_L4_M2_40 -dgemm_kernel_L4_M2_22: +.Ldgemm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -1248,34 +1248,34 @@ dgemm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M2_22 + bgt .Ldgemm_kernel_L4_M2_22 -dgemm_kernel_L4_M2_40: +.Ldgemm_kernel_L4_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L4_M2_100 + ble .Ldgemm_kernel_L4_M2_100 -dgemm_kernel_L4_M2_42: +.Ldgemm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M2_42 + bgt .Ldgemm_kernel_L4_M2_42 -dgemm_kernel_L4_M2_100: +.Ldgemm_kernel_L4_M2_100: SAVE2x4 -dgemm_kernel_L4_M2_END: +.Ldgemm_kernel_L4_M2_END: -dgemm_kernel_L4_M1_BEGIN: +.Ldgemm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dgemm_kernel_L4_END + ble .Ldgemm_kernel_L4_END -dgemm_kernel_L4_M1_20: +.Ldgemm_kernel_L4_M1_20: INIT1x4 @@ -1283,9 +1283,9 @@ dgemm_kernel_L4_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L4_M1_40 + ble .Ldgemm_kernel_L4_M1_40 -dgemm_kernel_L4_M1_22: +.Ldgemm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1297,40 +1297,40 @@ dgemm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M1_22 + bgt .Ldgemm_kernel_L4_M1_22 -dgemm_kernel_L4_M1_40: +.Ldgemm_kernel_L4_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L4_M1_100 + ble .Ldgemm_kernel_L4_M1_100 -dgemm_kernel_L4_M1_42: +.Ldgemm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M1_42 + bgt .Ldgemm_kernel_L4_M1_42 -dgemm_kernel_L4_M1_100: +.Ldgemm_kernel_L4_M1_100: SAVE1x4 -dgemm_kernel_L4_END: +.Ldgemm_kernel_L4_END: lsl temp, origK, #5 add origPB, origPB, temp // B = B + K * 4 * 8 /******************************************************************************/ -dgemm_kernel_L2_BEGIN: // less than 2 left in N direction +.Ldgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble dgemm_kernel_L999 // error, N was less than 4? + ble .Ldgemm_kernel_L999 // error, N was less than 4? tst counterJ , #2 - ble dgemm_kernel_L1_BEGIN + ble .Ldgemm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1339,14 +1339,14 @@ dgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov pA, origPA // pA = A -dgemm_kernel_L2_M4_BEGIN: +.Ldgemm_kernel_L2_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI,#0 - ble dgemm_kernel_L2_M2_BEGIN + ble .Ldgemm_kernel_L2_M2_BEGIN -dgemm_kernel_L2_M4_20: +.Ldgemm_kernel_L2_M4_20: INIT4x2 @@ -1354,10 +1354,10 @@ dgemm_kernel_L2_M4_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dgemm_kernel_L2_M4_40 + ble .Ldgemm_kernel_L2_M4_40 .align 5 -dgemm_kernel_L2_M4_22: +.Ldgemm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1369,41 +1369,41 @@ dgemm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M4_22 + bgt .Ldgemm_kernel_L2_M4_22 -dgemm_kernel_L2_M4_40: +.Ldgemm_kernel_L2_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L2_M4_100 + ble .Ldgemm_kernel_L2_M4_100 -dgemm_kernel_L2_M4_42: +.Ldgemm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M4_42 + bgt .Ldgemm_kernel_L2_M4_42 -dgemm_kernel_L2_M4_100: +.Ldgemm_kernel_L2_M4_100: SAVE4x2 -dgemm_kernel_L2_M4_END: +.Ldgemm_kernel_L2_M4_END: subs counterI, counterI, #1 - bgt dgemm_kernel_L2_M4_20 + bgt .Ldgemm_kernel_L2_M4_20 -dgemm_kernel_L2_M2_BEGIN: +.Ldgemm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dgemm_kernel_L2_END + ble .Ldgemm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble dgemm_kernel_L2_M1_BEGIN + ble .Ldgemm_kernel_L2_M1_BEGIN -dgemm_kernel_L2_M2_20: +.Ldgemm_kernel_L2_M2_20: INIT2x2 @@ -1411,9 +1411,9 @@ dgemm_kernel_L2_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dgemm_kernel_L2_M2_40 + ble .Ldgemm_kernel_L2_M2_40 -dgemm_kernel_L2_M2_22: +.Ldgemm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -1426,34 +1426,34 @@ dgemm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M2_22 + bgt .Ldgemm_kernel_L2_M2_22 -dgemm_kernel_L2_M2_40: +.Ldgemm_kernel_L2_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L2_M2_100 + ble .Ldgemm_kernel_L2_M2_100 -dgemm_kernel_L2_M2_42: +.Ldgemm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M2_42 + bgt .Ldgemm_kernel_L2_M2_42 -dgemm_kernel_L2_M2_100: +.Ldgemm_kernel_L2_M2_100: SAVE2x2 -dgemm_kernel_L2_M2_END: +.Ldgemm_kernel_L2_M2_END: -dgemm_kernel_L2_M1_BEGIN: +.Ldgemm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dgemm_kernel_L2_END + ble .Ldgemm_kernel_L2_END -dgemm_kernel_L2_M1_20: +.Ldgemm_kernel_L2_M1_20: INIT1x2 @@ -1461,9 +1461,9 @@ dgemm_kernel_L2_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble dgemm_kernel_L2_M1_40 + ble .Ldgemm_kernel_L2_M1_40 -dgemm_kernel_L2_M1_22: +.Ldgemm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1475,35 +1475,35 @@ dgemm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M1_22 + bgt .Ldgemm_kernel_L2_M1_22 -dgemm_kernel_L2_M1_40: +.Ldgemm_kernel_L2_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L2_M1_100 + ble .Ldgemm_kernel_L2_M1_100 -dgemm_kernel_L2_M1_42: +.Ldgemm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M1_42 + bgt .Ldgemm_kernel_L2_M1_42 -dgemm_kernel_L2_M1_100: +.Ldgemm_kernel_L2_M1_100: SAVE1x2 -dgemm_kernel_L2_END: +.Ldgemm_kernel_L2_END: add origPB, origPB, origK, lsl #4 // B = B + K * 2 * 8 /******************************************************************************/ -dgemm_kernel_L1_BEGIN: +.Ldgemm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble dgemm_kernel_L999 // done + ble .Ldgemm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -1511,24 +1511,24 @@ dgemm_kernel_L1_BEGIN: mov pA, origPA // pA = A -dgemm_kernel_L1_M4_BEGIN: +.Ldgemm_kernel_L1_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble dgemm_kernel_L1_M2_BEGIN + ble .Ldgemm_kernel_L1_M2_BEGIN -dgemm_kernel_L1_M4_20: +.Ldgemm_kernel_L1_M4_20: INIT4x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L1_M4_40 + ble .Ldgemm_kernel_L1_M4_40 .align 5 -dgemm_kernel_L1_M4_22: +.Ldgemm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -1540,41 +1540,41 @@ dgemm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M4_22 + bgt .Ldgemm_kernel_L1_M4_22 -dgemm_kernel_L1_M4_40: +.Ldgemm_kernel_L1_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L1_M4_100 + ble .Ldgemm_kernel_L1_M4_100 -dgemm_kernel_L1_M4_42: +.Ldgemm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M4_42 + bgt .Ldgemm_kernel_L1_M4_42 -dgemm_kernel_L1_M4_100: +.Ldgemm_kernel_L1_M4_100: SAVE4x1 -dgemm_kernel_L1_M4_END: +.Ldgemm_kernel_L1_M4_END: subs counterI, counterI, #1 - bgt dgemm_kernel_L1_M4_20 + bgt .Ldgemm_kernel_L1_M4_20 -dgemm_kernel_L1_M2_BEGIN: +.Ldgemm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dgemm_kernel_L1_END + ble .Ldgemm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble dgemm_kernel_L1_M1_BEGIN + ble .Ldgemm_kernel_L1_M1_BEGIN -dgemm_kernel_L1_M2_20: +.Ldgemm_kernel_L1_M2_20: INIT2x1 @@ -1582,9 +1582,9 @@ dgemm_kernel_L1_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L1_M2_40 + ble .Ldgemm_kernel_L1_M2_40 -dgemm_kernel_L1_M2_22: +.Ldgemm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -1597,34 +1597,34 @@ dgemm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M2_22 + bgt .Ldgemm_kernel_L1_M2_22 -dgemm_kernel_L1_M2_40: +.Ldgemm_kernel_L1_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L1_M2_100 + ble .Ldgemm_kernel_L1_M2_100 -dgemm_kernel_L1_M2_42: +.Ldgemm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M2_42 + bgt .Ldgemm_kernel_L1_M2_42 -dgemm_kernel_L1_M2_100: +.Ldgemm_kernel_L1_M2_100: SAVE2x1 -dgemm_kernel_L1_M2_END: +.Ldgemm_kernel_L1_M2_END: -dgemm_kernel_L1_M1_BEGIN: +.Ldgemm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dgemm_kernel_L1_END + ble .Ldgemm_kernel_L1_END -dgemm_kernel_L1_M1_20: +.Ldgemm_kernel_L1_M1_20: INIT1x1 @@ -1632,9 +1632,9 @@ dgemm_kernel_L1_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L1_M1_40 + ble .Ldgemm_kernel_L1_M1_40 -dgemm_kernel_L1_M1_22: +.Ldgemm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -1646,30 +1646,30 @@ dgemm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M1_22 + bgt .Ldgemm_kernel_L1_M1_22 -dgemm_kernel_L1_M1_40: +.Ldgemm_kernel_L1_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L1_M1_100 + ble .Ldgemm_kernel_L1_M1_100 -dgemm_kernel_L1_M1_42: +.Ldgemm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M1_42 + bgt .Ldgemm_kernel_L1_M1_42 -dgemm_kernel_L1_M1_100: +.Ldgemm_kernel_L1_M1_100: SAVE1x1 -dgemm_kernel_L1_END: +.Ldgemm_kernel_L1_END: -dgemm_kernel_L999: +.Ldgemm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/dgemm_kernel_8x4.S b/kernel/arm64/dgemm_kernel_8x4.S index 3fd74fc3b..af3aa0217 100644 --- a/kernel/arm64/dgemm_kernel_8x4.S +++ b/kernel/arm64/dgemm_kernel_8x4.S @@ -885,12 +885,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble dgemm_kernel_L2_BEGIN + ble .Ldgemm_kernel_L2_BEGIN /******************************************************************************/ .align 5 -dgemm_kernel_L4_BEGIN: +.Ldgemm_kernel_L4_BEGIN: mov pCRow0, pC add pCRow1, pCRow0, LDC add pCRow2, pCRow1, LDC @@ -900,21 +900,21 @@ dgemm_kernel_L4_BEGIN: mov pA, origPA // pA = start of A array -dgemm_kernel_L4_M8_BEGIN: +.Ldgemm_kernel_L4_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble dgemm_kernel_L4_M4_BEGIN + ble .Ldgemm_kernel_L4_M4_BEGIN .align 5 -dgemm_kernel_L4_M8_20: +.Ldgemm_kernel_L4_M8_20: mov pB, origPB asr counterL , origK, #3 // L = K / 8 cmp counterL , #2 // is there at least 4 to do? - blt dgemm_kernel_L4_M8_32 + blt .Ldgemm_kernel_L4_M8_32 KERNEL8x4_I KERNEL8x4_M2 @@ -926,10 +926,10 @@ dgemm_kernel_L4_M8_20: KERNEL8x4_M2 subs counterL, counterL, #2 // subtract 2 - ble dgemm_kernel_L4_M8_22a + ble .Ldgemm_kernel_L4_M8_22a .align 5 -dgemm_kernel_L4_M8_22: +.Ldgemm_kernel_L4_M8_22: KERNEL8x4_M1 KERNEL8x4_M2 @@ -941,10 +941,10 @@ dgemm_kernel_L4_M8_22: KERNEL8x4_M2 subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M8_22 + bgt .Ldgemm_kernel_L4_M8_22 .align 5 -dgemm_kernel_L4_M8_22a: +.Ldgemm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_M2 @@ -955,13 +955,13 @@ dgemm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_E - b dgemm_kernel_L4_M8_44 + b .Ldgemm_kernel_L4_M8_44 .align 5 -dgemm_kernel_L4_M8_32: +.Ldgemm_kernel_L4_M8_32: tst counterL, #1 - ble dgemm_kernel_L4_M8_40 + ble .Ldgemm_kernel_L4_M8_40 KERNEL8x4_I KERNEL8x4_M2 @@ -972,46 +972,46 @@ dgemm_kernel_L4_M8_32: KERNEL8x4_M1 KERNEL8x4_E - b dgemm_kernel_L4_M8_44 + b .Ldgemm_kernel_L4_M8_44 -dgemm_kernel_L4_M8_40: +.Ldgemm_kernel_L4_M8_40: INIT8x4 -dgemm_kernel_L4_M8_44: +.Ldgemm_kernel_L4_M8_44: ands counterL , origK, #7 - ble dgemm_kernel_L4_M8_100 + ble .Ldgemm_kernel_L4_M8_100 .align 5 -dgemm_kernel_L4_M8_46: +.Ldgemm_kernel_L4_M8_46: KERNEL8x4_SUB subs counterL, counterL, #1 - bne dgemm_kernel_L4_M8_46 + bne .Ldgemm_kernel_L4_M8_46 -dgemm_kernel_L4_M8_100: +.Ldgemm_kernel_L4_M8_100: prfm PLDL1KEEP, [pA] prfm PLDL1KEEP, [pA, #64] prfm PLDL1KEEP, [origPB] SAVE8x4 -dgemm_kernel_L4_M8_END: +.Ldgemm_kernel_L4_M8_END: subs counterI, counterI, #1 - bne dgemm_kernel_L4_M8_20 + bne .Ldgemm_kernel_L4_M8_20 -dgemm_kernel_L4_M4_BEGIN: +.Ldgemm_kernel_L4_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble dgemm_kernel_L4_END + ble .Ldgemm_kernel_L4_END tst counterI, #4 - ble dgemm_kernel_L4_M2_BEGIN + ble .Ldgemm_kernel_L4_M2_BEGIN -dgemm_kernel_L4_M4_20: +.Ldgemm_kernel_L4_M4_20: INIT4x4 @@ -1019,10 +1019,10 @@ dgemm_kernel_L4_M4_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L4_M4_40 + ble .Ldgemm_kernel_L4_M4_40 .align 5 -dgemm_kernel_L4_M4_22: +.Ldgemm_kernel_L4_M4_22: KERNEL4x4_SUB prfm PLDL1KEEP, [pB, #B_PRE_SIZE] @@ -1043,38 +1043,38 @@ dgemm_kernel_L4_M4_22: prfm PLDL1KEEP, [pA, #A_PRE_SIZE] subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M4_22 + bgt .Ldgemm_kernel_L4_M4_22 -dgemm_kernel_L4_M4_40: +.Ldgemm_kernel_L4_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L4_M4_100 + ble .Ldgemm_kernel_L4_M4_100 -dgemm_kernel_L4_M4_42: +.Ldgemm_kernel_L4_M4_42: KERNEL4x4_SUB prfm PLDL1KEEP, [pB, #B_PRE_SIZE] prfm PLDL1KEEP, [pA, #A_PRE_SIZE] subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M4_42 + bgt .Ldgemm_kernel_L4_M4_42 -dgemm_kernel_L4_M4_100: +.Ldgemm_kernel_L4_M4_100: SAVE4x4 -dgemm_kernel_L4_M4_END: +.Ldgemm_kernel_L4_M4_END: -dgemm_kernel_L4_M2_BEGIN: +.Ldgemm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dgemm_kernel_L4_END + ble .Ldgemm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble dgemm_kernel_L4_M1_BEGIN + ble .Ldgemm_kernel_L4_M1_BEGIN -dgemm_kernel_L4_M2_20: +.Ldgemm_kernel_L4_M2_20: INIT2x4 @@ -1082,10 +1082,10 @@ dgemm_kernel_L4_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L4_M2_40 + ble .Ldgemm_kernel_L4_M2_40 .align 5 -dgemm_kernel_L4_M2_22: +.Ldgemm_kernel_L4_M2_22: KERNEL2x4_SUB prfm PLDL1KEEP, [pB, #B_PRE_SIZE] @@ -1104,37 +1104,37 @@ dgemm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M2_22 + bgt .Ldgemm_kernel_L4_M2_22 -dgemm_kernel_L4_M2_40: +.Ldgemm_kernel_L4_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L4_M2_100 + ble .Ldgemm_kernel_L4_M2_100 prfm PLDL1KEEP, [pA, #A_PRE_SIZE] prfm PLDL1KEEP, [pA, #A_PRE_SIZE+64] -dgemm_kernel_L4_M2_42: +.Ldgemm_kernel_L4_M2_42: KERNEL2x4_SUB prfm PLDL1KEEP, [pB, #B_PRE_SIZE] subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M2_42 + bgt .Ldgemm_kernel_L4_M2_42 -dgemm_kernel_L4_M2_100: +.Ldgemm_kernel_L4_M2_100: SAVE2x4 -dgemm_kernel_L4_M2_END: +.Ldgemm_kernel_L4_M2_END: -dgemm_kernel_L4_M1_BEGIN: +.Ldgemm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dgemm_kernel_L4_END + ble .Ldgemm_kernel_L4_END -dgemm_kernel_L4_M1_20: +.Ldgemm_kernel_L4_M1_20: INIT1x4 @@ -1142,10 +1142,10 @@ dgemm_kernel_L4_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L4_M1_40 + ble .Ldgemm_kernel_L4_M1_40 .align 5 -dgemm_kernel_L4_M1_22: +.Ldgemm_kernel_L4_M1_22: KERNEL1x4_SUB prfm PLDL1KEEP, [pB, #B_PRE_SIZE] KERNEL1x4_SUB @@ -1163,46 +1163,46 @@ dgemm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M1_22 + bgt .Ldgemm_kernel_L4_M1_22 -dgemm_kernel_L4_M1_40: +.Ldgemm_kernel_L4_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L4_M1_100 + ble .Ldgemm_kernel_L4_M1_100 prfm PLDL1KEEP, [pA, #A_PRE_SIZE] -dgemm_kernel_L4_M1_42: +.Ldgemm_kernel_L4_M1_42: KERNEL1x4_SUB prfm PLDL1KEEP, [pB, #B_PRE_SIZE] subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M1_42 + bgt .Ldgemm_kernel_L4_M1_42 -dgemm_kernel_L4_M1_100: +.Ldgemm_kernel_L4_M1_100: SAVE1x4 -dgemm_kernel_L4_END: +.Ldgemm_kernel_L4_END: lsl temp, origK, #5 add origPB, origPB, temp // B = B + K * 4 * 8 subs counterJ, counterJ , #1 // j-- - bgt dgemm_kernel_L4_BEGIN + bgt .Ldgemm_kernel_L4_BEGIN /******************************************************************************/ -dgemm_kernel_L2_BEGIN: // less than 2 left in N direction +.Ldgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble dgemm_kernel_L999 // error, N was less than 4? + ble .Ldgemm_kernel_L999 // error, N was less than 4? tst counterJ , #2 - ble dgemm_kernel_L1_BEGIN + ble .Ldgemm_kernel_L1_BEGIN mov pCRow0, pC add pCRow1, pCRow0, LDC @@ -1211,15 +1211,15 @@ dgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov pA, origPA // pA = A -dgemm_kernel_L2_M8_BEGIN: +.Ldgemm_kernel_L2_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble dgemm_kernel_L2_M4_BEGIN + ble .Ldgemm_kernel_L2_M4_BEGIN .align 5 -dgemm_kernel_L2_M8_20: +.Ldgemm_kernel_L2_M8_20: INIT8x2 @@ -1227,10 +1227,10 @@ dgemm_kernel_L2_M8_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dgemm_kernel_L2_M8_40 + ble .Ldgemm_kernel_L2_M8_40 .align 5 -dgemm_kernel_L2_M8_22: +.Ldgemm_kernel_L2_M8_22: KERNEL8x2_SUB KERNEL8x2_SUB prfm PLDL1KEEP, [pB, #B_PRE_SIZE] @@ -1244,41 +1244,41 @@ dgemm_kernel_L2_M8_22: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M8_22 + bgt .Ldgemm_kernel_L2_M8_22 -dgemm_kernel_L2_M8_40: +.Ldgemm_kernel_L2_M8_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L2_M8_100 + ble .Ldgemm_kernel_L2_M8_100 prfm PLDL1KEEP, [pB, #B_PRE_SIZE] prfm PLDL1KEEP, [pB, #B_PRE_SIZE+64] -dgemm_kernel_L2_M8_42: +.Ldgemm_kernel_L2_M8_42: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M8_42 + bgt .Ldgemm_kernel_L2_M8_42 -dgemm_kernel_L2_M8_100: +.Ldgemm_kernel_L2_M8_100: SAVE8x2 -dgemm_kernel_L2_M8_END: +.Ldgemm_kernel_L2_M8_END: subs counterI, counterI, #1 - bgt dgemm_kernel_L2_M8_20 + bgt .Ldgemm_kernel_L2_M8_20 -dgemm_kernel_L2_M4_BEGIN: +.Ldgemm_kernel_L2_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble dgemm_kernel_L2_END + ble .Ldgemm_kernel_L2_END tst counterI, #4 // counterI = counterI / 2 - ble dgemm_kernel_L2_M2_BEGIN + ble .Ldgemm_kernel_L2_M2_BEGIN -dgemm_kernel_L2_M4_20: +.Ldgemm_kernel_L2_M4_20: INIT4x2 @@ -1286,10 +1286,10 @@ dgemm_kernel_L2_M4_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dgemm_kernel_L2_M4_40 + ble .Ldgemm_kernel_L2_M4_40 .align 5 -dgemm_kernel_L2_M4_22: +.Ldgemm_kernel_L2_M4_22: KERNEL4x2_SUB prfm PLDL1KEEP, [pA, #A_PRE_SIZE] KERNEL4x2_SUB @@ -1307,41 +1307,41 @@ dgemm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M4_22 + bgt .Ldgemm_kernel_L2_M4_22 -dgemm_kernel_L2_M4_40: +.Ldgemm_kernel_L2_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L2_M4_100 + ble .Ldgemm_kernel_L2_M4_100 prfm PLDL1KEEP, [pB, #B_PRE_SIZE] prfm PLDL1KEEP, [pB, #B_PRE_SIZE+64] -dgemm_kernel_L2_M4_42: +.Ldgemm_kernel_L2_M4_42: KERNEL4x2_SUB prfm PLDL1KEEP, [pA, #A_PRE_SIZE] subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M4_42 + bgt .Ldgemm_kernel_L2_M4_42 -dgemm_kernel_L2_M4_100: +.Ldgemm_kernel_L2_M4_100: SAVE4x2 -dgemm_kernel_L2_M4_END: +.Ldgemm_kernel_L2_M4_END: -dgemm_kernel_L2_M2_BEGIN: +.Ldgemm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dgemm_kernel_L2_END + ble .Ldgemm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble dgemm_kernel_L2_M1_BEGIN + ble .Ldgemm_kernel_L2_M1_BEGIN -dgemm_kernel_L2_M2_20: +.Ldgemm_kernel_L2_M2_20: INIT2x2 @@ -1349,9 +1349,9 @@ dgemm_kernel_L2_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dgemm_kernel_L2_M2_40 + ble .Ldgemm_kernel_L2_M2_40 -dgemm_kernel_L2_M2_22: +.Ldgemm_kernel_L2_M2_22: KERNEL2x2_SUB prfm PLDL1KEEP, [pB, #B_PRE_SIZE] @@ -1368,37 +1368,37 @@ dgemm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M2_22 + bgt .Ldgemm_kernel_L2_M2_22 prfm PLDL1KEEP, [pA, #A_PRE_SIZE] prfm PLDL1KEEP, [pA, #A_PRE_SIZE+64] prfm PLDL1KEEP, [pB, #B_PRE_SIZE] prfm PLDL1KEEP, [pB, #B_PRE_SIZE+64] -dgemm_kernel_L2_M2_40: +.Ldgemm_kernel_L2_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L2_M2_100 + ble .Ldgemm_kernel_L2_M2_100 -dgemm_kernel_L2_M2_42: +.Ldgemm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M2_42 + bgt .Ldgemm_kernel_L2_M2_42 -dgemm_kernel_L2_M2_100: +.Ldgemm_kernel_L2_M2_100: SAVE2x2 -dgemm_kernel_L2_M2_END: +.Ldgemm_kernel_L2_M2_END: -dgemm_kernel_L2_M1_BEGIN: +.Ldgemm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dgemm_kernel_L2_END + ble .Ldgemm_kernel_L2_END -dgemm_kernel_L2_M1_20: +.Ldgemm_kernel_L2_M1_20: INIT1x2 @@ -1406,9 +1406,9 @@ dgemm_kernel_L2_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble dgemm_kernel_L2_M1_40 + ble .Ldgemm_kernel_L2_M1_40 -dgemm_kernel_L2_M1_22: +.Ldgemm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB prfm PLDL1KEEP, [pB, #B_PRE_SIZE] @@ -1424,62 +1424,62 @@ dgemm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M1_22 + bgt .Ldgemm_kernel_L2_M1_22 prfm PLDL1KEEP, [pA, #A_PRE_SIZE] prfm PLDL1KEEP, [pB, #B_PRE_SIZE] prfm PLDL1KEEP, [pB, #B_PRE_SIZE+64] -dgemm_kernel_L2_M1_40: +.Ldgemm_kernel_L2_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L2_M1_100 + ble .Ldgemm_kernel_L2_M1_100 -dgemm_kernel_L2_M1_42: +.Ldgemm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M1_42 + bgt .Ldgemm_kernel_L2_M1_42 -dgemm_kernel_L2_M1_100: +.Ldgemm_kernel_L2_M1_100: SAVE1x2 -dgemm_kernel_L2_END: +.Ldgemm_kernel_L2_END: add origPB, origPB, origK, lsl #4 // B = B + K * 2 * 8 /******************************************************************************/ -dgemm_kernel_L1_BEGIN: +.Ldgemm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble dgemm_kernel_L999 // done + ble .Ldgemm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C add pC , pC , LDC // Update pC to point to next mov pA, origPA // pA = A -dgemm_kernel_L1_M8_BEGIN: +.Ldgemm_kernel_L1_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble dgemm_kernel_L1_M4_BEGIN + ble .Ldgemm_kernel_L1_M4_BEGIN .align 5 -dgemm_kernel_L1_M8_20: +.Ldgemm_kernel_L1_M8_20: INIT8x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L1_M8_40 + ble .Ldgemm_kernel_L1_M8_40 .align 5 -dgemm_kernel_L1_M8_22: +.Ldgemm_kernel_L1_M8_22: KERNEL8x1_SUB KERNEL8x1_SUB KERNEL8x1_SUB @@ -1493,51 +1493,51 @@ dgemm_kernel_L1_M8_22: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M8_22 + bgt .Ldgemm_kernel_L1_M8_22 -dgemm_kernel_L1_M8_40: +.Ldgemm_kernel_L1_M8_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L1_M8_100 + ble .Ldgemm_kernel_L1_M8_100 prfm PLDL1KEEP, [pB, #B_PRE_SIZE] -dgemm_kernel_L1_M8_42: +.Ldgemm_kernel_L1_M8_42: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M8_42 + bgt .Ldgemm_kernel_L1_M8_42 -dgemm_kernel_L1_M8_100: +.Ldgemm_kernel_L1_M8_100: SAVE8x1 -dgemm_kernel_L1_M8_END: +.Ldgemm_kernel_L1_M8_END: subs counterI, counterI, #1 - bgt dgemm_kernel_L1_M8_20 + bgt .Ldgemm_kernel_L1_M8_20 -dgemm_kernel_L1_M4_BEGIN: +.Ldgemm_kernel_L1_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble dgemm_kernel_L1_END + ble .Ldgemm_kernel_L1_END tst counterI, #4 // counterI = counterI / 2 - ble dgemm_kernel_L1_M2_BEGIN + ble .Ldgemm_kernel_L1_M2_BEGIN -dgemm_kernel_L1_M4_20: +.Ldgemm_kernel_L1_M4_20: INIT4x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L1_M4_40 + ble .Ldgemm_kernel_L1_M4_40 .align 5 -dgemm_kernel_L1_M4_22: +.Ldgemm_kernel_L1_M4_22: KERNEL4x1_SUB prfm PLDL1KEEP, [pA, #A_PRE_SIZE] KERNEL4x1_SUB @@ -1555,39 +1555,39 @@ dgemm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M4_22 + bgt .Ldgemm_kernel_L1_M4_22 -dgemm_kernel_L1_M4_40: +.Ldgemm_kernel_L1_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L1_M4_100 + ble .Ldgemm_kernel_L1_M4_100 prfm PLDL1KEEP, [pB, #B_PRE_SIZE] -dgemm_kernel_L1_M4_42: +.Ldgemm_kernel_L1_M4_42: KERNEL4x1_SUB prfm PLDL1KEEP, [pA, #A_PRE_SIZE] subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M4_42 + bgt .Ldgemm_kernel_L1_M4_42 -dgemm_kernel_L1_M4_100: +.Ldgemm_kernel_L1_M4_100: SAVE4x1 -dgemm_kernel_L1_M4_END: +.Ldgemm_kernel_L1_M4_END: -dgemm_kernel_L1_M2_BEGIN: +.Ldgemm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dgemm_kernel_L1_END + ble .Ldgemm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble dgemm_kernel_L1_M1_BEGIN + ble .Ldgemm_kernel_L1_M1_BEGIN -dgemm_kernel_L1_M2_20: +.Ldgemm_kernel_L1_M2_20: INIT2x1 @@ -1595,9 +1595,9 @@ dgemm_kernel_L1_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L1_M2_40 + ble .Ldgemm_kernel_L1_M2_40 -dgemm_kernel_L1_M2_22: +.Ldgemm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -1614,36 +1614,36 @@ dgemm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M2_22 + bgt .Ldgemm_kernel_L1_M2_22 prfm PLDL1KEEP, [pA, #A_PRE_SIZE] prfm PLDL1KEEP, [pA, #A_PRE_SIZE+64] prfm PLDL1KEEP, [pB, #B_PRE_SIZE] -dgemm_kernel_L1_M2_40: +.Ldgemm_kernel_L1_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L1_M2_100 + ble .Ldgemm_kernel_L1_M2_100 -dgemm_kernel_L1_M2_42: +.Ldgemm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M2_42 + bgt .Ldgemm_kernel_L1_M2_42 -dgemm_kernel_L1_M2_100: +.Ldgemm_kernel_L1_M2_100: SAVE2x1 -dgemm_kernel_L1_M2_END: +.Ldgemm_kernel_L1_M2_END: -dgemm_kernel_L1_M1_BEGIN: +.Ldgemm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dgemm_kernel_L1_END + ble .Ldgemm_kernel_L1_END -dgemm_kernel_L1_M1_20: +.Ldgemm_kernel_L1_M1_20: INIT1x1 @@ -1651,10 +1651,10 @@ dgemm_kernel_L1_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L1_M1_40 + ble .Ldgemm_kernel_L1_M1_40 -dgemm_kernel_L1_M1_22: +.Ldgemm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB prfm PLDL1KEEP, [pA, #A_PRE_SIZE] @@ -1668,32 +1668,32 @@ dgemm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M1_22 + bgt .Ldgemm_kernel_L1_M1_22 -dgemm_kernel_L1_M1_40: +.Ldgemm_kernel_L1_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L1_M1_100 + ble .Ldgemm_kernel_L1_M1_100 prfm PLDL1KEEP, [pA, #A_PRE_SIZE] prfm PLDL1KEEP, [pB, #B_PRE_SIZE] -dgemm_kernel_L1_M1_42: +.Ldgemm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M1_42 + bgt .Ldgemm_kernel_L1_M1_42 -dgemm_kernel_L1_M1_100: +.Ldgemm_kernel_L1_M1_100: SAVE1x1 -dgemm_kernel_L1_END: +.Ldgemm_kernel_L1_END: -dgemm_kernel_L999: +.Ldgemm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/dgemm_kernel_8x4_thunderx2t99.S b/kernel/arm64/dgemm_kernel_8x4_thunderx2t99.S index 86865d825..598db6e0c 100644 --- a/kernel/arm64/dgemm_kernel_8x4_thunderx2t99.S +++ b/kernel/arm64/dgemm_kernel_8x4_thunderx2t99.S @@ -962,12 +962,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble dgemm_kernel_L2_BEGIN + ble .Ldgemm_kernel_L2_BEGIN /******************************************************************************/ .align 5 -dgemm_kernel_L4_BEGIN: +.Ldgemm_kernel_L4_BEGIN: mov pCRow0, pC add pCRow1, pCRow0, LDC add pCRow2, pCRow1, LDC @@ -977,21 +977,21 @@ dgemm_kernel_L4_BEGIN: mov pA, origPA // pA = start of A array -dgemm_kernel_L4_M8_BEGIN: +.Ldgemm_kernel_L4_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble dgemm_kernel_L4_M4_BEGIN + ble .Ldgemm_kernel_L4_M4_BEGIN .align 5 -dgemm_kernel_L4_M8_20: +.Ldgemm_kernel_L4_M8_20: mov pB, origPB asr counterL , origK, #7 // L = K / 128 cmp counterL , #2 // is there at least 4 to do? - blt dgemm_kernel_L4_M8_32 + blt .Ldgemm_kernel_L4_M8_32 KERNEL8x4_I KERNEL8x4_M2 @@ -1003,18 +1003,18 @@ dgemm_kernel_L4_M8_20: KERNEL8x4_M1_M2_x1 subs counterL, counterL, #2 // subtract 2 - ble dgemm_kernel_L4_M8_22a + ble .Ldgemm_kernel_L4_M8_22a .align 5 -dgemm_kernel_L4_M8_22: +.Ldgemm_kernel_L4_M8_22: KERNEL8x4_M1_M2_x64 subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M8_22 + bgt .Ldgemm_kernel_L4_M8_22 .align 5 -dgemm_kernel_L4_M8_22a: +.Ldgemm_kernel_L4_M8_22a: KERNEL8x4_M1_M2_x32 KERNEL8x4_M1_M2_x16 @@ -1025,13 +1025,13 @@ dgemm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_E - b dgemm_kernel_L4_M8_44 + b .Ldgemm_kernel_L4_M8_44 .align 5 -dgemm_kernel_L4_M8_32: +.Ldgemm_kernel_L4_M8_32: tst counterL, #1 - ble dgemm_kernel_L4_M8_40 + ble .Ldgemm_kernel_L4_M8_40 KERNEL8x4_I KERNEL8x4_M2 @@ -1043,26 +1043,26 @@ dgemm_kernel_L4_M8_32: KERNEL8x4_M1 KERNEL8x4_E - b dgemm_kernel_L4_M8_44 + b .Ldgemm_kernel_L4_M8_44 -dgemm_kernel_L4_M8_40: +.Ldgemm_kernel_L4_M8_40: INIT8x4 -dgemm_kernel_L4_M8_44: +.Ldgemm_kernel_L4_M8_44: ands counterL , origK, #127 - ble dgemm_kernel_L4_M8_100 + ble .Ldgemm_kernel_L4_M8_100 .align 5 -dgemm_kernel_L4_M8_46: +.Ldgemm_kernel_L4_M8_46: KERNEL8x4_SUB subs counterL, counterL, #1 - bne dgemm_kernel_L4_M8_46 + bne .Ldgemm_kernel_L4_M8_46 -dgemm_kernel_L4_M8_100: +.Ldgemm_kernel_L4_M8_100: prfm PLDL2KEEP, [pCRow0, C_PRE_SIZE] prfm PLDL2KEEP, [pCRow1, C_PRE_SIZE] prfm PLDL2KEEP, [pCRow2, C_PRE_SIZE] @@ -1073,20 +1073,20 @@ dgemm_kernel_L4_M8_100: SAVE8x4 -dgemm_kernel_L4_M8_END: +.Ldgemm_kernel_L4_M8_END: subs counterI, counterI, #1 - bne dgemm_kernel_L4_M8_20 + bne .Ldgemm_kernel_L4_M8_20 -dgemm_kernel_L4_M4_BEGIN: +.Ldgemm_kernel_L4_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble dgemm_kernel_L4_END + ble .Ldgemm_kernel_L4_END tst counterI, #4 - ble dgemm_kernel_L4_M2_BEGIN + ble .Ldgemm_kernel_L4_M2_BEGIN -dgemm_kernel_L4_M4_20: +.Ldgemm_kernel_L4_M4_20: INIT4x4 @@ -1094,10 +1094,10 @@ dgemm_kernel_L4_M4_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L4_M4_40 + ble .Ldgemm_kernel_L4_M4_40 .align 5 -dgemm_kernel_L4_M4_22: +.Ldgemm_kernel_L4_M4_22: KERNEL4x4_SUB prfm PLDL1KEEP, [pB, B_PRE_SIZE] @@ -1118,38 +1118,38 @@ dgemm_kernel_L4_M4_22: prfm PLDL1KEEP, [pA, A_PRE_SIZE] subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M4_22 + bgt .Ldgemm_kernel_L4_M4_22 -dgemm_kernel_L4_M4_40: +.Ldgemm_kernel_L4_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L4_M4_100 + ble .Ldgemm_kernel_L4_M4_100 -dgemm_kernel_L4_M4_42: +.Ldgemm_kernel_L4_M4_42: KERNEL4x4_SUB prfm PLDL1KEEP, [pB, B_PRE_SIZE] prfm PLDL1KEEP, [pA, A_PRE_SIZE] subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M4_42 + bgt .Ldgemm_kernel_L4_M4_42 -dgemm_kernel_L4_M4_100: +.Ldgemm_kernel_L4_M4_100: SAVE4x4 -dgemm_kernel_L4_M4_END: +.Ldgemm_kernel_L4_M4_END: -dgemm_kernel_L4_M2_BEGIN: +.Ldgemm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dgemm_kernel_L4_END + ble .Ldgemm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble dgemm_kernel_L4_M1_BEGIN + ble .Ldgemm_kernel_L4_M1_BEGIN -dgemm_kernel_L4_M2_20: +.Ldgemm_kernel_L4_M2_20: INIT2x4 @@ -1157,10 +1157,10 @@ dgemm_kernel_L4_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L4_M2_40 + ble .Ldgemm_kernel_L4_M2_40 .align 5 -dgemm_kernel_L4_M2_22: +.Ldgemm_kernel_L4_M2_22: KERNEL2x4_SUB prfm PLDL1KEEP, [pB, B_PRE_SIZE] @@ -1179,37 +1179,37 @@ dgemm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M2_22 + bgt .Ldgemm_kernel_L4_M2_22 -dgemm_kernel_L4_M2_40: +.Ldgemm_kernel_L4_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L4_M2_100 + ble .Ldgemm_kernel_L4_M2_100 prfm PLDL1KEEP, [pA, A_PRE_SIZE] prfm PLDL1KEEP, [pA, A_PRE_SIZE_64] -dgemm_kernel_L4_M2_42: +.Ldgemm_kernel_L4_M2_42: KERNEL2x4_SUB prfm PLDL1KEEP, [pB, B_PRE_SIZE] subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M2_42 + bgt .Ldgemm_kernel_L4_M2_42 -dgemm_kernel_L4_M2_100: +.Ldgemm_kernel_L4_M2_100: SAVE2x4 -dgemm_kernel_L4_M2_END: +.Ldgemm_kernel_L4_M2_END: -dgemm_kernel_L4_M1_BEGIN: +.Ldgemm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dgemm_kernel_L4_END + ble .Ldgemm_kernel_L4_END -dgemm_kernel_L4_M1_20: +.Ldgemm_kernel_L4_M1_20: INIT1x4 @@ -1217,10 +1217,10 @@ dgemm_kernel_L4_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L4_M1_40 + ble .Ldgemm_kernel_L4_M1_40 .align 5 -dgemm_kernel_L4_M1_22: +.Ldgemm_kernel_L4_M1_22: KERNEL1x4_SUB prfm PLDL1KEEP, [pB, B_PRE_SIZE] KERNEL1x4_SUB @@ -1238,46 +1238,46 @@ dgemm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M1_22 + bgt .Ldgemm_kernel_L4_M1_22 -dgemm_kernel_L4_M1_40: +.Ldgemm_kernel_L4_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L4_M1_100 + ble .Ldgemm_kernel_L4_M1_100 prfm PLDL1KEEP, [pA, A_PRE_SIZE] -dgemm_kernel_L4_M1_42: +.Ldgemm_kernel_L4_M1_42: KERNEL1x4_SUB prfm PLDL1KEEP, [pB, B_PRE_SIZE] subs counterL, counterL, #1 - bgt dgemm_kernel_L4_M1_42 + bgt .Ldgemm_kernel_L4_M1_42 -dgemm_kernel_L4_M1_100: +.Ldgemm_kernel_L4_M1_100: SAVE1x4 -dgemm_kernel_L4_END: +.Ldgemm_kernel_L4_END: lsl temp, origK, #5 add origPB, origPB, temp // B = B + K * 4 * 8 subs counterJ, counterJ , #1 // j-- - bgt dgemm_kernel_L4_BEGIN + bgt .Ldgemm_kernel_L4_BEGIN /******************************************************************************/ -dgemm_kernel_L2_BEGIN: // less than 2 left in N direction +.Ldgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble dgemm_kernel_L999 // error, N was less than 4? + ble .Ldgemm_kernel_L999 // error, N was less than 4? tst counterJ , #2 - ble dgemm_kernel_L1_BEGIN + ble .Ldgemm_kernel_L1_BEGIN mov pCRow0, pC add pCRow1, pCRow0, LDC @@ -1286,15 +1286,15 @@ dgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov pA, origPA // pA = A -dgemm_kernel_L2_M8_BEGIN: +.Ldgemm_kernel_L2_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble dgemm_kernel_L2_M4_BEGIN + ble .Ldgemm_kernel_L2_M4_BEGIN .align 5 -dgemm_kernel_L2_M8_20: +.Ldgemm_kernel_L2_M8_20: INIT8x2 @@ -1302,10 +1302,10 @@ dgemm_kernel_L2_M8_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dgemm_kernel_L2_M8_40 + ble .Ldgemm_kernel_L2_M8_40 .align 5 -dgemm_kernel_L2_M8_22: +.Ldgemm_kernel_L2_M8_22: KERNEL8x2_SUB KERNEL8x2_SUB prfm PLDL1KEEP, [pB, B_PRE_SIZE] @@ -1319,41 +1319,41 @@ dgemm_kernel_L2_M8_22: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M8_22 + bgt .Ldgemm_kernel_L2_M8_22 -dgemm_kernel_L2_M8_40: +.Ldgemm_kernel_L2_M8_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L2_M8_100 + ble .Ldgemm_kernel_L2_M8_100 prfm PLDL1KEEP, [pB, B_PRE_SIZE] prfm PLDL1KEEP, [pB, B_PRE_SIZE_64] -dgemm_kernel_L2_M8_42: +.Ldgemm_kernel_L2_M8_42: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M8_42 + bgt .Ldgemm_kernel_L2_M8_42 -dgemm_kernel_L2_M8_100: +.Ldgemm_kernel_L2_M8_100: SAVE8x2 -dgemm_kernel_L2_M8_END: +.Ldgemm_kernel_L2_M8_END: subs counterI, counterI, #1 - bgt dgemm_kernel_L2_M8_20 + bgt .Ldgemm_kernel_L2_M8_20 -dgemm_kernel_L2_M4_BEGIN: +.Ldgemm_kernel_L2_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble dgemm_kernel_L2_END + ble .Ldgemm_kernel_L2_END tst counterI, #4 // counterI = counterI / 2 - ble dgemm_kernel_L2_M2_BEGIN + ble .Ldgemm_kernel_L2_M2_BEGIN -dgemm_kernel_L2_M4_20: +.Ldgemm_kernel_L2_M4_20: INIT4x2 @@ -1361,10 +1361,10 @@ dgemm_kernel_L2_M4_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dgemm_kernel_L2_M4_40 + ble .Ldgemm_kernel_L2_M4_40 .align 5 -dgemm_kernel_L2_M4_22: +.Ldgemm_kernel_L2_M4_22: KERNEL4x2_SUB prfm PLDL1KEEP, [pA, A_PRE_SIZE] KERNEL4x2_SUB @@ -1382,41 +1382,41 @@ dgemm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M4_22 + bgt .Ldgemm_kernel_L2_M4_22 -dgemm_kernel_L2_M4_40: +.Ldgemm_kernel_L2_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L2_M4_100 + ble .Ldgemm_kernel_L2_M4_100 prfm PLDL1KEEP, [pB, B_PRE_SIZE] prfm PLDL1KEEP, [pB, B_PRE_SIZE_64] -dgemm_kernel_L2_M4_42: +.Ldgemm_kernel_L2_M4_42: KERNEL4x2_SUB prfm PLDL1KEEP, [pA, A_PRE_SIZE] subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M4_42 + bgt .Ldgemm_kernel_L2_M4_42 -dgemm_kernel_L2_M4_100: +.Ldgemm_kernel_L2_M4_100: SAVE4x2 -dgemm_kernel_L2_M4_END: +.Ldgemm_kernel_L2_M4_END: -dgemm_kernel_L2_M2_BEGIN: +.Ldgemm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dgemm_kernel_L2_END + ble .Ldgemm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble dgemm_kernel_L2_M1_BEGIN + ble .Ldgemm_kernel_L2_M1_BEGIN -dgemm_kernel_L2_M2_20: +.Ldgemm_kernel_L2_M2_20: INIT2x2 @@ -1424,9 +1424,9 @@ dgemm_kernel_L2_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dgemm_kernel_L2_M2_40 + ble .Ldgemm_kernel_L2_M2_40 -dgemm_kernel_L2_M2_22: +.Ldgemm_kernel_L2_M2_22: KERNEL2x2_SUB prfm PLDL1KEEP, [pB, B_PRE_SIZE] @@ -1443,37 +1443,37 @@ dgemm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M2_22 + bgt .Ldgemm_kernel_L2_M2_22 prfm PLDL1KEEP, [pA, A_PRE_SIZE] prfm PLDL1KEEP, [pA, A_PRE_SIZE_64] prfm PLDL1KEEP, [pB, B_PRE_SIZE] prfm PLDL1KEEP, [pB, B_PRE_SIZE_64] -dgemm_kernel_L2_M2_40: +.Ldgemm_kernel_L2_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L2_M2_100 + ble .Ldgemm_kernel_L2_M2_100 -dgemm_kernel_L2_M2_42: +.Ldgemm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M2_42 + bgt .Ldgemm_kernel_L2_M2_42 -dgemm_kernel_L2_M2_100: +.Ldgemm_kernel_L2_M2_100: SAVE2x2 -dgemm_kernel_L2_M2_END: +.Ldgemm_kernel_L2_M2_END: -dgemm_kernel_L2_M1_BEGIN: +.Ldgemm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dgemm_kernel_L2_END + ble .Ldgemm_kernel_L2_END -dgemm_kernel_L2_M1_20: +.Ldgemm_kernel_L2_M1_20: INIT1x2 @@ -1481,9 +1481,9 @@ dgemm_kernel_L2_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble dgemm_kernel_L2_M1_40 + ble .Ldgemm_kernel_L2_M1_40 -dgemm_kernel_L2_M1_22: +.Ldgemm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB prfm PLDL1KEEP, [pB, B_PRE_SIZE] @@ -1499,62 +1499,62 @@ dgemm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M1_22 + bgt .Ldgemm_kernel_L2_M1_22 prfm PLDL1KEEP, [pA, A_PRE_SIZE] prfm PLDL1KEEP, [pB, B_PRE_SIZE] prfm PLDL1KEEP, [pB, B_PRE_SIZE_64] -dgemm_kernel_L2_M1_40: +.Ldgemm_kernel_L2_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L2_M1_100 + ble .Ldgemm_kernel_L2_M1_100 -dgemm_kernel_L2_M1_42: +.Ldgemm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L2_M1_42 + bgt .Ldgemm_kernel_L2_M1_42 -dgemm_kernel_L2_M1_100: +.Ldgemm_kernel_L2_M1_100: SAVE1x2 -dgemm_kernel_L2_END: +.Ldgemm_kernel_L2_END: add origPB, origPB, origK, lsl #4 // B = B + K * 2 * 8 /******************************************************************************/ -dgemm_kernel_L1_BEGIN: +.Ldgemm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble dgemm_kernel_L999 // done + ble .Ldgemm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C add pC , pC , LDC // Update pC to point to next mov pA, origPA // pA = A -dgemm_kernel_L1_M8_BEGIN: +.Ldgemm_kernel_L1_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble dgemm_kernel_L1_M4_BEGIN + ble .Ldgemm_kernel_L1_M4_BEGIN .align 5 -dgemm_kernel_L1_M8_20: +.Ldgemm_kernel_L1_M8_20: INIT8x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L1_M8_40 + ble .Ldgemm_kernel_L1_M8_40 .align 5 -dgemm_kernel_L1_M8_22: +.Ldgemm_kernel_L1_M8_22: KERNEL8x1_SUB KERNEL8x1_SUB KERNEL8x1_SUB @@ -1568,51 +1568,51 @@ dgemm_kernel_L1_M8_22: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M8_22 + bgt .Ldgemm_kernel_L1_M8_22 -dgemm_kernel_L1_M8_40: +.Ldgemm_kernel_L1_M8_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L1_M8_100 + ble .Ldgemm_kernel_L1_M8_100 prfm PLDL1KEEP, [pB, B_PRE_SIZE] -dgemm_kernel_L1_M8_42: +.Ldgemm_kernel_L1_M8_42: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M8_42 + bgt .Ldgemm_kernel_L1_M8_42 -dgemm_kernel_L1_M8_100: +.Ldgemm_kernel_L1_M8_100: SAVE8x1 -dgemm_kernel_L1_M8_END: +.Ldgemm_kernel_L1_M8_END: subs counterI, counterI, #1 - bgt dgemm_kernel_L1_M8_20 + bgt .Ldgemm_kernel_L1_M8_20 -dgemm_kernel_L1_M4_BEGIN: +.Ldgemm_kernel_L1_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble dgemm_kernel_L1_END + ble .Ldgemm_kernel_L1_END tst counterI, #4 // counterI = counterI / 2 - ble dgemm_kernel_L1_M2_BEGIN + ble .Ldgemm_kernel_L1_M2_BEGIN -dgemm_kernel_L1_M4_20: +.Ldgemm_kernel_L1_M4_20: INIT4x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L1_M4_40 + ble .Ldgemm_kernel_L1_M4_40 .align 5 -dgemm_kernel_L1_M4_22: +.Ldgemm_kernel_L1_M4_22: KERNEL4x1_SUB prfm PLDL1KEEP, [pA, A_PRE_SIZE] KERNEL4x1_SUB @@ -1630,39 +1630,39 @@ dgemm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M4_22 + bgt .Ldgemm_kernel_L1_M4_22 -dgemm_kernel_L1_M4_40: +.Ldgemm_kernel_L1_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L1_M4_100 + ble .Ldgemm_kernel_L1_M4_100 prfm PLDL1KEEP, [pB, B_PRE_SIZE] -dgemm_kernel_L1_M4_42: +.Ldgemm_kernel_L1_M4_42: KERNEL4x1_SUB prfm PLDL1KEEP, [pA, A_PRE_SIZE] subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M4_42 + bgt .Ldgemm_kernel_L1_M4_42 -dgemm_kernel_L1_M4_100: +.Ldgemm_kernel_L1_M4_100: SAVE4x1 -dgemm_kernel_L1_M4_END: +.Ldgemm_kernel_L1_M4_END: -dgemm_kernel_L1_M2_BEGIN: +.Ldgemm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dgemm_kernel_L1_END + ble .Ldgemm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble dgemm_kernel_L1_M1_BEGIN + ble .Ldgemm_kernel_L1_M1_BEGIN -dgemm_kernel_L1_M2_20: +.Ldgemm_kernel_L1_M2_20: INIT2x1 @@ -1670,9 +1670,9 @@ dgemm_kernel_L1_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L1_M2_40 + ble .Ldgemm_kernel_L1_M2_40 -dgemm_kernel_L1_M2_22: +.Ldgemm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -1689,36 +1689,36 @@ dgemm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M2_22 + bgt .Ldgemm_kernel_L1_M2_22 prfm PLDL1KEEP, [pA, A_PRE_SIZE] prfm PLDL1KEEP, [pA, A_PRE_SIZE_64] prfm PLDL1KEEP, [pB, B_PRE_SIZE] -dgemm_kernel_L1_M2_40: +.Ldgemm_kernel_L1_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L1_M2_100 + ble .Ldgemm_kernel_L1_M2_100 -dgemm_kernel_L1_M2_42: +.Ldgemm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M2_42 + bgt .Ldgemm_kernel_L1_M2_42 -dgemm_kernel_L1_M2_100: +.Ldgemm_kernel_L1_M2_100: SAVE2x1 -dgemm_kernel_L1_M2_END: +.Ldgemm_kernel_L1_M2_END: -dgemm_kernel_L1_M1_BEGIN: +.Ldgemm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dgemm_kernel_L1_END + ble .Ldgemm_kernel_L1_END -dgemm_kernel_L1_M1_20: +.Ldgemm_kernel_L1_M1_20: INIT1x1 @@ -1726,10 +1726,10 @@ dgemm_kernel_L1_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dgemm_kernel_L1_M1_40 + ble .Ldgemm_kernel_L1_M1_40 -dgemm_kernel_L1_M1_22: +.Ldgemm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB prfm PLDL1KEEP, [pA, A_PRE_SIZE] @@ -1743,32 +1743,32 @@ dgemm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M1_22 + bgt .Ldgemm_kernel_L1_M1_22 -dgemm_kernel_L1_M1_40: +.Ldgemm_kernel_L1_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble dgemm_kernel_L1_M1_100 + ble .Ldgemm_kernel_L1_M1_100 prfm PLDL1KEEP, [pA, A_PRE_SIZE] prfm PLDL1KEEP, [pB, B_PRE_SIZE] -dgemm_kernel_L1_M1_42: +.Ldgemm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt dgemm_kernel_L1_M1_42 + bgt .Ldgemm_kernel_L1_M1_42 -dgemm_kernel_L1_M1_100: +.Ldgemm_kernel_L1_M1_100: SAVE1x1 -dgemm_kernel_L1_END: +.Ldgemm_kernel_L1_END: -dgemm_kernel_L999: +.Ldgemm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/dgemm_ncopy_4.S b/kernel/arm64/dgemm_ncopy_4.S index c98a73277..29d274d93 100644 --- a/kernel/arm64/dgemm_ncopy_4.S +++ b/kernel/arm64/dgemm_ncopy_4.S @@ -192,14 +192,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. lsl LDA, LDA, #3 // LDA = LDA * SIZE -dgemm_ncopy_L4_BEGIN: +.Ldgemm_ncopy_L4_BEGIN: asr J, N, #2 // J = N / 4 cmp J, #0 - ble dgemm_ncopy_L2_BEGIN + ble .Ldgemm_ncopy_L2_BEGIN .align 5 -dgemm_ncopy_L4_M4_BEGIN: +.Ldgemm_ncopy_L4_M4_BEGIN: mov A01, A00 add A02, A01, LDA @@ -209,128 +209,128 @@ dgemm_ncopy_L4_M4_BEGIN: asr I, M, #2 // I = M / 4 cmp I, #0 - ble dgemm_ncopy_L4_M4_40 + ble .Ldgemm_ncopy_L4_M4_40 .align 5 -dgemm_ncopy_L4_M4_20: +.Ldgemm_ncopy_L4_M4_20: COPY4x4 subs I , I , #1 - bne dgemm_ncopy_L4_M4_20 + bne .Ldgemm_ncopy_L4_M4_20 -dgemm_ncopy_L4_M4_40: +.Ldgemm_ncopy_L4_M4_40: and I, M , #3 cmp I, #0 - ble dgemm_ncopy_L4_M4_END + ble .Ldgemm_ncopy_L4_M4_END .align 5 -dgemm_ncopy_L4_M4_60: +.Ldgemm_ncopy_L4_M4_60: COPY1x4 subs I , I , #1 - bne dgemm_ncopy_L4_M4_60 + bne .Ldgemm_ncopy_L4_M4_60 -dgemm_ncopy_L4_M4_END: +.Ldgemm_ncopy_L4_M4_END: subs J , J, #1 // j-- - bne dgemm_ncopy_L4_M4_BEGIN + bne .Ldgemm_ncopy_L4_M4_BEGIN /*********************************************************************************************/ -dgemm_ncopy_L2_BEGIN: +.Ldgemm_ncopy_L2_BEGIN: tst N, #3 - ble dgemm_ncopy_L999 + ble .Ldgemm_ncopy_L999 tst N, #2 - ble dgemm_ncopy_L1_BEGIN + ble .Ldgemm_ncopy_L1_BEGIN -dgemm_ncopy_L2_M4_BEGIN: +.Ldgemm_ncopy_L2_M4_BEGIN: mov A01, A00 add A02, A01, LDA add A00, A02, LDA asr I, M, #2 // I = M / 4 cmp I, #0 - ble dgemm_ncopy_L2_M4_40 + ble .Ldgemm_ncopy_L2_M4_40 .align 5 -dgemm_ncopy_L2_M4_20: +.Ldgemm_ncopy_L2_M4_20: COPY4x2 subs I , I , #1 - bne dgemm_ncopy_L2_M4_20 + bne .Ldgemm_ncopy_L2_M4_20 -dgemm_ncopy_L2_M4_40: +.Ldgemm_ncopy_L2_M4_40: and I, M , #3 cmp I, #0 - ble dgemm_ncopy_L2_M4_END + ble .Ldgemm_ncopy_L2_M4_END .align 5 -dgemm_ncopy_L2_M4_60: +.Ldgemm_ncopy_L2_M4_60: COPY1x2 subs I , I , #1 - bne dgemm_ncopy_L2_M4_60 + bne .Ldgemm_ncopy_L2_M4_60 -dgemm_ncopy_L2_M4_END: +.Ldgemm_ncopy_L2_M4_END: /*********************************************************************************************/ -dgemm_ncopy_L1_BEGIN: +.Ldgemm_ncopy_L1_BEGIN: tst N, #1 - ble dgemm_ncopy_L999 + ble .Ldgemm_ncopy_L999 -dgemm_ncopy_L1_M4_BEGIN: +.Ldgemm_ncopy_L1_M4_BEGIN: mov A01, A00 asr I, M, #2 // I = M / 4 cmp I, #0 - ble dgemm_ncopy_L1_M4_40 + ble .Ldgemm_ncopy_L1_M4_40 .align 5 -dgemm_ncopy_L1_M4_20: +.Ldgemm_ncopy_L1_M4_20: COPY4x1 subs I , I , #1 - bne dgemm_ncopy_L1_M4_20 + bne .Ldgemm_ncopy_L1_M4_20 -dgemm_ncopy_L1_M4_40: +.Ldgemm_ncopy_L1_M4_40: and I, M , #3 cmp I, #0 - ble dgemm_ncopy_L1_M4_END + ble .Ldgemm_ncopy_L1_M4_END .align 5 -dgemm_ncopy_L1_M4_60: +.Ldgemm_ncopy_L1_M4_60: COPY1x1 subs I , I , #1 - bne dgemm_ncopy_L1_M4_60 + bne .Ldgemm_ncopy_L1_M4_60 -dgemm_ncopy_L1_M4_END: +.Ldgemm_ncopy_L1_M4_END: -dgemm_ncopy_L999: +.Ldgemm_ncopy_L999: mov x0, #0 RESTORE_REGS diff --git a/kernel/arm64/dgemm_ncopy_8.S b/kernel/arm64/dgemm_ncopy_8.S index 1f237b42c..366424830 100644 --- a/kernel/arm64/dgemm_ncopy_8.S +++ b/kernel/arm64/dgemm_ncopy_8.S @@ -353,13 +353,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. lsl LDA, LDA, #3 // LDA = LDA * SIZE -dgemm_ncopy_L8_BEGIN: +.Ldgemm_ncopy_L8_BEGIN: asr J, N, #3 // J = N / 8 cmp J, #0 - ble dgemm_ncopy_L4_BEGIN + ble .Ldgemm_ncopy_L4_BEGIN -dgemm_ncopy_L8_M8_BEGIN: +.Ldgemm_ncopy_L8_M8_BEGIN: mov A01, A00 add A02, A01, LDA @@ -374,46 +374,46 @@ dgemm_ncopy_L8_M8_BEGIN: asr I, M, #3 // I = M / 8 cmp I, #0 - ble dgemm_ncopy_L8_M8_40 + ble .Ldgemm_ncopy_L8_M8_40 -dgemm_ncopy_L8_M8_20: +.Ldgemm_ncopy_L8_M8_20: COPY8x8 subs I , I , #1 - bne dgemm_ncopy_L8_M8_20 + bne .Ldgemm_ncopy_L8_M8_20 -dgemm_ncopy_L8_M8_40: +.Ldgemm_ncopy_L8_M8_40: and I, M , #7 cmp I, #0 - ble dgemm_ncopy_L8_M8_END + ble .Ldgemm_ncopy_L8_M8_END -dgemm_ncopy_L8_M8_60: +.Ldgemm_ncopy_L8_M8_60: COPY1x8 subs I , I , #1 - bne dgemm_ncopy_L8_M8_60 + bne .Ldgemm_ncopy_L8_M8_60 -dgemm_ncopy_L8_M8_END: +.Ldgemm_ncopy_L8_M8_END: subs J , J, #1 // j-- - bne dgemm_ncopy_L8_M8_BEGIN + bne .Ldgemm_ncopy_L8_M8_BEGIN /*********************************************************************************************/ -dgemm_ncopy_L4_BEGIN: +.Ldgemm_ncopy_L4_BEGIN: tst N, #7 - ble dgemm_ncopy_L999 + ble .Ldgemm_ncopy_L999 tst N, #4 - ble dgemm_ncopy_L2_BEGIN + ble .Ldgemm_ncopy_L2_BEGIN -dgemm_ncopy_L4_M8_BEGIN: +.Ldgemm_ncopy_L4_M8_BEGIN: mov A01, A00 add A02, A01, LDA @@ -423,118 +423,118 @@ dgemm_ncopy_L4_M8_BEGIN: asr I, M, #3 // I = M / 8 cmp I, #0 - ble dgemm_ncopy_L4_M8_40 + ble .Ldgemm_ncopy_L4_M8_40 -dgemm_ncopy_L4_M8_20: +.Ldgemm_ncopy_L4_M8_20: COPY8x4 subs I , I , #1 - bne dgemm_ncopy_L4_M8_20 + bne .Ldgemm_ncopy_L4_M8_20 -dgemm_ncopy_L4_M8_40: +.Ldgemm_ncopy_L4_M8_40: and I, M , #7 cmp I, #0 - ble dgemm_ncopy_L4_M8_END + ble .Ldgemm_ncopy_L4_M8_END -dgemm_ncopy_L4_M8_60: +.Ldgemm_ncopy_L4_M8_60: COPY1x4 subs I , I , #1 - bne dgemm_ncopy_L4_M8_60 + bne .Ldgemm_ncopy_L4_M8_60 -dgemm_ncopy_L4_M8_END: +.Ldgemm_ncopy_L4_M8_END: /*********************************************************************************************/ -dgemm_ncopy_L2_BEGIN: +.Ldgemm_ncopy_L2_BEGIN: tst N, #3 - ble dgemm_ncopy_L999 + ble .Ldgemm_ncopy_L999 tst N, #2 - ble dgemm_ncopy_L1_BEGIN + ble .Ldgemm_ncopy_L1_BEGIN -dgemm_ncopy_L2_M8_BEGIN: +.Ldgemm_ncopy_L2_M8_BEGIN: mov A01, A00 add A02, A01, LDA add A00, A02, LDA asr I, M, #3 // I = M / 8 cmp I, #0 - ble dgemm_ncopy_L2_M8_40 + ble .Ldgemm_ncopy_L2_M8_40 -dgemm_ncopy_L2_M8_20: +.Ldgemm_ncopy_L2_M8_20: COPY8x2 subs I , I , #1 - bne dgemm_ncopy_L2_M8_20 + bne .Ldgemm_ncopy_L2_M8_20 -dgemm_ncopy_L2_M8_40: +.Ldgemm_ncopy_L2_M8_40: and I, M , #7 cmp I, #0 - ble dgemm_ncopy_L2_M8_END + ble .Ldgemm_ncopy_L2_M8_END -dgemm_ncopy_L2_M8_60: +.Ldgemm_ncopy_L2_M8_60: COPY1x2 subs I , I , #1 - bne dgemm_ncopy_L2_M8_60 + bne .Ldgemm_ncopy_L2_M8_60 -dgemm_ncopy_L2_M8_END: +.Ldgemm_ncopy_L2_M8_END: /*********************************************************************************************/ -dgemm_ncopy_L1_BEGIN: +.Ldgemm_ncopy_L1_BEGIN: tst N, #1 - ble dgemm_ncopy_L999 + ble .Ldgemm_ncopy_L999 -dgemm_ncopy_L1_M8_BEGIN: +.Ldgemm_ncopy_L1_M8_BEGIN: mov A01, A00 asr I, M, #3 // I = M / 8 cmp I, #0 - ble dgemm_ncopy_L1_M8_40 + ble .Ldgemm_ncopy_L1_M8_40 -dgemm_ncopy_L1_M8_20: +.Ldgemm_ncopy_L1_M8_20: COPY8x1 subs I , I , #1 - bne dgemm_ncopy_L1_M8_20 + bne .Ldgemm_ncopy_L1_M8_20 -dgemm_ncopy_L1_M8_40: +.Ldgemm_ncopy_L1_M8_40: and I, M , #7 cmp I, #0 - ble dgemm_ncopy_L1_M8_END + ble .Ldgemm_ncopy_L1_M8_END -dgemm_ncopy_L1_M8_60: +.Ldgemm_ncopy_L1_M8_60: COPY1x1 subs I , I , #1 - bne dgemm_ncopy_L1_M8_60 + bne .Ldgemm_ncopy_L1_M8_60 -dgemm_ncopy_L1_M8_END: +.Ldgemm_ncopy_L1_M8_END: -dgemm_ncopy_L999: +.Ldgemm_ncopy_L999: mov x0, #0 RESTORE_REGS diff --git a/kernel/arm64/dgemm_tcopy_4.S b/kernel/arm64/dgemm_tcopy_4.S index 5b2ed43f1..7c9135287 100644 --- a/kernel/arm64/dgemm_tcopy_4.S +++ b/kernel/arm64/dgemm_tcopy_4.S @@ -247,13 +247,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. lsl M4, M, #5 // M4 = M * 4 * SIZE -dgemm_tcopy_L4_BEGIN: +.Ldgemm_tcopy_L4_BEGIN: asr J, M, #2 // J = M / 4 cmp J, #0 - ble dgemm_tcopy_L2_BEGIN + ble .Ldgemm_tcopy_L2_BEGIN .align 5 -dgemm_tcopy_L4_M4_BEGIN: +.Ldgemm_tcopy_L4_M4_BEGIN: mov A01, A add A02, A01, LDA @@ -266,51 +266,51 @@ dgemm_tcopy_L4_M4_BEGIN: asr I, N, #2 // I = N / 4 cmp I, #0 - ble dgemm_tcopy_L4_M4_40 + ble .Ldgemm_tcopy_L4_M4_40 .align 5 -dgemm_tcopy_L4_M4_20: +.Ldgemm_tcopy_L4_M4_20: COPY4x4 subs I , I , #1 - bne dgemm_tcopy_L4_M4_20 + bne .Ldgemm_tcopy_L4_M4_20 -dgemm_tcopy_L4_M4_40: +.Ldgemm_tcopy_L4_M4_40: tst N , #2 - ble dgemm_tcopy_L4_M4_60 + ble .Ldgemm_tcopy_L4_M4_60 COPY2x4 -dgemm_tcopy_L4_M4_60: +.Ldgemm_tcopy_L4_M4_60: tst N, #1 - ble dgemm_tcopy_L4_M4_END + ble .Ldgemm_tcopy_L4_M4_END COPY1x4 -dgemm_tcopy_L4_M4_END: +.Ldgemm_tcopy_L4_M4_END: subs J , J, #1 // j-- - bne dgemm_tcopy_L4_M4_BEGIN + bne .Ldgemm_tcopy_L4_M4_BEGIN /*********************************************************************************************/ -dgemm_tcopy_L2_BEGIN: +.Ldgemm_tcopy_L2_BEGIN: tst M, #3 - ble dgemm_tcopy_L999 + ble .Ldgemm_tcopy_L999 tst M, #2 - ble dgemm_tcopy_L1_BEGIN + ble .Ldgemm_tcopy_L1_BEGIN -dgemm_tcopy_L2_M4_BEGIN: +.Ldgemm_tcopy_L2_M4_BEGIN: mov A01, A add A02, A01, LDA add A, A02, LDA @@ -320,80 +320,80 @@ dgemm_tcopy_L2_M4_BEGIN: asr I, N, #2 // I = N / 4 cmp I, #0 - ble dgemm_tcopy_L2_M4_40 + ble .Ldgemm_tcopy_L2_M4_40 .align 5 -dgemm_tcopy_L2_M4_20: +.Ldgemm_tcopy_L2_M4_20: COPY4x2 subs I , I , #1 - bne dgemm_tcopy_L2_M4_20 + bne .Ldgemm_tcopy_L2_M4_20 -dgemm_tcopy_L2_M4_40: +.Ldgemm_tcopy_L2_M4_40: tst N , #2 - ble dgemm_tcopy_L2_M4_60 + ble .Ldgemm_tcopy_L2_M4_60 COPY2x2 -dgemm_tcopy_L2_M4_60: +.Ldgemm_tcopy_L2_M4_60: tst N , #1 - ble dgemm_tcopy_L2_M4_END + ble .Ldgemm_tcopy_L2_M4_END COPY1x2 -dgemm_tcopy_L2_M4_END: +.Ldgemm_tcopy_L2_M4_END: /*********************************************************************************************/ -dgemm_tcopy_L1_BEGIN: +.Ldgemm_tcopy_L1_BEGIN: tst M, #1 - ble dgemm_tcopy_L999 + ble .Ldgemm_tcopy_L999 -dgemm_tcopy_L1_M4_BEGIN: +.Ldgemm_tcopy_L1_M4_BEGIN: mov A01, A // A01 = A mov B01, B asr I, N, #2 // I = M / 4 cmp I, #0 - ble dgemm_tcopy_L1_M4_40 + ble .Ldgemm_tcopy_L1_M4_40 .align 5 -dgemm_tcopy_L1_M4_20: +.Ldgemm_tcopy_L1_M4_20: COPY4x1 subs I , I , #1 - bne dgemm_tcopy_L1_M4_20 + bne .Ldgemm_tcopy_L1_M4_20 -dgemm_tcopy_L1_M4_40: +.Ldgemm_tcopy_L1_M4_40: tst N , #2 - ble dgemm_tcopy_L1_M4_60 + ble .Ldgemm_tcopy_L1_M4_60 COPY2x1 -dgemm_tcopy_L1_M4_60: +.Ldgemm_tcopy_L1_M4_60: tst N , #1 - ble dgemm_tcopy_L1_M4_END + ble .Ldgemm_tcopy_L1_M4_END COPY1x1 -dgemm_tcopy_L1_M4_END: +.Ldgemm_tcopy_L1_M4_END: -dgemm_tcopy_L999: +.Ldgemm_tcopy_L999: mov x0, #0 // set return value RESTORE_REGS ret diff --git a/kernel/arm64/dgemm_tcopy_8.S b/kernel/arm64/dgemm_tcopy_8.S index 1c57e30e0..9ab51ff57 100644 --- a/kernel/arm64/dgemm_tcopy_8.S +++ b/kernel/arm64/dgemm_tcopy_8.S @@ -454,13 +454,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. lsl M8, M, #6 // M8 = M * 8 * SIZE -dgemm_tcopy_L8_BEGIN: +.Ldgemm_tcopy_L8_BEGIN: asr J, M, #3 // J = M / 4 cmp J, #0 - ble dgemm_tcopy_L4_BEGIN + ble .Ldgemm_tcopy_L4_BEGIN .align 5 -dgemm_tcopy_L8_M8_BEGIN: +.Ldgemm_tcopy_L8_M8_BEGIN: mov A01, A add A02, A01, LDA @@ -477,53 +477,53 @@ dgemm_tcopy_L8_M8_BEGIN: asr I, N, #3 // I = N / 8 cmp I, #0 - ble dgemm_tcopy_L8_M8_40 + ble .Ldgemm_tcopy_L8_M8_40 .align 5 -dgemm_tcopy_L8_M8_20: +.Ldgemm_tcopy_L8_M8_20: COPY8x8 subs I , I , #1 - bne dgemm_tcopy_L8_M8_20 + bne .Ldgemm_tcopy_L8_M8_20 -dgemm_tcopy_L8_M8_40: +.Ldgemm_tcopy_L8_M8_40: tst N , #4 - ble dgemm_tcopy_L8_M8_60 + ble .Ldgemm_tcopy_L8_M8_60 COPY4x8 -dgemm_tcopy_L8_M8_60: +.Ldgemm_tcopy_L8_M8_60: tst N , #2 - ble dgemm_tcopy_L8_M8_80 + ble .Ldgemm_tcopy_L8_M8_80 COPY2x8 -dgemm_tcopy_L8_M8_80: +.Ldgemm_tcopy_L8_M8_80: tst N, #1 - ble dgemm_tcopy_L8_M8_END + ble .Ldgemm_tcopy_L8_M8_END COPY1x8 -dgemm_tcopy_L8_M8_END: +.Ldgemm_tcopy_L8_M8_END: subs J , J, #1 // j-- - bne dgemm_tcopy_L8_M8_BEGIN + bne .Ldgemm_tcopy_L8_M8_BEGIN /*********************************************************************************************/ -dgemm_tcopy_L4_BEGIN: +.Ldgemm_tcopy_L4_BEGIN: tst M, #7 - ble dgemm_tcopy_L999 + ble .Ldgemm_tcopy_L999 tst M, #4 - ble dgemm_tcopy_L2_BEGIN + ble .Ldgemm_tcopy_L2_BEGIN -dgemm_tcopy_L4_M8_BEGIN: +.Ldgemm_tcopy_L4_M8_BEGIN: mov A01, A add A02, A01, LDA @@ -536,51 +536,51 @@ dgemm_tcopy_L4_M8_BEGIN: asr I, N, #3 // I = N / 8 cmp I, #0 - ble dgemm_tcopy_L4_M8_40 + ble .Ldgemm_tcopy_L4_M8_40 .align 5 -dgemm_tcopy_L4_M8_20: +.Ldgemm_tcopy_L4_M8_20: COPY8x4 subs I , I , #1 - bne dgemm_tcopy_L4_M8_20 + bne .Ldgemm_tcopy_L4_M8_20 -dgemm_tcopy_L4_M8_40: +.Ldgemm_tcopy_L4_M8_40: tst N , #4 - ble dgemm_tcopy_L4_M8_60 + ble .Ldgemm_tcopy_L4_M8_60 COPY4x4 -dgemm_tcopy_L4_M8_60: +.Ldgemm_tcopy_L4_M8_60: tst N , #2 - ble dgemm_tcopy_L4_M8_80 + ble .Ldgemm_tcopy_L4_M8_80 COPY2x4 -dgemm_tcopy_L4_M8_80: +.Ldgemm_tcopy_L4_M8_80: tst N, #1 - ble dgemm_tcopy_L4_M8_END + ble .Ldgemm_tcopy_L4_M8_END COPY1x4 -dgemm_tcopy_L4_M8_END: +.Ldgemm_tcopy_L4_M8_END: /*********************************************************************************************/ -dgemm_tcopy_L2_BEGIN: +.Ldgemm_tcopy_L2_BEGIN: tst M, #3 - ble dgemm_tcopy_L999 + ble .Ldgemm_tcopy_L999 tst M, #2 - ble dgemm_tcopy_L1_BEGIN + ble .Ldgemm_tcopy_L1_BEGIN -dgemm_tcopy_L2_M8_BEGIN: +.Ldgemm_tcopy_L2_M8_BEGIN: mov A01, A add A02, A01, LDA add A, A02, LDA @@ -590,90 +590,90 @@ dgemm_tcopy_L2_M8_BEGIN: asr I, N, #3 // I = N / 8 cmp I, #0 - ble dgemm_tcopy_L2_M8_40 + ble .Ldgemm_tcopy_L2_M8_40 .align 5 -dgemm_tcopy_L2_M8_20: +.Ldgemm_tcopy_L2_M8_20: COPY8x2 subs I , I , #1 - bne dgemm_tcopy_L2_M8_20 + bne .Ldgemm_tcopy_L2_M8_20 -dgemm_tcopy_L2_M8_40: +.Ldgemm_tcopy_L2_M8_40: tst N , #4 - ble dgemm_tcopy_L2_M8_60 + ble .Ldgemm_tcopy_L2_M8_60 COPY4x2 -dgemm_tcopy_L2_M8_60: +.Ldgemm_tcopy_L2_M8_60: tst N , #2 - ble dgemm_tcopy_L2_M8_80 + ble .Ldgemm_tcopy_L2_M8_80 COPY2x2 -dgemm_tcopy_L2_M8_80: +.Ldgemm_tcopy_L2_M8_80: tst N , #1 - ble dgemm_tcopy_L2_M8_END + ble .Ldgemm_tcopy_L2_M8_END COPY1x2 -dgemm_tcopy_L2_M8_END: +.Ldgemm_tcopy_L2_M8_END: /*********************************************************************************************/ -dgemm_tcopy_L1_BEGIN: +.Ldgemm_tcopy_L1_BEGIN: tst M, #1 - ble dgemm_tcopy_L999 + ble .Ldgemm_tcopy_L999 -dgemm_tcopy_L1_M8_BEGIN: +.Ldgemm_tcopy_L1_M8_BEGIN: mov A01, A // A01 = A mov B01, B asr I, N, #3 // I = M / 8 cmp I, #0 - ble dgemm_tcopy_L1_M8_40 + ble .Ldgemm_tcopy_L1_M8_40 .align 5 -dgemm_tcopy_L1_M8_20: +.Ldgemm_tcopy_L1_M8_20: COPY8x1 subs I , I , #1 - bne dgemm_tcopy_L1_M8_20 + bne .Ldgemm_tcopy_L1_M8_20 -dgemm_tcopy_L1_M8_40: +.Ldgemm_tcopy_L1_M8_40: tst N , #4 - ble dgemm_tcopy_L1_M8_60 + ble .Ldgemm_tcopy_L1_M8_60 COPY4x1 -dgemm_tcopy_L1_M8_60: +.Ldgemm_tcopy_L1_M8_60: tst N , #2 - ble dgemm_tcopy_L1_M8_80 + ble .Ldgemm_tcopy_L1_M8_80 COPY2x1 -dgemm_tcopy_L1_M8_80: +.Ldgemm_tcopy_L1_M8_80: tst N , #1 - ble dgemm_tcopy_L1_M8_END + ble .Ldgemm_tcopy_L1_M8_END COPY1x1 -dgemm_tcopy_L1_M8_END: +.Ldgemm_tcopy_L1_M8_END: -dgemm_tcopy_L999: +.Ldgemm_tcopy_L999: mov x0, #0 // set return value RESTORE_REGS ret diff --git a/kernel/arm64/dot.S b/kernel/arm64/dot.S index 35d47790c..a1a5bf20b 100644 --- a/kernel/arm64/dot.S +++ b/kernel/arm64/dot.S @@ -154,51 +154,51 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif cmp N, xzr - ble dot_kernel_L999 + ble .Ldot_kernel_L999 cmp INC_X, #1 - bne dot_kernel_S_BEGIN + bne .Ldot_kernel_S_BEGIN cmp INC_Y, #1 - bne dot_kernel_S_BEGIN + bne .Ldot_kernel_S_BEGIN -dot_kernel_F_BEGIN: +.Ldot_kernel_F_BEGIN: asr I, N, #2 cmp I, xzr - beq dot_kernel_F1 + beq .Ldot_kernel_F1 -dot_kernel_F4: +.Ldot_kernel_F4: KERNEL_F4 subs I, I, #1 - bne dot_kernel_F4 + bne .Ldot_kernel_F4 KERNEL_F4_FINALIZE -dot_kernel_F1: +.Ldot_kernel_F1: ands I, N, #3 - ble dot_kernel_L999 + ble .Ldot_kernel_L999 -dot_kernel_F10: +.Ldot_kernel_F10: KERNEL_F1 subs I, I, #1 - bne dot_kernel_F10 + bne .Ldot_kernel_F10 ret -dot_kernel_S_BEGIN: +.Ldot_kernel_S_BEGIN: INIT_S asr I, N, #2 cmp I, xzr - ble dot_kernel_S1 + ble .Ldot_kernel_S1 -dot_kernel_S4: +.Ldot_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -206,21 +206,21 @@ dot_kernel_S4: KERNEL_S1 subs I, I, #1 - bne dot_kernel_S4 + bne .Ldot_kernel_S4 -dot_kernel_S1: +.Ldot_kernel_S1: ands I, N, #3 - ble dot_kernel_L999 + ble .Ldot_kernel_L999 -dot_kernel_S10: +.Ldot_kernel_S10: KERNEL_S1 subs I, I, #1 - bne dot_kernel_S10 + bne .Ldot_kernel_S10 -dot_kernel_L999: +.Ldot_kernel_L999: ret diff --git a/kernel/arm64/dtrmm_kernel_4x4.S b/kernel/arm64/dtrmm_kernel_4x4.S index 34fb8c233..b528aeb18 100644 --- a/kernel/arm64/dtrmm_kernel_4x4.S +++ b/kernel/arm64/dtrmm_kernel_4x4.S @@ -549,11 +549,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble dtrmm_kernel_L2_BEGIN + ble .Ldtrmm_kernel_L2_BEGIN /******************************************************************************/ -dtrmm_kernel_L4_BEGIN: +.Ldtrmm_kernel_L4_BEGIN: mov pCRow0, pC // pCRow0 = C add pC, pC, LDC, lsl #2 @@ -563,14 +563,14 @@ dtrmm_kernel_L4_BEGIN: mov pA, origPA // pA = start of A array -dtrmm_kernel_L4_M4_BEGIN: +.Ldtrmm_kernel_L4_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble dtrmm_kernel_L4_M2_BEGIN + ble .Ldtrmm_kernel_L4_M2_BEGIN -dtrmm_kernel_L4_M4_20: +.Ldtrmm_kernel_L4_M4_20: #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) mov pB, origPB @@ -591,57 +591,57 @@ dtrmm_kernel_L4_M4_20: asr counterL , tempK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt dtrmm_kernel_L4_M4_32 + blt .Ldtrmm_kernel_L4_M4_32 KERNEL4x4_I // do one in the K KERNEL4x4_M2 // do another in the K subs counterL, counterL, #2 - ble dtrmm_kernel_L4_M4_22a + ble .Ldtrmm_kernel_L4_M4_22a .align 5 -dtrmm_kernel_L4_M4_22: +.Ldtrmm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M4_22 + bgt .Ldtrmm_kernel_L4_M4_22 -dtrmm_kernel_L4_M4_22a: +.Ldtrmm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_E - b dtrmm_kernel_L4_M4_44 + b .Ldtrmm_kernel_L4_M4_44 -dtrmm_kernel_L4_M4_32: +.Ldtrmm_kernel_L4_M4_32: tst counterL, #1 - ble dtrmm_kernel_L4_M4_40 + ble .Ldtrmm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_E - b dtrmm_kernel_L4_M4_44 + b .Ldtrmm_kernel_L4_M4_44 -dtrmm_kernel_L4_M4_40: +.Ldtrmm_kernel_L4_M4_40: INIT4x4 -dtrmm_kernel_L4_M4_44: +.Ldtrmm_kernel_L4_M4_44: ands counterL , tempK, #1 - ble dtrmm_kernel_L4_M4_100 + ble .Ldtrmm_kernel_L4_M4_100 -dtrmm_kernel_L4_M4_46: +.Ldtrmm_kernel_L4_M4_46: KERNEL4x4_SUB -dtrmm_kernel_L4_M4_100: +.Ldtrmm_kernel_L4_M4_100: SAVE4x4 @@ -660,20 +660,20 @@ dtrmm_kernel_L4_M4_100: add tempOffset, tempOffset, #4 #endif -dtrmm_kernel_L4_M4_END: +.Ldtrmm_kernel_L4_M4_END: subs counterI, counterI, #1 - bne dtrmm_kernel_L4_M4_20 + bne .Ldtrmm_kernel_L4_M4_20 -dtrmm_kernel_L4_M2_BEGIN: +.Ldtrmm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dtrmm_kernel_L4_END + ble .Ldtrmm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble dtrmm_kernel_L4_M1_BEGIN + ble .Ldtrmm_kernel_L4_M1_BEGIN -dtrmm_kernel_L4_M2_20: +.Ldtrmm_kernel_L4_M2_20: INIT2x4 @@ -697,9 +697,9 @@ dtrmm_kernel_L4_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L4_M2_40 + ble .Ldtrmm_kernel_L4_M2_40 -dtrmm_kernel_L4_M2_22: +.Ldtrmm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -712,22 +712,22 @@ dtrmm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M2_22 + bgt .Ldtrmm_kernel_L4_M2_22 -dtrmm_kernel_L4_M2_40: +.Ldtrmm_kernel_L4_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L4_M2_100 + ble .Ldtrmm_kernel_L4_M2_100 -dtrmm_kernel_L4_M2_42: +.Ldtrmm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M2_42 + bgt .Ldtrmm_kernel_L4_M2_42 -dtrmm_kernel_L4_M2_100: +.Ldtrmm_kernel_L4_M2_100: SAVE2x4 @@ -747,15 +747,15 @@ dtrmm_kernel_L4_M2_100: add tempOffset, tempOffset, #2 #endif -dtrmm_kernel_L4_M2_END: +.Ldtrmm_kernel_L4_M2_END: -dtrmm_kernel_L4_M1_BEGIN: +.Ldtrmm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dtrmm_kernel_L4_END + ble .Ldtrmm_kernel_L4_END -dtrmm_kernel_L4_M1_20: +.Ldtrmm_kernel_L4_M1_20: INIT1x4 @@ -779,9 +779,9 @@ dtrmm_kernel_L4_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L4_M1_40 + ble .Ldtrmm_kernel_L4_M1_40 -dtrmm_kernel_L4_M1_22: +.Ldtrmm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -793,22 +793,22 @@ dtrmm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M1_22 + bgt .Ldtrmm_kernel_L4_M1_22 -dtrmm_kernel_L4_M1_40: +.Ldtrmm_kernel_L4_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L4_M1_100 + ble .Ldtrmm_kernel_L4_M1_100 -dtrmm_kernel_L4_M1_42: +.Ldtrmm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M1_42 + bgt .Ldtrmm_kernel_L4_M1_42 -dtrmm_kernel_L4_M1_100: +.Ldtrmm_kernel_L4_M1_100: SAVE1x4 @@ -828,7 +828,7 @@ dtrmm_kernel_L4_M1_100: add tempOffset, tempOffset, #1 #endif -dtrmm_kernel_L4_END: +.Ldtrmm_kernel_L4_END: lsl temp, origK, #5 add origPB, origPB, temp // B = B + K * 4 * 8 @@ -838,19 +838,19 @@ dtrmm_kernel_L4_END: #endif subs counterJ, counterJ , #1 // j-- - bgt dtrmm_kernel_L4_BEGIN + bgt .Ldtrmm_kernel_L4_BEGIN /******************************************************************************/ -dtrmm_kernel_L2_BEGIN: // less than 2 left in N direction +.Ldtrmm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble dtrmm_kernel_L999 // error, N was less than 4? + ble .Ldtrmm_kernel_L999 // error, N was less than 4? tst counterJ , #2 - ble dtrmm_kernel_L1_BEGIN + ble .Ldtrmm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -863,14 +863,14 @@ dtrmm_kernel_L2_BEGIN: // less than 2 left in N direction mov pA, origPA // pA = A -dtrmm_kernel_L2_M4_BEGIN: +.Ldtrmm_kernel_L2_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI,#0 - ble dtrmm_kernel_L2_M2_BEGIN + ble .Ldtrmm_kernel_L2_M2_BEGIN -dtrmm_kernel_L2_M4_20: +.Ldtrmm_kernel_L2_M4_20: INIT4x2 @@ -894,10 +894,10 @@ dtrmm_kernel_L2_M4_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dtrmm_kernel_L2_M4_40 + ble .Ldtrmm_kernel_L2_M4_40 .align 5 -dtrmm_kernel_L2_M4_22: +.Ldtrmm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -909,22 +909,22 @@ dtrmm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M4_22 + bgt .Ldtrmm_kernel_L2_M4_22 -dtrmm_kernel_L2_M4_40: +.Ldtrmm_kernel_L2_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L2_M4_100 + ble .Ldtrmm_kernel_L2_M4_100 -dtrmm_kernel_L2_M4_42: +.Ldtrmm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M4_42 + bgt .Ldtrmm_kernel_L2_M4_42 -dtrmm_kernel_L2_M4_100: +.Ldtrmm_kernel_L2_M4_100: SAVE4x2 @@ -944,22 +944,22 @@ dtrmm_kernel_L2_M4_100: add tempOffset, tempOffset, #4 #endif -dtrmm_kernel_L2_M4_END: +.Ldtrmm_kernel_L2_M4_END: subs counterI, counterI, #1 - bgt dtrmm_kernel_L2_M4_20 + bgt .Ldtrmm_kernel_L2_M4_20 -dtrmm_kernel_L2_M2_BEGIN: +.Ldtrmm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dtrmm_kernel_L2_END + ble .Ldtrmm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble dtrmm_kernel_L2_M1_BEGIN + ble .Ldtrmm_kernel_L2_M1_BEGIN -dtrmm_kernel_L2_M2_20: +.Ldtrmm_kernel_L2_M2_20: INIT2x2 @@ -983,9 +983,9 @@ dtrmm_kernel_L2_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dtrmm_kernel_L2_M2_40 + ble .Ldtrmm_kernel_L2_M2_40 -dtrmm_kernel_L2_M2_22: +.Ldtrmm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -998,22 +998,22 @@ dtrmm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M2_22 + bgt .Ldtrmm_kernel_L2_M2_22 -dtrmm_kernel_L2_M2_40: +.Ldtrmm_kernel_L2_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L2_M2_100 + ble .Ldtrmm_kernel_L2_M2_100 -dtrmm_kernel_L2_M2_42: +.Ldtrmm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M2_42 + bgt .Ldtrmm_kernel_L2_M2_42 -dtrmm_kernel_L2_M2_100: +.Ldtrmm_kernel_L2_M2_100: SAVE2x2 @@ -1033,15 +1033,15 @@ dtrmm_kernel_L2_M2_100: add tempOffset, tempOffset, #2 #endif -dtrmm_kernel_L2_M2_END: +.Ldtrmm_kernel_L2_M2_END: -dtrmm_kernel_L2_M1_BEGIN: +.Ldtrmm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dtrmm_kernel_L2_END + ble .Ldtrmm_kernel_L2_END -dtrmm_kernel_L2_M1_20: +.Ldtrmm_kernel_L2_M1_20: INIT1x2 @@ -1065,9 +1065,9 @@ dtrmm_kernel_L2_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble dtrmm_kernel_L2_M1_40 + ble .Ldtrmm_kernel_L2_M1_40 -dtrmm_kernel_L2_M1_22: +.Ldtrmm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1079,22 +1079,22 @@ dtrmm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M1_22 + bgt .Ldtrmm_kernel_L2_M1_22 -dtrmm_kernel_L2_M1_40: +.Ldtrmm_kernel_L2_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L2_M1_100 + ble .Ldtrmm_kernel_L2_M1_100 -dtrmm_kernel_L2_M1_42: +.Ldtrmm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M1_42 + bgt .Ldtrmm_kernel_L2_M1_42 -dtrmm_kernel_L2_M1_100: +.Ldtrmm_kernel_L2_M1_100: SAVE1x2 @@ -1114,7 +1114,7 @@ dtrmm_kernel_L2_M1_100: add tempOffset, tempOffset, #1 #endif -dtrmm_kernel_L2_END: +.Ldtrmm_kernel_L2_END: #if !defined(LEFT) add tempOffset, tempOffset, #2 #endif @@ -1122,11 +1122,11 @@ dtrmm_kernel_L2_END: /******************************************************************************/ -dtrmm_kernel_L1_BEGIN: +.Ldtrmm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble dtrmm_kernel_L999 // done + ble .Ldtrmm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -1138,14 +1138,14 @@ dtrmm_kernel_L1_BEGIN: mov pA, origPA // pA = A -dtrmm_kernel_L1_M4_BEGIN: +.Ldtrmm_kernel_L1_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble dtrmm_kernel_L1_M2_BEGIN + ble .Ldtrmm_kernel_L1_M2_BEGIN -dtrmm_kernel_L1_M4_20: +.Ldtrmm_kernel_L1_M4_20: INIT4x1 @@ -1169,10 +1169,10 @@ dtrmm_kernel_L1_M4_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L1_M4_40 + ble .Ldtrmm_kernel_L1_M4_40 .align 5 -dtrmm_kernel_L1_M4_22: +.Ldtrmm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -1184,22 +1184,22 @@ dtrmm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M4_22 + bgt .Ldtrmm_kernel_L1_M4_22 -dtrmm_kernel_L1_M4_40: +.Ldtrmm_kernel_L1_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L1_M4_100 + ble .Ldtrmm_kernel_L1_M4_100 -dtrmm_kernel_L1_M4_42: +.Ldtrmm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M4_42 + bgt .Ldtrmm_kernel_L1_M4_42 -dtrmm_kernel_L1_M4_100: +.Ldtrmm_kernel_L1_M4_100: SAVE4x1 @@ -1220,22 +1220,22 @@ dtrmm_kernel_L1_M4_100: add tempOffset, tempOffset, #4 #endif -dtrmm_kernel_L1_M4_END: +.Ldtrmm_kernel_L1_M4_END: subs counterI, counterI, #1 - bgt dtrmm_kernel_L1_M4_20 + bgt .Ldtrmm_kernel_L1_M4_20 -dtrmm_kernel_L1_M2_BEGIN: +.Ldtrmm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dtrmm_kernel_L1_END + ble .Ldtrmm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble dtrmm_kernel_L1_M1_BEGIN + ble .Ldtrmm_kernel_L1_M1_BEGIN -dtrmm_kernel_L1_M2_20: +.Ldtrmm_kernel_L1_M2_20: INIT2x1 @@ -1259,9 +1259,9 @@ dtrmm_kernel_L1_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L1_M2_40 + ble .Ldtrmm_kernel_L1_M2_40 -dtrmm_kernel_L1_M2_22: +.Ldtrmm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -1274,22 +1274,22 @@ dtrmm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M2_22 + bgt .Ldtrmm_kernel_L1_M2_22 -dtrmm_kernel_L1_M2_40: +.Ldtrmm_kernel_L1_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L1_M2_100 + ble .Ldtrmm_kernel_L1_M2_100 -dtrmm_kernel_L1_M2_42: +.Ldtrmm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M2_42 + bgt .Ldtrmm_kernel_L1_M2_42 -dtrmm_kernel_L1_M2_100: +.Ldtrmm_kernel_L1_M2_100: SAVE2x1 @@ -1309,15 +1309,15 @@ dtrmm_kernel_L1_M2_100: add tempOffset, tempOffset, #2 #endif -dtrmm_kernel_L1_M2_END: +.Ldtrmm_kernel_L1_M2_END: -dtrmm_kernel_L1_M1_BEGIN: +.Ldtrmm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dtrmm_kernel_L1_END + ble .Ldtrmm_kernel_L1_END -dtrmm_kernel_L1_M1_20: +.Ldtrmm_kernel_L1_M1_20: INIT1x1 @@ -1341,9 +1341,9 @@ dtrmm_kernel_L1_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L1_M1_40 + ble .Ldtrmm_kernel_L1_M1_40 -dtrmm_kernel_L1_M1_22: +.Ldtrmm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -1355,30 +1355,30 @@ dtrmm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M1_22 + bgt .Ldtrmm_kernel_L1_M1_22 -dtrmm_kernel_L1_M1_40: +.Ldtrmm_kernel_L1_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L1_M1_100 + ble .Ldtrmm_kernel_L1_M1_100 -dtrmm_kernel_L1_M1_42: +.Ldtrmm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M1_42 + bgt .Ldtrmm_kernel_L1_M1_42 -dtrmm_kernel_L1_M1_100: +.Ldtrmm_kernel_L1_M1_100: SAVE1x1 -dtrmm_kernel_L1_END: +.Ldtrmm_kernel_L1_END: -dtrmm_kernel_L999: +.Ldtrmm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/dtrmm_kernel_4x8.S b/kernel/arm64/dtrmm_kernel_4x8.S index 4aecf28eb..47956dec5 100644 --- a/kernel/arm64/dtrmm_kernel_4x8.S +++ b/kernel/arm64/dtrmm_kernel_4x8.S @@ -900,11 +900,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov counterJ, origN asr counterJ, counterJ, #3 // J = J / 8 cmp counterJ, #0 - ble dtrmm_kernel_L4_BEGIN + ble .Ldtrmm_kernel_L4_BEGIN /******************************************************************************/ -dtrmm_kernel_L8_BEGIN: +.Ldtrmm_kernel_L8_BEGIN: mov pCRow0, pC // pCRow0 = C add pC, pC, LDC, lsl #3 @@ -915,14 +915,14 @@ dtrmm_kernel_L8_BEGIN: mov pA, origPA // pA = start of A array -dtrmm_kernel_L8_M4_BEGIN: +.Ldtrmm_kernel_L8_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble dtrmm_kernel_L8_M2_BEGIN + ble .Ldtrmm_kernel_L8_M2_BEGIN -dtrmm_kernel_L8_M4_20: +.Ldtrmm_kernel_L8_M4_20: #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) mov pB, origPB @@ -944,57 +944,57 @@ dtrmm_kernel_L8_M4_20: asr counterL, tempK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt dtrmm_kernel_L8_M4_32 + blt .Ldtrmm_kernel_L8_M4_32 KERNEL4x8_I // do one in the K KERNEL4x8_M2 // do another in the K subs counterL, counterL, #2 - ble dtrmm_kernel_L8_M4_22a + ble .Ldtrmm_kernel_L8_M4_22a .align 5 -dtrmm_kernel_L8_M4_22: +.Ldtrmm_kernel_L8_M4_22: KERNEL4x8_M1 KERNEL4x8_M2 subs counterL, counterL, #1 - bgt dtrmm_kernel_L8_M4_22 + bgt .Ldtrmm_kernel_L8_M4_22 -dtrmm_kernel_L8_M4_22a: +.Ldtrmm_kernel_L8_M4_22a: KERNEL4x8_M1 KERNEL4x8_E - b dtrmm_kernel_L8_M4_44 + b .Ldtrmm_kernel_L8_M4_44 -dtrmm_kernel_L8_M4_32: +.Ldtrmm_kernel_L8_M4_32: tst counterL, #1 - ble dtrmm_kernel_L8_M4_40 + ble .Ldtrmm_kernel_L8_M4_40 KERNEL4x8_I KERNEL4x8_E - b dtrmm_kernel_L8_M4_44 + b .Ldtrmm_kernel_L8_M4_44 -dtrmm_kernel_L8_M4_40: +.Ldtrmm_kernel_L8_M4_40: INIT4x8 -dtrmm_kernel_L8_M4_44: +.Ldtrmm_kernel_L8_M4_44: ands counterL, tempK, #1 - ble dtrmm_kernel_L8_M4_100 + ble .Ldtrmm_kernel_L8_M4_100 -dtrmm_kernel_L8_M4_46: +.Ldtrmm_kernel_L8_M4_46: KERNEL4x8_SUB -dtrmm_kernel_L8_M4_100: +.Ldtrmm_kernel_L8_M4_100: SAVE4x8 @@ -1014,20 +1014,20 @@ dtrmm_kernel_L8_M4_100: add tempOffset, tempOffset, #4 #endif -dtrmm_kernel_L8_M4_END: +.Ldtrmm_kernel_L8_M4_END: subs counterI, counterI, #1 - bne dtrmm_kernel_L8_M4_20 + bne .Ldtrmm_kernel_L8_M4_20 -dtrmm_kernel_L8_M2_BEGIN: +.Ldtrmm_kernel_L8_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dtrmm_kernel_L8_END + ble .Ldtrmm_kernel_L8_END tst counterI, #2 // counterI = counterI / 2 - ble dtrmm_kernel_L8_M1_BEGIN + ble .Ldtrmm_kernel_L8_M1_BEGIN -dtrmm_kernel_L8_M2_20: +.Ldtrmm_kernel_L8_M2_20: INIT2x8 @@ -1051,9 +1051,9 @@ dtrmm_kernel_L8_M2_20: asr counterL, tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L8_M2_40 + ble .Ldtrmm_kernel_L8_M2_40 -dtrmm_kernel_L8_M2_22: +.Ldtrmm_kernel_L8_M2_22: KERNEL2x8_SUB KERNEL2x8_SUB @@ -1066,22 +1066,22 @@ dtrmm_kernel_L8_M2_22: KERNEL2x8_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L8_M2_22 + bgt .Ldtrmm_kernel_L8_M2_22 -dtrmm_kernel_L8_M2_40: +.Ldtrmm_kernel_L8_M2_40: ands counterL, tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L8_M2_100 + ble .Ldtrmm_kernel_L8_M2_100 -dtrmm_kernel_L8_M2_42: +.Ldtrmm_kernel_L8_M2_42: KERNEL2x8_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L8_M2_42 + bgt .Ldtrmm_kernel_L8_M2_42 -dtrmm_kernel_L8_M2_100: +.Ldtrmm_kernel_L8_M2_100: SAVE2x8 @@ -1102,15 +1102,15 @@ dtrmm_kernel_L8_M2_100: add tempOffset, tempOffset, #2 #endif -dtrmm_kernel_L8_M2_END: +.Ldtrmm_kernel_L8_M2_END: -dtrmm_kernel_L8_M1_BEGIN: +.Ldtrmm_kernel_L8_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dtrmm_kernel_L8_END + ble .Ldtrmm_kernel_L8_END -dtrmm_kernel_L8_M1_20: +.Ldtrmm_kernel_L8_M1_20: INIT1x8 @@ -1134,9 +1134,9 @@ dtrmm_kernel_L8_M1_20: asr counterL, tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L8_M1_40 + ble .Ldtrmm_kernel_L8_M1_40 -dtrmm_kernel_L8_M1_22: +.Ldtrmm_kernel_L8_M1_22: KERNEL1x8_SUB KERNEL1x8_SUB KERNEL1x8_SUB @@ -1148,22 +1148,22 @@ dtrmm_kernel_L8_M1_22: KERNEL1x8_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L8_M1_22 + bgt .Ldtrmm_kernel_L8_M1_22 -dtrmm_kernel_L8_M1_40: +.Ldtrmm_kernel_L8_M1_40: ands counterL, tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L8_M1_100 + ble .Ldtrmm_kernel_L8_M1_100 -dtrmm_kernel_L8_M1_42: +.Ldtrmm_kernel_L8_M1_42: KERNEL1x8_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L8_M1_42 + bgt .Ldtrmm_kernel_L8_M1_42 -dtrmm_kernel_L8_M1_100: +.Ldtrmm_kernel_L8_M1_100: SAVE1x8 @@ -1183,7 +1183,7 @@ dtrmm_kernel_L8_M1_100: add tempOffset, tempOffset, #1 #endif -dtrmm_kernel_L8_END: +.Ldtrmm_kernel_L8_END: lsl temp, origK, #6 add origPB, origPB, temp // B = B + K * 8 * 8 @@ -1193,19 +1193,19 @@ dtrmm_kernel_L8_END: #endif subs counterJ, counterJ , #1 // j-- - bgt dtrmm_kernel_L8_BEGIN + bgt .Ldtrmm_kernel_L8_BEGIN /******************************************************************************/ -dtrmm_kernel_L4_BEGIN: +.Ldtrmm_kernel_L4_BEGIN: mov counterJ , origN tst counterJ , #7 - ble dtrmm_kernel_L999 + ble .Ldtrmm_kernel_L999 tst counterJ , #4 - ble dtrmm_kernel_L2_BEGIN + ble .Ldtrmm_kernel_L2_BEGIN mov pCRow0, pC // pCRow0 = C add pC, pC, LDC, lsl #2 @@ -1216,14 +1216,14 @@ dtrmm_kernel_L4_BEGIN: mov pA, origPA // pA = start of A array -dtrmm_kernel_L4_M4_BEGIN: +.Ldtrmm_kernel_L4_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble dtrmm_kernel_L4_M2_BEGIN + ble .Ldtrmm_kernel_L4_M2_BEGIN -dtrmm_kernel_L4_M4_20: +.Ldtrmm_kernel_L4_M4_20: #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) mov pB, origPB @@ -1244,57 +1244,57 @@ dtrmm_kernel_L4_M4_20: asr counterL, tempK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt dtrmm_kernel_L4_M4_32 + blt .Ldtrmm_kernel_L4_M4_32 KERNEL4x4_I // do one in the K KERNEL4x4_M2 // do another in the K subs counterL, counterL, #2 - ble dtrmm_kernel_L4_M4_22a + ble .Ldtrmm_kernel_L4_M4_22a .align 5 -dtrmm_kernel_L4_M4_22: +.Ldtrmm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M4_22 + bgt .Ldtrmm_kernel_L4_M4_22 -dtrmm_kernel_L4_M4_22a: +.Ldtrmm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_E - b dtrmm_kernel_L4_M4_44 + b .Ldtrmm_kernel_L4_M4_44 -dtrmm_kernel_L4_M4_32: +.Ldtrmm_kernel_L4_M4_32: tst counterL, #1 - ble dtrmm_kernel_L4_M4_40 + ble .Ldtrmm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_E - b dtrmm_kernel_L4_M4_44 + b .Ldtrmm_kernel_L4_M4_44 -dtrmm_kernel_L4_M4_40: +.Ldtrmm_kernel_L4_M4_40: INIT4x4 -dtrmm_kernel_L4_M4_44: +.Ldtrmm_kernel_L4_M4_44: ands counterL , tempK, #1 - ble dtrmm_kernel_L4_M4_100 + ble .Ldtrmm_kernel_L4_M4_100 -dtrmm_kernel_L4_M4_46: +.Ldtrmm_kernel_L4_M4_46: KERNEL4x4_SUB -dtrmm_kernel_L4_M4_100: +.Ldtrmm_kernel_L4_M4_100: SAVE4x4 #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) @@ -1312,20 +1312,20 @@ dtrmm_kernel_L4_M4_100: add tempOffset, tempOffset, #4 #endif -dtrmm_kernel_L4_M4_END: +.Ldtrmm_kernel_L4_M4_END: subs counterI, counterI, #1 - bne dtrmm_kernel_L4_M4_20 + bne .Ldtrmm_kernel_L4_M4_20 -dtrmm_kernel_L4_M2_BEGIN: +.Ldtrmm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dtrmm_kernel_L4_END + ble .Ldtrmm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble dtrmm_kernel_L4_M1_BEGIN + ble .Ldtrmm_kernel_L4_M1_BEGIN -dtrmm_kernel_L4_M2_20: +.Ldtrmm_kernel_L4_M2_20: INIT2x4 @@ -1348,9 +1348,9 @@ dtrmm_kernel_L4_M2_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L4_M2_40 + ble .Ldtrmm_kernel_L4_M2_40 -dtrmm_kernel_L4_M2_22: +.Ldtrmm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -1363,22 +1363,22 @@ dtrmm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M2_22 + bgt .Ldtrmm_kernel_L4_M2_22 -dtrmm_kernel_L4_M2_40: +.Ldtrmm_kernel_L4_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L4_M2_100 + ble .Ldtrmm_kernel_L4_M2_100 -dtrmm_kernel_L4_M2_42: +.Ldtrmm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M2_42 + bgt .Ldtrmm_kernel_L4_M2_42 -dtrmm_kernel_L4_M2_100: +.Ldtrmm_kernel_L4_M2_100: SAVE2x4 @@ -1397,15 +1397,15 @@ dtrmm_kernel_L4_M2_100: #if defined(LEFT) add tempOffset, tempOffset, #2 #endif -dtrmm_kernel_L4_M2_END: +.Ldtrmm_kernel_L4_M2_END: -dtrmm_kernel_L4_M1_BEGIN: +.Ldtrmm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dtrmm_kernel_L4_END + ble .Ldtrmm_kernel_L4_END -dtrmm_kernel_L4_M1_20: +.Ldtrmm_kernel_L4_M1_20: INIT1x4 @@ -1428,9 +1428,9 @@ dtrmm_kernel_L4_M1_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L4_M1_40 + ble .Ldtrmm_kernel_L4_M1_40 -dtrmm_kernel_L4_M1_22: +.Ldtrmm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1442,22 +1442,22 @@ dtrmm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M1_22 + bgt .Ldtrmm_kernel_L4_M1_22 -dtrmm_kernel_L4_M1_40: +.Ldtrmm_kernel_L4_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L4_M1_100 + ble .Ldtrmm_kernel_L4_M1_100 -dtrmm_kernel_L4_M1_42: +.Ldtrmm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M1_42 + bgt .Ldtrmm_kernel_L4_M1_42 -dtrmm_kernel_L4_M1_100: +.Ldtrmm_kernel_L4_M1_100: SAVE1x4 @@ -1476,7 +1476,7 @@ dtrmm_kernel_L4_M1_100: #if defined(LEFT) add tempOffset, tempOffset, #1 #endif -dtrmm_kernel_L4_END: +.Ldtrmm_kernel_L4_END: lsl temp, origK, #5 add origPB, origPB, temp // B = B + K * 4 * 8 @@ -1486,14 +1486,14 @@ dtrmm_kernel_L4_END: /******************************************************************************/ -dtrmm_kernel_L2_BEGIN: // less than 2 left in N direction +.Ldtrmm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble dtrmm_kernel_L999 // error, N was less than 4? + ble .Ldtrmm_kernel_L999 // error, N was less than 4? tst counterJ , #2 - ble dtrmm_kernel_L1_BEGIN + ble .Ldtrmm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1505,14 +1505,14 @@ dtrmm_kernel_L2_BEGIN: // less than 2 left in N direction mov pA, origPA // pA = A -dtrmm_kernel_L2_M4_BEGIN: +.Ldtrmm_kernel_L2_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI,#0 - ble dtrmm_kernel_L2_M2_BEGIN + ble .Ldtrmm_kernel_L2_M2_BEGIN -dtrmm_kernel_L2_M4_20: +.Ldtrmm_kernel_L2_M4_20: INIT4x2 @@ -1535,10 +1535,10 @@ dtrmm_kernel_L2_M4_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dtrmm_kernel_L2_M4_40 + ble .Ldtrmm_kernel_L2_M4_40 .align 5 -dtrmm_kernel_L2_M4_22: +.Ldtrmm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1550,22 +1550,22 @@ dtrmm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M4_22 + bgt .Ldtrmm_kernel_L2_M4_22 -dtrmm_kernel_L2_M4_40: +.Ldtrmm_kernel_L2_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L2_M4_100 + ble .Ldtrmm_kernel_L2_M4_100 -dtrmm_kernel_L2_M4_42: +.Ldtrmm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M4_42 + bgt .Ldtrmm_kernel_L2_M4_42 -dtrmm_kernel_L2_M4_100: +.Ldtrmm_kernel_L2_M4_100: SAVE4x2 #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) @@ -1584,22 +1584,22 @@ dtrmm_kernel_L2_M4_100: add tempOffset, tempOffset, #4 #endif -dtrmm_kernel_L2_M4_END: +.Ldtrmm_kernel_L2_M4_END: subs counterI, counterI, #1 - bgt dtrmm_kernel_L2_M4_20 + bgt .Ldtrmm_kernel_L2_M4_20 -dtrmm_kernel_L2_M2_BEGIN: +.Ldtrmm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dtrmm_kernel_L2_END + ble .Ldtrmm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble dtrmm_kernel_L2_M1_BEGIN + ble .Ldtrmm_kernel_L2_M1_BEGIN -dtrmm_kernel_L2_M2_20: +.Ldtrmm_kernel_L2_M2_20: INIT2x2 @@ -1622,9 +1622,9 @@ dtrmm_kernel_L2_M2_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dtrmm_kernel_L2_M2_40 + ble .Ldtrmm_kernel_L2_M2_40 -dtrmm_kernel_L2_M2_22: +.Ldtrmm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -1637,22 +1637,22 @@ dtrmm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M2_22 + bgt .Ldtrmm_kernel_L2_M2_22 -dtrmm_kernel_L2_M2_40: +.Ldtrmm_kernel_L2_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L2_M2_100 + ble .Ldtrmm_kernel_L2_M2_100 -dtrmm_kernel_L2_M2_42: +.Ldtrmm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M2_42 + bgt .Ldtrmm_kernel_L2_M2_42 -dtrmm_kernel_L2_M2_100: +.Ldtrmm_kernel_L2_M2_100: SAVE2x2 @@ -1671,15 +1671,15 @@ dtrmm_kernel_L2_M2_100: #if defined(LEFT) add tempOffset, tempOffset, #2 #endif -dtrmm_kernel_L2_M2_END: +.Ldtrmm_kernel_L2_M2_END: -dtrmm_kernel_L2_M1_BEGIN: +.Ldtrmm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dtrmm_kernel_L2_END + ble .Ldtrmm_kernel_L2_END -dtrmm_kernel_L2_M1_20: +.Ldtrmm_kernel_L2_M1_20: INIT1x2 @@ -1702,9 +1702,9 @@ dtrmm_kernel_L2_M1_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble dtrmm_kernel_L2_M1_40 + ble .Ldtrmm_kernel_L2_M1_40 -dtrmm_kernel_L2_M1_22: +.Ldtrmm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1716,22 +1716,22 @@ dtrmm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M1_22 + bgt .Ldtrmm_kernel_L2_M1_22 -dtrmm_kernel_L2_M1_40: +.Ldtrmm_kernel_L2_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L2_M1_100 + ble .Ldtrmm_kernel_L2_M1_100 -dtrmm_kernel_L2_M1_42: +.Ldtrmm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M1_42 + bgt .Ldtrmm_kernel_L2_M1_42 -dtrmm_kernel_L2_M1_100: +.Ldtrmm_kernel_L2_M1_100: SAVE1x2 @@ -1750,7 +1750,7 @@ dtrmm_kernel_L2_M1_100: #if defined(LEFT) add tempOffset, tempOffset, #1 #endif -dtrmm_kernel_L2_END: +.Ldtrmm_kernel_L2_END: #if !defined(LEFT) add tempOffset, tempOffset, #2 #endif @@ -1758,11 +1758,11 @@ dtrmm_kernel_L2_END: /******************************************************************************/ -dtrmm_kernel_L1_BEGIN: +.Ldtrmm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble dtrmm_kernel_L999 // done + ble .Ldtrmm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -1773,14 +1773,14 @@ dtrmm_kernel_L1_BEGIN: #endif mov pA, origPA // pA = A -dtrmm_kernel_L1_M4_BEGIN: +.Ldtrmm_kernel_L1_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble dtrmm_kernel_L1_M2_BEGIN + ble .Ldtrmm_kernel_L1_M2_BEGIN -dtrmm_kernel_L1_M4_20: +.Ldtrmm_kernel_L1_M4_20: INIT4x1 @@ -1802,10 +1802,10 @@ dtrmm_kernel_L1_M4_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L1_M4_40 + ble .Ldtrmm_kernel_L1_M4_40 .align 5 -dtrmm_kernel_L1_M4_22: +.Ldtrmm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -1817,22 +1817,22 @@ dtrmm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M4_22 + bgt .Ldtrmm_kernel_L1_M4_22 -dtrmm_kernel_L1_M4_40: +.Ldtrmm_kernel_L1_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L1_M4_100 + ble .Ldtrmm_kernel_L1_M4_100 -dtrmm_kernel_L1_M4_42: +.Ldtrmm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M4_42 + bgt .Ldtrmm_kernel_L1_M4_42 -dtrmm_kernel_L1_M4_100: +.Ldtrmm_kernel_L1_M4_100: SAVE4x1 #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) @@ -1851,22 +1851,22 @@ dtrmm_kernel_L1_M4_100: add tempOffset, tempOffset, #4 #endif -dtrmm_kernel_L1_M4_END: +.Ldtrmm_kernel_L1_M4_END: subs counterI, counterI, #1 - bgt dtrmm_kernel_L1_M4_20 + bgt .Ldtrmm_kernel_L1_M4_20 -dtrmm_kernel_L1_M2_BEGIN: +.Ldtrmm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dtrmm_kernel_L1_END + ble .Ldtrmm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble dtrmm_kernel_L1_M1_BEGIN + ble .Ldtrmm_kernel_L1_M1_BEGIN -dtrmm_kernel_L1_M2_20: +.Ldtrmm_kernel_L1_M2_20: INIT2x1 @@ -1889,9 +1889,9 @@ dtrmm_kernel_L1_M2_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L1_M2_40 + ble .Ldtrmm_kernel_L1_M2_40 -dtrmm_kernel_L1_M2_22: +.Ldtrmm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -1904,22 +1904,22 @@ dtrmm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M2_22 + bgt .Ldtrmm_kernel_L1_M2_22 -dtrmm_kernel_L1_M2_40: +.Ldtrmm_kernel_L1_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L1_M2_100 + ble .Ldtrmm_kernel_L1_M2_100 -dtrmm_kernel_L1_M2_42: +.Ldtrmm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M2_42 + bgt .Ldtrmm_kernel_L1_M2_42 -dtrmm_kernel_L1_M2_100: +.Ldtrmm_kernel_L1_M2_100: SAVE2x1 @@ -1938,15 +1938,15 @@ dtrmm_kernel_L1_M2_100: #if defined(LEFT) add tempOffset, tempOffset, #2 #endif -dtrmm_kernel_L1_M2_END: +.Ldtrmm_kernel_L1_M2_END: -dtrmm_kernel_L1_M1_BEGIN: +.Ldtrmm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dtrmm_kernel_L1_END + ble .Ldtrmm_kernel_L1_END -dtrmm_kernel_L1_M1_20: +.Ldtrmm_kernel_L1_M1_20: INIT1x1 @@ -1969,9 +1969,9 @@ dtrmm_kernel_L1_M1_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L1_M1_40 + ble .Ldtrmm_kernel_L1_M1_40 -dtrmm_kernel_L1_M1_22: +.Ldtrmm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -1983,30 +1983,30 @@ dtrmm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M1_22 + bgt .Ldtrmm_kernel_L1_M1_22 -dtrmm_kernel_L1_M1_40: +.Ldtrmm_kernel_L1_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L1_M1_100 + ble .Ldtrmm_kernel_L1_M1_100 -dtrmm_kernel_L1_M1_42: +.Ldtrmm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M1_42 + bgt .Ldtrmm_kernel_L1_M1_42 -dtrmm_kernel_L1_M1_100: +.Ldtrmm_kernel_L1_M1_100: SAVE1x1 -dtrmm_kernel_L1_END: +.Ldtrmm_kernel_L1_END: -dtrmm_kernel_L999: +.Ldtrmm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/dtrmm_kernel_8x4.S b/kernel/arm64/dtrmm_kernel_8x4.S index 2b8173715..0ac5a5f24 100644 --- a/kernel/arm64/dtrmm_kernel_8x4.S +++ b/kernel/arm64/dtrmm_kernel_8x4.S @@ -829,11 +829,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble dtrmm_kernel_L2_BEGIN + ble .Ldtrmm_kernel_L2_BEGIN /******************************************************************************/ -dtrmm_kernel_L4_BEGIN: +.Ldtrmm_kernel_L4_BEGIN: mov pCRow0, pC add pCRow1, pCRow0, LDC add pCRow2, pCRow1, LDC @@ -847,15 +847,15 @@ dtrmm_kernel_L4_BEGIN: #endif mov pA, origPA // pA = start of A array -dtrmm_kernel_L4_M8_BEGIN: +.Ldtrmm_kernel_L4_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble dtrmm_kernel_L4_M4_BEGIN + ble .Ldtrmm_kernel_L4_M4_BEGIN .align 5 -dtrmm_kernel_L4_M8_20: +.Ldtrmm_kernel_L4_M8_20: #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) mov pB, origPB @@ -877,7 +877,7 @@ dtrmm_kernel_L4_M8_20: asr counterL , tempK, #3 // L = K / 8 cmp counterL , #2 // is there at least 4 to do? - blt dtrmm_kernel_L4_M8_32 + blt .Ldtrmm_kernel_L4_M8_32 KERNEL8x4_I // do one in the K KERNEL8x4_M2 // do another in the K @@ -889,10 +889,10 @@ dtrmm_kernel_L4_M8_20: KERNEL8x4_M2 subs counterL, counterL, #2 // subtract 2 - ble dtrmm_kernel_L4_M8_22a + ble .Ldtrmm_kernel_L4_M8_22a .align 5 -dtrmm_kernel_L4_M8_22: +.Ldtrmm_kernel_L4_M8_22: KERNEL8x4_M1 KERNEL8x4_M2 @@ -904,10 +904,10 @@ dtrmm_kernel_L4_M8_22: KERNEL8x4_M2 subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M8_22 + bgt .Ldtrmm_kernel_L4_M8_22 .align 5 -dtrmm_kernel_L4_M8_22a: +.Ldtrmm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_M2 @@ -918,13 +918,13 @@ dtrmm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_E - b dtrmm_kernel_L4_M8_44 + b .Ldtrmm_kernel_L4_M8_44 .align 5 -dtrmm_kernel_L4_M8_32: +.Ldtrmm_kernel_L4_M8_32: tst counterL, #1 - ble dtrmm_kernel_L4_M8_40 + ble .Ldtrmm_kernel_L4_M8_40 KERNEL8x4_I KERNEL8x4_M2 @@ -935,26 +935,26 @@ dtrmm_kernel_L4_M8_32: KERNEL8x4_M1 KERNEL8x4_E - b dtrmm_kernel_L4_M8_44 + b .Ldtrmm_kernel_L4_M8_44 -dtrmm_kernel_L4_M8_40: +.Ldtrmm_kernel_L4_M8_40: INIT8x4 -dtrmm_kernel_L4_M8_44: +.Ldtrmm_kernel_L4_M8_44: ands counterL , tempK, #7 - ble dtrmm_kernel_L4_M8_100 + ble .Ldtrmm_kernel_L4_M8_100 .align 5 -dtrmm_kernel_L4_M8_46: +.Ldtrmm_kernel_L4_M8_46: KERNEL8x4_SUB subs counterL, counterL, #1 - bne dtrmm_kernel_L4_M8_46 + bne .Ldtrmm_kernel_L4_M8_46 -dtrmm_kernel_L4_M8_100: +.Ldtrmm_kernel_L4_M8_100: SAVE8x4 @@ -977,20 +977,20 @@ dtrmm_kernel_L4_M8_100: prfm PLDL1KEEP, [pA, #64] prfm PLDL1KEEP, [origPB] -dtrmm_kernel_L4_M8_END: +.Ldtrmm_kernel_L4_M8_END: subs counterI, counterI, #1 - bne dtrmm_kernel_L4_M8_20 + bne .Ldtrmm_kernel_L4_M8_20 -dtrmm_kernel_L4_M4_BEGIN: +.Ldtrmm_kernel_L4_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble dtrmm_kernel_L4_END + ble .Ldtrmm_kernel_L4_END tst counterI, #4 - ble dtrmm_kernel_L4_M2_BEGIN + ble .Ldtrmm_kernel_L4_M2_BEGIN -dtrmm_kernel_L4_M4_20: +.Ldtrmm_kernel_L4_M4_20: INIT4x4 @@ -1013,9 +1013,9 @@ dtrmm_kernel_L4_M4_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L4_M4_40 + ble .Ldtrmm_kernel_L4_M4_40 -dtrmm_kernel_L4_M4_22: +.Ldtrmm_kernel_L4_M4_22: KERNEL4x4_SUB KERNEL4x4_SUB @@ -1028,22 +1028,22 @@ dtrmm_kernel_L4_M4_22: KERNEL4x4_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M4_22 + bgt .Ldtrmm_kernel_L4_M4_22 -dtrmm_kernel_L4_M4_40: +.Ldtrmm_kernel_L4_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L4_M4_100 + ble .Ldtrmm_kernel_L4_M4_100 -dtrmm_kernel_L4_M4_42: +.Ldtrmm_kernel_L4_M4_42: KERNEL4x4_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M4_42 + bgt .Ldtrmm_kernel_L4_M4_42 -dtrmm_kernel_L4_M4_100: +.Ldtrmm_kernel_L4_M4_100: SAVE4x4 @@ -1062,19 +1062,19 @@ dtrmm_kernel_L4_M4_100: add tempOffset, tempOffset, #4 #endif -dtrmm_kernel_L4_M4_END: +.Ldtrmm_kernel_L4_M4_END: -dtrmm_kernel_L4_M2_BEGIN: +.Ldtrmm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dtrmm_kernel_L4_END + ble .Ldtrmm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble dtrmm_kernel_L4_M1_BEGIN + ble .Ldtrmm_kernel_L4_M1_BEGIN -dtrmm_kernel_L4_M2_20: +.Ldtrmm_kernel_L4_M2_20: INIT2x4 @@ -1097,9 +1097,9 @@ dtrmm_kernel_L4_M2_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L4_M2_40 + ble .Ldtrmm_kernel_L4_M2_40 -dtrmm_kernel_L4_M2_22: +.Ldtrmm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -1112,22 +1112,22 @@ dtrmm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M2_22 + bgt .Ldtrmm_kernel_L4_M2_22 -dtrmm_kernel_L4_M2_40: +.Ldtrmm_kernel_L4_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L4_M2_100 + ble .Ldtrmm_kernel_L4_M2_100 -dtrmm_kernel_L4_M2_42: +.Ldtrmm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M2_42 + bgt .Ldtrmm_kernel_L4_M2_42 -dtrmm_kernel_L4_M2_100: +.Ldtrmm_kernel_L4_M2_100: SAVE2x4 @@ -1147,15 +1147,15 @@ dtrmm_kernel_L4_M2_100: add tempOffset, tempOffset, #2 #endif -dtrmm_kernel_L4_M2_END: +.Ldtrmm_kernel_L4_M2_END: -dtrmm_kernel_L4_M1_BEGIN: +.Ldtrmm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dtrmm_kernel_L4_END + ble .Ldtrmm_kernel_L4_END -dtrmm_kernel_L4_M1_20: +.Ldtrmm_kernel_L4_M1_20: INIT1x4 @@ -1179,9 +1179,9 @@ dtrmm_kernel_L4_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L4_M1_40 + ble .Ldtrmm_kernel_L4_M1_40 -dtrmm_kernel_L4_M1_22: +.Ldtrmm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1193,22 +1193,22 @@ dtrmm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M1_22 + bgt .Ldtrmm_kernel_L4_M1_22 -dtrmm_kernel_L4_M1_40: +.Ldtrmm_kernel_L4_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L4_M1_100 + ble .Ldtrmm_kernel_L4_M1_100 -dtrmm_kernel_L4_M1_42: +.Ldtrmm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L4_M1_42 + bgt .Ldtrmm_kernel_L4_M1_42 -dtrmm_kernel_L4_M1_100: +.Ldtrmm_kernel_L4_M1_100: SAVE1x4 @@ -1228,7 +1228,7 @@ dtrmm_kernel_L4_M1_100: add tempOffset, tempOffset, #1 #endif -dtrmm_kernel_L4_END: +.Ldtrmm_kernel_L4_END: lsl temp, origK, #5 add origPB, origPB, temp // B = B + K * 4 * 8 @@ -1238,19 +1238,19 @@ dtrmm_kernel_L4_END: #endif subs counterJ, counterJ , #1 // j-- - bgt dtrmm_kernel_L4_BEGIN + bgt .Ldtrmm_kernel_L4_BEGIN /******************************************************************************/ -dtrmm_kernel_L2_BEGIN: // less than 2 left in N direction +.Ldtrmm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble dtrmm_kernel_L999 // error, N was less than 4? + ble .Ldtrmm_kernel_L999 // error, N was less than 4? tst counterJ , #2 - ble dtrmm_kernel_L1_BEGIN + ble .Ldtrmm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1261,14 +1261,14 @@ dtrmm_kernel_L2_BEGIN: // less than 2 left in N direction #endif mov pA, origPA // pA = A -dtrmm_kernel_L2_M8_BEGIN: +.Ldtrmm_kernel_L2_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble dtrmm_kernel_L2_M4_BEGIN + ble .Ldtrmm_kernel_L2_M4_BEGIN -dtrmm_kernel_L2_M8_20: +.Ldtrmm_kernel_L2_M8_20: INIT8x2 @@ -1292,10 +1292,10 @@ dtrmm_kernel_L2_M8_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dtrmm_kernel_L2_M8_40 + ble .Ldtrmm_kernel_L2_M8_40 .align 5 -dtrmm_kernel_L2_M8_22: +.Ldtrmm_kernel_L2_M8_22: KERNEL8x2_SUB KERNEL8x2_SUB KERNEL8x2_SUB @@ -1307,22 +1307,22 @@ dtrmm_kernel_L2_M8_22: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M8_22 + bgt .Ldtrmm_kernel_L2_M8_22 -dtrmm_kernel_L2_M8_40: +.Ldtrmm_kernel_L2_M8_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L2_M8_100 + ble .Ldtrmm_kernel_L2_M8_100 -dtrmm_kernel_L2_M8_42: +.Ldtrmm_kernel_L2_M8_42: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M8_42 + bgt .Ldtrmm_kernel_L2_M8_42 -dtrmm_kernel_L2_M8_100: +.Ldtrmm_kernel_L2_M8_100: SAVE8x2 @@ -1342,21 +1342,21 @@ dtrmm_kernel_L2_M8_100: add tempOffset, tempOffset, #8 #endif -dtrmm_kernel_L2_M8_END: +.Ldtrmm_kernel_L2_M8_END: subs counterI, counterI, #1 - bgt dtrmm_kernel_L2_M8_20 + bgt .Ldtrmm_kernel_L2_M8_20 -dtrmm_kernel_L2_M4_BEGIN: +.Ldtrmm_kernel_L2_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble dtrmm_kernel_L2_END + ble .Ldtrmm_kernel_L2_END tst counterI, #4 // counterI = counterI / 2 - ble dtrmm_kernel_L2_M2_BEGIN + ble .Ldtrmm_kernel_L2_M2_BEGIN -dtrmm_kernel_L2_M4_20: +.Ldtrmm_kernel_L2_M4_20: INIT4x2 @@ -1380,10 +1380,10 @@ dtrmm_kernel_L2_M4_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dtrmm_kernel_L2_M4_40 + ble .Ldtrmm_kernel_L2_M4_40 .align 5 -dtrmm_kernel_L2_M4_22: +.Ldtrmm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1395,22 +1395,22 @@ dtrmm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M4_22 + bgt .Ldtrmm_kernel_L2_M4_22 -dtrmm_kernel_L2_M4_40: +.Ldtrmm_kernel_L2_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L2_M4_100 + ble .Ldtrmm_kernel_L2_M4_100 -dtrmm_kernel_L2_M4_42: +.Ldtrmm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M4_42 + bgt .Ldtrmm_kernel_L2_M4_42 -dtrmm_kernel_L2_M4_100: +.Ldtrmm_kernel_L2_M4_100: SAVE4x2 @@ -1430,19 +1430,19 @@ dtrmm_kernel_L2_M4_100: add tempOffset, tempOffset, #4 #endif -dtrmm_kernel_L2_M4_END: +.Ldtrmm_kernel_L2_M4_END: -dtrmm_kernel_L2_M2_BEGIN: +.Ldtrmm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dtrmm_kernel_L2_END + ble .Ldtrmm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble dtrmm_kernel_L2_M1_BEGIN + ble .Ldtrmm_kernel_L2_M1_BEGIN -dtrmm_kernel_L2_M2_20: +.Ldtrmm_kernel_L2_M2_20: INIT2x2 @@ -1466,9 +1466,9 @@ dtrmm_kernel_L2_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble dtrmm_kernel_L2_M2_40 + ble .Ldtrmm_kernel_L2_M2_40 -dtrmm_kernel_L2_M2_22: +.Ldtrmm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -1481,22 +1481,22 @@ dtrmm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M2_22 + bgt .Ldtrmm_kernel_L2_M2_22 -dtrmm_kernel_L2_M2_40: +.Ldtrmm_kernel_L2_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L2_M2_100 + ble .Ldtrmm_kernel_L2_M2_100 -dtrmm_kernel_L2_M2_42: +.Ldtrmm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M2_42 + bgt .Ldtrmm_kernel_L2_M2_42 -dtrmm_kernel_L2_M2_100: +.Ldtrmm_kernel_L2_M2_100: SAVE2x2 @@ -1516,15 +1516,15 @@ dtrmm_kernel_L2_M2_100: add tempOffset, tempOffset, #2 #endif -dtrmm_kernel_L2_M2_END: +.Ldtrmm_kernel_L2_M2_END: -dtrmm_kernel_L2_M1_BEGIN: +.Ldtrmm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dtrmm_kernel_L2_END + ble .Ldtrmm_kernel_L2_END -dtrmm_kernel_L2_M1_20: +.Ldtrmm_kernel_L2_M1_20: INIT1x2 @@ -1548,9 +1548,9 @@ dtrmm_kernel_L2_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble dtrmm_kernel_L2_M1_40 + ble .Ldtrmm_kernel_L2_M1_40 -dtrmm_kernel_L2_M1_22: +.Ldtrmm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1562,22 +1562,22 @@ dtrmm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M1_22 + bgt .Ldtrmm_kernel_L2_M1_22 -dtrmm_kernel_L2_M1_40: +.Ldtrmm_kernel_L2_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L2_M1_100 + ble .Ldtrmm_kernel_L2_M1_100 -dtrmm_kernel_L2_M1_42: +.Ldtrmm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L2_M1_42 + bgt .Ldtrmm_kernel_L2_M1_42 -dtrmm_kernel_L2_M1_100: +.Ldtrmm_kernel_L2_M1_100: SAVE1x2 @@ -1597,7 +1597,7 @@ dtrmm_kernel_L2_M1_100: add tempOffset, tempOffset, #1 #endif -dtrmm_kernel_L2_END: +.Ldtrmm_kernel_L2_END: #if !defined(LEFT) add tempOffset, tempOffset, #2 #endif @@ -1605,11 +1605,11 @@ dtrmm_kernel_L2_END: /******************************************************************************/ -dtrmm_kernel_L1_BEGIN: +.Ldtrmm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble dtrmm_kernel_L999 // done + ble .Ldtrmm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C add pC , pC , LDC // Update pC to point to next @@ -1619,14 +1619,14 @@ dtrmm_kernel_L1_BEGIN: #endif mov pA, origPA // pA = A -dtrmm_kernel_L1_M8_BEGIN: +.Ldtrmm_kernel_L1_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble dtrmm_kernel_L1_M4_BEGIN + ble .Ldtrmm_kernel_L1_M4_BEGIN -dtrmm_kernel_L1_M8_20: +.Ldtrmm_kernel_L1_M8_20: INIT8x1 @@ -1650,10 +1650,10 @@ dtrmm_kernel_L1_M8_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L1_M8_40 + ble .Ldtrmm_kernel_L1_M8_40 .align 5 -dtrmm_kernel_L1_M8_22: +.Ldtrmm_kernel_L1_M8_22: KERNEL8x1_SUB KERNEL8x1_SUB KERNEL8x1_SUB @@ -1665,22 +1665,22 @@ dtrmm_kernel_L1_M8_22: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M8_22 + bgt .Ldtrmm_kernel_L1_M8_22 -dtrmm_kernel_L1_M8_40: +.Ldtrmm_kernel_L1_M8_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L1_M8_100 + ble .Ldtrmm_kernel_L1_M8_100 -dtrmm_kernel_L1_M8_42: +.Ldtrmm_kernel_L1_M8_42: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M8_42 + bgt .Ldtrmm_kernel_L1_M8_42 -dtrmm_kernel_L1_M8_100: +.Ldtrmm_kernel_L1_M8_100: SAVE8x1 @@ -1700,21 +1700,21 @@ dtrmm_kernel_L1_M8_100: add tempOffset, tempOffset, #8 #endif -dtrmm_kernel_L1_M8_END: +.Ldtrmm_kernel_L1_M8_END: subs counterI, counterI, #1 - bgt dtrmm_kernel_L1_M8_20 + bgt .Ldtrmm_kernel_L1_M8_20 -dtrmm_kernel_L1_M4_BEGIN: +.Ldtrmm_kernel_L1_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble dtrmm_kernel_L1_END + ble .Ldtrmm_kernel_L1_END tst counterI, #4 // counterI = counterI / 2 - ble dtrmm_kernel_L1_M2_BEGIN + ble .Ldtrmm_kernel_L1_M2_BEGIN -dtrmm_kernel_L1_M4_20: +.Ldtrmm_kernel_L1_M4_20: INIT4x1 @@ -1737,10 +1737,10 @@ dtrmm_kernel_L1_M4_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L1_M4_40 + ble .Ldtrmm_kernel_L1_M4_40 .align 5 -dtrmm_kernel_L1_M4_22: +.Ldtrmm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -1752,22 +1752,22 @@ dtrmm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M4_22 + bgt .Ldtrmm_kernel_L1_M4_22 -dtrmm_kernel_L1_M4_40: +.Ldtrmm_kernel_L1_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L1_M4_100 + ble .Ldtrmm_kernel_L1_M4_100 -dtrmm_kernel_L1_M4_42: +.Ldtrmm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M4_42 + bgt .Ldtrmm_kernel_L1_M4_42 -dtrmm_kernel_L1_M4_100: +.Ldtrmm_kernel_L1_M4_100: SAVE4x1 @@ -1787,18 +1787,18 @@ dtrmm_kernel_L1_M4_100: add tempOffset, tempOffset, #4 #endif -dtrmm_kernel_L1_M4_END: +.Ldtrmm_kernel_L1_M4_END: -dtrmm_kernel_L1_M2_BEGIN: +.Ldtrmm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble dtrmm_kernel_L1_END + ble .Ldtrmm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble dtrmm_kernel_L1_M1_BEGIN + ble .Ldtrmm_kernel_L1_M1_BEGIN -dtrmm_kernel_L1_M2_20: +.Ldtrmm_kernel_L1_M2_20: INIT2x1 @@ -1822,9 +1822,9 @@ dtrmm_kernel_L1_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L1_M2_40 + ble .Ldtrmm_kernel_L1_M2_40 -dtrmm_kernel_L1_M2_22: +.Ldtrmm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -1837,22 +1837,22 @@ dtrmm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M2_22 + bgt .Ldtrmm_kernel_L1_M2_22 -dtrmm_kernel_L1_M2_40: +.Ldtrmm_kernel_L1_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L1_M2_100 + ble .Ldtrmm_kernel_L1_M2_100 -dtrmm_kernel_L1_M2_42: +.Ldtrmm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M2_42 + bgt .Ldtrmm_kernel_L1_M2_42 -dtrmm_kernel_L1_M2_100: +.Ldtrmm_kernel_L1_M2_100: SAVE2x1 @@ -1872,15 +1872,15 @@ dtrmm_kernel_L1_M2_100: add tempOffset, tempOffset, #2 #endif -dtrmm_kernel_L1_M2_END: +.Ldtrmm_kernel_L1_M2_END: -dtrmm_kernel_L1_M1_BEGIN: +.Ldtrmm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble dtrmm_kernel_L1_END + ble .Ldtrmm_kernel_L1_END -dtrmm_kernel_L1_M1_20: +.Ldtrmm_kernel_L1_M1_20: INIT1x1 @@ -1904,9 +1904,9 @@ dtrmm_kernel_L1_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble dtrmm_kernel_L1_M1_40 + ble .Ldtrmm_kernel_L1_M1_40 -dtrmm_kernel_L1_M1_22: +.Ldtrmm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -1918,30 +1918,30 @@ dtrmm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M1_22 + bgt .Ldtrmm_kernel_L1_M1_22 -dtrmm_kernel_L1_M1_40: +.Ldtrmm_kernel_L1_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble dtrmm_kernel_L1_M1_100 + ble .Ldtrmm_kernel_L1_M1_100 -dtrmm_kernel_L1_M1_42: +.Ldtrmm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt dtrmm_kernel_L1_M1_42 + bgt .Ldtrmm_kernel_L1_M1_42 -dtrmm_kernel_L1_M1_100: +.Ldtrmm_kernel_L1_M1_100: SAVE1x1 -dtrmm_kernel_L1_END: +.Ldtrmm_kernel_L1_END: -dtrmm_kernel_L999: +.Ldtrmm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/gemv_n.S b/kernel/arm64/gemv_n.S index 162f721c3..658551f4f 100644 --- a/kernel/arm64/gemv_n.S +++ b/kernel/arm64/gemv_n.S @@ -203,18 +203,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SAVE_REGS cmp N, xzr - ble gemv_n_kernel_L999 + ble .Lgemv_n_kernel_L999 cmp M, xzr - ble gemv_n_kernel_L999 + ble .Lgemv_n_kernel_L999 lsl LDA, LDA, #SHZ lsl INC_X, INC_X, #SHZ mov J, N cmp INC_Y, #1 - bne gemv_n_kernel_S_BEGIN + bne .Lgemv_n_kernel_S_BEGIN -gemv_n_kernel_F_LOOP: +.Lgemv_n_kernel_F_LOOP: ld1 TEMPV, [X], INC_X fmul TEMP, ALPHA, TEMP @@ -229,57 +229,57 @@ gemv_n_kernel_F_LOOP: mov Y_IPTR, Y mov Y_OPTR, Y -gemv_n_kernel_F32: +.Lgemv_n_kernel_F32: asr I, M, #5 cmp I, xzr - beq gemv_n_kernel_F4 + beq .Lgemv_n_kernel_F4 -gemv_n_kernel_F320: +.Lgemv_n_kernel_F320: KERNEL_F16 KERNEL_F16 subs I, I, #1 - bne gemv_n_kernel_F320 + bne .Lgemv_n_kernel_F320 -gemv_n_kernel_F4: +.Lgemv_n_kernel_F4: ands I, M, #31 asr I, I, #2 cmp I, xzr - beq gemv_n_kernel_F1 + beq .Lgemv_n_kernel_F1 -gemv_n_kernel_F40: +.Lgemv_n_kernel_F40: KERNEL_F4 subs I, I, #1 - bne gemv_n_kernel_F40 + bne .Lgemv_n_kernel_F40 -gemv_n_kernel_F1: +.Lgemv_n_kernel_F1: ands I, M, #3 - ble gemv_n_kernel_F_END + ble .Lgemv_n_kernel_F_END -gemv_n_kernel_F10: +.Lgemv_n_kernel_F10: KERNEL_F1 subs I, I, #1 - bne gemv_n_kernel_F10 + bne .Lgemv_n_kernel_F10 -gemv_n_kernel_F_END: +.Lgemv_n_kernel_F_END: add A, A, LDA subs J, J, #1 - bne gemv_n_kernel_F_LOOP + bne .Lgemv_n_kernel_F_LOOP - b gemv_n_kernel_L999 + b .Lgemv_n_kernel_L999 -gemv_n_kernel_S_BEGIN: +.Lgemv_n_kernel_S_BEGIN: INIT_S -gemv_n_kernel_S_LOOP: +.Lgemv_n_kernel_S_LOOP: ld1 TEMPV, [X], INC_X fmul TEMP, ALPHA, TEMP @@ -288,9 +288,9 @@ gemv_n_kernel_S_LOOP: asr I, M, #2 cmp I, xzr - ble gemv_n_kernel_S1 + ble .Lgemv_n_kernel_S1 -gemv_n_kernel_S4: +.Lgemv_n_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -298,27 +298,27 @@ gemv_n_kernel_S4: KERNEL_S1 subs I, I, #1 - bne gemv_n_kernel_S4 + bne .Lgemv_n_kernel_S4 -gemv_n_kernel_S1: +.Lgemv_n_kernel_S1: ands I, M, #3 - ble gemv_n_kernel_S_END + ble .Lgemv_n_kernel_S_END -gemv_n_kernel_S10: +.Lgemv_n_kernel_S10: KERNEL_S1 subs I, I, #1 - bne gemv_n_kernel_S10 + bne .Lgemv_n_kernel_S10 -gemv_n_kernel_S_END: +.Lgemv_n_kernel_S_END: add A, A, LDA subs J, J, #1 - bne gemv_n_kernel_S_LOOP + bne .Lgemv_n_kernel_S_LOOP -gemv_n_kernel_L999: +.Lgemv_n_kernel_L999: mov w0, wzr diff --git a/kernel/arm64/gemv_t.S b/kernel/arm64/gemv_t.S index 28325f784..b04367ab3 100644 --- a/kernel/arm64/gemv_t.S +++ b/kernel/arm64/gemv_t.S @@ -233,18 +233,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SAVE_REGS cmp N, xzr - ble gemv_t_kernel_L999 + ble .Lgemv_t_kernel_L999 cmp M, xzr - ble gemv_t_kernel_L999 + ble .Lgemv_t_kernel_L999 lsl LDA, LDA, #SHZ lsl INC_Y, INC_Y, #SHZ mov J, N cmp INC_X, #1 - bne gemv_t_kernel_S_BEGIN + bne .Lgemv_t_kernel_S_BEGIN -gemv_t_kernel_F_LOOP: +.Lgemv_t_kernel_F_LOOP: fmov TEMP, REG0 fmov TEMP1, REG0 @@ -254,64 +254,64 @@ gemv_t_kernel_F_LOOP: mov A_PTR, A mov X_PTR, X -gemv_t_kernel_F32: +.Lgemv_t_kernel_F32: asr I, M, #5 cmp I, xzr - beq gemv_t_kernel_F4 + beq .Lgemv_t_kernel_F4 -gemv_t_kernel_F320: +.Lgemv_t_kernel_F320: KERNEL_F32 subs I, I, #1 - bne gemv_t_kernel_F320 + bne .Lgemv_t_kernel_F320 KERNEL_F32_FINALIZE -gemv_t_kernel_F4: +.Lgemv_t_kernel_F4: ands I, M, #31 asr I, I, #2 cmp I, xzr - beq gemv_t_kernel_F1 + beq .Lgemv_t_kernel_F1 -gemv_t_kernel_F40: +.Lgemv_t_kernel_F40: KERNEL_F4 subs I, I, #1 - bne gemv_t_kernel_F40 + bne .Lgemv_t_kernel_F40 -gemv_t_kernel_F1: +.Lgemv_t_kernel_F1: KERNEL_F4_FINALIZE ands I, M, #3 - ble gemv_t_kernel_F_END + ble .Lgemv_t_kernel_F_END -gemv_t_kernel_F10: +.Lgemv_t_kernel_F10: KERNEL_F1 subs I, I, #1 - bne gemv_t_kernel_F10 + bne .Lgemv_t_kernel_F10 -gemv_t_kernel_F_END: +.Lgemv_t_kernel_F_END: ld1 TMPV1, [Y] add A, A, LDA subs J, J, #1 fmadd TMP1, ALPHA, TEMP, TMP1 st1 TMPV1, [Y], INC_Y - bne gemv_t_kernel_F_LOOP + bne .Lgemv_t_kernel_F_LOOP - b gemv_t_kernel_L999 + b .Lgemv_t_kernel_L999 -gemv_t_kernel_S_BEGIN: +.Lgemv_t_kernel_S_BEGIN: INIT_S -gemv_t_kernel_S_LOOP: +.Lgemv_t_kernel_S_LOOP: fmov TEMP, REG0 mov A_PTR, A @@ -319,9 +319,9 @@ gemv_t_kernel_S_LOOP: asr I, M, #2 cmp I, xzr - ble gemv_t_kernel_S1 + ble .Lgemv_t_kernel_S1 -gemv_t_kernel_S4: +.Lgemv_t_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -329,30 +329,30 @@ gemv_t_kernel_S4: KERNEL_S1 subs I, I, #1 - bne gemv_t_kernel_S4 + bne .Lgemv_t_kernel_S4 -gemv_t_kernel_S1: +.Lgemv_t_kernel_S1: ands I, M, #3 - ble gemv_t_kernel_S_END + ble .Lgemv_t_kernel_S_END -gemv_t_kernel_S10: +.Lgemv_t_kernel_S10: KERNEL_S1 subs I, I, #1 - bne gemv_t_kernel_S10 + bne .Lgemv_t_kernel_S10 -gemv_t_kernel_S_END: +.Lgemv_t_kernel_S_END: ld1 TMPV1, [Y] add A, A, LDA subs J, J, #1 fmadd TMP1, ALPHA, TEMP, TMP1 st1 TMPV1, [Y], INC_Y - bne gemv_t_kernel_S_LOOP + bne .Lgemv_t_kernel_S_LOOP -gemv_t_kernel_L999: +.Lgemv_t_kernel_L999: RESTORE_REGS diff --git a/kernel/arm64/iamax.S b/kernel/arm64/iamax.S index 6c0d84f98..31d0cd646 100644 --- a/kernel/arm64/iamax.S +++ b/kernel/arm64/iamax.S @@ -230,62 +230,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE cmp N, xzr - ble iamax_kernel_zero + ble .Liamax_kernel_zero cmp INC_X, xzr - ble iamax_kernel_zero + ble .Liamax_kernel_zero cmp INC_X, #1 - bne iamax_kernel_S_BEGIN + bne .Liamax_kernel_S_BEGIN mov x7, X -iamax_kernel_F_BEGIN: +.Liamax_kernel_F_BEGIN: INIT_S subs N, N, #1 - ble iamax_kernel_L999 + ble .Liamax_kernel_L999 asr I, N, #3 cmp I, xzr - beq iamax_kernel_F1 + beq .Liamax_kernel_F1 add Z, Z, #1 -iamax_kernel_F8: +.Liamax_kernel_F8: KERNEL_F8 subs I, I, #1 - bne iamax_kernel_F8 + bne .Liamax_kernel_F8 KERNEL_F8_FINALIZE sub Z, Z, #1 -iamax_kernel_F1: +.Liamax_kernel_F1: ands I, N, #7 - ble iamax_kernel_L999 + ble .Liamax_kernel_L999 -iamax_kernel_F10: +.Liamax_kernel_F10: KERNEL_S1 subs I, I, #1 - bne iamax_kernel_F10 + bne .Liamax_kernel_F10 - b iamax_kernel_L999 + b .Liamax_kernel_L999 -iamax_kernel_S_BEGIN: +.Liamax_kernel_S_BEGIN: INIT_S subs N, N, #1 - ble iamax_kernel_L999 + ble .Liamax_kernel_L999 asr I, N, #2 cmp I, xzr - ble iamax_kernel_S1 + ble .Liamax_kernel_S1 -iamax_kernel_S4: +.Liamax_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -293,25 +293,25 @@ iamax_kernel_S4: KERNEL_S1 subs I, I, #1 - bne iamax_kernel_S4 + bne .Liamax_kernel_S4 -iamax_kernel_S1: +.Liamax_kernel_S1: ands I, N, #3 - ble iamax_kernel_L999 + ble .Liamax_kernel_L999 -iamax_kernel_S10: +.Liamax_kernel_S10: KERNEL_S1 subs I, I, #1 - bne iamax_kernel_S10 + bne .Liamax_kernel_S10 -iamax_kernel_L999: +.Liamax_kernel_L999: mov x0, INDEX ret -iamax_kernel_zero: +.Liamax_kernel_zero: mov x0, xzr ret diff --git a/kernel/arm64/izamax.S b/kernel/arm64/izamax.S index 9b252ec98..42fa4e711 100644 --- a/kernel/arm64/izamax.S +++ b/kernel/arm64/izamax.S @@ -276,64 +276,64 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE cmp N, xzr - ble iamax_kernel_zero + ble .Lizamax_kernel_zero cmp INC_X, xzr - ble iamax_kernel_zero + ble .Lizamax_kernel_zero cmp INC_X, #1 - bne iamax_kernel_S_BEGIN + bne .Lizamax_kernel_S_BEGIN mov x7, X -iamax_kernel_F_BEGIN: +.Lizamax_kernel_F_BEGIN: INIT_S subs N, N, #1 - ble iamax_kernel_L999 + ble .Lizamax_kernel_L999 asr I, N, #3 cmp I, xzr - ble iamax_kernel_F1 + ble .Lizamax_kernel_F1 add Z, Z, #1 -iamax_kernel_F8: +.Lizamax_kernel_F8: KERNEL_F8 subs I, I, #1 - bne iamax_kernel_F8 + bne .Lizamax_kernel_F8 KERNEL_F8_FINALIZE sub Z, Z, #1 -iamax_kernel_F1: +.Lizamax_kernel_F1: ands I, N, #7 - ble iamax_kernel_L999 + ble .Lizamax_kernel_L999 -iamax_kernel_F10: +.Lizamax_kernel_F10: KERNEL_S1 subs I, I, #1 - bne iamax_kernel_F10 + bne .Lizamax_kernel_F10 - b iamax_kernel_L999 + b .Lizamax_kernel_L999 -iamax_kernel_S_BEGIN: +.Lizamax_kernel_S_BEGIN: INIT_S subs N, N, #1 - ble iamax_kernel_L999 + ble .Lizamax_kernel_L999 asr I, N, #2 cmp I, xzr - ble iamax_kernel_S1 + ble .Lizamax_kernel_S1 -iamax_kernel_S4: +.Lizamax_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -341,26 +341,26 @@ iamax_kernel_S4: KERNEL_S1 subs I, I, #1 - bne iamax_kernel_S4 + bne .Lizamax_kernel_S4 -iamax_kernel_S1: +.Lizamax_kernel_S1: ands I, N, #3 - ble iamax_kernel_L999 + ble .Lizamax_kernel_L999 -iamax_kernel_S10: +.Lizamax_kernel_S10: KERNEL_S1 subs I, I, #1 - bne iamax_kernel_S10 + bne .Lizamax_kernel_S10 -iamax_kernel_L999: +.Lizamax_kernel_L999: mov x0, INDEX ret -iamax_kernel_zero: +.Lizamax_kernel_zero: mov x0, xzr ret diff --git a/kernel/arm64/nrm2.S b/kernel/arm64/nrm2.S index 5d06c13c0..e2cbd4def 100644 --- a/kernel/arm64/nrm2.S +++ b/kernel/arm64/nrm2.S @@ -162,44 +162,44 @@ KERNEL_S1_NEXT: INIT cmp N, #0 - ble nrm2_kernel_L999 + ble .Lnrm2_kernel_L999 cmp INC_X, #0 - beq nrm2_kernel_L999 + beq .Lnrm2_kernel_L999 cmp INC_X, #1 - bne nrm2_kernel_S_BEGIN + bne .Lnrm2_kernel_S_BEGIN -nrm2_kernel_F_BEGIN: +.Lnrm2_kernel_F_BEGIN: asr I, N, #3 // I = N / 8 cmp I, xzr - ble nrm2_kernel_F1 + ble .Lnrm2_kernel_F1 -nrm2_kernel_F8: +.Lnrm2_kernel_F8: KERNEL_F8 subs I, I, #1 - bne nrm2_kernel_F8 + bne .Lnrm2_kernel_F8 -nrm2_kernel_F1: +.Lnrm2_kernel_F1: ands I, N, #7 - ble nrm2_kernel_L999 + ble .Lnrm2_kernel_L999 -nrm2_kernel_F10: +.Lnrm2_kernel_F10: KERNEL_F1 subs I, I, #1 - bne nrm2_kernel_F10 + bne .Lnrm2_kernel_F10 - b nrm2_kernel_L999 + b .Lnrm2_kernel_L999 -nrm2_kernel_S_BEGIN: +.Lnrm2_kernel_S_BEGIN: INIT_S @@ -207,15 +207,15 @@ nrm2_kernel_S_BEGIN: .align 5 -nrm2_kernel_S10: +.Lnrm2_kernel_S10: KERNEL_S1 subs I, I, #1 - bne nrm2_kernel_S10 + bne .Lnrm2_kernel_S10 -nrm2_kernel_L999: +.Lnrm2_kernel_L999: fsqrt SSQ, SSQ fmul SSQ, SCALE, SSQ diff --git a/kernel/arm64/rot.S b/kernel/arm64/rot.S index 572125232..00c3085fa 100644 --- a/kernel/arm64/rot.S +++ b/kernel/arm64/rot.S @@ -165,48 +165,48 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE cmp N, xzr - ble rot_kernel_L999 + ble .Lrot_kernel_L999 INIT cmp INC_X, #1 - bne rot_kernel_S_BEGIN + bne .Lrot_kernel_S_BEGIN cmp INC_Y, #1 - bne rot_kernel_S_BEGIN + bne .Lrot_kernel_S_BEGIN -rot_kernel_F_BEGIN: +.Lrot_kernel_F_BEGIN: asr I, N, #2 cmp I, xzr - beq rot_kernel_F1 + beq .Lrot_kernel_F1 KERNEL_INIT_F4 -rot_kernel_F4: +.Lrot_kernel_F4: KERNEL_F4 subs I, I, #1 - bne rot_kernel_F4 + bne .Lrot_kernel_F4 -rot_kernel_F1: +.Lrot_kernel_F1: ands I, N, #3 - ble rot_kernel_L999 + ble .Lrot_kernel_L999 INIT_F1 -rot_kernel_F10: +.Lrot_kernel_F10: KERNEL_F1 subs I, I, #1 - bne rot_kernel_F10 + bne .Lrot_kernel_F10 mov w0, wzr ret -rot_kernel_S_BEGIN: +.Lrot_kernel_S_BEGIN: INIT_S INIT_F1 @@ -214,9 +214,9 @@ rot_kernel_S_BEGIN: asr I, N, #2 cmp I, xzr - ble rot_kernel_S1 + ble .Lrot_kernel_S1 -rot_kernel_S4: +.Lrot_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -224,22 +224,22 @@ rot_kernel_S4: KERNEL_S1 subs I, I, #1 - bne rot_kernel_S4 + bne .Lrot_kernel_S4 -rot_kernel_S1: +.Lrot_kernel_S1: ands I, N, #3 - ble rot_kernel_L999 + ble .Lrot_kernel_L999 -rot_kernel_S10: +.Lrot_kernel_S10: KERNEL_S1 subs I, I, #1 - bne rot_kernel_S10 + bne .Lrot_kernel_S10 -rot_kernel_L999: +.Lrot_kernel_L999: mov w0, wzr ret diff --git a/kernel/arm64/scal.S b/kernel/arm64/scal.S index 91d469d03..09c41cdaa 100644 --- a/kernel/arm64/scal.S +++ b/kernel/arm64/scal.S @@ -166,86 +166,86 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE cmp N, xzr - ble scal_kernel_L999 + ble .Lscal_kernel_L999 fcmp DA, #0.0 - beq scal_kernel_zero + beq .Lscal_kernel_zero cmp INC_X, #1 - bne scal_kernel_S_BEGIN + bne .Lscal_kernel_S_BEGIN -scal_kernel_F_BEGIN: +.Lscal_kernel_F_BEGIN: asr I, N, #3 cmp I, xzr - beq scal_kernel_F1 + beq .Lscal_kernel_F1 KERNEL_INIT_F8 -scal_kernel_F8: +.Lscal_kernel_F8: KERNEL_F8 subs I, I, #1 - bne scal_kernel_F8 + bne .Lscal_kernel_F8 -scal_kernel_F1: +.Lscal_kernel_F1: ands I, N, #7 - ble scal_kernel_L999 + ble .Lscal_kernel_L999 -scal_kernel_F10: +.Lscal_kernel_F10: KERNEL_F1 subs I, I, #1 - bne scal_kernel_F10 + bne .Lscal_kernel_F10 mov w0, wzr ret -scal_kernel_S_BEGIN: +.Lscal_kernel_S_BEGIN: INIT_S mov X_COPY, X asr I, N, #2 cmp I, xzr - ble scal_kernel_S1 + ble .Lscal_kernel_S1 -scal_kernel_S4: +.Lscal_kernel_S4: KERNEL_S4 subs I, I, #1 - bne scal_kernel_S4 + bne .Lscal_kernel_S4 -scal_kernel_S1: +.Lscal_kernel_S1: ands I, N, #3 - ble scal_kernel_L999 + ble .Lscal_kernel_L999 -scal_kernel_S10: +.Lscal_kernel_S10: KERNEL_S1 subs I, I, #1 - bne scal_kernel_S10 + bne .Lscal_kernel_S10 -scal_kernel_L999: +.Lscal_kernel_L999: mov w0, wzr ret -scal_kernel_zero: +.Lscal_kernel_zero: INIT_S -scal_kernel_Z1: +.Lscal_kernel_Z1: st1 DAV, [X], INC_X subs N, N, #1 - bne scal_kernel_Z1 + bne .Lscal_kernel_Z1 mov w0, wzr ret diff --git a/kernel/arm64/sgemm_kernel_16x4.S b/kernel/arm64/sgemm_kernel_16x4.S index 6e3645b76..99099ea6f 100644 --- a/kernel/arm64/sgemm_kernel_16x4.S +++ b/kernel/arm64/sgemm_kernel_16x4.S @@ -1070,7 +1070,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE -sgemm_kernel_begin: +.Lsgemm_kernel_begin: .align 5 add sp, sp, #-(11 * 16) @@ -1098,11 +1098,11 @@ sgemm_kernel_begin: mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble sgemm_kernel_L2_BEGIN + ble .Lsgemm_kernel_L2_BEGIN /******************************************************************************/ -sgemm_kernel_L4_BEGIN: +.Lsgemm_kernel_L4_BEGIN: mov pCRow0, pC add pCRow1, pCRow0, LDC add pCRow2, pCRow1, LDC @@ -1112,21 +1112,21 @@ sgemm_kernel_L4_BEGIN: mov pA, origPA // pA = start of A array -sgemm_kernel_L4_M16_BEGIN: +.Lsgemm_kernel_L4_M16_BEGIN: mov counterI, origM asr counterI, counterI, #4 // counterI = counterI / 16 cmp counterI, #0 - ble sgemm_kernel_L4_M8_BEGIN + ble .Lsgemm_kernel_L4_M8_BEGIN .align 5 -sgemm_kernel_L4_M16_20: +.Lsgemm_kernel_L4_M16_20: mov pB, origPB asr counterL , origK, #3 cmp counterL , #2 - blt sgemm_kernel_L4_M16_32 + blt .Lsgemm_kernel_L4_M16_32 KERNEL16x4_I KERNEL16x4_M2 @@ -1138,10 +1138,10 @@ sgemm_kernel_L4_M16_20: KERNEL16x4_M2 subs counterL, counterL, #2 - ble sgemm_kernel_L4_M16_22a + ble .Lsgemm_kernel_L4_M16_22a .align 5 -sgemm_kernel_L4_M16_22: +.Lsgemm_kernel_L4_M16_22: KERNEL16x4_M1 KERNEL16x4_M2 @@ -1153,10 +1153,10 @@ sgemm_kernel_L4_M16_22: KERNEL16x4_M2 subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M16_22 + bgt .Lsgemm_kernel_L4_M16_22 .align 5 -sgemm_kernel_L4_M16_22a: +.Lsgemm_kernel_L4_M16_22a: KERNEL16x4_M1 KERNEL16x4_M2 @@ -1167,13 +1167,13 @@ sgemm_kernel_L4_M16_22a: KERNEL16x4_M1 KERNEL16x4_E - b sgemm_kernel_L4_M16_44 + b .Lsgemm_kernel_L4_M16_44 .align 5 -sgemm_kernel_L4_M16_32: +.Lsgemm_kernel_L4_M16_32: tst counterL, #1 - ble sgemm_kernel_L4_M16_40 + ble .Lsgemm_kernel_L4_M16_40 KERNEL16x4_I KERNEL16x4_M2 @@ -1184,187 +1184,187 @@ sgemm_kernel_L4_M16_32: KERNEL16x4_M1 KERNEL16x4_E - b sgemm_kernel_L4_M16_44 + b .Lsgemm_kernel_L4_M16_44 -sgemm_kernel_L4_M16_40: +.Lsgemm_kernel_L4_M16_40: INIT16x4 -sgemm_kernel_L4_M16_44: +.Lsgemm_kernel_L4_M16_44: ands counterL , origK, #7 - ble sgemm_kernel_L4_M16_100 + ble .Lsgemm_kernel_L4_M16_100 .align 5 -sgemm_kernel_L4_M16_46: +.Lsgemm_kernel_L4_M16_46: KERNEL16x4_SUB subs counterL, counterL, #1 - bne sgemm_kernel_L4_M16_46 + bne .Lsgemm_kernel_L4_M16_46 -sgemm_kernel_L4_M16_100: +.Lsgemm_kernel_L4_M16_100: prfm PLDL1KEEP, [pA] prfm PLDL1KEEP, [pA, #64] prfm PLDL1KEEP, [origPB] SAVE16x4 -sgemm_kernel_L4_M16_END: +.Lsgemm_kernel_L4_M16_END: subs counterI, counterI, #1 - bne sgemm_kernel_L4_M16_20 + bne .Lsgemm_kernel_L4_M16_20 //------------------------------------------------------------------------------ -sgemm_kernel_L4_M8_BEGIN: +.Lsgemm_kernel_L4_M8_BEGIN: mov counterI, origM tst counterI , #15 - ble sgemm_kernel_L4_END + ble .Lsgemm_kernel_L4_END tst counterI, #8 - ble sgemm_kernel_L4_M4_BEGIN + ble .Lsgemm_kernel_L4_M4_BEGIN -sgemm_kernel_L4_M8_20: +.Lsgemm_kernel_L4_M8_20: mov pB, origPB asr counterL , origK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt sgemm_kernel_L4_M8_32 + blt .Lsgemm_kernel_L4_M8_32 KERNEL8x4_I // do one in the K KERNEL8x4_M2 // do another in the K subs counterL, counterL, #2 - ble sgemm_kernel_L4_M8_22a + ble .Lsgemm_kernel_L4_M8_22a .align 5 -sgemm_kernel_L4_M8_22: +.Lsgemm_kernel_L4_M8_22: KERNEL8x4_M1 KERNEL8x4_M2 subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M8_22 + bgt .Lsgemm_kernel_L4_M8_22 -sgemm_kernel_L4_M8_22a: +.Lsgemm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_E - b sgemm_kernel_L4_M8_44 + b .Lsgemm_kernel_L4_M8_44 -sgemm_kernel_L4_M8_32: +.Lsgemm_kernel_L4_M8_32: tst counterL, #1 - ble sgemm_kernel_L4_M8_40 + ble .Lsgemm_kernel_L4_M8_40 KERNEL8x4_I KERNEL8x4_E - b sgemm_kernel_L4_M8_44 + b .Lsgemm_kernel_L4_M8_44 -sgemm_kernel_L4_M8_40: +.Lsgemm_kernel_L4_M8_40: INIT8x4 -sgemm_kernel_L4_M8_44: +.Lsgemm_kernel_L4_M8_44: ands counterL , origK, #1 - ble sgemm_kernel_L4_M8_100 + ble .Lsgemm_kernel_L4_M8_100 -sgemm_kernel_L4_M8_46: +.Lsgemm_kernel_L4_M8_46: KERNEL8x4_SUB -sgemm_kernel_L4_M8_100: +.Lsgemm_kernel_L4_M8_100: SAVE8x4 -sgemm_kernel_L4_M8_END: +.Lsgemm_kernel_L4_M8_END: //------------------------------------------------------------------------------ -sgemm_kernel_L4_M4_BEGIN: +.Lsgemm_kernel_L4_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble sgemm_kernel_L4_END + ble .Lsgemm_kernel_L4_END tst counterI, #4 - ble sgemm_kernel_L4_M2_BEGIN + ble .Lsgemm_kernel_L4_M2_BEGIN -sgemm_kernel_L4_M4_20: +.Lsgemm_kernel_L4_M4_20: mov pB, origPB asr counterL , origK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt sgemm_kernel_L4_M4_32 + blt .Lsgemm_kernel_L4_M4_32 KERNEL4x4_I // do one in the K KERNEL4x4_M2 // do another in the K subs counterL, counterL, #2 - ble sgemm_kernel_L4_M4_22a + ble .Lsgemm_kernel_L4_M4_22a .align 5 -sgemm_kernel_L4_M4_22: +.Lsgemm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M4_22 + bgt .Lsgemm_kernel_L4_M4_22 -sgemm_kernel_L4_M4_22a: +.Lsgemm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_E - b sgemm_kernel_L4_M4_44 + b .Lsgemm_kernel_L4_M4_44 -sgemm_kernel_L4_M4_32: +.Lsgemm_kernel_L4_M4_32: tst counterL, #1 - ble sgemm_kernel_L4_M4_40 + ble .Lsgemm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_E - b sgemm_kernel_L4_M4_44 + b .Lsgemm_kernel_L4_M4_44 -sgemm_kernel_L4_M4_40: +.Lsgemm_kernel_L4_M4_40: INIT4x4 -sgemm_kernel_L4_M4_44: +.Lsgemm_kernel_L4_M4_44: ands counterL , origK, #1 - ble sgemm_kernel_L4_M4_100 + ble .Lsgemm_kernel_L4_M4_100 -sgemm_kernel_L4_M4_46: +.Lsgemm_kernel_L4_M4_46: KERNEL4x4_SUB -sgemm_kernel_L4_M4_100: +.Lsgemm_kernel_L4_M4_100: SAVE4x4 -sgemm_kernel_L4_M4_END: +.Lsgemm_kernel_L4_M4_END: //------------------------------------------------------------------------------ -sgemm_kernel_L4_M2_BEGIN: +.Lsgemm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble sgemm_kernel_L4_END + ble .Lsgemm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble sgemm_kernel_L4_M1_BEGIN + ble .Lsgemm_kernel_L4_M1_BEGIN -sgemm_kernel_L4_M2_20: +.Lsgemm_kernel_L4_M2_20: INIT2x4 @@ -1372,9 +1372,9 @@ sgemm_kernel_L4_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L4_M2_40 + ble .Lsgemm_kernel_L4_M2_40 -sgemm_kernel_L4_M2_22: +.Lsgemm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -1387,34 +1387,34 @@ sgemm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M2_22 + bgt .Lsgemm_kernel_L4_M2_22 -sgemm_kernel_L4_M2_40: +.Lsgemm_kernel_L4_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L4_M2_100 + ble .Lsgemm_kernel_L4_M2_100 -sgemm_kernel_L4_M2_42: +.Lsgemm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M2_42 + bgt .Lsgemm_kernel_L4_M2_42 -sgemm_kernel_L4_M2_100: +.Lsgemm_kernel_L4_M2_100: SAVE2x4 -sgemm_kernel_L4_M2_END: +.Lsgemm_kernel_L4_M2_END: -sgemm_kernel_L4_M1_BEGIN: +.Lsgemm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble sgemm_kernel_L4_END + ble .Lsgemm_kernel_L4_END -sgemm_kernel_L4_M1_20: +.Lsgemm_kernel_L4_M1_20: INIT1x4 @@ -1422,9 +1422,9 @@ sgemm_kernel_L4_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L4_M1_40 + ble .Lsgemm_kernel_L4_M1_40 -sgemm_kernel_L4_M1_22: +.Lsgemm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1436,42 +1436,42 @@ sgemm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M1_22 + bgt .Lsgemm_kernel_L4_M1_22 -sgemm_kernel_L4_M1_40: +.Lsgemm_kernel_L4_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L4_M1_100 + ble .Lsgemm_kernel_L4_M1_100 -sgemm_kernel_L4_M1_42: +.Lsgemm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M1_42 + bgt .Lsgemm_kernel_L4_M1_42 -sgemm_kernel_L4_M1_100: +.Lsgemm_kernel_L4_M1_100: SAVE1x4 -sgemm_kernel_L4_END: +.Lsgemm_kernel_L4_END: add origPB, origPB, origK, lsl #4 // B = B + K * 4 * 4 subs counterJ, counterJ , #1 // j-- - bgt sgemm_kernel_L4_BEGIN + bgt .Lsgemm_kernel_L4_BEGIN /******************************************************************************/ -sgemm_kernel_L2_BEGIN: // less than 2 left in N direction +.Lsgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble sgemm_kernel_L999 + ble .Lsgemm_kernel_L999 tst counterJ , #2 - ble sgemm_kernel_L1_BEGIN + ble .Lsgemm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1479,14 +1479,14 @@ sgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov pA, origPA // pA = A -sgemm_kernel_L2_M16_BEGIN: +.Lsgemm_kernel_L2_M16_BEGIN: mov counterI, origM asr counterI, counterI, #4 // counterI = counterI / 16 cmp counterI,#0 - ble sgemm_kernel_L2_M8_BEGIN + ble .Lsgemm_kernel_L2_M8_BEGIN -sgemm_kernel_L2_M16_20: +.Lsgemm_kernel_L2_M16_20: INIT16x2 @@ -1494,10 +1494,10 @@ sgemm_kernel_L2_M16_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble sgemm_kernel_L2_M16_40 + ble .Lsgemm_kernel_L2_M16_40 .align 5 -sgemm_kernel_L2_M16_22: +.Lsgemm_kernel_L2_M16_22: KERNEL16x2_SUB KERNEL16x2_SUB KERNEL16x2_SUB @@ -1509,41 +1509,41 @@ sgemm_kernel_L2_M16_22: KERNEL16x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M16_22 + bgt .Lsgemm_kernel_L2_M16_22 -sgemm_kernel_L2_M16_40: +.Lsgemm_kernel_L2_M16_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M16_100 + ble .Lsgemm_kernel_L2_M16_100 -sgemm_kernel_L2_M16_42: +.Lsgemm_kernel_L2_M16_42: KERNEL16x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M16_42 + bgt .Lsgemm_kernel_L2_M16_42 -sgemm_kernel_L2_M16_100: +.Lsgemm_kernel_L2_M16_100: SAVE16x2 -sgemm_kernel_L2_M16_END: +.Lsgemm_kernel_L2_M16_END: subs counterI, counterI, #1 - bgt sgemm_kernel_L2_M16_20 + bgt .Lsgemm_kernel_L2_M16_20 //------------------------------------------------------------------------------ -sgemm_kernel_L2_M8_BEGIN: +.Lsgemm_kernel_L2_M8_BEGIN: mov counterI, origM tst counterI , #15 - ble sgemm_kernel_L2_END + ble .Lsgemm_kernel_L2_END tst counterI, #8 - ble sgemm_kernel_L2_M4_BEGIN + ble .Lsgemm_kernel_L2_M4_BEGIN -sgemm_kernel_L2_M8_20: +.Lsgemm_kernel_L2_M8_20: INIT8x2 @@ -1551,10 +1551,10 @@ sgemm_kernel_L2_M8_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble sgemm_kernel_L2_M8_40 + ble .Lsgemm_kernel_L2_M8_40 .align 5 -sgemm_kernel_L2_M8_22: +.Lsgemm_kernel_L2_M8_22: KERNEL8x2_SUB KERNEL8x2_SUB KERNEL8x2_SUB @@ -1566,38 +1566,38 @@ sgemm_kernel_L2_M8_22: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M8_22 + bgt .Lsgemm_kernel_L2_M8_22 -sgemm_kernel_L2_M8_40: +.Lsgemm_kernel_L2_M8_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M8_100 + ble .Lsgemm_kernel_L2_M8_100 -sgemm_kernel_L2_M8_42: +.Lsgemm_kernel_L2_M8_42: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M8_42 + bgt .Lsgemm_kernel_L2_M8_42 -sgemm_kernel_L2_M8_100: +.Lsgemm_kernel_L2_M8_100: SAVE8x2 -sgemm_kernel_L2_M8_END: +.Lsgemm_kernel_L2_M8_END: //------------------------------------------------------------------------------ -sgemm_kernel_L2_M4_BEGIN: +.Lsgemm_kernel_L2_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble sgemm_kernel_L2_END + ble .Lsgemm_kernel_L2_END tst counterI, #4 - ble sgemm_kernel_L2_M2_BEGIN + ble .Lsgemm_kernel_L2_M2_BEGIN -sgemm_kernel_L2_M4_20: +.Lsgemm_kernel_L2_M4_20: INIT4x2 @@ -1605,10 +1605,10 @@ sgemm_kernel_L2_M4_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble sgemm_kernel_L2_M4_40 + ble .Lsgemm_kernel_L2_M4_40 .align 5 -sgemm_kernel_L2_M4_22: +.Lsgemm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1620,40 +1620,40 @@ sgemm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M4_22 + bgt .Lsgemm_kernel_L2_M4_22 -sgemm_kernel_L2_M4_40: +.Lsgemm_kernel_L2_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M4_100 + ble .Lsgemm_kernel_L2_M4_100 -sgemm_kernel_L2_M4_42: +.Lsgemm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M4_42 + bgt .Lsgemm_kernel_L2_M4_42 -sgemm_kernel_L2_M4_100: +.Lsgemm_kernel_L2_M4_100: SAVE4x2 -sgemm_kernel_L2_M4_END: +.Lsgemm_kernel_L2_M4_END: //------------------------------------------------------------------------------ -sgemm_kernel_L2_M2_BEGIN: +.Lsgemm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble sgemm_kernel_L2_END + ble .Lsgemm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble sgemm_kernel_L2_M1_BEGIN + ble .Lsgemm_kernel_L2_M1_BEGIN -sgemm_kernel_L2_M2_20: +.Lsgemm_kernel_L2_M2_20: INIT2x2 @@ -1661,9 +1661,9 @@ sgemm_kernel_L2_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble sgemm_kernel_L2_M2_40 + ble .Lsgemm_kernel_L2_M2_40 -sgemm_kernel_L2_M2_22: +.Lsgemm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -1676,34 +1676,34 @@ sgemm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M2_22 + bgt .Lsgemm_kernel_L2_M2_22 -sgemm_kernel_L2_M2_40: +.Lsgemm_kernel_L2_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M2_100 + ble .Lsgemm_kernel_L2_M2_100 -sgemm_kernel_L2_M2_42: +.Lsgemm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M2_42 + bgt .Lsgemm_kernel_L2_M2_42 -sgemm_kernel_L2_M2_100: +.Lsgemm_kernel_L2_M2_100: SAVE2x2 -sgemm_kernel_L2_M2_END: +.Lsgemm_kernel_L2_M2_END: -sgemm_kernel_L2_M1_BEGIN: +.Lsgemm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble sgemm_kernel_L2_END + ble .Lsgemm_kernel_L2_END -sgemm_kernel_L2_M1_20: +.Lsgemm_kernel_L2_M1_20: INIT1x2 @@ -1711,9 +1711,9 @@ sgemm_kernel_L2_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble sgemm_kernel_L2_M1_40 + ble .Lsgemm_kernel_L2_M1_40 -sgemm_kernel_L2_M1_22: +.Lsgemm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1725,36 +1725,36 @@ sgemm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M1_22 + bgt .Lsgemm_kernel_L2_M1_22 -sgemm_kernel_L2_M1_40: +.Lsgemm_kernel_L2_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M1_100 + ble .Lsgemm_kernel_L2_M1_100 -sgemm_kernel_L2_M1_42: +.Lsgemm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M1_42 + bgt .Lsgemm_kernel_L2_M1_42 -sgemm_kernel_L2_M1_100: +.Lsgemm_kernel_L2_M1_100: SAVE1x2 -sgemm_kernel_L2_END: +.Lsgemm_kernel_L2_END: add origPB, origPB, origK, lsl #3 // B = B + K * 2 * 4 /******************************************************************************/ -sgemm_kernel_L1_BEGIN: +.Lsgemm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble sgemm_kernel_L999 // done + ble .Lsgemm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -1762,14 +1762,14 @@ sgemm_kernel_L1_BEGIN: mov pA, origPA // pA = A -sgemm_kernel_L1_M16_BEGIN: +.Lsgemm_kernel_L1_M16_BEGIN: mov counterI, origM asr counterI, counterI, #4 // counterI = counterI / 16 cmp counterI, #0 - ble sgemm_kernel_L1_M8_BEGIN + ble .Lsgemm_kernel_L1_M8_BEGIN -sgemm_kernel_L1_M16_20: +.Lsgemm_kernel_L1_M16_20: INIT16x1 @@ -1777,10 +1777,10 @@ sgemm_kernel_L1_M16_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M16_40 + ble .Lsgemm_kernel_L1_M16_40 .align 5 -sgemm_kernel_L1_M16_22: +.Lsgemm_kernel_L1_M16_22: KERNEL16x1_SUB KERNEL16x1_SUB KERNEL16x1_SUB @@ -1792,42 +1792,42 @@ sgemm_kernel_L1_M16_22: KERNEL16x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M16_22 + bgt .Lsgemm_kernel_L1_M16_22 -sgemm_kernel_L1_M16_40: +.Lsgemm_kernel_L1_M16_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M16_100 + ble .Lsgemm_kernel_L1_M16_100 -sgemm_kernel_L1_M16_42: +.Lsgemm_kernel_L1_M16_42: KERNEL16x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M16_42 + bgt .Lsgemm_kernel_L1_M16_42 -sgemm_kernel_L1_M16_100: +.Lsgemm_kernel_L1_M16_100: SAVE16x1 -sgemm_kernel_L1_M16_END: +.Lsgemm_kernel_L1_M16_END: subs counterI, counterI, #1 - bgt sgemm_kernel_L1_M16_20 + bgt .Lsgemm_kernel_L1_M16_20 //------------------------------------------------------------------------------ -sgemm_kernel_L1_M8_BEGIN: +.Lsgemm_kernel_L1_M8_BEGIN: mov counterI, origM tst counterI , #15 - ble sgemm_kernel_L1_END + ble .Lsgemm_kernel_L1_END tst counterI, #8 - ble sgemm_kernel_L1_M4_BEGIN + ble .Lsgemm_kernel_L1_M4_BEGIN -sgemm_kernel_L1_M8_20: +.Lsgemm_kernel_L1_M8_20: INIT8x1 @@ -1835,10 +1835,10 @@ sgemm_kernel_L1_M8_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M8_40 + ble .Lsgemm_kernel_L1_M8_40 .align 5 -sgemm_kernel_L1_M8_22: +.Lsgemm_kernel_L1_M8_22: KERNEL8x1_SUB KERNEL8x1_SUB KERNEL8x1_SUB @@ -1850,38 +1850,38 @@ sgemm_kernel_L1_M8_22: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M8_22 + bgt .Lsgemm_kernel_L1_M8_22 -sgemm_kernel_L1_M8_40: +.Lsgemm_kernel_L1_M8_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M8_100 + ble .Lsgemm_kernel_L1_M8_100 -sgemm_kernel_L1_M8_42: +.Lsgemm_kernel_L1_M8_42: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M8_42 + bgt .Lsgemm_kernel_L1_M8_42 -sgemm_kernel_L1_M8_100: +.Lsgemm_kernel_L1_M8_100: SAVE8x1 -sgemm_kernel_L1_M8_END: +.Lsgemm_kernel_L1_M8_END: //------------------------------------------------------------------------------ -sgemm_kernel_L1_M4_BEGIN: +.Lsgemm_kernel_L1_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble sgemm_kernel_L1_END + ble .Lsgemm_kernel_L1_END tst counterI, #4 - ble sgemm_kernel_L1_M2_BEGIN + ble .Lsgemm_kernel_L1_M2_BEGIN -sgemm_kernel_L1_M4_20: +.Lsgemm_kernel_L1_M4_20: INIT4x1 @@ -1889,10 +1889,10 @@ sgemm_kernel_L1_M4_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M4_40 + ble .Lsgemm_kernel_L1_M4_40 .align 5 -sgemm_kernel_L1_M4_22: +.Lsgemm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -1904,39 +1904,39 @@ sgemm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M4_22 + bgt .Lsgemm_kernel_L1_M4_22 -sgemm_kernel_L1_M4_40: +.Lsgemm_kernel_L1_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M4_100 + ble .Lsgemm_kernel_L1_M4_100 -sgemm_kernel_L1_M4_42: +.Lsgemm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M4_42 + bgt .Lsgemm_kernel_L1_M4_42 -sgemm_kernel_L1_M4_100: +.Lsgemm_kernel_L1_M4_100: SAVE4x1 -sgemm_kernel_L1_M4_END: +.Lsgemm_kernel_L1_M4_END: //------------------------------------------------------------------------------ -sgemm_kernel_L1_M2_BEGIN: +.Lsgemm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble sgemm_kernel_L1_END + ble .Lsgemm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble sgemm_kernel_L1_M1_BEGIN + ble .Lsgemm_kernel_L1_M1_BEGIN -sgemm_kernel_L1_M2_20: +.Lsgemm_kernel_L1_M2_20: INIT2x1 @@ -1944,9 +1944,9 @@ sgemm_kernel_L1_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M2_40 + ble .Lsgemm_kernel_L1_M2_40 -sgemm_kernel_L1_M2_22: +.Lsgemm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -1959,34 +1959,34 @@ sgemm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M2_22 + bgt .Lsgemm_kernel_L1_M2_22 -sgemm_kernel_L1_M2_40: +.Lsgemm_kernel_L1_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M2_100 + ble .Lsgemm_kernel_L1_M2_100 -sgemm_kernel_L1_M2_42: +.Lsgemm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M2_42 + bgt .Lsgemm_kernel_L1_M2_42 -sgemm_kernel_L1_M2_100: +.Lsgemm_kernel_L1_M2_100: SAVE2x1 -sgemm_kernel_L1_M2_END: +.Lsgemm_kernel_L1_M2_END: -sgemm_kernel_L1_M1_BEGIN: +.Lsgemm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble sgemm_kernel_L1_END + ble .Lsgemm_kernel_L1_END -sgemm_kernel_L1_M1_20: +.Lsgemm_kernel_L1_M1_20: INIT1x1 @@ -1994,9 +1994,9 @@ sgemm_kernel_L1_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M1_40 + ble .Lsgemm_kernel_L1_M1_40 -sgemm_kernel_L1_M1_22: +.Lsgemm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -2008,28 +2008,28 @@ sgemm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M1_22 + bgt .Lsgemm_kernel_L1_M1_22 -sgemm_kernel_L1_M1_40: +.Lsgemm_kernel_L1_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M1_100 + ble .Lsgemm_kernel_L1_M1_100 -sgemm_kernel_L1_M1_42: +.Lsgemm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M1_42 + bgt .Lsgemm_kernel_L1_M1_42 -sgemm_kernel_L1_M1_100: +.Lsgemm_kernel_L1_M1_100: SAVE1x1 -sgemm_kernel_L1_END: +.Lsgemm_kernel_L1_END: -sgemm_kernel_L999: +.Lsgemm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/sgemm_kernel_16x4_thunderx2t99.S b/kernel/arm64/sgemm_kernel_16x4_thunderx2t99.S index 0ee10e130..144d4bcd6 100644 --- a/kernel/arm64/sgemm_kernel_16x4_thunderx2t99.S +++ b/kernel/arm64/sgemm_kernel_16x4_thunderx2t99.S @@ -1117,7 +1117,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE -sgemm_kernel_begin: +.Lsgemm_kernel_begin: .align 5 add sp, sp, #-(11 * 16) @@ -1145,11 +1145,11 @@ sgemm_kernel_begin: mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble sgemm_kernel_L2_BEGIN + ble .Lsgemm_kernel_L2_BEGIN /******************************************************************************/ -sgemm_kernel_L4_BEGIN: +.Lsgemm_kernel_L4_BEGIN: mov pCRow0, pC add pCRow1, pCRow0, LDC add pCRow2, pCRow1, LDC @@ -1159,21 +1159,21 @@ sgemm_kernel_L4_BEGIN: mov pA, origPA // pA = start of A array -sgemm_kernel_L4_M16_BEGIN: +.Lsgemm_kernel_L4_M16_BEGIN: mov counterI, origM asr counterI, counterI, #4 // counterI = counterI / 16 cmp counterI, #0 - ble sgemm_kernel_L4_M8_BEGIN + ble .Lsgemm_kernel_L4_M8_BEGIN .align 5 -sgemm_kernel_L4_M16_20: +.Lsgemm_kernel_L4_M16_20: mov pB, origPB asr counterL , origK, #4 // L = K / 16 cmp counterL , #2 - blt sgemm_kernel_L4_M16_32 + blt .Lsgemm_kernel_L4_M16_32 KERNEL16x4_I KERNEL16x4_M2 @@ -1182,18 +1182,18 @@ sgemm_kernel_L4_M16_20: KERNEL16x4_M1_M2_x1 subs counterL, counterL, #2 - ble sgemm_kernel_L4_M16_22a + ble .Lsgemm_kernel_L4_M16_22a .align 5 -sgemm_kernel_L4_M16_22: +.Lsgemm_kernel_L4_M16_22: KERNEL16x4_M1_M2_x8 subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M16_22 + bgt .Lsgemm_kernel_L4_M16_22 .align 5 -sgemm_kernel_L4_M16_22a: +.Lsgemm_kernel_L4_M16_22a: KERNEL16x4_M1_M2_x4 KERNEL16x4_M1_M2_x2 @@ -1201,13 +1201,13 @@ sgemm_kernel_L4_M16_22a: KERNEL16x4_M1 KERNEL16x4_E - b sgemm_kernel_L4_M16_44 + b .Lsgemm_kernel_L4_M16_44 .align 5 -sgemm_kernel_L4_M16_32: +.Lsgemm_kernel_L4_M16_32: tst counterL, #1 - ble sgemm_kernel_L4_M16_40 + ble .Lsgemm_kernel_L4_M16_40 KERNEL16x4_I KERNEL16x4_M2 @@ -1216,187 +1216,187 @@ sgemm_kernel_L4_M16_32: KERNEL16x4_M1 KERNEL16x4_E - b sgemm_kernel_L4_M16_44 + b .Lsgemm_kernel_L4_M16_44 -sgemm_kernel_L4_M16_40: +.Lsgemm_kernel_L4_M16_40: INIT16x4 -sgemm_kernel_L4_M16_44: +.Lsgemm_kernel_L4_M16_44: ands counterL , origK, #15 - ble sgemm_kernel_L4_M16_100 + ble .Lsgemm_kernel_L4_M16_100 .align 5 -sgemm_kernel_L4_M16_46: +.Lsgemm_kernel_L4_M16_46: KERNEL16x4_SUB subs counterL, counterL, #1 - bne sgemm_kernel_L4_M16_46 + bne .Lsgemm_kernel_L4_M16_46 -sgemm_kernel_L4_M16_100: +.Lsgemm_kernel_L4_M16_100: prfm PLDL1KEEP, [pA] prfm PLDL1KEEP, [pA, #64] prfm PLDL1KEEP, [origPB] SAVE16x4 -sgemm_kernel_L4_M16_END: +.Lsgemm_kernel_L4_M16_END: subs counterI, counterI, #1 - bne sgemm_kernel_L4_M16_20 + bne .Lsgemm_kernel_L4_M16_20 //------------------------------------------------------------------------------ -sgemm_kernel_L4_M8_BEGIN: +.Lsgemm_kernel_L4_M8_BEGIN: mov counterI, origM tst counterI , #15 - ble sgemm_kernel_L4_END + ble .Lsgemm_kernel_L4_END tst counterI, #8 - ble sgemm_kernel_L4_M4_BEGIN + ble .Lsgemm_kernel_L4_M4_BEGIN -sgemm_kernel_L4_M8_20: +.Lsgemm_kernel_L4_M8_20: mov pB, origPB asr counterL , origK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt sgemm_kernel_L4_M8_32 + blt .Lsgemm_kernel_L4_M8_32 KERNEL8x4_I // do one in the K KERNEL8x4_M2 // do another in the K subs counterL, counterL, #2 - ble sgemm_kernel_L4_M8_22a + ble .Lsgemm_kernel_L4_M8_22a .align 5 -sgemm_kernel_L4_M8_22: +.Lsgemm_kernel_L4_M8_22: KERNEL8x4_M1 KERNEL8x4_M2 subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M8_22 + bgt .Lsgemm_kernel_L4_M8_22 -sgemm_kernel_L4_M8_22a: +.Lsgemm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_E - b sgemm_kernel_L4_M8_44 + b .Lsgemm_kernel_L4_M8_44 -sgemm_kernel_L4_M8_32: +.Lsgemm_kernel_L4_M8_32: tst counterL, #1 - ble sgemm_kernel_L4_M8_40 + ble .Lsgemm_kernel_L4_M8_40 KERNEL8x4_I KERNEL8x4_E - b sgemm_kernel_L4_M8_44 + b .Lsgemm_kernel_L4_M8_44 -sgemm_kernel_L4_M8_40: +.Lsgemm_kernel_L4_M8_40: INIT8x4 -sgemm_kernel_L4_M8_44: +.Lsgemm_kernel_L4_M8_44: ands counterL , origK, #1 - ble sgemm_kernel_L4_M8_100 + ble .Lsgemm_kernel_L4_M8_100 -sgemm_kernel_L4_M8_46: +.Lsgemm_kernel_L4_M8_46: KERNEL8x4_SUB -sgemm_kernel_L4_M8_100: +.Lsgemm_kernel_L4_M8_100: SAVE8x4 -sgemm_kernel_L4_M8_END: +.Lsgemm_kernel_L4_M8_END: //------------------------------------------------------------------------------ -sgemm_kernel_L4_M4_BEGIN: +.Lsgemm_kernel_L4_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble sgemm_kernel_L4_END + ble .Lsgemm_kernel_L4_END tst counterI, #4 - ble sgemm_kernel_L4_M2_BEGIN + ble .Lsgemm_kernel_L4_M2_BEGIN -sgemm_kernel_L4_M4_20: +.Lsgemm_kernel_L4_M4_20: mov pB, origPB asr counterL , origK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt sgemm_kernel_L4_M4_32 + blt .Lsgemm_kernel_L4_M4_32 KERNEL4x4_I // do one in the K KERNEL4x4_M2 // do another in the K subs counterL, counterL, #2 - ble sgemm_kernel_L4_M4_22a + ble .Lsgemm_kernel_L4_M4_22a .align 5 -sgemm_kernel_L4_M4_22: +.Lsgemm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M4_22 + bgt .Lsgemm_kernel_L4_M4_22 -sgemm_kernel_L4_M4_22a: +.Lsgemm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_E - b sgemm_kernel_L4_M4_44 + b .Lsgemm_kernel_L4_M4_44 -sgemm_kernel_L4_M4_32: +.Lsgemm_kernel_L4_M4_32: tst counterL, #1 - ble sgemm_kernel_L4_M4_40 + ble .Lsgemm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_E - b sgemm_kernel_L4_M4_44 + b .Lsgemm_kernel_L4_M4_44 -sgemm_kernel_L4_M4_40: +.Lsgemm_kernel_L4_M4_40: INIT4x4 -sgemm_kernel_L4_M4_44: +.Lsgemm_kernel_L4_M4_44: ands counterL , origK, #1 - ble sgemm_kernel_L4_M4_100 + ble .Lsgemm_kernel_L4_M4_100 -sgemm_kernel_L4_M4_46: +.Lsgemm_kernel_L4_M4_46: KERNEL4x4_SUB -sgemm_kernel_L4_M4_100: +.Lsgemm_kernel_L4_M4_100: SAVE4x4 -sgemm_kernel_L4_M4_END: +.Lsgemm_kernel_L4_M4_END: //------------------------------------------------------------------------------ -sgemm_kernel_L4_M2_BEGIN: +.Lsgemm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble sgemm_kernel_L4_END + ble .Lsgemm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble sgemm_kernel_L4_M1_BEGIN + ble .Lsgemm_kernel_L4_M1_BEGIN -sgemm_kernel_L4_M2_20: +.Lsgemm_kernel_L4_M2_20: INIT2x4 @@ -1404,9 +1404,9 @@ sgemm_kernel_L4_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L4_M2_40 + ble .Lsgemm_kernel_L4_M2_40 -sgemm_kernel_L4_M2_22: +.Lsgemm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -1419,34 +1419,34 @@ sgemm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M2_22 + bgt .Lsgemm_kernel_L4_M2_22 -sgemm_kernel_L4_M2_40: +.Lsgemm_kernel_L4_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L4_M2_100 + ble .Lsgemm_kernel_L4_M2_100 -sgemm_kernel_L4_M2_42: +.Lsgemm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M2_42 + bgt .Lsgemm_kernel_L4_M2_42 -sgemm_kernel_L4_M2_100: +.Lsgemm_kernel_L4_M2_100: SAVE2x4 -sgemm_kernel_L4_M2_END: +.Lsgemm_kernel_L4_M2_END: -sgemm_kernel_L4_M1_BEGIN: +.Lsgemm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble sgemm_kernel_L4_END + ble .Lsgemm_kernel_L4_END -sgemm_kernel_L4_M1_20: +.Lsgemm_kernel_L4_M1_20: INIT1x4 @@ -1454,9 +1454,9 @@ sgemm_kernel_L4_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L4_M1_40 + ble .Lsgemm_kernel_L4_M1_40 -sgemm_kernel_L4_M1_22: +.Lsgemm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1468,42 +1468,42 @@ sgemm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M1_22 + bgt .Lsgemm_kernel_L4_M1_22 -sgemm_kernel_L4_M1_40: +.Lsgemm_kernel_L4_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L4_M1_100 + ble .Lsgemm_kernel_L4_M1_100 -sgemm_kernel_L4_M1_42: +.Lsgemm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M1_42 + bgt .Lsgemm_kernel_L4_M1_42 -sgemm_kernel_L4_M1_100: +.Lsgemm_kernel_L4_M1_100: SAVE1x4 -sgemm_kernel_L4_END: +.Lsgemm_kernel_L4_END: add origPB, origPB, origK, lsl #4 // B = B + K * 4 * 4 subs counterJ, counterJ , #1 // j-- - bgt sgemm_kernel_L4_BEGIN + bgt .Lsgemm_kernel_L4_BEGIN /******************************************************************************/ -sgemm_kernel_L2_BEGIN: // less than 2 left in N direction +.Lsgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble sgemm_kernel_L999 + ble .Lsgemm_kernel_L999 tst counterJ , #2 - ble sgemm_kernel_L1_BEGIN + ble .Lsgemm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1511,14 +1511,14 @@ sgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov pA, origPA // pA = A -sgemm_kernel_L2_M16_BEGIN: +.Lsgemm_kernel_L2_M16_BEGIN: mov counterI, origM asr counterI, counterI, #4 // counterI = counterI / 16 cmp counterI,#0 - ble sgemm_kernel_L2_M8_BEGIN + ble .Lsgemm_kernel_L2_M8_BEGIN -sgemm_kernel_L2_M16_20: +.Lsgemm_kernel_L2_M16_20: INIT16x2 @@ -1526,10 +1526,10 @@ sgemm_kernel_L2_M16_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble sgemm_kernel_L2_M16_40 + ble .Lsgemm_kernel_L2_M16_40 .align 5 -sgemm_kernel_L2_M16_22: +.Lsgemm_kernel_L2_M16_22: KERNEL16x2_SUB KERNEL16x2_SUB KERNEL16x2_SUB @@ -1541,41 +1541,41 @@ sgemm_kernel_L2_M16_22: KERNEL16x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M16_22 + bgt .Lsgemm_kernel_L2_M16_22 -sgemm_kernel_L2_M16_40: +.Lsgemm_kernel_L2_M16_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M16_100 + ble .Lsgemm_kernel_L2_M16_100 -sgemm_kernel_L2_M16_42: +.Lsgemm_kernel_L2_M16_42: KERNEL16x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M16_42 + bgt .Lsgemm_kernel_L2_M16_42 -sgemm_kernel_L2_M16_100: +.Lsgemm_kernel_L2_M16_100: SAVE16x2 -sgemm_kernel_L2_M16_END: +.Lsgemm_kernel_L2_M16_END: subs counterI, counterI, #1 - bgt sgemm_kernel_L2_M16_20 + bgt .Lsgemm_kernel_L2_M16_20 //------------------------------------------------------------------------------ -sgemm_kernel_L2_M8_BEGIN: +.Lsgemm_kernel_L2_M8_BEGIN: mov counterI, origM tst counterI , #15 - ble sgemm_kernel_L2_END + ble .Lsgemm_kernel_L2_END tst counterI, #8 - ble sgemm_kernel_L2_M4_BEGIN + ble .Lsgemm_kernel_L2_M4_BEGIN -sgemm_kernel_L2_M8_20: +.Lsgemm_kernel_L2_M8_20: INIT8x2 @@ -1583,10 +1583,10 @@ sgemm_kernel_L2_M8_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble sgemm_kernel_L2_M8_40 + ble .Lsgemm_kernel_L2_M8_40 .align 5 -sgemm_kernel_L2_M8_22: +.Lsgemm_kernel_L2_M8_22: KERNEL8x2_SUB KERNEL8x2_SUB KERNEL8x2_SUB @@ -1598,38 +1598,38 @@ sgemm_kernel_L2_M8_22: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M8_22 + bgt .Lsgemm_kernel_L2_M8_22 -sgemm_kernel_L2_M8_40: +.Lsgemm_kernel_L2_M8_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M8_100 + ble .Lsgemm_kernel_L2_M8_100 -sgemm_kernel_L2_M8_42: +.Lsgemm_kernel_L2_M8_42: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M8_42 + bgt .Lsgemm_kernel_L2_M8_42 -sgemm_kernel_L2_M8_100: +.Lsgemm_kernel_L2_M8_100: SAVE8x2 -sgemm_kernel_L2_M8_END: +.Lsgemm_kernel_L2_M8_END: //------------------------------------------------------------------------------ -sgemm_kernel_L2_M4_BEGIN: +.Lsgemm_kernel_L2_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble sgemm_kernel_L2_END + ble .Lsgemm_kernel_L2_END tst counterI, #4 - ble sgemm_kernel_L2_M2_BEGIN + ble .Lsgemm_kernel_L2_M2_BEGIN -sgemm_kernel_L2_M4_20: +.Lsgemm_kernel_L2_M4_20: INIT4x2 @@ -1637,10 +1637,10 @@ sgemm_kernel_L2_M4_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble sgemm_kernel_L2_M4_40 + ble .Lsgemm_kernel_L2_M4_40 .align 5 -sgemm_kernel_L2_M4_22: +.Lsgemm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1652,40 +1652,40 @@ sgemm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M4_22 + bgt .Lsgemm_kernel_L2_M4_22 -sgemm_kernel_L2_M4_40: +.Lsgemm_kernel_L2_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M4_100 + ble .Lsgemm_kernel_L2_M4_100 -sgemm_kernel_L2_M4_42: +.Lsgemm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M4_42 + bgt .Lsgemm_kernel_L2_M4_42 -sgemm_kernel_L2_M4_100: +.Lsgemm_kernel_L2_M4_100: SAVE4x2 -sgemm_kernel_L2_M4_END: +.Lsgemm_kernel_L2_M4_END: //------------------------------------------------------------------------------ -sgemm_kernel_L2_M2_BEGIN: +.Lsgemm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble sgemm_kernel_L2_END + ble .Lsgemm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble sgemm_kernel_L2_M1_BEGIN + ble .Lsgemm_kernel_L2_M1_BEGIN -sgemm_kernel_L2_M2_20: +.Lsgemm_kernel_L2_M2_20: INIT2x2 @@ -1693,9 +1693,9 @@ sgemm_kernel_L2_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble sgemm_kernel_L2_M2_40 + ble .Lsgemm_kernel_L2_M2_40 -sgemm_kernel_L2_M2_22: +.Lsgemm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -1708,34 +1708,34 @@ sgemm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M2_22 + bgt .Lsgemm_kernel_L2_M2_22 -sgemm_kernel_L2_M2_40: +.Lsgemm_kernel_L2_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M2_100 + ble .Lsgemm_kernel_L2_M2_100 -sgemm_kernel_L2_M2_42: +.Lsgemm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M2_42 + bgt .Lsgemm_kernel_L2_M2_42 -sgemm_kernel_L2_M2_100: +.Lsgemm_kernel_L2_M2_100: SAVE2x2 -sgemm_kernel_L2_M2_END: +.Lsgemm_kernel_L2_M2_END: -sgemm_kernel_L2_M1_BEGIN: +.Lsgemm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble sgemm_kernel_L2_END + ble .Lsgemm_kernel_L2_END -sgemm_kernel_L2_M1_20: +.Lsgemm_kernel_L2_M1_20: INIT1x2 @@ -1743,9 +1743,9 @@ sgemm_kernel_L2_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble sgemm_kernel_L2_M1_40 + ble .Lsgemm_kernel_L2_M1_40 -sgemm_kernel_L2_M1_22: +.Lsgemm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1757,36 +1757,36 @@ sgemm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M1_22 + bgt .Lsgemm_kernel_L2_M1_22 -sgemm_kernel_L2_M1_40: +.Lsgemm_kernel_L2_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M1_100 + ble .Lsgemm_kernel_L2_M1_100 -sgemm_kernel_L2_M1_42: +.Lsgemm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M1_42 + bgt .Lsgemm_kernel_L2_M1_42 -sgemm_kernel_L2_M1_100: +.Lsgemm_kernel_L2_M1_100: SAVE1x2 -sgemm_kernel_L2_END: +.Lsgemm_kernel_L2_END: add origPB, origPB, origK, lsl #3 // B = B + K * 2 * 4 /******************************************************************************/ -sgemm_kernel_L1_BEGIN: +.Lsgemm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble sgemm_kernel_L999 // done + ble .Lsgemm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -1794,14 +1794,14 @@ sgemm_kernel_L1_BEGIN: mov pA, origPA // pA = A -sgemm_kernel_L1_M16_BEGIN: +.Lsgemm_kernel_L1_M16_BEGIN: mov counterI, origM asr counterI, counterI, #4 // counterI = counterI / 16 cmp counterI, #0 - ble sgemm_kernel_L1_M8_BEGIN + ble .Lsgemm_kernel_L1_M8_BEGIN -sgemm_kernel_L1_M16_20: +.Lsgemm_kernel_L1_M16_20: INIT16x1 @@ -1809,10 +1809,10 @@ sgemm_kernel_L1_M16_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M16_40 + ble .Lsgemm_kernel_L1_M16_40 .align 5 -sgemm_kernel_L1_M16_22: +.Lsgemm_kernel_L1_M16_22: KERNEL16x1_SUB KERNEL16x1_SUB KERNEL16x1_SUB @@ -1824,42 +1824,42 @@ sgemm_kernel_L1_M16_22: KERNEL16x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M16_22 + bgt .Lsgemm_kernel_L1_M16_22 -sgemm_kernel_L1_M16_40: +.Lsgemm_kernel_L1_M16_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M16_100 + ble .Lsgemm_kernel_L1_M16_100 -sgemm_kernel_L1_M16_42: +.Lsgemm_kernel_L1_M16_42: KERNEL16x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M16_42 + bgt .Lsgemm_kernel_L1_M16_42 -sgemm_kernel_L1_M16_100: +.Lsgemm_kernel_L1_M16_100: SAVE16x1 -sgemm_kernel_L1_M16_END: +.Lsgemm_kernel_L1_M16_END: subs counterI, counterI, #1 - bgt sgemm_kernel_L1_M16_20 + bgt .Lsgemm_kernel_L1_M16_20 //------------------------------------------------------------------------------ -sgemm_kernel_L1_M8_BEGIN: +.Lsgemm_kernel_L1_M8_BEGIN: mov counterI, origM tst counterI , #15 - ble sgemm_kernel_L1_END + ble .Lsgemm_kernel_L1_END tst counterI, #8 - ble sgemm_kernel_L1_M4_BEGIN + ble .Lsgemm_kernel_L1_M4_BEGIN -sgemm_kernel_L1_M8_20: +.Lsgemm_kernel_L1_M8_20: INIT8x1 @@ -1867,10 +1867,10 @@ sgemm_kernel_L1_M8_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M8_40 + ble .Lsgemm_kernel_L1_M8_40 .align 5 -sgemm_kernel_L1_M8_22: +.Lsgemm_kernel_L1_M8_22: KERNEL8x1_SUB KERNEL8x1_SUB KERNEL8x1_SUB @@ -1882,38 +1882,38 @@ sgemm_kernel_L1_M8_22: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M8_22 + bgt .Lsgemm_kernel_L1_M8_22 -sgemm_kernel_L1_M8_40: +.Lsgemm_kernel_L1_M8_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M8_100 + ble .Lsgemm_kernel_L1_M8_100 -sgemm_kernel_L1_M8_42: +.Lsgemm_kernel_L1_M8_42: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M8_42 + bgt .Lsgemm_kernel_L1_M8_42 -sgemm_kernel_L1_M8_100: +.Lsgemm_kernel_L1_M8_100: SAVE8x1 -sgemm_kernel_L1_M8_END: +.Lsgemm_kernel_L1_M8_END: //------------------------------------------------------------------------------ -sgemm_kernel_L1_M4_BEGIN: +.Lsgemm_kernel_L1_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble sgemm_kernel_L1_END + ble .Lsgemm_kernel_L1_END tst counterI, #4 - ble sgemm_kernel_L1_M2_BEGIN + ble .Lsgemm_kernel_L1_M2_BEGIN -sgemm_kernel_L1_M4_20: +.Lsgemm_kernel_L1_M4_20: INIT4x1 @@ -1921,10 +1921,10 @@ sgemm_kernel_L1_M4_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M4_40 + ble .Lsgemm_kernel_L1_M4_40 .align 5 -sgemm_kernel_L1_M4_22: +.Lsgemm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -1936,39 +1936,39 @@ sgemm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M4_22 + bgt .Lsgemm_kernel_L1_M4_22 -sgemm_kernel_L1_M4_40: +.Lsgemm_kernel_L1_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M4_100 + ble .Lsgemm_kernel_L1_M4_100 -sgemm_kernel_L1_M4_42: +.Lsgemm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M4_42 + bgt .Lsgemm_kernel_L1_M4_42 -sgemm_kernel_L1_M4_100: +.Lsgemm_kernel_L1_M4_100: SAVE4x1 -sgemm_kernel_L1_M4_END: +.Lsgemm_kernel_L1_M4_END: //------------------------------------------------------------------------------ -sgemm_kernel_L1_M2_BEGIN: +.Lsgemm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble sgemm_kernel_L1_END + ble .Lsgemm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble sgemm_kernel_L1_M1_BEGIN + ble .Lsgemm_kernel_L1_M1_BEGIN -sgemm_kernel_L1_M2_20: +.Lsgemm_kernel_L1_M2_20: INIT2x1 @@ -1976,9 +1976,9 @@ sgemm_kernel_L1_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M2_40 + ble .Lsgemm_kernel_L1_M2_40 -sgemm_kernel_L1_M2_22: +.Lsgemm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -1991,34 +1991,34 @@ sgemm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M2_22 + bgt .Lsgemm_kernel_L1_M2_22 -sgemm_kernel_L1_M2_40: +.Lsgemm_kernel_L1_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M2_100 + ble .Lsgemm_kernel_L1_M2_100 -sgemm_kernel_L1_M2_42: +.Lsgemm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M2_42 + bgt .Lsgemm_kernel_L1_M2_42 -sgemm_kernel_L1_M2_100: +.Lsgemm_kernel_L1_M2_100: SAVE2x1 -sgemm_kernel_L1_M2_END: +.Lsgemm_kernel_L1_M2_END: -sgemm_kernel_L1_M1_BEGIN: +.Lsgemm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble sgemm_kernel_L1_END + ble .Lsgemm_kernel_L1_END -sgemm_kernel_L1_M1_20: +.Lsgemm_kernel_L1_M1_20: INIT1x1 @@ -2026,9 +2026,9 @@ sgemm_kernel_L1_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M1_40 + ble .Lsgemm_kernel_L1_M1_40 -sgemm_kernel_L1_M1_22: +.Lsgemm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -2040,28 +2040,28 @@ sgemm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M1_22 + bgt .Lsgemm_kernel_L1_M1_22 -sgemm_kernel_L1_M1_40: +.Lsgemm_kernel_L1_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M1_100 + ble .Lsgemm_kernel_L1_M1_100 -sgemm_kernel_L1_M1_42: +.Lsgemm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M1_42 + bgt .Lsgemm_kernel_L1_M1_42 -sgemm_kernel_L1_M1_100: +.Lsgemm_kernel_L1_M1_100: SAVE1x1 -sgemm_kernel_L1_END: +.Lsgemm_kernel_L1_END: -sgemm_kernel_L999: +.Lsgemm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/sgemm_kernel_4x4.S b/kernel/arm64/sgemm_kernel_4x4.S index a5cf7baff..76c11f1e1 100644 --- a/kernel/arm64/sgemm_kernel_4x4.S +++ b/kernel/arm64/sgemm_kernel_4x4.S @@ -892,11 +892,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble sgemm_kernel_L2_BEGIN + ble .Lsgemm_kernel_L2_BEGIN /******************************************************************************/ -sgemm_kernel_L4_BEGIN: +.Lsgemm_kernel_L4_BEGIN: mov pCRow0, pC // pCRow0 = C add pC, pC, LDC, lsl #2 @@ -906,73 +906,73 @@ sgemm_kernel_L4_BEGIN: add pA_2, temp, pA_1 add pA_3, temp, pA_2 -sgemm_kernel_L4_M16_BEGIN: +.Lsgemm_kernel_L4_M16_BEGIN: mov counterI, origM asr counterI, counterI, #4 // counterI = counterI / 16 cmp counterI, #0 - ble sgemm_kernel_L4_M8_BEGIN + ble .Lsgemm_kernel_L4_M8_BEGIN -sgemm_kernel_L4_M16_20: +.Lsgemm_kernel_L4_M16_20: mov pB, origPB asr counterL , origK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt sgemm_kernel_L4_M16_32 + blt .Lsgemm_kernel_L4_M16_32 KERNEL16x4_I // do one in the K KERNEL16x4_M2 // do another in the K subs counterL, counterL, #2 - ble sgemm_kernel_L4_M16_22a + ble .Lsgemm_kernel_L4_M16_22a .align 5 -sgemm_kernel_L4_M16_22: +.Lsgemm_kernel_L4_M16_22: KERNEL16x4_M1 KERNEL16x4_M2 subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M16_22 + bgt .Lsgemm_kernel_L4_M16_22 -sgemm_kernel_L4_M16_22a: +.Lsgemm_kernel_L4_M16_22a: KERNEL16x4_M1 KERNEL16x4_E - b sgemm_kernel_L4_M16_44 + b .Lsgemm_kernel_L4_M16_44 -sgemm_kernel_L4_M16_32: +.Lsgemm_kernel_L4_M16_32: tst counterL, #1 - ble sgemm_kernel_L4_M16_40 + ble .Lsgemm_kernel_L4_M16_40 KERNEL16x4_I KERNEL16x4_E - b sgemm_kernel_L4_M16_44 + b .Lsgemm_kernel_L4_M16_44 -sgemm_kernel_L4_M16_40: +.Lsgemm_kernel_L4_M16_40: INIT16x4 -sgemm_kernel_L4_M16_44: +.Lsgemm_kernel_L4_M16_44: ands counterL , origK, #1 - ble sgemm_kernel_L4_M16_100 + ble .Lsgemm_kernel_L4_M16_100 -sgemm_kernel_L4_M16_46: +.Lsgemm_kernel_L4_M16_46: KERNEL16x4_SUB -sgemm_kernel_L4_M16_100: +.Lsgemm_kernel_L4_M16_100: SAVE16x4 -sgemm_kernel_L4_M16_END: +.Lsgemm_kernel_L4_M16_END: lsl temp, origK, #4 // k * 4 * 4 = Four rows of A add pA_0, pA_0, temp add pA_0, pA_0, temp @@ -981,26 +981,26 @@ sgemm_kernel_L4_M16_END: add pA_2, pA_1, temp add pA_3, pA_2, temp subs counterI, counterI, #1 - bne sgemm_kernel_L4_M16_20 + bne .Lsgemm_kernel_L4_M16_20 -sgemm_kernel_L4_M8_BEGIN: +.Lsgemm_kernel_L4_M8_BEGIN: mov counterI, origM tst counterI , #15 - ble sgemm_kernel_L4_END + ble .Lsgemm_kernel_L4_END tst counterI, #8 - ble sgemm_kernel_L4_M4_BEGIN + ble .Lsgemm_kernel_L4_M4_BEGIN -sgemm_kernel_L4_M8_20: +.Lsgemm_kernel_L4_M8_20: INIT8x4 mov pB, origPB asr counterL, origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble sgemm_kernel_L4_M8_40 + ble .Lsgemm_kernel_L4_M8_40 -sgemm_kernel_L4_M8_22: +.Lsgemm_kernel_L4_M8_22: KERNEL8x4_SUB KERNEL8x4_SUB @@ -1013,47 +1013,47 @@ sgemm_kernel_L4_M8_22: KERNEL8x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M8_22 + bgt .Lsgemm_kernel_L4_M8_22 -sgemm_kernel_L4_M8_40: +.Lsgemm_kernel_L4_M8_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L4_M8_100 + ble .Lsgemm_kernel_L4_M8_100 -sgemm_kernel_L4_M8_42: +.Lsgemm_kernel_L4_M8_42: KERNEL8x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M8_42 + bgt .Lsgemm_kernel_L4_M8_42 -sgemm_kernel_L4_M8_100: +.Lsgemm_kernel_L4_M8_100: SAVE8x4 -sgemm_kernel_L4_M8_END: +.Lsgemm_kernel_L4_M8_END: lsl temp, origK, #4 // k * 4 * 4 add pA_0, pA_0, temp -sgemm_kernel_L4_M4_BEGIN: +.Lsgemm_kernel_L4_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble sgemm_kernel_L4_END + ble .Lsgemm_kernel_L4_END tst counterI, #4 - ble sgemm_kernel_L4_M2_BEGIN + ble .Lsgemm_kernel_L4_M2_BEGIN -sgemm_kernel_L4_M4_20: +.Lsgemm_kernel_L4_M4_20: INIT4x4 mov pB, origPB asr counterL, origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble sgemm_kernel_L4_M4_40 + ble .Lsgemm_kernel_L4_M4_40 -sgemm_kernel_L4_M4_22: +.Lsgemm_kernel_L4_M4_22: KERNEL4x4_SUB KERNEL4x4_SUB @@ -1066,47 +1066,47 @@ sgemm_kernel_L4_M4_22: KERNEL4x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M4_22 + bgt .Lsgemm_kernel_L4_M4_22 -sgemm_kernel_L4_M4_40: +.Lsgemm_kernel_L4_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L4_M4_100 + ble .Lsgemm_kernel_L4_M4_100 -sgemm_kernel_L4_M4_42: +.Lsgemm_kernel_L4_M4_42: KERNEL4x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M4_42 + bgt .Lsgemm_kernel_L4_M4_42 -sgemm_kernel_L4_M4_100: +.Lsgemm_kernel_L4_M4_100: SAVE4x4 -sgemm_kernel_L4_M4_END: +.Lsgemm_kernel_L4_M4_END: -sgemm_kernel_L4_M2_BEGIN: +.Lsgemm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble sgemm_kernel_L4_END + ble .Lsgemm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble sgemm_kernel_L4_M1_BEGIN + ble .Lsgemm_kernel_L4_M1_BEGIN -sgemm_kernel_L4_M2_20: +.Lsgemm_kernel_L4_M2_20: INIT2x4 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L4_M2_40 + ble .Lsgemm_kernel_L4_M2_40 -sgemm_kernel_L4_M2_22: +.Lsgemm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -1119,43 +1119,43 @@ sgemm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M2_22 + bgt .Lsgemm_kernel_L4_M2_22 -sgemm_kernel_L4_M2_40: +.Lsgemm_kernel_L4_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L4_M2_100 + ble .Lsgemm_kernel_L4_M2_100 -sgemm_kernel_L4_M2_42: +.Lsgemm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M2_42 + bgt .Lsgemm_kernel_L4_M2_42 -sgemm_kernel_L4_M2_100: +.Lsgemm_kernel_L4_M2_100: SAVE2x4 -sgemm_kernel_L4_M2_END: +.Lsgemm_kernel_L4_M2_END: -sgemm_kernel_L4_M1_BEGIN: +.Lsgemm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble sgemm_kernel_L4_END + ble .Lsgemm_kernel_L4_END -sgemm_kernel_L4_M1_20: +.Lsgemm_kernel_L4_M1_20: INIT1x4 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L4_M1_40 + ble .Lsgemm_kernel_L4_M1_40 -sgemm_kernel_L4_M1_22: +.Lsgemm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1167,45 +1167,45 @@ sgemm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M1_22 + bgt .Lsgemm_kernel_L4_M1_22 -sgemm_kernel_L4_M1_40: +.Lsgemm_kernel_L4_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L4_M1_100 + ble .Lsgemm_kernel_L4_M1_100 -sgemm_kernel_L4_M1_42: +.Lsgemm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M1_42 + bgt .Lsgemm_kernel_L4_M1_42 -sgemm_kernel_L4_M1_100: +.Lsgemm_kernel_L4_M1_100: SAVE1x4 -sgemm_kernel_L4_END: +.Lsgemm_kernel_L4_END: lsl temp, origK, #4 add origPB, origPB, temp // B = B + K * 4 * 4 subs counterJ, counterJ , #1 // j-- - bgt sgemm_kernel_L4_BEGIN + bgt .Lsgemm_kernel_L4_BEGIN /******************************************************************************/ -sgemm_kernel_L2_BEGIN: // less than 2 left in N direction +.Lsgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble sgemm_kernel_L999 + ble .Lsgemm_kernel_L999 tst counterJ , #2 - ble sgemm_kernel_L1_BEGIN + ble .Lsgemm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1215,24 +1215,24 @@ sgemm_kernel_L2_BEGIN: // less than 2 left in N direction -sgemm_kernel_L2_M4_BEGIN: +.Lsgemm_kernel_L2_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI,#0 - ble sgemm_kernel_L2_M2_BEGIN + ble .Lsgemm_kernel_L2_M2_BEGIN -sgemm_kernel_L2_M4_20: +.Lsgemm_kernel_L2_M4_20: INIT4x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble sgemm_kernel_L2_M4_40 + ble .Lsgemm_kernel_L2_M4_40 .align 5 -sgemm_kernel_L2_M4_22: +.Lsgemm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1244,50 +1244,50 @@ sgemm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M4_22 + bgt .Lsgemm_kernel_L2_M4_22 -sgemm_kernel_L2_M4_40: +.Lsgemm_kernel_L2_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M4_100 + ble .Lsgemm_kernel_L2_M4_100 -sgemm_kernel_L2_M4_42: +.Lsgemm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M4_42 + bgt .Lsgemm_kernel_L2_M4_42 -sgemm_kernel_L2_M4_100: +.Lsgemm_kernel_L2_M4_100: SAVE4x2 -sgemm_kernel_L2_M4_END: +.Lsgemm_kernel_L2_M4_END: subs counterI, counterI, #1 - bgt sgemm_kernel_L2_M4_20 + bgt .Lsgemm_kernel_L2_M4_20 -sgemm_kernel_L2_M2_BEGIN: +.Lsgemm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble sgemm_kernel_L2_END + ble .Lsgemm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble sgemm_kernel_L2_M1_BEGIN + ble .Lsgemm_kernel_L2_M1_BEGIN -sgemm_kernel_L2_M2_20: +.Lsgemm_kernel_L2_M2_20: INIT2x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble sgemm_kernel_L2_M2_40 + ble .Lsgemm_kernel_L2_M2_40 -sgemm_kernel_L2_M2_22: +.Lsgemm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -1300,43 +1300,43 @@ sgemm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M2_22 + bgt .Lsgemm_kernel_L2_M2_22 -sgemm_kernel_L2_M2_40: +.Lsgemm_kernel_L2_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M2_100 + ble .Lsgemm_kernel_L2_M2_100 -sgemm_kernel_L2_M2_42: +.Lsgemm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M2_42 + bgt .Lsgemm_kernel_L2_M2_42 -sgemm_kernel_L2_M2_100: +.Lsgemm_kernel_L2_M2_100: SAVE2x2 -sgemm_kernel_L2_M2_END: +.Lsgemm_kernel_L2_M2_END: -sgemm_kernel_L2_M1_BEGIN: +.Lsgemm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble sgemm_kernel_L2_END + ble .Lsgemm_kernel_L2_END -sgemm_kernel_L2_M1_20: +.Lsgemm_kernel_L2_M1_20: INIT1x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble sgemm_kernel_L2_M1_40 + ble .Lsgemm_kernel_L2_M1_40 -sgemm_kernel_L2_M1_22: +.Lsgemm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1348,36 +1348,36 @@ sgemm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M1_22 + bgt .Lsgemm_kernel_L2_M1_22 -sgemm_kernel_L2_M1_40: +.Lsgemm_kernel_L2_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M1_100 + ble .Lsgemm_kernel_L2_M1_100 -sgemm_kernel_L2_M1_42: +.Lsgemm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M1_42 + bgt .Lsgemm_kernel_L2_M1_42 -sgemm_kernel_L2_M1_100: +.Lsgemm_kernel_L2_M1_100: SAVE1x2 -sgemm_kernel_L2_END: +.Lsgemm_kernel_L2_END: add origPB, origPB, origK, lsl #3 // B = B + K * 2 * 4 /******************************************************************************/ -sgemm_kernel_L1_BEGIN: +.Lsgemm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble sgemm_kernel_L999 // done + ble .Lsgemm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -1387,24 +1387,24 @@ sgemm_kernel_L1_BEGIN: -sgemm_kernel_L1_M4_BEGIN: +.Lsgemm_kernel_L1_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble sgemm_kernel_L1_M2_BEGIN + ble .Lsgemm_kernel_L1_M2_BEGIN -sgemm_kernel_L1_M4_20: +.Lsgemm_kernel_L1_M4_20: INIT4x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M4_40 + ble .Lsgemm_kernel_L1_M4_40 .align 5 -sgemm_kernel_L1_M4_22: +.Lsgemm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -1416,50 +1416,50 @@ sgemm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M4_22 + bgt .Lsgemm_kernel_L1_M4_22 -sgemm_kernel_L1_M4_40: +.Lsgemm_kernel_L1_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M4_100 + ble .Lsgemm_kernel_L1_M4_100 -sgemm_kernel_L1_M4_42: +.Lsgemm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M4_42 + bgt .Lsgemm_kernel_L1_M4_42 -sgemm_kernel_L1_M4_100: +.Lsgemm_kernel_L1_M4_100: SAVE4x1 -sgemm_kernel_L1_M4_END: +.Lsgemm_kernel_L1_M4_END: subs counterI, counterI, #1 - bgt sgemm_kernel_L1_M4_20 + bgt .Lsgemm_kernel_L1_M4_20 -sgemm_kernel_L1_M2_BEGIN: +.Lsgemm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble sgemm_kernel_L1_END + ble .Lsgemm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble sgemm_kernel_L1_M1_BEGIN + ble .Lsgemm_kernel_L1_M1_BEGIN -sgemm_kernel_L1_M2_20: +.Lsgemm_kernel_L1_M2_20: INIT2x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M2_40 + ble .Lsgemm_kernel_L1_M2_40 -sgemm_kernel_L1_M2_22: +.Lsgemm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -1472,43 +1472,43 @@ sgemm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M2_22 + bgt .Lsgemm_kernel_L1_M2_22 -sgemm_kernel_L1_M2_40: +.Lsgemm_kernel_L1_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M2_100 + ble .Lsgemm_kernel_L1_M2_100 -sgemm_kernel_L1_M2_42: +.Lsgemm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M2_42 + bgt .Lsgemm_kernel_L1_M2_42 -sgemm_kernel_L1_M2_100: +.Lsgemm_kernel_L1_M2_100: SAVE2x1 -sgemm_kernel_L1_M2_END: +.Lsgemm_kernel_L1_M2_END: -sgemm_kernel_L1_M1_BEGIN: +.Lsgemm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble sgemm_kernel_L1_END + ble .Lsgemm_kernel_L1_END -sgemm_kernel_L1_M1_20: +.Lsgemm_kernel_L1_M1_20: INIT1x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M1_40 + ble .Lsgemm_kernel_L1_M1_40 -sgemm_kernel_L1_M1_22: +.Lsgemm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -1520,30 +1520,30 @@ sgemm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M1_22 + bgt .Lsgemm_kernel_L1_M1_22 -sgemm_kernel_L1_M1_40: +.Lsgemm_kernel_L1_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M1_100 + ble .Lsgemm_kernel_L1_M1_100 -sgemm_kernel_L1_M1_42: +.Lsgemm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M1_42 + bgt .Lsgemm_kernel_L1_M1_42 -sgemm_kernel_L1_M1_100: +.Lsgemm_kernel_L1_M1_100: SAVE1x1 -sgemm_kernel_L1_END: +.Lsgemm_kernel_L1_END: -sgemm_kernel_L999: +.Lsgemm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/sgemm_kernel_8x8.S b/kernel/arm64/sgemm_kernel_8x8.S index bd47bed31..6ba64dd35 100644 --- a/kernel/arm64/sgemm_kernel_8x8.S +++ b/kernel/arm64/sgemm_kernel_8x8.S @@ -1263,7 +1263,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE -sgemm_kernel_begin: +.Lsgemm_kernel_begin: .align 5 add sp, sp, #-(11 * 16) @@ -1291,12 +1291,12 @@ sgemm_kernel_begin: mov counterJ, origN asr counterJ, counterJ, #3 // J = J / 8 cmp counterJ, #0 - ble sgemm_kernel_L4_BEGIN + ble .Lsgemm_kernel_L4_BEGIN /******************************************************************************/ /******************************************************************************/ -sgemm_kernel_L8_BEGIN: +.Lsgemm_kernel_L8_BEGIN: mov pCRow0, pC // pCRow0 = C add pC, pC, LDC, lsl #3 @@ -1304,156 +1304,156 @@ sgemm_kernel_L8_BEGIN: /******************************************************************************/ -sgemm_kernel_L8_M8_BEGIN: +.Lsgemm_kernel_L8_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble sgemm_kernel_L8_M4_BEGIN + ble .Lsgemm_kernel_L8_M4_BEGIN -sgemm_kernel_L8_M8_20: +.Lsgemm_kernel_L8_M8_20: mov pB, origPB asr counterL , origK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt sgemm_kernel_L8_M8_32 + blt .Lsgemm_kernel_L8_M8_32 KERNEL8x8_I // do one in the K KERNEL8x8_M2 // do another in the K subs counterL, counterL, #2 - ble sgemm_kernel_L8_M8_22a + ble .Lsgemm_kernel_L8_M8_22a .align 5 -sgemm_kernel_L8_M8_22: +.Lsgemm_kernel_L8_M8_22: KERNEL8x8_M1 KERNEL8x8_M2 subs counterL, counterL, #1 - bgt sgemm_kernel_L8_M8_22 + bgt .Lsgemm_kernel_L8_M8_22 -sgemm_kernel_L8_M8_22a: +.Lsgemm_kernel_L8_M8_22a: KERNEL8x8_M1 KERNEL8x8_E - b sgemm_kernel_L8_M8_44 + b .Lsgemm_kernel_L8_M8_44 -sgemm_kernel_L8_M8_32: +.Lsgemm_kernel_L8_M8_32: tst counterL, #1 - ble sgemm_kernel_L8_M8_40 + ble .Lsgemm_kernel_L8_M8_40 KERNEL8x8_I KERNEL8x8_E - b sgemm_kernel_L8_M8_44 + b .Lsgemm_kernel_L8_M8_44 -sgemm_kernel_L8_M8_40: +.Lsgemm_kernel_L8_M8_40: INIT8x8 -sgemm_kernel_L8_M8_44: +.Lsgemm_kernel_L8_M8_44: ands counterL , origK, #1 - ble sgemm_kernel_L8_M8_100 + ble .Lsgemm_kernel_L8_M8_100 -sgemm_kernel_L8_M8_46: +.Lsgemm_kernel_L8_M8_46: KERNEL8x8_SUB -sgemm_kernel_L8_M8_100: +.Lsgemm_kernel_L8_M8_100: SAVE8x8 -sgemm_kernel_L8_M8_END: +.Lsgemm_kernel_L8_M8_END: subs counterI, counterI, #1 - bne sgemm_kernel_L8_M8_20 + bne .Lsgemm_kernel_L8_M8_20 /******************************************************************************/ -sgemm_kernel_L8_M4_BEGIN: +.Lsgemm_kernel_L8_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble sgemm_kernel_L8_END + ble .Lsgemm_kernel_L8_END tst counterI, #4 - ble sgemm_kernel_L8_M2_BEGIN + ble .Lsgemm_kernel_L8_M2_BEGIN -sgemm_kernel_L8_M4_20: +.Lsgemm_kernel_L8_M4_20: mov pB, origPB asr counterL , origK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt sgemm_kernel_L8_M4_32 + blt .Lsgemm_kernel_L8_M4_32 KERNEL4x8_I // do one in the K KERNEL4x8_M2 // do another in the K subs counterL, counterL, #2 - ble sgemm_kernel_L8_M4_22a + ble .Lsgemm_kernel_L8_M4_22a .align 5 -sgemm_kernel_L8_M4_22: +.Lsgemm_kernel_L8_M4_22: KERNEL4x8_M1 KERNEL4x8_M2 subs counterL, counterL, #1 - bgt sgemm_kernel_L8_M4_22 + bgt .Lsgemm_kernel_L8_M4_22 -sgemm_kernel_L8_M4_22a: +.Lsgemm_kernel_L8_M4_22a: KERNEL4x8_M1 KERNEL4x8_E - b sgemm_kernel_L8_M4_44 + b .Lsgemm_kernel_L8_M4_44 -sgemm_kernel_L8_M4_32: +.Lsgemm_kernel_L8_M4_32: tst counterL, #1 - ble sgemm_kernel_L8_M4_40 + ble .Lsgemm_kernel_L8_M4_40 KERNEL4x8_I KERNEL4x8_E - b sgemm_kernel_L8_M4_44 + b .Lsgemm_kernel_L8_M4_44 -sgemm_kernel_L8_M4_40: +.Lsgemm_kernel_L8_M4_40: INIT4x8 -sgemm_kernel_L8_M4_44: +.Lsgemm_kernel_L8_M4_44: ands counterL , origK, #1 - ble sgemm_kernel_L8_M4_100 + ble .Lsgemm_kernel_L8_M4_100 -sgemm_kernel_L8_M4_46: +.Lsgemm_kernel_L8_M4_46: KERNEL4x8_SUB -sgemm_kernel_L8_M4_100: +.Lsgemm_kernel_L8_M4_100: SAVE4x8 -sgemm_kernel_L8_M4_END: +.Lsgemm_kernel_L8_M4_END: /******************************************************************************/ -sgemm_kernel_L8_M2_BEGIN: +.Lsgemm_kernel_L8_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble sgemm_kernel_L8_END + ble .Lsgemm_kernel_L8_END tst counterI, #2 // counterI = counterI / 2 - ble sgemm_kernel_L8_M1_BEGIN + ble .Lsgemm_kernel_L8_M1_BEGIN -sgemm_kernel_L8_M2_20: +.Lsgemm_kernel_L8_M2_20: INIT2x8 @@ -1461,9 +1461,9 @@ sgemm_kernel_L8_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L8_M2_40 + ble .Lsgemm_kernel_L8_M2_40 -sgemm_kernel_L8_M2_22: +.Lsgemm_kernel_L8_M2_22: KERNEL2x8_SUB KERNEL2x8_SUB @@ -1476,35 +1476,35 @@ sgemm_kernel_L8_M2_22: KERNEL2x8_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L8_M2_22 + bgt .Lsgemm_kernel_L8_M2_22 -sgemm_kernel_L8_M2_40: +.Lsgemm_kernel_L8_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L8_M2_100 + ble .Lsgemm_kernel_L8_M2_100 -sgemm_kernel_L8_M2_42: +.Lsgemm_kernel_L8_M2_42: KERNEL2x8_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L8_M2_42 + bgt .Lsgemm_kernel_L8_M2_42 -sgemm_kernel_L8_M2_100: +.Lsgemm_kernel_L8_M2_100: SAVE2x8 -sgemm_kernel_L8_M2_END: +.Lsgemm_kernel_L8_M2_END: /******************************************************************************/ -sgemm_kernel_L8_M1_BEGIN: +.Lsgemm_kernel_L8_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble sgemm_kernel_L8_END + ble .Lsgemm_kernel_L8_END -sgemm_kernel_L8_M1_20: +.Lsgemm_kernel_L8_M1_20: INIT1x8 @@ -1512,9 +1512,9 @@ sgemm_kernel_L8_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L8_M1_40 + ble .Lsgemm_kernel_L8_M1_40 -sgemm_kernel_L8_M1_22: +.Lsgemm_kernel_L8_M1_22: KERNEL1x8_SUB KERNEL1x8_SUB KERNEL1x8_SUB @@ -1526,43 +1526,43 @@ sgemm_kernel_L8_M1_22: KERNEL1x8_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L8_M1_22 + bgt .Lsgemm_kernel_L8_M1_22 -sgemm_kernel_L8_M1_40: +.Lsgemm_kernel_L8_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L8_M1_100 + ble .Lsgemm_kernel_L8_M1_100 -sgemm_kernel_L8_M1_42: +.Lsgemm_kernel_L8_M1_42: KERNEL1x8_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L8_M1_42 + bgt .Lsgemm_kernel_L8_M1_42 -sgemm_kernel_L8_M1_100: +.Lsgemm_kernel_L8_M1_100: SAVE1x8 -sgemm_kernel_L8_END: +.Lsgemm_kernel_L8_END: lsl temp, origK, #5 // B = B + K * 4 * 8 add origPB, origPB, temp subs counterJ, counterJ , #1 // j-- - bgt sgemm_kernel_L8_BEGIN + bgt .Lsgemm_kernel_L8_BEGIN /******************************************************************************/ /******************************************************************************/ -sgemm_kernel_L4_BEGIN: +.Lsgemm_kernel_L4_BEGIN: mov counterJ , origN tst counterJ , #7 - ble sgemm_kernel_L999 + ble .Lsgemm_kernel_L999 tst counterJ , #4 - ble sgemm_kernel_L2_BEGIN + ble .Lsgemm_kernel_L2_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1572,156 +1572,156 @@ sgemm_kernel_L4_BEGIN: /******************************************************************************/ -sgemm_kernel_L4_M8_BEGIN: +.Lsgemm_kernel_L4_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble sgemm_kernel_L4_M4_BEGIN + ble .Lsgemm_kernel_L4_M4_BEGIN -sgemm_kernel_L4_M8_20: +.Lsgemm_kernel_L4_M8_20: mov pB, origPB asr counterL , origK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt sgemm_kernel_L4_M8_32 + blt .Lsgemm_kernel_L4_M8_32 KERNEL8x4_I // do one in the K KERNEL8x4_M2 // do another in the K subs counterL, counterL, #2 - ble sgemm_kernel_L4_M8_22a + ble .Lsgemm_kernel_L4_M8_22a .align 5 -sgemm_kernel_L4_M8_22: +.Lsgemm_kernel_L4_M8_22: KERNEL8x4_M1 KERNEL8x4_M2 subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M8_22 + bgt .Lsgemm_kernel_L4_M8_22 -sgemm_kernel_L4_M8_22a: +.Lsgemm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_E - b sgemm_kernel_L4_M8_44 + b .Lsgemm_kernel_L4_M8_44 -sgemm_kernel_L4_M8_32: +.Lsgemm_kernel_L4_M8_32: tst counterL, #1 - ble sgemm_kernel_L4_M8_40 + ble .Lsgemm_kernel_L4_M8_40 KERNEL8x4_I KERNEL8x4_E - b sgemm_kernel_L4_M8_44 + b .Lsgemm_kernel_L4_M8_44 -sgemm_kernel_L4_M8_40: +.Lsgemm_kernel_L4_M8_40: INIT8x4 -sgemm_kernel_L4_M8_44: +.Lsgemm_kernel_L4_M8_44: ands counterL , origK, #1 - ble sgemm_kernel_L4_M8_100 + ble .Lsgemm_kernel_L4_M8_100 -sgemm_kernel_L4_M8_46: +.Lsgemm_kernel_L4_M8_46: KERNEL8x4_SUB -sgemm_kernel_L4_M8_100: +.Lsgemm_kernel_L4_M8_100: SAVE8x4 -sgemm_kernel_L4_M8_END: +.Lsgemm_kernel_L4_M8_END: subs counterI, counterI, #1 - bne sgemm_kernel_L4_M8_20 + bne .Lsgemm_kernel_L4_M8_20 /******************************************************************************/ -sgemm_kernel_L4_M4_BEGIN: +.Lsgemm_kernel_L4_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble sgemm_kernel_L4_END + ble .Lsgemm_kernel_L4_END tst counterI, #4 - ble sgemm_kernel_L4_M2_BEGIN + ble .Lsgemm_kernel_L4_M2_BEGIN -sgemm_kernel_L4_M4_20: +.Lsgemm_kernel_L4_M4_20: mov pB, origPB asr counterL , origK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt sgemm_kernel_L4_M4_32 + blt .Lsgemm_kernel_L4_M4_32 KERNEL4x4_I // do one in the K KERNEL4x4_M2 // do another in the K subs counterL, counterL, #2 - ble sgemm_kernel_L4_M4_22a + ble .Lsgemm_kernel_L4_M4_22a .align 5 -sgemm_kernel_L4_M4_22: +.Lsgemm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M4_22 + bgt .Lsgemm_kernel_L4_M4_22 -sgemm_kernel_L4_M4_22a: +.Lsgemm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_E - b sgemm_kernel_L4_M4_44 + b .Lsgemm_kernel_L4_M4_44 -sgemm_kernel_L4_M4_32: +.Lsgemm_kernel_L4_M4_32: tst counterL, #1 - ble sgemm_kernel_L4_M4_40 + ble .Lsgemm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_E - b sgemm_kernel_L4_M4_44 + b .Lsgemm_kernel_L4_M4_44 -sgemm_kernel_L4_M4_40: +.Lsgemm_kernel_L4_M4_40: INIT4x4 -sgemm_kernel_L4_M4_44: +.Lsgemm_kernel_L4_M4_44: ands counterL , origK, #1 - ble sgemm_kernel_L4_M4_100 + ble .Lsgemm_kernel_L4_M4_100 -sgemm_kernel_L4_M4_46: +.Lsgemm_kernel_L4_M4_46: KERNEL4x4_SUB -sgemm_kernel_L4_M4_100: +.Lsgemm_kernel_L4_M4_100: SAVE4x4 -sgemm_kernel_L4_M4_END: +.Lsgemm_kernel_L4_M4_END: /******************************************************************************/ -sgemm_kernel_L4_M2_BEGIN: +.Lsgemm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble sgemm_kernel_L4_END + ble .Lsgemm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble sgemm_kernel_L4_M1_BEGIN + ble .Lsgemm_kernel_L4_M1_BEGIN -sgemm_kernel_L4_M2_20: +.Lsgemm_kernel_L4_M2_20: INIT2x4 @@ -1729,9 +1729,9 @@ sgemm_kernel_L4_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L4_M2_40 + ble .Lsgemm_kernel_L4_M2_40 -sgemm_kernel_L4_M2_22: +.Lsgemm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -1744,35 +1744,35 @@ sgemm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M2_22 + bgt .Lsgemm_kernel_L4_M2_22 -sgemm_kernel_L4_M2_40: +.Lsgemm_kernel_L4_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L4_M2_100 + ble .Lsgemm_kernel_L4_M2_100 -sgemm_kernel_L4_M2_42: +.Lsgemm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M2_42 + bgt .Lsgemm_kernel_L4_M2_42 -sgemm_kernel_L4_M2_100: +.Lsgemm_kernel_L4_M2_100: SAVE2x4 -sgemm_kernel_L4_M2_END: +.Lsgemm_kernel_L4_M2_END: /******************************************************************************/ -sgemm_kernel_L4_M1_BEGIN: +.Lsgemm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble sgemm_kernel_L4_END + ble .Lsgemm_kernel_L4_END -sgemm_kernel_L4_M1_20: +.Lsgemm_kernel_L4_M1_20: INIT1x4 @@ -1780,9 +1780,9 @@ sgemm_kernel_L4_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L4_M1_40 + ble .Lsgemm_kernel_L4_M1_40 -sgemm_kernel_L4_M1_22: +.Lsgemm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1794,39 +1794,39 @@ sgemm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M1_22 + bgt .Lsgemm_kernel_L4_M1_22 -sgemm_kernel_L4_M1_40: +.Lsgemm_kernel_L4_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L4_M1_100 + ble .Lsgemm_kernel_L4_M1_100 -sgemm_kernel_L4_M1_42: +.Lsgemm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L4_M1_42 + bgt .Lsgemm_kernel_L4_M1_42 -sgemm_kernel_L4_M1_100: +.Lsgemm_kernel_L4_M1_100: SAVE1x4 -sgemm_kernel_L4_END: +.Lsgemm_kernel_L4_END: add origPB, origPB, origK, lsl #4 // B = B + K * 4 * 4 /******************************************************************************/ /******************************************************************************/ -sgemm_kernel_L2_BEGIN: // less than 2 left in N direction +.Lsgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble sgemm_kernel_L999 + ble .Lsgemm_kernel_L999 tst counterJ , #2 - ble sgemm_kernel_L1_BEGIN + ble .Lsgemm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1836,14 +1836,14 @@ sgemm_kernel_L2_BEGIN: // less than 2 left in N direction /******************************************************************************/ -sgemm_kernel_L2_M8_BEGIN: +.Lsgemm_kernel_L2_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI,#0 - ble sgemm_kernel_L2_M4_BEGIN + ble .Lsgemm_kernel_L2_M4_BEGIN -sgemm_kernel_L2_M8_20: +.Lsgemm_kernel_L2_M8_20: INIT8x2 @@ -1851,10 +1851,10 @@ sgemm_kernel_L2_M8_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble sgemm_kernel_L2_M8_40 + ble .Lsgemm_kernel_L2_M8_40 .align 5 -sgemm_kernel_L2_M8_22: +.Lsgemm_kernel_L2_M8_22: KERNEL8x2_SUB KERNEL8x2_SUB KERNEL8x2_SUB @@ -1866,42 +1866,42 @@ sgemm_kernel_L2_M8_22: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M8_22 + bgt .Lsgemm_kernel_L2_M8_22 -sgemm_kernel_L2_M8_40: +.Lsgemm_kernel_L2_M8_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M8_100 + ble .Lsgemm_kernel_L2_M8_100 -sgemm_kernel_L2_M8_42: +.Lsgemm_kernel_L2_M8_42: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M8_42 + bgt .Lsgemm_kernel_L2_M8_42 -sgemm_kernel_L2_M8_100: +.Lsgemm_kernel_L2_M8_100: SAVE8x2 -sgemm_kernel_L2_M8_END: +.Lsgemm_kernel_L2_M8_END: subs counterI, counterI, #1 - bgt sgemm_kernel_L2_M8_20 + bgt .Lsgemm_kernel_L2_M8_20 /******************************************************************************/ -sgemm_kernel_L2_M4_BEGIN: +.Lsgemm_kernel_L2_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble sgemm_kernel_L2_END + ble .Lsgemm_kernel_L2_END tst counterI, #4 - ble sgemm_kernel_L2_M2_BEGIN + ble .Lsgemm_kernel_L2_M2_BEGIN -sgemm_kernel_L2_M4_20: +.Lsgemm_kernel_L2_M4_20: INIT4x2 @@ -1909,10 +1909,10 @@ sgemm_kernel_L2_M4_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble sgemm_kernel_L2_M4_40 + ble .Lsgemm_kernel_L2_M4_40 .align 5 -sgemm_kernel_L2_M4_22: +.Lsgemm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1924,39 +1924,39 @@ sgemm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M4_22 + bgt .Lsgemm_kernel_L2_M4_22 -sgemm_kernel_L2_M4_40: +.Lsgemm_kernel_L2_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M4_100 + ble .Lsgemm_kernel_L2_M4_100 -sgemm_kernel_L2_M4_42: +.Lsgemm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M4_42 + bgt .Lsgemm_kernel_L2_M4_42 -sgemm_kernel_L2_M4_100: +.Lsgemm_kernel_L2_M4_100: SAVE4x2 -sgemm_kernel_L2_M4_END: +.Lsgemm_kernel_L2_M4_END: /******************************************************************************/ -sgemm_kernel_L2_M2_BEGIN: +.Lsgemm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble sgemm_kernel_L2_END + ble .Lsgemm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble sgemm_kernel_L2_M1_BEGIN + ble .Lsgemm_kernel_L2_M1_BEGIN -sgemm_kernel_L2_M2_20: +.Lsgemm_kernel_L2_M2_20: INIT2x2 @@ -1964,9 +1964,9 @@ sgemm_kernel_L2_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble sgemm_kernel_L2_M2_40 + ble .Lsgemm_kernel_L2_M2_40 -sgemm_kernel_L2_M2_22: +.Lsgemm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -1979,35 +1979,35 @@ sgemm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M2_22 + bgt .Lsgemm_kernel_L2_M2_22 -sgemm_kernel_L2_M2_40: +.Lsgemm_kernel_L2_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M2_100 + ble .Lsgemm_kernel_L2_M2_100 -sgemm_kernel_L2_M2_42: +.Lsgemm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M2_42 + bgt .Lsgemm_kernel_L2_M2_42 -sgemm_kernel_L2_M2_100: +.Lsgemm_kernel_L2_M2_100: SAVE2x2 -sgemm_kernel_L2_M2_END: +.Lsgemm_kernel_L2_M2_END: /******************************************************************************/ -sgemm_kernel_L2_M1_BEGIN: +.Lsgemm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble sgemm_kernel_L2_END + ble .Lsgemm_kernel_L2_END -sgemm_kernel_L2_M1_20: +.Lsgemm_kernel_L2_M1_20: INIT1x2 @@ -2015,9 +2015,9 @@ sgemm_kernel_L2_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble sgemm_kernel_L2_M1_40 + ble .Lsgemm_kernel_L2_M1_40 -sgemm_kernel_L2_M1_22: +.Lsgemm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -2029,37 +2029,37 @@ sgemm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M1_22 + bgt .Lsgemm_kernel_L2_M1_22 -sgemm_kernel_L2_M1_40: +.Lsgemm_kernel_L2_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L2_M1_100 + ble .Lsgemm_kernel_L2_M1_100 -sgemm_kernel_L2_M1_42: +.Lsgemm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L2_M1_42 + bgt .Lsgemm_kernel_L2_M1_42 -sgemm_kernel_L2_M1_100: +.Lsgemm_kernel_L2_M1_100: SAVE1x2 -sgemm_kernel_L2_END: +.Lsgemm_kernel_L2_END: add origPB, origPB, origK, lsl #3 // B = B + K * 2 * 4 /******************************************************************************/ /******************************************************************************/ -sgemm_kernel_L1_BEGIN: +.Lsgemm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble sgemm_kernel_L999 // done + ble .Lsgemm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -2069,14 +2069,14 @@ sgemm_kernel_L1_BEGIN: /******************************************************************************/ -sgemm_kernel_L1_M8_BEGIN: +.Lsgemm_kernel_L1_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 cmp counterI, #0 - ble sgemm_kernel_L1_M4_BEGIN + ble .Lsgemm_kernel_L1_M4_BEGIN -sgemm_kernel_L1_M8_20: +.Lsgemm_kernel_L1_M8_20: INIT8x1 @@ -2084,10 +2084,10 @@ sgemm_kernel_L1_M8_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M8_40 + ble .Lsgemm_kernel_L1_M8_40 .align 5 -sgemm_kernel_L1_M8_22: +.Lsgemm_kernel_L1_M8_22: KERNEL8x1_SUB KERNEL8x1_SUB KERNEL8x1_SUB @@ -2099,42 +2099,42 @@ sgemm_kernel_L1_M8_22: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M8_22 + bgt .Lsgemm_kernel_L1_M8_22 -sgemm_kernel_L1_M8_40: +.Lsgemm_kernel_L1_M8_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M8_100 + ble .Lsgemm_kernel_L1_M8_100 -sgemm_kernel_L1_M8_42: +.Lsgemm_kernel_L1_M8_42: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M8_42 + bgt .Lsgemm_kernel_L1_M8_42 -sgemm_kernel_L1_M8_100: +.Lsgemm_kernel_L1_M8_100: SAVE8x1 -sgemm_kernel_L1_M8_END: +.Lsgemm_kernel_L1_M8_END: subs counterI, counterI, #1 - bgt sgemm_kernel_L1_M8_20 + bgt .Lsgemm_kernel_L1_M8_20 /******************************************************************************/ -sgemm_kernel_L1_M4_BEGIN: +.Lsgemm_kernel_L1_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble sgemm_kernel_L1_END + ble .Lsgemm_kernel_L1_END tst counterI, #4 - ble sgemm_kernel_L1_M2_BEGIN + ble .Lsgemm_kernel_L1_M2_BEGIN -sgemm_kernel_L1_M4_20: +.Lsgemm_kernel_L1_M4_20: INIT4x1 @@ -2142,10 +2142,10 @@ sgemm_kernel_L1_M4_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M4_40 + ble .Lsgemm_kernel_L1_M4_40 .align 5 -sgemm_kernel_L1_M4_22: +.Lsgemm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -2157,39 +2157,39 @@ sgemm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M4_22 + bgt .Lsgemm_kernel_L1_M4_22 -sgemm_kernel_L1_M4_40: +.Lsgemm_kernel_L1_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M4_100 + ble .Lsgemm_kernel_L1_M4_100 -sgemm_kernel_L1_M4_42: +.Lsgemm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M4_42 + bgt .Lsgemm_kernel_L1_M4_42 -sgemm_kernel_L1_M4_100: +.Lsgemm_kernel_L1_M4_100: SAVE4x1 -sgemm_kernel_L1_M4_END: +.Lsgemm_kernel_L1_M4_END: /******************************************************************************/ -sgemm_kernel_L1_M2_BEGIN: +.Lsgemm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble sgemm_kernel_L1_END + ble .Lsgemm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble sgemm_kernel_L1_M1_BEGIN + ble .Lsgemm_kernel_L1_M1_BEGIN -sgemm_kernel_L1_M2_20: +.Lsgemm_kernel_L1_M2_20: INIT2x1 @@ -2197,9 +2197,9 @@ sgemm_kernel_L1_M2_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M2_40 + ble .Lsgemm_kernel_L1_M2_40 -sgemm_kernel_L1_M2_22: +.Lsgemm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -2212,35 +2212,35 @@ sgemm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M2_22 + bgt .Lsgemm_kernel_L1_M2_22 -sgemm_kernel_L1_M2_40: +.Lsgemm_kernel_L1_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M2_100 + ble .Lsgemm_kernel_L1_M2_100 -sgemm_kernel_L1_M2_42: +.Lsgemm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M2_42 + bgt .Lsgemm_kernel_L1_M2_42 -sgemm_kernel_L1_M2_100: +.Lsgemm_kernel_L1_M2_100: SAVE2x1 -sgemm_kernel_L1_M2_END: +.Lsgemm_kernel_L1_M2_END: /******************************************************************************/ -sgemm_kernel_L1_M1_BEGIN: +.Lsgemm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble sgemm_kernel_L1_END + ble .Lsgemm_kernel_L1_END -sgemm_kernel_L1_M1_20: +.Lsgemm_kernel_L1_M1_20: INIT1x1 @@ -2248,9 +2248,9 @@ sgemm_kernel_L1_M1_20: asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble sgemm_kernel_L1_M1_40 + ble .Lsgemm_kernel_L1_M1_40 -sgemm_kernel_L1_M1_22: +.Lsgemm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -2262,30 +2262,30 @@ sgemm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M1_22 + bgt .Lsgemm_kernel_L1_M1_22 -sgemm_kernel_L1_M1_40: +.Lsgemm_kernel_L1_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble sgemm_kernel_L1_M1_100 + ble .Lsgemm_kernel_L1_M1_100 -sgemm_kernel_L1_M1_42: +.Lsgemm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt sgemm_kernel_L1_M1_42 + bgt .Lsgemm_kernel_L1_M1_42 -sgemm_kernel_L1_M1_100: +.Lsgemm_kernel_L1_M1_100: SAVE1x1 -sgemm_kernel_L1_END: +.Lsgemm_kernel_L1_END: /******************************************************************************/ -sgemm_kernel_L999: +.Lsgemm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/strmm_kernel_16x4.S b/kernel/arm64/strmm_kernel_16x4.S index 77e05103d..985a0a9a6 100644 --- a/kernel/arm64/strmm_kernel_16x4.S +++ b/kernel/arm64/strmm_kernel_16x4.S @@ -1035,7 +1035,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE -strmm_kernel_begin: +.Lstrmm_kernel_begin: .align 5 add sp, sp, #-(11 * 16) @@ -1066,11 +1066,11 @@ strmm_kernel_begin: mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble strmm_kernel_L2_BEGIN + ble .Lstrmm_kernel_L2_BEGIN /******************************************************************************/ -strmm_kernel_L4_BEGIN: +.Lstrmm_kernel_L4_BEGIN: mov pCRow0, pC add pCRow1, pCRow0, LDC add pCRow2, pCRow1, LDC @@ -1084,15 +1084,15 @@ strmm_kernel_L4_BEGIN: #endif mov pA, origPA // pA = start of A array -strmm_kernel_L4_M16_BEGIN: +.Lstrmm_kernel_L4_M16_BEGIN: mov counterI, origM asr counterI, counterI, #4 // counterI = counterI / 16 cmp counterI, #0 - ble strmm_kernel_L4_M8_BEGIN + ble .Lstrmm_kernel_L4_M8_BEGIN .align 5 -strmm_kernel_L4_M16_20: +.Lstrmm_kernel_L4_M16_20: #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) mov pB, origPB @@ -1114,7 +1114,7 @@ strmm_kernel_L4_M16_20: asr counterL , tempK, #3 cmp counterL , #2 - blt strmm_kernel_L4_M16_32 + blt .Lstrmm_kernel_L4_M16_32 KERNEL16x4_I KERNEL16x4_M2 @@ -1126,10 +1126,10 @@ strmm_kernel_L4_M16_20: KERNEL16x4_M2 subs counterL, counterL, #2 - ble strmm_kernel_L4_M16_22a + ble .Lstrmm_kernel_L4_M16_22a .align 5 -strmm_kernel_L4_M16_22: +.Lstrmm_kernel_L4_M16_22: KERNEL16x4_M1 KERNEL16x4_M2 @@ -1141,10 +1141,10 @@ strmm_kernel_L4_M16_22: KERNEL16x4_M2 subs counterL, counterL, #1 - bgt strmm_kernel_L4_M16_22 + bgt .Lstrmm_kernel_L4_M16_22 .align 5 -strmm_kernel_L4_M16_22a: +.Lstrmm_kernel_L4_M16_22a: KERNEL16x4_M1 KERNEL16x4_M2 @@ -1155,13 +1155,13 @@ strmm_kernel_L4_M16_22a: KERNEL16x4_M1 KERNEL16x4_E - b strmm_kernel_L4_M16_44 + b .Lstrmm_kernel_L4_M16_44 .align 5 -strmm_kernel_L4_M16_32: +.Lstrmm_kernel_L4_M16_32: tst counterL, #1 - ble strmm_kernel_L4_M16_40 + ble .Lstrmm_kernel_L4_M16_40 KERNEL16x4_I KERNEL16x4_M2 @@ -1172,25 +1172,25 @@ strmm_kernel_L4_M16_32: KERNEL16x4_M1 KERNEL16x4_E - b strmm_kernel_L4_M16_44 + b .Lstrmm_kernel_L4_M16_44 -strmm_kernel_L4_M16_40: +.Lstrmm_kernel_L4_M16_40: INIT16x4 -strmm_kernel_L4_M16_44: +.Lstrmm_kernel_L4_M16_44: ands counterL , tempK, #7 - ble strmm_kernel_L4_M16_100 + ble .Lstrmm_kernel_L4_M16_100 .align 5 -strmm_kernel_L4_M16_46: +.Lstrmm_kernel_L4_M16_46: KERNEL16x4_SUB subs counterL, counterL, #1 - bne strmm_kernel_L4_M16_46 + bne .Lstrmm_kernel_L4_M16_46 -strmm_kernel_L4_M16_100: +.Lstrmm_kernel_L4_M16_100: SAVE16x4 @@ -1213,22 +1213,22 @@ strmm_kernel_L4_M16_100: prfm PLDL1KEEP, [pA, #64] prfm PLDL1KEEP, [origPB] -strmm_kernel_L4_M16_END: +.Lstrmm_kernel_L4_M16_END: subs counterI, counterI, #1 - bne strmm_kernel_L4_M16_20 + bne .Lstrmm_kernel_L4_M16_20 //------------------------------------------------------------------------------ -strmm_kernel_L4_M8_BEGIN: +.Lstrmm_kernel_L4_M8_BEGIN: mov counterI, origM tst counterI , #15 - ble strmm_kernel_L4_END + ble .Lstrmm_kernel_L4_END tst counterI, #8 - ble strmm_kernel_L4_M4_BEGIN + ble .Lstrmm_kernel_L4_M4_BEGIN -strmm_kernel_L4_M8_20: +.Lstrmm_kernel_L4_M8_20: #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) mov pB, origPB @@ -1250,54 +1250,54 @@ strmm_kernel_L4_M8_20: asr counterL , tempK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt strmm_kernel_L4_M8_32 + blt .Lstrmm_kernel_L4_M8_32 KERNEL8x4_I // do one in the K KERNEL8x4_M2 // do another in the K subs counterL, counterL, #2 - ble strmm_kernel_L4_M8_22a + ble .Lstrmm_kernel_L4_M8_22a .align 5 -strmm_kernel_L4_M8_22: +.Lstrmm_kernel_L4_M8_22: KERNEL8x4_M1 KERNEL8x4_M2 subs counterL, counterL, #1 - bgt strmm_kernel_L4_M8_22 + bgt .Lstrmm_kernel_L4_M8_22 -strmm_kernel_L4_M8_22a: +.Lstrmm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_E - b strmm_kernel_L4_M8_44 + b .Lstrmm_kernel_L4_M8_44 -strmm_kernel_L4_M8_32: +.Lstrmm_kernel_L4_M8_32: tst counterL, #1 - ble strmm_kernel_L4_M8_40 + ble .Lstrmm_kernel_L4_M8_40 KERNEL8x4_I KERNEL8x4_E - b strmm_kernel_L4_M8_44 + b .Lstrmm_kernel_L4_M8_44 -strmm_kernel_L4_M8_40: +.Lstrmm_kernel_L4_M8_40: INIT8x4 -strmm_kernel_L4_M8_44: +.Lstrmm_kernel_L4_M8_44: ands counterL , tempK, #1 - ble strmm_kernel_L4_M8_100 + ble .Lstrmm_kernel_L4_M8_100 -strmm_kernel_L4_M8_46: +.Lstrmm_kernel_L4_M8_46: KERNEL8x4_SUB -strmm_kernel_L4_M8_100: +.Lstrmm_kernel_L4_M8_100: SAVE8x4 @@ -1317,20 +1317,20 @@ strmm_kernel_L4_M8_100: add tempOffset, tempOffset, #8 #endif -strmm_kernel_L4_M8_END: +.Lstrmm_kernel_L4_M8_END: //------------------------------------------------------------------------------ -strmm_kernel_L4_M4_BEGIN: +.Lstrmm_kernel_L4_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble strmm_kernel_L4_END + ble .Lstrmm_kernel_L4_END tst counterI, #4 - ble strmm_kernel_L4_M2_BEGIN + ble .Lstrmm_kernel_L4_M2_BEGIN -strmm_kernel_L4_M4_20: +.Lstrmm_kernel_L4_M4_20: #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) mov pB, origPB @@ -1350,54 +1350,54 @@ strmm_kernel_L4_M4_20: #endif asr counterL , tempK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt strmm_kernel_L4_M4_32 + blt .Lstrmm_kernel_L4_M4_32 KERNEL4x4_I // do one in the K KERNEL4x4_M2 // do another in the K subs counterL, counterL, #2 - ble strmm_kernel_L4_M4_22a + ble .Lstrmm_kernel_L4_M4_22a .align 5 -strmm_kernel_L4_M4_22: +.Lstrmm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 subs counterL, counterL, #1 - bgt strmm_kernel_L4_M4_22 + bgt .Lstrmm_kernel_L4_M4_22 -strmm_kernel_L4_M4_22a: +.Lstrmm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_E - b strmm_kernel_L4_M4_44 + b .Lstrmm_kernel_L4_M4_44 -strmm_kernel_L4_M4_32: +.Lstrmm_kernel_L4_M4_32: tst counterL, #1 - ble strmm_kernel_L4_M4_40 + ble .Lstrmm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_E - b strmm_kernel_L4_M4_44 + b .Lstrmm_kernel_L4_M4_44 -strmm_kernel_L4_M4_40: +.Lstrmm_kernel_L4_M4_40: INIT4x4 -strmm_kernel_L4_M4_44: +.Lstrmm_kernel_L4_M4_44: ands counterL , tempK, #1 - ble strmm_kernel_L4_M4_100 + ble .Lstrmm_kernel_L4_M4_100 -strmm_kernel_L4_M4_46: +.Lstrmm_kernel_L4_M4_46: KERNEL4x4_SUB -strmm_kernel_L4_M4_100: +.Lstrmm_kernel_L4_M4_100: SAVE4x4 @@ -1415,20 +1415,20 @@ strmm_kernel_L4_M4_100: #if defined(LEFT) add tempOffset, tempOffset, #4 #endif -strmm_kernel_L4_M4_END: +.Lstrmm_kernel_L4_M4_END: //------------------------------------------------------------------------------ -strmm_kernel_L4_M2_BEGIN: +.Lstrmm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble strmm_kernel_L4_END + ble .Lstrmm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble strmm_kernel_L4_M1_BEGIN + ble .Lstrmm_kernel_L4_M1_BEGIN -strmm_kernel_L4_M2_20: +.Lstrmm_kernel_L4_M2_20: INIT2x4 @@ -1451,9 +1451,9 @@ strmm_kernel_L4_M2_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L4_M2_40 + ble .Lstrmm_kernel_L4_M2_40 -strmm_kernel_L4_M2_22: +.Lstrmm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -1466,22 +1466,22 @@ strmm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L4_M2_22 + bgt .Lstrmm_kernel_L4_M2_22 -strmm_kernel_L4_M2_40: +.Lstrmm_kernel_L4_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L4_M2_100 + ble .Lstrmm_kernel_L4_M2_100 -strmm_kernel_L4_M2_42: +.Lstrmm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L4_M2_42 + bgt .Lstrmm_kernel_L4_M2_42 -strmm_kernel_L4_M2_100: +.Lstrmm_kernel_L4_M2_100: SAVE2x4 @@ -1500,15 +1500,15 @@ strmm_kernel_L4_M2_100: #if defined(LEFT) add tempOffset, tempOffset, #2 #endif -strmm_kernel_L4_M2_END: +.Lstrmm_kernel_L4_M2_END: -strmm_kernel_L4_M1_BEGIN: +.Lstrmm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble strmm_kernel_L4_END + ble .Lstrmm_kernel_L4_END -strmm_kernel_L4_M1_20: +.Lstrmm_kernel_L4_M1_20: INIT1x4 @@ -1531,9 +1531,9 @@ strmm_kernel_L4_M1_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L4_M1_40 + ble .Lstrmm_kernel_L4_M1_40 -strmm_kernel_L4_M1_22: +.Lstrmm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1545,22 +1545,22 @@ strmm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L4_M1_22 + bgt .Lstrmm_kernel_L4_M1_22 -strmm_kernel_L4_M1_40: +.Lstrmm_kernel_L4_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L4_M1_100 + ble .Lstrmm_kernel_L4_M1_100 -strmm_kernel_L4_M1_42: +.Lstrmm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L4_M1_42 + bgt .Lstrmm_kernel_L4_M1_42 -strmm_kernel_L4_M1_100: +.Lstrmm_kernel_L4_M1_100: SAVE1x4 @@ -1579,26 +1579,26 @@ strmm_kernel_L4_M1_100: #if defined(LEFT) add tempOffset, tempOffset, #1 #endif -strmm_kernel_L4_END: +.Lstrmm_kernel_L4_END: add origPB, origPB, origK, lsl #4 // B = B + K * 4 * 4 #if !defined(LEFT) add tempOffset, tempOffset, #4 #endif subs counterJ, counterJ , #1 // j-- - bgt strmm_kernel_L4_BEGIN + bgt .Lstrmm_kernel_L4_BEGIN /******************************************************************************/ -strmm_kernel_L2_BEGIN: // less than 2 left in N direction +.Lstrmm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble strmm_kernel_L999 + ble .Lstrmm_kernel_L999 tst counterJ , #2 - ble strmm_kernel_L1_BEGIN + ble .Lstrmm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1609,14 +1609,14 @@ strmm_kernel_L2_BEGIN: // less than 2 left in N direction #endif mov pA, origPA // pA = A -strmm_kernel_L2_M16_BEGIN: +.Lstrmm_kernel_L2_M16_BEGIN: mov counterI, origM asr counterI, counterI, #4 // counterI = counterI / 16 cmp counterI,#0 - ble strmm_kernel_L2_M8_BEGIN + ble .Lstrmm_kernel_L2_M8_BEGIN -strmm_kernel_L2_M16_20: +.Lstrmm_kernel_L2_M16_20: INIT16x2 @@ -1640,10 +1640,10 @@ strmm_kernel_L2_M16_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble strmm_kernel_L2_M16_40 + ble .Lstrmm_kernel_L2_M16_40 .align 5 -strmm_kernel_L2_M16_22: +.Lstrmm_kernel_L2_M16_22: KERNEL16x2_SUB KERNEL16x2_SUB KERNEL16x2_SUB @@ -1655,22 +1655,22 @@ strmm_kernel_L2_M16_22: KERNEL16x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M16_22 + bgt .Lstrmm_kernel_L2_M16_22 -strmm_kernel_L2_M16_40: +.Lstrmm_kernel_L2_M16_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L2_M16_100 + ble .Lstrmm_kernel_L2_M16_100 -strmm_kernel_L2_M16_42: +.Lstrmm_kernel_L2_M16_42: KERNEL16x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M16_42 + bgt .Lstrmm_kernel_L2_M16_42 -strmm_kernel_L2_M16_100: +.Lstrmm_kernel_L2_M16_100: SAVE16x2 @@ -1690,22 +1690,22 @@ strmm_kernel_L2_M16_100: add tempOffset, tempOffset, #16 #endif -strmm_kernel_L2_M16_END: +.Lstrmm_kernel_L2_M16_END: subs counterI, counterI, #1 - bgt strmm_kernel_L2_M16_20 + bgt .Lstrmm_kernel_L2_M16_20 //------------------------------------------------------------------------------ -strmm_kernel_L2_M8_BEGIN: +.Lstrmm_kernel_L2_M8_BEGIN: mov counterI, origM tst counterI , #15 - ble strmm_kernel_L2_END + ble .Lstrmm_kernel_L2_END tst counterI, #8 - ble strmm_kernel_L2_M4_BEGIN + ble .Lstrmm_kernel_L2_M4_BEGIN -strmm_kernel_L2_M8_20: +.Lstrmm_kernel_L2_M8_20: INIT8x2 @@ -1729,10 +1729,10 @@ strmm_kernel_L2_M8_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble strmm_kernel_L2_M8_40 + ble .Lstrmm_kernel_L2_M8_40 .align 5 -strmm_kernel_L2_M8_22: +.Lstrmm_kernel_L2_M8_22: KERNEL8x2_SUB KERNEL8x2_SUB KERNEL8x2_SUB @@ -1744,22 +1744,22 @@ strmm_kernel_L2_M8_22: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M8_22 + bgt .Lstrmm_kernel_L2_M8_22 -strmm_kernel_L2_M8_40: +.Lstrmm_kernel_L2_M8_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L2_M8_100 + ble .Lstrmm_kernel_L2_M8_100 -strmm_kernel_L2_M8_42: +.Lstrmm_kernel_L2_M8_42: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M8_42 + bgt .Lstrmm_kernel_L2_M8_42 -strmm_kernel_L2_M8_100: +.Lstrmm_kernel_L2_M8_100: SAVE8x2 @@ -1779,19 +1779,19 @@ strmm_kernel_L2_M8_100: add tempOffset, tempOffset, #8 #endif -strmm_kernel_L2_M8_END: +.Lstrmm_kernel_L2_M8_END: //------------------------------------------------------------------------------ -strmm_kernel_L2_M4_BEGIN: +.Lstrmm_kernel_L2_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble strmm_kernel_L2_END + ble .Lstrmm_kernel_L2_END tst counterI, #4 - ble strmm_kernel_L2_M2_BEGIN + ble .Lstrmm_kernel_L2_M2_BEGIN -strmm_kernel_L2_M4_20: +.Lstrmm_kernel_L2_M4_20: INIT4x2 @@ -1814,10 +1814,10 @@ strmm_kernel_L2_M4_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble strmm_kernel_L2_M4_40 + ble .Lstrmm_kernel_L2_M4_40 .align 5 -strmm_kernel_L2_M4_22: +.Lstrmm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1829,22 +1829,22 @@ strmm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M4_22 + bgt .Lstrmm_kernel_L2_M4_22 -strmm_kernel_L2_M4_40: +.Lstrmm_kernel_L2_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L2_M4_100 + ble .Lstrmm_kernel_L2_M4_100 -strmm_kernel_L2_M4_42: +.Lstrmm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M4_42 + bgt .Lstrmm_kernel_L2_M4_42 -strmm_kernel_L2_M4_100: +.Lstrmm_kernel_L2_M4_100: SAVE4x2 @@ -1863,21 +1863,21 @@ strmm_kernel_L2_M4_100: #if defined(LEFT) add tempOffset, tempOffset, #4 #endif -strmm_kernel_L2_M4_END: +.Lstrmm_kernel_L2_M4_END: //------------------------------------------------------------------------------ -strmm_kernel_L2_M2_BEGIN: +.Lstrmm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble strmm_kernel_L2_END + ble .Lstrmm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble strmm_kernel_L2_M1_BEGIN + ble .Lstrmm_kernel_L2_M1_BEGIN -strmm_kernel_L2_M2_20: +.Lstrmm_kernel_L2_M2_20: INIT2x2 @@ -1900,9 +1900,9 @@ strmm_kernel_L2_M2_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble strmm_kernel_L2_M2_40 + ble .Lstrmm_kernel_L2_M2_40 -strmm_kernel_L2_M2_22: +.Lstrmm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -1915,22 +1915,22 @@ strmm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M2_22 + bgt .Lstrmm_kernel_L2_M2_22 -strmm_kernel_L2_M2_40: +.Lstrmm_kernel_L2_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L2_M2_100 + ble .Lstrmm_kernel_L2_M2_100 -strmm_kernel_L2_M2_42: +.Lstrmm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M2_42 + bgt .Lstrmm_kernel_L2_M2_42 -strmm_kernel_L2_M2_100: +.Lstrmm_kernel_L2_M2_100: SAVE2x2 #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) @@ -1949,15 +1949,15 @@ strmm_kernel_L2_M2_100: add tempOffset, tempOffset, #2 #endif -strmm_kernel_L2_M2_END: +.Lstrmm_kernel_L2_M2_END: -strmm_kernel_L2_M1_BEGIN: +.Lstrmm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble strmm_kernel_L2_END + ble .Lstrmm_kernel_L2_END -strmm_kernel_L2_M1_20: +.Lstrmm_kernel_L2_M1_20: INIT1x2 @@ -1980,9 +1980,9 @@ strmm_kernel_L2_M1_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble strmm_kernel_L2_M1_40 + ble .Lstrmm_kernel_L2_M1_40 -strmm_kernel_L2_M1_22: +.Lstrmm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1994,22 +1994,22 @@ strmm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M1_22 + bgt .Lstrmm_kernel_L2_M1_22 -strmm_kernel_L2_M1_40: +.Lstrmm_kernel_L2_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L2_M1_100 + ble .Lstrmm_kernel_L2_M1_100 -strmm_kernel_L2_M1_42: +.Lstrmm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M1_42 + bgt .Lstrmm_kernel_L2_M1_42 -strmm_kernel_L2_M1_100: +.Lstrmm_kernel_L2_M1_100: SAVE1x2 @@ -2028,7 +2028,7 @@ strmm_kernel_L2_M1_100: #if defined(LEFT) add tempOffset, tempOffset, #1 #endif -strmm_kernel_L2_END: +.Lstrmm_kernel_L2_END: #if !defined(LEFT) add tempOffset, tempOffset, #2 #endif @@ -2036,11 +2036,11 @@ strmm_kernel_L2_END: /******************************************************************************/ -strmm_kernel_L1_BEGIN: +.Lstrmm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble strmm_kernel_L999 // done + ble .Lstrmm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -2051,14 +2051,14 @@ strmm_kernel_L1_BEGIN: #endif mov pA, origPA // pA = A -strmm_kernel_L1_M16_BEGIN: +.Lstrmm_kernel_L1_M16_BEGIN: mov counterI, origM asr counterI, counterI, #4 // counterI = counterI / 16 cmp counterI, #0 - ble strmm_kernel_L1_M8_BEGIN + ble .Lstrmm_kernel_L1_M8_BEGIN -strmm_kernel_L1_M16_20: +.Lstrmm_kernel_L1_M16_20: INIT16x1 @@ -2082,10 +2082,10 @@ strmm_kernel_L1_M16_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L1_M16_40 + ble .Lstrmm_kernel_L1_M16_40 .align 5 -strmm_kernel_L1_M16_22: +.Lstrmm_kernel_L1_M16_22: KERNEL16x1_SUB KERNEL16x1_SUB KERNEL16x1_SUB @@ -2097,22 +2097,22 @@ strmm_kernel_L1_M16_22: KERNEL16x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M16_22 + bgt .Lstrmm_kernel_L1_M16_22 -strmm_kernel_L1_M16_40: +.Lstrmm_kernel_L1_M16_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L1_M16_100 + ble .Lstrmm_kernel_L1_M16_100 -strmm_kernel_L1_M16_42: +.Lstrmm_kernel_L1_M16_42: KERNEL16x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M16_42 + bgt .Lstrmm_kernel_L1_M16_42 -strmm_kernel_L1_M16_100: +.Lstrmm_kernel_L1_M16_100: SAVE16x1 @@ -2132,23 +2132,23 @@ strmm_kernel_L1_M16_100: add tempOffset, tempOffset, #16 #endif -strmm_kernel_L1_M16_END: +.Lstrmm_kernel_L1_M16_END: subs counterI, counterI, #1 - bgt strmm_kernel_L1_M16_20 + bgt .Lstrmm_kernel_L1_M16_20 //------------------------------------------------------------------------------ -strmm_kernel_L1_M8_BEGIN: +.Lstrmm_kernel_L1_M8_BEGIN: mov counterI, origM tst counterI , #15 - ble strmm_kernel_L1_END + ble .Lstrmm_kernel_L1_END tst counterI, #8 - ble strmm_kernel_L1_M4_BEGIN + ble .Lstrmm_kernel_L1_M4_BEGIN -strmm_kernel_L1_M8_20: +.Lstrmm_kernel_L1_M8_20: INIT8x1 @@ -2172,10 +2172,10 @@ strmm_kernel_L1_M8_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L1_M8_40 + ble .Lstrmm_kernel_L1_M8_40 .align 5 -strmm_kernel_L1_M8_22: +.Lstrmm_kernel_L1_M8_22: KERNEL8x1_SUB KERNEL8x1_SUB KERNEL8x1_SUB @@ -2187,22 +2187,22 @@ strmm_kernel_L1_M8_22: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M8_22 + bgt .Lstrmm_kernel_L1_M8_22 -strmm_kernel_L1_M8_40: +.Lstrmm_kernel_L1_M8_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L1_M8_100 + ble .Lstrmm_kernel_L1_M8_100 -strmm_kernel_L1_M8_42: +.Lstrmm_kernel_L1_M8_42: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M8_42 + bgt .Lstrmm_kernel_L1_M8_42 -strmm_kernel_L1_M8_100: +.Lstrmm_kernel_L1_M8_100: SAVE8x1 @@ -2222,19 +2222,19 @@ strmm_kernel_L1_M8_100: add tempOffset, tempOffset, #8 #endif -strmm_kernel_L1_M8_END: +.Lstrmm_kernel_L1_M8_END: //------------------------------------------------------------------------------ -strmm_kernel_L1_M4_BEGIN: +.Lstrmm_kernel_L1_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble strmm_kernel_L1_END + ble .Lstrmm_kernel_L1_END tst counterI, #4 - ble strmm_kernel_L1_M2_BEGIN + ble .Lstrmm_kernel_L1_M2_BEGIN -strmm_kernel_L1_M4_20: +.Lstrmm_kernel_L1_M4_20: INIT4x1 @@ -2257,10 +2257,10 @@ strmm_kernel_L1_M4_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L1_M4_40 + ble .Lstrmm_kernel_L1_M4_40 .align 5 -strmm_kernel_L1_M4_22: +.Lstrmm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -2272,22 +2272,22 @@ strmm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M4_22 + bgt .Lstrmm_kernel_L1_M4_22 -strmm_kernel_L1_M4_40: +.Lstrmm_kernel_L1_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L1_M4_100 + ble .Lstrmm_kernel_L1_M4_100 -strmm_kernel_L1_M4_42: +.Lstrmm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M4_42 + bgt .Lstrmm_kernel_L1_M4_42 -strmm_kernel_L1_M4_100: +.Lstrmm_kernel_L1_M4_100: SAVE4x1 @@ -2306,20 +2306,20 @@ strmm_kernel_L1_M4_100: #if defined(LEFT) add tempOffset, tempOffset, #4 #endif -strmm_kernel_L1_M4_END: +.Lstrmm_kernel_L1_M4_END: //------------------------------------------------------------------------------ -strmm_kernel_L1_M2_BEGIN: +.Lstrmm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble strmm_kernel_L1_END + ble .Lstrmm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble strmm_kernel_L1_M1_BEGIN + ble .Lstrmm_kernel_L1_M1_BEGIN -strmm_kernel_L1_M2_20: +.Lstrmm_kernel_L1_M2_20: INIT2x1 @@ -2342,9 +2342,9 @@ strmm_kernel_L1_M2_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L1_M2_40 + ble .Lstrmm_kernel_L1_M2_40 -strmm_kernel_L1_M2_22: +.Lstrmm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -2357,22 +2357,22 @@ strmm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M2_22 + bgt .Lstrmm_kernel_L1_M2_22 -strmm_kernel_L1_M2_40: +.Lstrmm_kernel_L1_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L1_M2_100 + ble .Lstrmm_kernel_L1_M2_100 -strmm_kernel_L1_M2_42: +.Lstrmm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M2_42 + bgt .Lstrmm_kernel_L1_M2_42 -strmm_kernel_L1_M2_100: +.Lstrmm_kernel_L1_M2_100: SAVE2x1 @@ -2391,15 +2391,15 @@ strmm_kernel_L1_M2_100: #if defined(LEFT) add tempOffset, tempOffset, #2 #endif -strmm_kernel_L1_M2_END: +.Lstrmm_kernel_L1_M2_END: -strmm_kernel_L1_M1_BEGIN: +.Lstrmm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble strmm_kernel_L1_END + ble .Lstrmm_kernel_L1_END -strmm_kernel_L1_M1_20: +.Lstrmm_kernel_L1_M1_20: INIT1x1 @@ -2422,9 +2422,9 @@ strmm_kernel_L1_M1_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L1_M1_40 + ble .Lstrmm_kernel_L1_M1_40 -strmm_kernel_L1_M1_22: +.Lstrmm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -2436,28 +2436,28 @@ strmm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M1_22 + bgt .Lstrmm_kernel_L1_M1_22 -strmm_kernel_L1_M1_40: +.Lstrmm_kernel_L1_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L1_M1_100 + ble .Lstrmm_kernel_L1_M1_100 -strmm_kernel_L1_M1_42: +.Lstrmm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M1_42 + bgt .Lstrmm_kernel_L1_M1_42 -strmm_kernel_L1_M1_100: +.Lstrmm_kernel_L1_M1_100: SAVE1x1 -strmm_kernel_L1_END: +.Lstrmm_kernel_L1_END: -strmm_kernel_L999: +.Lstrmm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/strmm_kernel_4x4.S b/kernel/arm64/strmm_kernel_4x4.S index eeb3e6e72..5f7818c40 100644 --- a/kernel/arm64/strmm_kernel_4x4.S +++ b/kernel/arm64/strmm_kernel_4x4.S @@ -507,7 +507,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE -strmm_kernel_begin: +.Lstrmm_kernel_begin: .align 5 add sp, sp, #-(11 * 16) @@ -539,11 +539,11 @@ strmm_kernel_begin: mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble strmm_kernel_L2_BEGIN + ble .Lstrmm_kernel_L2_BEGIN /******************************************************************************/ -strmm_kernel_L4_BEGIN: +.Lstrmm_kernel_L4_BEGIN: mov pCRow0, pC // pCRow0 = C add pC, pC, LDC, lsl #2 @@ -553,14 +553,14 @@ strmm_kernel_L4_BEGIN: mov pA, origPA // pA = start of A array -strmm_kernel_L4_M4_BEGIN: +.Lstrmm_kernel_L4_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble strmm_kernel_L4_M2_BEGIN + ble .Lstrmm_kernel_L4_M2_BEGIN -strmm_kernel_L4_M4_20: +.Lstrmm_kernel_L4_M4_20: #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) mov pB, origPB @@ -581,54 +581,54 @@ strmm_kernel_L4_M4_20: asr counterL , tempK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt strmm_kernel_L4_M4_32 + blt .Lstrmm_kernel_L4_M4_32 KERNEL4x4_I // do one in the K KERNEL4x4_M2 // do another in the K subs counterL, counterL, #2 - ble strmm_kernel_L4_M4_22a + ble .Lstrmm_kernel_L4_M4_22a .align 5 -strmm_kernel_L4_M4_22: +.Lstrmm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 subs counterL, counterL, #1 - bgt strmm_kernel_L4_M4_22 + bgt .Lstrmm_kernel_L4_M4_22 -strmm_kernel_L4_M4_22a: +.Lstrmm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_E - b strmm_kernel_L4_M4_44 + b .Lstrmm_kernel_L4_M4_44 -strmm_kernel_L4_M4_32: +.Lstrmm_kernel_L4_M4_32: tst counterL, #1 - ble strmm_kernel_L4_M4_40 + ble .Lstrmm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_E - b strmm_kernel_L4_M4_44 + b .Lstrmm_kernel_L4_M4_44 -strmm_kernel_L4_M4_40: +.Lstrmm_kernel_L4_M4_40: INIT4x4 -strmm_kernel_L4_M4_44: +.Lstrmm_kernel_L4_M4_44: ands counterL , tempK, #1 - ble strmm_kernel_L4_M4_100 + ble .Lstrmm_kernel_L4_M4_100 -strmm_kernel_L4_M4_46: +.Lstrmm_kernel_L4_M4_46: KERNEL4x4_SUB -strmm_kernel_L4_M4_100: +.Lstrmm_kernel_L4_M4_100: SAVE4x4 @@ -647,20 +647,20 @@ strmm_kernel_L4_M4_100: add tempOffset, tempOffset, #4 #endif -strmm_kernel_L4_M4_END: +.Lstrmm_kernel_L4_M4_END: subs counterI, counterI, #1 - bne strmm_kernel_L4_M4_20 + bne .Lstrmm_kernel_L4_M4_20 -strmm_kernel_L4_M2_BEGIN: +.Lstrmm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble strmm_kernel_L4_END + ble .Lstrmm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble strmm_kernel_L4_M1_BEGIN + ble .Lstrmm_kernel_L4_M1_BEGIN -strmm_kernel_L4_M2_20: +.Lstrmm_kernel_L4_M2_20: INIT2x4 @@ -684,9 +684,9 @@ strmm_kernel_L4_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L4_M2_40 + ble .Lstrmm_kernel_L4_M2_40 -strmm_kernel_L4_M2_22: +.Lstrmm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -699,22 +699,22 @@ strmm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L4_M2_22 + bgt .Lstrmm_kernel_L4_M2_22 -strmm_kernel_L4_M2_40: +.Lstrmm_kernel_L4_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L4_M2_100 + ble .Lstrmm_kernel_L4_M2_100 -strmm_kernel_L4_M2_42: +.Lstrmm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L4_M2_42 + bgt .Lstrmm_kernel_L4_M2_42 -strmm_kernel_L4_M2_100: +.Lstrmm_kernel_L4_M2_100: SAVE2x4 @@ -735,15 +735,15 @@ strmm_kernel_L4_M2_100: #endif -strmm_kernel_L4_M2_END: +.Lstrmm_kernel_L4_M2_END: -strmm_kernel_L4_M1_BEGIN: +.Lstrmm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble strmm_kernel_L4_END + ble .Lstrmm_kernel_L4_END -strmm_kernel_L4_M1_20: +.Lstrmm_kernel_L4_M1_20: INIT1x4 @@ -767,9 +767,9 @@ strmm_kernel_L4_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L4_M1_40 + ble .Lstrmm_kernel_L4_M1_40 -strmm_kernel_L4_M1_22: +.Lstrmm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -781,22 +781,22 @@ strmm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L4_M1_22 + bgt .Lstrmm_kernel_L4_M1_22 -strmm_kernel_L4_M1_40: +.Lstrmm_kernel_L4_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L4_M1_100 + ble .Lstrmm_kernel_L4_M1_100 -strmm_kernel_L4_M1_42: +.Lstrmm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L4_M1_42 + bgt .Lstrmm_kernel_L4_M1_42 -strmm_kernel_L4_M1_100: +.Lstrmm_kernel_L4_M1_100: SAVE1x4 @@ -817,7 +817,7 @@ strmm_kernel_L4_M1_100: #endif -strmm_kernel_L4_END: +.Lstrmm_kernel_L4_END: add origPB, origPB, origK, lsl #4 // B = B + K * 4 * 4 #if !defined(LEFT) @@ -825,19 +825,19 @@ strmm_kernel_L4_END: #endif subs counterJ, counterJ , #1 // j-- - bgt strmm_kernel_L4_BEGIN + bgt .Lstrmm_kernel_L4_BEGIN /******************************************************************************/ -strmm_kernel_L2_BEGIN: // less than 2 left in N direction +.Lstrmm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble strmm_kernel_L999 + ble .Lstrmm_kernel_L999 tst counterJ , #2 - ble strmm_kernel_L1_BEGIN + ble .Lstrmm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -849,14 +849,14 @@ strmm_kernel_L2_BEGIN: // less than 2 left in N direction mov pA, origPA // pA = A -strmm_kernel_L2_M4_BEGIN: +.Lstrmm_kernel_L2_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI,#0 - ble strmm_kernel_L2_M2_BEGIN + ble .Lstrmm_kernel_L2_M2_BEGIN -strmm_kernel_L2_M4_20: +.Lstrmm_kernel_L2_M4_20: INIT4x2 @@ -880,10 +880,10 @@ strmm_kernel_L2_M4_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble strmm_kernel_L2_M4_40 + ble .Lstrmm_kernel_L2_M4_40 .align 5 -strmm_kernel_L2_M4_22: +.Lstrmm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -895,22 +895,22 @@ strmm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M4_22 + bgt .Lstrmm_kernel_L2_M4_22 -strmm_kernel_L2_M4_40: +.Lstrmm_kernel_L2_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L2_M4_100 + ble .Lstrmm_kernel_L2_M4_100 -strmm_kernel_L2_M4_42: +.Lstrmm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M4_42 + bgt .Lstrmm_kernel_L2_M4_42 -strmm_kernel_L2_M4_100: +.Lstrmm_kernel_L2_M4_100: SAVE4x2 @@ -930,22 +930,22 @@ strmm_kernel_L2_M4_100: add tempOffset, tempOffset, #4 #endif -strmm_kernel_L2_M4_END: +.Lstrmm_kernel_L2_M4_END: subs counterI, counterI, #1 - bgt strmm_kernel_L2_M4_20 + bgt .Lstrmm_kernel_L2_M4_20 -strmm_kernel_L2_M2_BEGIN: +.Lstrmm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble strmm_kernel_L2_END + ble .Lstrmm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble strmm_kernel_L2_M1_BEGIN + ble .Lstrmm_kernel_L2_M1_BEGIN -strmm_kernel_L2_M2_20: +.Lstrmm_kernel_L2_M2_20: INIT2x2 @@ -969,9 +969,9 @@ strmm_kernel_L2_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble strmm_kernel_L2_M2_40 + ble .Lstrmm_kernel_L2_M2_40 -strmm_kernel_L2_M2_22: +.Lstrmm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -984,22 +984,22 @@ strmm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M2_22 + bgt .Lstrmm_kernel_L2_M2_22 -strmm_kernel_L2_M2_40: +.Lstrmm_kernel_L2_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L2_M2_100 + ble .Lstrmm_kernel_L2_M2_100 -strmm_kernel_L2_M2_42: +.Lstrmm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M2_42 + bgt .Lstrmm_kernel_L2_M2_42 -strmm_kernel_L2_M2_100: +.Lstrmm_kernel_L2_M2_100: SAVE2x2 #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) @@ -1018,15 +1018,15 @@ strmm_kernel_L2_M2_100: add tempOffset, tempOffset, #2 #endif -strmm_kernel_L2_M2_END: +.Lstrmm_kernel_L2_M2_END: -strmm_kernel_L2_M1_BEGIN: +.Lstrmm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble strmm_kernel_L2_END + ble .Lstrmm_kernel_L2_END -strmm_kernel_L2_M1_20: +.Lstrmm_kernel_L2_M1_20: INIT1x2 @@ -1050,9 +1050,9 @@ strmm_kernel_L2_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble strmm_kernel_L2_M1_40 + ble .Lstrmm_kernel_L2_M1_40 -strmm_kernel_L2_M1_22: +.Lstrmm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1064,22 +1064,22 @@ strmm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M1_22 + bgt .Lstrmm_kernel_L2_M1_22 -strmm_kernel_L2_M1_40: +.Lstrmm_kernel_L2_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L2_M1_100 + ble .Lstrmm_kernel_L2_M1_100 -strmm_kernel_L2_M1_42: +.Lstrmm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M1_42 + bgt .Lstrmm_kernel_L2_M1_42 -strmm_kernel_L2_M1_100: +.Lstrmm_kernel_L2_M1_100: SAVE1x2 @@ -1099,7 +1099,7 @@ strmm_kernel_L2_M1_100: add tempOffset, tempOffset, #1 #endif -strmm_kernel_L2_END: +.Lstrmm_kernel_L2_END: #if !defined(LEFT) add tempOffset, tempOffset, #2 #endif @@ -1107,11 +1107,11 @@ strmm_kernel_L2_END: /******************************************************************************/ -strmm_kernel_L1_BEGIN: +.Lstrmm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble strmm_kernel_L999 // done + ble .Lstrmm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -1123,14 +1123,14 @@ strmm_kernel_L1_BEGIN: mov pA, origPA // pA = A -strmm_kernel_L1_M4_BEGIN: +.Lstrmm_kernel_L1_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble strmm_kernel_L1_M2_BEGIN + ble .Lstrmm_kernel_L1_M2_BEGIN -strmm_kernel_L1_M4_20: +.Lstrmm_kernel_L1_M4_20: INIT4x1 @@ -1154,10 +1154,10 @@ strmm_kernel_L1_M4_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L1_M4_40 + ble .Lstrmm_kernel_L1_M4_40 .align 5 -strmm_kernel_L1_M4_22: +.Lstrmm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -1169,22 +1169,22 @@ strmm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M4_22 + bgt .Lstrmm_kernel_L1_M4_22 -strmm_kernel_L1_M4_40: +.Lstrmm_kernel_L1_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L1_M4_100 + ble .Lstrmm_kernel_L1_M4_100 -strmm_kernel_L1_M4_42: +.Lstrmm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M4_42 + bgt .Lstrmm_kernel_L1_M4_42 -strmm_kernel_L1_M4_100: +.Lstrmm_kernel_L1_M4_100: SAVE4x1 @@ -1204,22 +1204,22 @@ strmm_kernel_L1_M4_100: add tempOffset, tempOffset, #4 #endif -strmm_kernel_L1_M4_END: +.Lstrmm_kernel_L1_M4_END: subs counterI, counterI, #1 - bgt strmm_kernel_L1_M4_20 + bgt .Lstrmm_kernel_L1_M4_20 -strmm_kernel_L1_M2_BEGIN: +.Lstrmm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble strmm_kernel_L1_END + ble .Lstrmm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble strmm_kernel_L1_M1_BEGIN + ble .Lstrmm_kernel_L1_M1_BEGIN -strmm_kernel_L1_M2_20: +.Lstrmm_kernel_L1_M2_20: INIT2x1 @@ -1243,9 +1243,9 @@ strmm_kernel_L1_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L1_M2_40 + ble .Lstrmm_kernel_L1_M2_40 -strmm_kernel_L1_M2_22: +.Lstrmm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -1258,22 +1258,22 @@ strmm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M2_22 + bgt .Lstrmm_kernel_L1_M2_22 -strmm_kernel_L1_M2_40: +.Lstrmm_kernel_L1_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L1_M2_100 + ble .Lstrmm_kernel_L1_M2_100 -strmm_kernel_L1_M2_42: +.Lstrmm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M2_42 + bgt .Lstrmm_kernel_L1_M2_42 -strmm_kernel_L1_M2_100: +.Lstrmm_kernel_L1_M2_100: SAVE2x1 @@ -1294,15 +1294,15 @@ strmm_kernel_L1_M2_100: #endif -strmm_kernel_L1_M2_END: +.Lstrmm_kernel_L1_M2_END: -strmm_kernel_L1_M1_BEGIN: +.Lstrmm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble strmm_kernel_L1_END + ble .Lstrmm_kernel_L1_END -strmm_kernel_L1_M1_20: +.Lstrmm_kernel_L1_M1_20: INIT1x1 @@ -1326,9 +1326,9 @@ strmm_kernel_L1_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L1_M1_40 + ble .Lstrmm_kernel_L1_M1_40 -strmm_kernel_L1_M1_22: +.Lstrmm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -1340,22 +1340,22 @@ strmm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M1_22 + bgt .Lstrmm_kernel_L1_M1_22 -strmm_kernel_L1_M1_40: +.Lstrmm_kernel_L1_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L1_M1_100 + ble .Lstrmm_kernel_L1_M1_100 -strmm_kernel_L1_M1_42: +.Lstrmm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M1_42 + bgt .Lstrmm_kernel_L1_M1_42 -strmm_kernel_L1_M1_100: +.Lstrmm_kernel_L1_M1_100: SAVE1x1 @@ -1377,7 +1377,7 @@ strmm_kernel_L1_M1_100: #endif #endif -strmm_kernel_L1_END: +.Lstrmm_kernel_L1_END: #if 0 #if !defined(LEFT) @@ -1385,7 +1385,7 @@ strmm_kernel_L1_END: #endif #endif -strmm_kernel_L999: +.Lstrmm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/strmm_kernel_8x8.S b/kernel/arm64/strmm_kernel_8x8.S index 843f0c890..cd18e6847 100644 --- a/kernel/arm64/strmm_kernel_8x8.S +++ b/kernel/arm64/strmm_kernel_8x8.S @@ -1257,7 +1257,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE -strmm_kernel_begin: +.Lstrmm_kernel_begin: .align 5 add sp, sp, #-(11 * 16) @@ -1288,12 +1288,12 @@ strmm_kernel_begin: mov counterJ, origN asr counterJ, counterJ, #3 // J = J / 8 cmp counterJ, #0 - ble strmm_kernel_L4_BEGIN + ble .Lstrmm_kernel_L4_BEGIN /******************************************************************************/ /******************************************************************************/ -strmm_kernel_L8_BEGIN: +.Lstrmm_kernel_L8_BEGIN: mov pCRow0, pC // pCRow0 = C add pC, pC, LDC, lsl #3 @@ -1305,14 +1305,14 @@ strmm_kernel_L8_BEGIN: /******************************************************************************/ -strmm_kernel_L8_M8_BEGIN: +.Lstrmm_kernel_L8_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble strmm_kernel_L8_M4_BEGIN + ble .Lstrmm_kernel_L8_M4_BEGIN -strmm_kernel_L8_M8_20: +.Lstrmm_kernel_L8_M8_20: #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) mov pB, origPB @@ -1333,54 +1333,54 @@ strmm_kernel_L8_M8_20: asr counterL , tempK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt strmm_kernel_L8_M8_32 + blt .Lstrmm_kernel_L8_M8_32 KERNEL8x8_I // do one in the K KERNEL8x8_M2 // do another in the K subs counterL, counterL, #2 - ble strmm_kernel_L8_M8_22a + ble .Lstrmm_kernel_L8_M8_22a .align 5 -strmm_kernel_L8_M8_22: +.Lstrmm_kernel_L8_M8_22: KERNEL8x8_M1 KERNEL8x8_M2 subs counterL, counterL, #1 - bgt strmm_kernel_L8_M8_22 + bgt .Lstrmm_kernel_L8_M8_22 -strmm_kernel_L8_M8_22a: +.Lstrmm_kernel_L8_M8_22a: KERNEL8x8_M1 KERNEL8x8_E - b strmm_kernel_L8_M8_44 + b .Lstrmm_kernel_L8_M8_44 -strmm_kernel_L8_M8_32: +.Lstrmm_kernel_L8_M8_32: tst counterL, #1 - ble strmm_kernel_L8_M8_40 + ble .Lstrmm_kernel_L8_M8_40 KERNEL8x8_I KERNEL8x8_E - b strmm_kernel_L8_M8_44 + b .Lstrmm_kernel_L8_M8_44 -strmm_kernel_L8_M8_40: +.Lstrmm_kernel_L8_M8_40: INIT8x8 -strmm_kernel_L8_M8_44: +.Lstrmm_kernel_L8_M8_44: ands counterL , tempK, #1 - ble strmm_kernel_L8_M8_100 + ble .Lstrmm_kernel_L8_M8_100 -strmm_kernel_L8_M8_46: +.Lstrmm_kernel_L8_M8_46: KERNEL8x8_SUB -strmm_kernel_L8_M8_100: +.Lstrmm_kernel_L8_M8_100: SAVE8x8 @@ -1399,22 +1399,22 @@ strmm_kernel_L8_M8_100: add tempOffset, tempOffset, #8 #endif -strmm_kernel_L8_M8_END: +.Lstrmm_kernel_L8_M8_END: subs counterI, counterI, #1 - bne strmm_kernel_L8_M8_20 + bne .Lstrmm_kernel_L8_M8_20 /******************************************************************************/ -strmm_kernel_L8_M4_BEGIN: +.Lstrmm_kernel_L8_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble strmm_kernel_L8_END + ble .Lstrmm_kernel_L8_END tst counterI, #4 - ble strmm_kernel_L8_M2_BEGIN + ble .Lstrmm_kernel_L8_M2_BEGIN -strmm_kernel_L8_M4_20: +.Lstrmm_kernel_L8_M4_20: #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) mov pB, origPB @@ -1436,54 +1436,54 @@ strmm_kernel_L8_M4_20: asr counterL , tempK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt strmm_kernel_L8_M4_32 + blt .Lstrmm_kernel_L8_M4_32 KERNEL4x8_I // do one in the K KERNEL4x8_M2 // do another in the K subs counterL, counterL, #2 - ble strmm_kernel_L8_M4_22a + ble .Lstrmm_kernel_L8_M4_22a .align 5 -strmm_kernel_L8_M4_22: +.Lstrmm_kernel_L8_M4_22: KERNEL4x8_M1 KERNEL4x8_M2 subs counterL, counterL, #1 - bgt strmm_kernel_L8_M4_22 + bgt .Lstrmm_kernel_L8_M4_22 -strmm_kernel_L8_M4_22a: +.Lstrmm_kernel_L8_M4_22a: KERNEL4x8_M1 KERNEL4x8_E - b strmm_kernel_L8_M4_44 + b .Lstrmm_kernel_L8_M4_44 -strmm_kernel_L8_M4_32: +.Lstrmm_kernel_L8_M4_32: tst counterL, #1 - ble strmm_kernel_L8_M4_40 + ble .Lstrmm_kernel_L8_M4_40 KERNEL4x8_I KERNEL4x8_E - b strmm_kernel_L8_M4_44 + b .Lstrmm_kernel_L8_M4_44 -strmm_kernel_L8_M4_40: +.Lstrmm_kernel_L8_M4_40: INIT4x8 -strmm_kernel_L8_M4_44: +.Lstrmm_kernel_L8_M4_44: ands counterL , tempK, #1 - ble strmm_kernel_L8_M4_100 + ble .Lstrmm_kernel_L8_M4_100 -strmm_kernel_L8_M4_46: +.Lstrmm_kernel_L8_M4_46: KERNEL4x8_SUB -strmm_kernel_L8_M4_100: +.Lstrmm_kernel_L8_M4_100: SAVE4x8 @@ -1503,20 +1503,20 @@ strmm_kernel_L8_M4_100: add tempOffset, tempOffset, #4 #endif -strmm_kernel_L8_M4_END: +.Lstrmm_kernel_L8_M4_END: /******************************************************************************/ -strmm_kernel_L8_M2_BEGIN: +.Lstrmm_kernel_L8_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble strmm_kernel_L8_END + ble .Lstrmm_kernel_L8_END tst counterI, #2 // counterI = counterI / 2 - ble strmm_kernel_L8_M1_BEGIN + ble .Lstrmm_kernel_L8_M1_BEGIN -strmm_kernel_L8_M2_20: +.Lstrmm_kernel_L8_M2_20: INIT2x8 @@ -1540,9 +1540,9 @@ strmm_kernel_L8_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L8_M2_40 + ble .Lstrmm_kernel_L8_M2_40 -strmm_kernel_L8_M2_22: +.Lstrmm_kernel_L8_M2_22: KERNEL2x8_SUB KERNEL2x8_SUB @@ -1555,22 +1555,22 @@ strmm_kernel_L8_M2_22: KERNEL2x8_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L8_M2_22 + bgt .Lstrmm_kernel_L8_M2_22 -strmm_kernel_L8_M2_40: +.Lstrmm_kernel_L8_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L8_M2_100 + ble .Lstrmm_kernel_L8_M2_100 -strmm_kernel_L8_M2_42: +.Lstrmm_kernel_L8_M2_42: KERNEL2x8_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L8_M2_42 + bgt .Lstrmm_kernel_L8_M2_42 -strmm_kernel_L8_M2_100: +.Lstrmm_kernel_L8_M2_100: SAVE2x8 @@ -1590,16 +1590,16 @@ strmm_kernel_L8_M2_100: add tempOffset, tempOffset, #2 #endif -strmm_kernel_L8_M2_END: +.Lstrmm_kernel_L8_M2_END: /******************************************************************************/ -strmm_kernel_L8_M1_BEGIN: +.Lstrmm_kernel_L8_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble strmm_kernel_L8_END + ble .Lstrmm_kernel_L8_END -strmm_kernel_L8_M1_20: +.Lstrmm_kernel_L8_M1_20: INIT1x8 @@ -1623,9 +1623,9 @@ strmm_kernel_L8_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L8_M1_40 + ble .Lstrmm_kernel_L8_M1_40 -strmm_kernel_L8_M1_22: +.Lstrmm_kernel_L8_M1_22: KERNEL1x8_SUB KERNEL1x8_SUB KERNEL1x8_SUB @@ -1637,22 +1637,22 @@ strmm_kernel_L8_M1_22: KERNEL1x8_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L8_M1_22 + bgt .Lstrmm_kernel_L8_M1_22 -strmm_kernel_L8_M1_40: +.Lstrmm_kernel_L8_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L8_M1_100 + ble .Lstrmm_kernel_L8_M1_100 -strmm_kernel_L8_M1_42: +.Lstrmm_kernel_L8_M1_42: KERNEL1x8_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L8_M1_42 + bgt .Lstrmm_kernel_L8_M1_42 -strmm_kernel_L8_M1_100: +.Lstrmm_kernel_L8_M1_100: SAVE1x8 @@ -1672,7 +1672,7 @@ strmm_kernel_L8_M1_100: add tempOffset, tempOffset, #1 #endif -strmm_kernel_L8_END: +.Lstrmm_kernel_L8_END: lsl temp, origK, #5 // B = B + K * 4 * 8 add origPB, origPB, temp @@ -1681,19 +1681,19 @@ strmm_kernel_L8_END: #endif subs counterJ, counterJ , #1 // j-- - bgt strmm_kernel_L8_BEGIN + bgt .Lstrmm_kernel_L8_BEGIN /******************************************************************************/ /******************************************************************************/ -strmm_kernel_L4_BEGIN: +.Lstrmm_kernel_L4_BEGIN: mov counterJ , origN tst counterJ , #7 - ble strmm_kernel_L999 + ble .Lstrmm_kernel_L999 tst counterJ , #4 - ble strmm_kernel_L2_BEGIN + ble .Lstrmm_kernel_L2_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1707,14 +1707,14 @@ strmm_kernel_L4_BEGIN: /******************************************************************************/ -strmm_kernel_L4_M8_BEGIN: +.Lstrmm_kernel_L4_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI, #0 - ble strmm_kernel_L4_M4_BEGIN + ble .Lstrmm_kernel_L4_M4_BEGIN -strmm_kernel_L4_M8_20: +.Lstrmm_kernel_L4_M8_20: #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) mov pB, origPB @@ -1736,54 +1736,54 @@ strmm_kernel_L4_M8_20: asr counterL , tempK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt strmm_kernel_L4_M8_32 + blt .Lstrmm_kernel_L4_M8_32 KERNEL8x4_I // do one in the K KERNEL8x4_M2 // do another in the K subs counterL, counterL, #2 - ble strmm_kernel_L4_M8_22a + ble .Lstrmm_kernel_L4_M8_22a .align 5 -strmm_kernel_L4_M8_22: +.Lstrmm_kernel_L4_M8_22: KERNEL8x4_M1 KERNEL8x4_M2 subs counterL, counterL, #1 - bgt strmm_kernel_L4_M8_22 + bgt .Lstrmm_kernel_L4_M8_22 -strmm_kernel_L4_M8_22a: +.Lstrmm_kernel_L4_M8_22a: KERNEL8x4_M1 KERNEL8x4_E - b strmm_kernel_L4_M8_44 + b .Lstrmm_kernel_L4_M8_44 -strmm_kernel_L4_M8_32: +.Lstrmm_kernel_L4_M8_32: tst counterL, #1 - ble strmm_kernel_L4_M8_40 + ble .Lstrmm_kernel_L4_M8_40 KERNEL8x4_I KERNEL8x4_E - b strmm_kernel_L4_M8_44 + b .Lstrmm_kernel_L4_M8_44 -strmm_kernel_L4_M8_40: +.Lstrmm_kernel_L4_M8_40: INIT8x4 -strmm_kernel_L4_M8_44: +.Lstrmm_kernel_L4_M8_44: ands counterL , tempK, #1 - ble strmm_kernel_L4_M8_100 + ble .Lstrmm_kernel_L4_M8_100 -strmm_kernel_L4_M8_46: +.Lstrmm_kernel_L4_M8_46: KERNEL8x4_SUB -strmm_kernel_L4_M8_100: +.Lstrmm_kernel_L4_M8_100: SAVE8x4 @@ -1802,22 +1802,22 @@ strmm_kernel_L4_M8_100: #if defined(LEFT) add tempOffset, tempOffset, #8 #endif -strmm_kernel_L4_M8_END: +.Lstrmm_kernel_L4_M8_END: subs counterI, counterI, #1 - bne strmm_kernel_L4_M8_20 + bne .Lstrmm_kernel_L4_M8_20 /******************************************************************************/ -strmm_kernel_L4_M4_BEGIN: +.Lstrmm_kernel_L4_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble strmm_kernel_L4_END + ble .Lstrmm_kernel_L4_END tst counterI, #4 - ble strmm_kernel_L4_M2_BEGIN + ble .Lstrmm_kernel_L4_M2_BEGIN -strmm_kernel_L4_M4_20: +.Lstrmm_kernel_L4_M4_20: #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) mov pB, origPB @@ -1837,54 +1837,54 @@ strmm_kernel_L4_M4_20: #endif asr counterL , tempK, #1 // L = K / 2 cmp counterL , #2 // is there at least 4 to do? - blt strmm_kernel_L4_M4_32 + blt .Lstrmm_kernel_L4_M4_32 KERNEL4x4_I // do one in the K KERNEL4x4_M2 // do another in the K subs counterL, counterL, #2 - ble strmm_kernel_L4_M4_22a + ble .Lstrmm_kernel_L4_M4_22a .align 5 -strmm_kernel_L4_M4_22: +.Lstrmm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 subs counterL, counterL, #1 - bgt strmm_kernel_L4_M4_22 + bgt .Lstrmm_kernel_L4_M4_22 -strmm_kernel_L4_M4_22a: +.Lstrmm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_E - b strmm_kernel_L4_M4_44 + b .Lstrmm_kernel_L4_M4_44 -strmm_kernel_L4_M4_32: +.Lstrmm_kernel_L4_M4_32: tst counterL, #1 - ble strmm_kernel_L4_M4_40 + ble .Lstrmm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_E - b strmm_kernel_L4_M4_44 + b .Lstrmm_kernel_L4_M4_44 -strmm_kernel_L4_M4_40: +.Lstrmm_kernel_L4_M4_40: INIT4x4 -strmm_kernel_L4_M4_44: +.Lstrmm_kernel_L4_M4_44: ands counterL , tempK, #1 - ble strmm_kernel_L4_M4_100 + ble .Lstrmm_kernel_L4_M4_100 -strmm_kernel_L4_M4_46: +.Lstrmm_kernel_L4_M4_46: KERNEL4x4_SUB -strmm_kernel_L4_M4_100: +.Lstrmm_kernel_L4_M4_100: SAVE4x4 @@ -1902,20 +1902,20 @@ strmm_kernel_L4_M4_100: #if defined(LEFT) add tempOffset, tempOffset, #4 #endif -strmm_kernel_L4_M4_END: +.Lstrmm_kernel_L4_M4_END: /******************************************************************************/ -strmm_kernel_L4_M2_BEGIN: +.Lstrmm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble strmm_kernel_L4_END + ble .Lstrmm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble strmm_kernel_L4_M1_BEGIN + ble .Lstrmm_kernel_L4_M1_BEGIN -strmm_kernel_L4_M2_20: +.Lstrmm_kernel_L4_M2_20: INIT2x4 @@ -1938,9 +1938,9 @@ strmm_kernel_L4_M2_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L4_M2_40 + ble .Lstrmm_kernel_L4_M2_40 -strmm_kernel_L4_M2_22: +.Lstrmm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -1953,22 +1953,22 @@ strmm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L4_M2_22 + bgt .Lstrmm_kernel_L4_M2_22 -strmm_kernel_L4_M2_40: +.Lstrmm_kernel_L4_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L4_M2_100 + ble .Lstrmm_kernel_L4_M2_100 -strmm_kernel_L4_M2_42: +.Lstrmm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L4_M2_42 + bgt .Lstrmm_kernel_L4_M2_42 -strmm_kernel_L4_M2_100: +.Lstrmm_kernel_L4_M2_100: SAVE2x4 @@ -1987,16 +1987,16 @@ strmm_kernel_L4_M2_100: #if defined(LEFT) add tempOffset, tempOffset, #2 #endif -strmm_kernel_L4_M2_END: +.Lstrmm_kernel_L4_M2_END: /******************************************************************************/ -strmm_kernel_L4_M1_BEGIN: +.Lstrmm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble strmm_kernel_L4_END + ble .Lstrmm_kernel_L4_END -strmm_kernel_L4_M1_20: +.Lstrmm_kernel_L4_M1_20: INIT1x4 @@ -2019,9 +2019,9 @@ strmm_kernel_L4_M1_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L4_M1_40 + ble .Lstrmm_kernel_L4_M1_40 -strmm_kernel_L4_M1_22: +.Lstrmm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -2033,22 +2033,22 @@ strmm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L4_M1_22 + bgt .Lstrmm_kernel_L4_M1_22 -strmm_kernel_L4_M1_40: +.Lstrmm_kernel_L4_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L4_M1_100 + ble .Lstrmm_kernel_L4_M1_100 -strmm_kernel_L4_M1_42: +.Lstrmm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L4_M1_42 + bgt .Lstrmm_kernel_L4_M1_42 -strmm_kernel_L4_M1_100: +.Lstrmm_kernel_L4_M1_100: SAVE1x4 @@ -2067,7 +2067,7 @@ strmm_kernel_L4_M1_100: #if defined(LEFT) add tempOffset, tempOffset, #1 #endif -strmm_kernel_L4_END: +.Lstrmm_kernel_L4_END: add origPB, origPB, origK, lsl #4 // B = B + K * 4 * 4 #if !defined(LEFT) add tempOffset, tempOffset, #4 @@ -2076,14 +2076,14 @@ strmm_kernel_L4_END: /******************************************************************************/ /******************************************************************************/ -strmm_kernel_L2_BEGIN: // less than 2 left in N direction +.Lstrmm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble strmm_kernel_L999 + ble .Lstrmm_kernel_L999 tst counterJ , #2 - ble strmm_kernel_L1_BEGIN + ble .Lstrmm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -2096,14 +2096,14 @@ strmm_kernel_L2_BEGIN: // less than 2 left in N direction /******************************************************************************/ -strmm_kernel_L2_M8_BEGIN: +.Lstrmm_kernel_L2_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 // counterI = counterI / 8 cmp counterI,#0 - ble strmm_kernel_L2_M4_BEGIN + ble .Lstrmm_kernel_L2_M4_BEGIN -strmm_kernel_L2_M8_20: +.Lstrmm_kernel_L2_M8_20: INIT8x2 @@ -2126,10 +2126,10 @@ strmm_kernel_L2_M8_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble strmm_kernel_L2_M8_40 + ble .Lstrmm_kernel_L2_M8_40 .align 5 -strmm_kernel_L2_M8_22: +.Lstrmm_kernel_L2_M8_22: KERNEL8x2_SUB KERNEL8x2_SUB KERNEL8x2_SUB @@ -2141,22 +2141,22 @@ strmm_kernel_L2_M8_22: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M8_22 + bgt .Lstrmm_kernel_L2_M8_22 -strmm_kernel_L2_M8_40: +.Lstrmm_kernel_L2_M8_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L2_M8_100 + ble .Lstrmm_kernel_L2_M8_100 -strmm_kernel_L2_M8_42: +.Lstrmm_kernel_L2_M8_42: KERNEL8x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M8_42 + bgt .Lstrmm_kernel_L2_M8_42 -strmm_kernel_L2_M8_100: +.Lstrmm_kernel_L2_M8_100: SAVE8x2 @@ -2175,23 +2175,23 @@ strmm_kernel_L2_M8_100: #if defined(LEFT) add tempOffset, tempOffset, #8 #endif -strmm_kernel_L2_M8_END: +.Lstrmm_kernel_L2_M8_END: subs counterI, counterI, #1 - bgt strmm_kernel_L2_M8_20 + bgt .Lstrmm_kernel_L2_M8_20 /******************************************************************************/ -strmm_kernel_L2_M4_BEGIN: +.Lstrmm_kernel_L2_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble strmm_kernel_L2_END + ble .Lstrmm_kernel_L2_END tst counterI, #4 - ble strmm_kernel_L2_M2_BEGIN + ble .Lstrmm_kernel_L2_M2_BEGIN -strmm_kernel_L2_M4_20: +.Lstrmm_kernel_L2_M4_20: INIT4x2 @@ -2214,10 +2214,10 @@ strmm_kernel_L2_M4_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble strmm_kernel_L2_M4_40 + ble .Lstrmm_kernel_L2_M4_40 .align 5 -strmm_kernel_L2_M4_22: +.Lstrmm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -2229,22 +2229,22 @@ strmm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M4_22 + bgt .Lstrmm_kernel_L2_M4_22 -strmm_kernel_L2_M4_40: +.Lstrmm_kernel_L2_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L2_M4_100 + ble .Lstrmm_kernel_L2_M4_100 -strmm_kernel_L2_M4_42: +.Lstrmm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M4_42 + bgt .Lstrmm_kernel_L2_M4_42 -strmm_kernel_L2_M4_100: +.Lstrmm_kernel_L2_M4_100: SAVE4x2 @@ -2263,20 +2263,20 @@ strmm_kernel_L2_M4_100: #if defined(LEFT) add tempOffset, tempOffset, #4 #endif -strmm_kernel_L2_M4_END: +.Lstrmm_kernel_L2_M4_END: /******************************************************************************/ -strmm_kernel_L2_M2_BEGIN: +.Lstrmm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble strmm_kernel_L2_END + ble .Lstrmm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble strmm_kernel_L2_M1_BEGIN + ble .Lstrmm_kernel_L2_M1_BEGIN -strmm_kernel_L2_M2_20: +.Lstrmm_kernel_L2_M2_20: INIT2x2 @@ -2299,9 +2299,9 @@ strmm_kernel_L2_M2_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble strmm_kernel_L2_M2_40 + ble .Lstrmm_kernel_L2_M2_40 -strmm_kernel_L2_M2_22: +.Lstrmm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -2314,22 +2314,22 @@ strmm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M2_22 + bgt .Lstrmm_kernel_L2_M2_22 -strmm_kernel_L2_M2_40: +.Lstrmm_kernel_L2_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L2_M2_100 + ble .Lstrmm_kernel_L2_M2_100 -strmm_kernel_L2_M2_42: +.Lstrmm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M2_42 + bgt .Lstrmm_kernel_L2_M2_42 -strmm_kernel_L2_M2_100: +.Lstrmm_kernel_L2_M2_100: SAVE2x2 #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) @@ -2348,16 +2348,16 @@ strmm_kernel_L2_M2_100: add tempOffset, tempOffset, #2 #endif -strmm_kernel_L2_M2_END: +.Lstrmm_kernel_L2_M2_END: /******************************************************************************/ -strmm_kernel_L2_M1_BEGIN: +.Lstrmm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble strmm_kernel_L2_END + ble .Lstrmm_kernel_L2_END -strmm_kernel_L2_M1_20: +.Lstrmm_kernel_L2_M1_20: INIT1x2 @@ -2380,9 +2380,9 @@ strmm_kernel_L2_M1_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble strmm_kernel_L2_M1_40 + ble .Lstrmm_kernel_L2_M1_40 -strmm_kernel_L2_M1_22: +.Lstrmm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -2394,22 +2394,22 @@ strmm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M1_22 + bgt .Lstrmm_kernel_L2_M1_22 -strmm_kernel_L2_M1_40: +.Lstrmm_kernel_L2_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L2_M1_100 + ble .Lstrmm_kernel_L2_M1_100 -strmm_kernel_L2_M1_42: +.Lstrmm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L2_M1_42 + bgt .Lstrmm_kernel_L2_M1_42 -strmm_kernel_L2_M1_100: +.Lstrmm_kernel_L2_M1_100: SAVE1x2 @@ -2428,7 +2428,7 @@ strmm_kernel_L2_M1_100: #if defined(LEFT) add tempOffset, tempOffset, #1 #endif -strmm_kernel_L2_END: +.Lstrmm_kernel_L2_END: #if !defined(LEFT) add tempOffset, tempOffset, #2 #endif @@ -2437,11 +2437,11 @@ strmm_kernel_L2_END: /******************************************************************************/ /******************************************************************************/ -strmm_kernel_L1_BEGIN: +.Lstrmm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble strmm_kernel_L999 // done + ble .Lstrmm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -2454,14 +2454,14 @@ strmm_kernel_L1_BEGIN: /******************************************************************************/ -strmm_kernel_L1_M8_BEGIN: +.Lstrmm_kernel_L1_M8_BEGIN: mov counterI, origM asr counterI, counterI, #3 cmp counterI, #0 - ble strmm_kernel_L1_M4_BEGIN + ble .Lstrmm_kernel_L1_M4_BEGIN -strmm_kernel_L1_M8_20: +.Lstrmm_kernel_L1_M8_20: INIT8x1 @@ -2484,10 +2484,10 @@ strmm_kernel_L1_M8_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L1_M8_40 + ble .Lstrmm_kernel_L1_M8_40 .align 5 -strmm_kernel_L1_M8_22: +.Lstrmm_kernel_L1_M8_22: KERNEL8x1_SUB KERNEL8x1_SUB KERNEL8x1_SUB @@ -2499,22 +2499,22 @@ strmm_kernel_L1_M8_22: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M8_22 + bgt .Lstrmm_kernel_L1_M8_22 -strmm_kernel_L1_M8_40: +.Lstrmm_kernel_L1_M8_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L1_M8_100 + ble .Lstrmm_kernel_L1_M8_100 -strmm_kernel_L1_M8_42: +.Lstrmm_kernel_L1_M8_42: KERNEL8x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M8_42 + bgt .Lstrmm_kernel_L1_M8_42 -strmm_kernel_L1_M8_100: +.Lstrmm_kernel_L1_M8_100: SAVE8x1 @@ -2533,23 +2533,23 @@ strmm_kernel_L1_M8_100: #if defined(LEFT) add tempOffset, tempOffset, #8 #endif -strmm_kernel_L1_M8_END: +.Lstrmm_kernel_L1_M8_END: subs counterI, counterI, #1 - bgt strmm_kernel_L1_M8_20 + bgt .Lstrmm_kernel_L1_M8_20 /******************************************************************************/ -strmm_kernel_L1_M4_BEGIN: +.Lstrmm_kernel_L1_M4_BEGIN: mov counterI, origM tst counterI , #7 - ble strmm_kernel_L1_END + ble .Lstrmm_kernel_L1_END tst counterI, #4 - ble strmm_kernel_L1_M2_BEGIN + ble .Lstrmm_kernel_L1_M2_BEGIN -strmm_kernel_L1_M4_20: +.Lstrmm_kernel_L1_M4_20: INIT4x1 @@ -2572,10 +2572,10 @@ strmm_kernel_L1_M4_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L1_M4_40 + ble .Lstrmm_kernel_L1_M4_40 .align 5 -strmm_kernel_L1_M4_22: +.Lstrmm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -2587,22 +2587,22 @@ strmm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M4_22 + bgt .Lstrmm_kernel_L1_M4_22 -strmm_kernel_L1_M4_40: +.Lstrmm_kernel_L1_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L1_M4_100 + ble .Lstrmm_kernel_L1_M4_100 -strmm_kernel_L1_M4_42: +.Lstrmm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M4_42 + bgt .Lstrmm_kernel_L1_M4_42 -strmm_kernel_L1_M4_100: +.Lstrmm_kernel_L1_M4_100: SAVE4x1 @@ -2621,20 +2621,20 @@ strmm_kernel_L1_M4_100: #if defined(LEFT) add tempOffset, tempOffset, #4 #endif -strmm_kernel_L1_M4_END: +.Lstrmm_kernel_L1_M4_END: /******************************************************************************/ -strmm_kernel_L1_M2_BEGIN: +.Lstrmm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble strmm_kernel_L1_END + ble .Lstrmm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble strmm_kernel_L1_M1_BEGIN + ble .Lstrmm_kernel_L1_M1_BEGIN -strmm_kernel_L1_M2_20: +.Lstrmm_kernel_L1_M2_20: INIT2x1 @@ -2657,9 +2657,9 @@ strmm_kernel_L1_M2_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L1_M2_40 + ble .Lstrmm_kernel_L1_M2_40 -strmm_kernel_L1_M2_22: +.Lstrmm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -2672,22 +2672,22 @@ strmm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M2_22 + bgt .Lstrmm_kernel_L1_M2_22 -strmm_kernel_L1_M2_40: +.Lstrmm_kernel_L1_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L1_M2_100 + ble .Lstrmm_kernel_L1_M2_100 -strmm_kernel_L1_M2_42: +.Lstrmm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M2_42 + bgt .Lstrmm_kernel_L1_M2_42 -strmm_kernel_L1_M2_100: +.Lstrmm_kernel_L1_M2_100: SAVE2x1 @@ -2706,16 +2706,16 @@ strmm_kernel_L1_M2_100: #if defined(LEFT) add tempOffset, tempOffset, #2 #endif -strmm_kernel_L1_M2_END: +.Lstrmm_kernel_L1_M2_END: /******************************************************************************/ -strmm_kernel_L1_M1_BEGIN: +.Lstrmm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble strmm_kernel_L1_END + ble .Lstrmm_kernel_L1_END -strmm_kernel_L1_M1_20: +.Lstrmm_kernel_L1_M1_20: INIT1x1 @@ -2738,9 +2738,9 @@ strmm_kernel_L1_M1_20: #endif asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble strmm_kernel_L1_M1_40 + ble .Lstrmm_kernel_L1_M1_40 -strmm_kernel_L1_M1_22: +.Lstrmm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -2752,30 +2752,30 @@ strmm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M1_22 + bgt .Lstrmm_kernel_L1_M1_22 -strmm_kernel_L1_M1_40: +.Lstrmm_kernel_L1_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble strmm_kernel_L1_M1_100 + ble .Lstrmm_kernel_L1_M1_100 -strmm_kernel_L1_M1_42: +.Lstrmm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt strmm_kernel_L1_M1_42 + bgt .Lstrmm_kernel_L1_M1_42 -strmm_kernel_L1_M1_100: +.Lstrmm_kernel_L1_M1_100: SAVE1x1 -strmm_kernel_L1_END: +.Lstrmm_kernel_L1_END: /******************************************************************************/ -strmm_kernel_L999: +.Lstrmm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/swap.S b/kernel/arm64/swap.S index 37ed83f2a..184e02e9c 100644 --- a/kernel/arm64/swap.S +++ b/kernel/arm64/swap.S @@ -193,50 +193,50 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE cmp N, xzr - ble swap_kernel_L999 + ble .Lswap_kernel_L999 cmp INC_X, #1 - bne swap_kernel_S_BEGIN + bne .Lswap_kernel_S_BEGIN cmp INC_Y, #1 - bne swap_kernel_S_BEGIN + bne .Lswap_kernel_S_BEGIN -swap_kernel_F_BEGIN: +.Lswap_kernel_F_BEGIN: asr I, N, #3 cmp I, xzr - beq swap_kernel_F1 + beq .Lswap_kernel_F1 -swap_kernel_F8: +.Lswap_kernel_F8: KERNEL_F8 subs I, I, #1 - bne swap_kernel_F8 + bne .Lswap_kernel_F8 -swap_kernel_F1: +.Lswap_kernel_F1: ands I, N, #7 - ble swap_kernel_L999 + ble .Lswap_kernel_L999 -swap_kernel_F10: +.Lswap_kernel_F10: KERNEL_F1 subs I, I, #1 - bne swap_kernel_F10 + bne .Lswap_kernel_F10 - b swap_kernel_L999 + b .Lswap_kernel_L999 -swap_kernel_S_BEGIN: +.Lswap_kernel_S_BEGIN: INIT_S asr I, N, #2 cmp I, xzr - ble swap_kernel_S1 + ble .Lswap_kernel_S1 -swap_kernel_S4: +.Lswap_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -244,21 +244,21 @@ swap_kernel_S4: KERNEL_S1 subs I, I, #1 - bne swap_kernel_S4 + bne .Lswap_kernel_S4 -swap_kernel_S1: +.Lswap_kernel_S1: ands I, N, #3 - ble swap_kernel_L999 + ble .Lswap_kernel_L999 -swap_kernel_S10: +.Lswap_kernel_S10: KERNEL_S1 subs I, I, #1 - bne swap_kernel_S10 + bne .Lswap_kernel_S10 -swap_kernel_L999: +.Lswap_kernel_L999: mov w0, wzr ret diff --git a/kernel/arm64/zamax.S b/kernel/arm64/zamax.S index 7db339f53..c2c0a5374 100644 --- a/kernel/arm64/zamax.S +++ b/kernel/arm64/zamax.S @@ -184,62 +184,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE cmp N, xzr - ble amax_kernel_zero + ble .Lzamax_kernel_zero cmp INC_X, xzr - ble amax_kernel_zero + ble .Lzamax_kernel_zero cmp INC_X, #1 - bne amax_kernel_S_BEGIN + bne .Lzamax_kernel_S_BEGIN -amax_kernel_F_BEGIN: +.Lzamax_kernel_F_BEGIN: asr I, N, #2 cmp I, xzr - beq amax_kernel_F1_INIT + beq .Lzamax_kernel_F1_INIT INIT_F4 subs I, I, #1 - beq amax_kernel_F1 + beq .Lzamax_kernel_F1 -amax_kernel_F4: +.Lzamax_kernel_F4: KERNEL_F4 subs I, I, #1 - bne amax_kernel_F4 + bne .Lzamax_kernel_F4 -amax_kernel_F1: +.Lzamax_kernel_F1: ands I, N, #3 - ble amax_kernel_L999 + ble .Lzamax_kernel_L999 -amax_kernel_F10: +.Lzamax_kernel_F10: KERNEL_F1 subs I, I, #1 - bne amax_kernel_F10 + bne .Lzamax_kernel_F10 ret -amax_kernel_F1_INIT: +.Lzamax_kernel_F1_INIT: INIT_F1 subs N, N, #1 - b amax_kernel_F1 + b .Lzamax_kernel_F1 -amax_kernel_S_BEGIN: +.Lzamax_kernel_S_BEGIN: INIT_S subs N, N, #1 - ble amax_kernel_L999 + ble .Lzamax_kernel_L999 asr I, N, #2 cmp I, xzr - ble amax_kernel_S1 + ble .Lzamax_kernel_S1 -amax_kernel_S4: +.Lzamax_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -247,25 +247,25 @@ amax_kernel_S4: KERNEL_S1 subs I, I, #1 - bne amax_kernel_S4 + bne .Lzamax_kernel_S4 -amax_kernel_S1: +.Lzamax_kernel_S1: ands I, N, #3 - ble amax_kernel_L999 + ble .Lzamax_kernel_L999 -amax_kernel_S10: +.Lzamax_kernel_S10: KERNEL_S1 subs I, I, #1 - bne amax_kernel_S10 + bne .Lzamax_kernel_S10 -amax_kernel_L999: +.Lzamax_kernel_L999: ret -amax_kernel_zero: +.Lzamax_kernel_zero: fmov MAXF, REG0 ret diff --git a/kernel/arm64/zasum.S b/kernel/arm64/zasum.S index bf586d367..0d5ec952b 100644 --- a/kernel/arm64/zasum.S +++ b/kernel/arm64/zasum.S @@ -92,52 +92,52 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fmov SUMF, REG0 cmp N, xzr - ble asum_kernel_L999 + ble .Lzasum_kernel_L999 cmp INC_X, xzr - ble asum_kernel_L999 + ble .Lzasum_kernel_L999 cmp INC_X, #1 - bne asum_kernel_S_BEGIN + bne .Lzasum_kernel_S_BEGIN -asum_kernel_F_BEGIN: +.Lzasum_kernel_F_BEGIN: asr I, N, #2 cmp I, xzr - beq asum_kernel_F1 + beq .Lzasum_kernel_F1 -asum_kernel_F4: +.Lzasum_kernel_F4: KERNEL_F4 subs I, I, #1 - bne asum_kernel_F4 + bne .Lzasum_kernel_F4 KERNEL_F4_FINALIZE -asum_kernel_F1: +.Lzasum_kernel_F1: ands I, N, #3 - ble asum_kernel_L999 + ble .Lzasum_kernel_L999 -asum_kernel_F10: +.Lzasum_kernel_F10: KERNEL_F1 subs I, I, #1 - bne asum_kernel_F10 + bne .Lzasum_kernel_F10 -asum_kernel_L999: +.Lzasum_kernel_L999: ret -asum_kernel_S_BEGIN: +.Lzasum_kernel_S_BEGIN: INIT_S asr I, N, #2 cmp I, xzr - ble asum_kernel_S1 + ble .Lzasum_kernel_S1 -asum_kernel_S4: +.Lzasum_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -145,19 +145,19 @@ asum_kernel_S4: KERNEL_S1 subs I, I, #1 - bne asum_kernel_S4 + bne .Lzasum_kernel_S4 -asum_kernel_S1: +.Lzasum_kernel_S1: ands I, N, #3 - ble asum_kernel_L999 + ble .Lzasum_kernel_L999 -asum_kernel_S10: +.Lzasum_kernel_S10: KERNEL_S1 subs I, I, #1 - bne asum_kernel_S10 + bne .Lzasum_kernel_S10 ret diff --git a/kernel/arm64/zaxpy.S b/kernel/arm64/zaxpy.S index 70c249981..46d7b0478 100644 --- a/kernel/arm64/zaxpy.S +++ b/kernel/arm64/zaxpy.S @@ -241,62 +241,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE cmp N, xzr - ble zaxpy_kernel_L999 + ble .Lzaxpy_kernel_L999 mov Y_COPY, Y fcmp DA_R, #0.0 bne .L1 fcmp DA_I, #0.0 - beq zaxpy_kernel_L999 + beq .Lzaxpy_kernel_L999 .L1: INIT cmp INC_X, #1 - bne zaxpy_kernel_S_BEGIN + bne .Lzaxpy_kernel_S_BEGIN cmp INC_Y, #1 - bne zaxpy_kernel_S_BEGIN + bne .Lzaxpy_kernel_S_BEGIN -zaxpy_kernel_F_BEGIN: +.Lzaxpy_kernel_F_BEGIN: asr I, N, #2 cmp I, xzr - beq zaxpy_kernel_F1 + beq .Lzaxpy_kernel_F1 KERNEL_INIT_F4 -zaxpy_kernel_F4: +.Lzaxpy_kernel_F4: KERNEL_F4 subs I, I, #1 - bne zaxpy_kernel_F4 + bne .Lzaxpy_kernel_F4 -zaxpy_kernel_F1: +.Lzaxpy_kernel_F1: ands I, N, #3 - ble zaxpy_kernel_L999 + ble .Lzaxpy_kernel_L999 -zaxpy_kernel_F10: +.Lzaxpy_kernel_F10: KERNEL_F1 subs I, I, #1 - bne zaxpy_kernel_F10 + bne .Lzaxpy_kernel_F10 mov w0, wzr ret -zaxpy_kernel_S_BEGIN: +.Lzaxpy_kernel_S_BEGIN: INIT_S asr I, N, #2 cmp I, xzr - ble zaxpy_kernel_S1 + ble .Lzaxpy_kernel_S1 -zaxpy_kernel_S4: +.Lzaxpy_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -304,21 +304,21 @@ zaxpy_kernel_S4: KERNEL_S1 subs I, I, #1 - bne zaxpy_kernel_S4 + bne .Lzaxpy_kernel_S4 -zaxpy_kernel_S1: +.Lzaxpy_kernel_S1: ands I, N, #3 - ble zaxpy_kernel_L999 + ble .Lzaxpy_kernel_L999 -zaxpy_kernel_S10: +.Lzaxpy_kernel_S10: KERNEL_S1 subs I, I, #1 - bne zaxpy_kernel_S10 + bne .Lzaxpy_kernel_S10 -zaxpy_kernel_L999: +.Lzaxpy_kernel_L999: mov w0, wzr ret diff --git a/kernel/arm64/zdot.S b/kernel/arm64/zdot.S index 3e8e3d7d9..044ace3b8 100644 --- a/kernel/arm64/zdot.S +++ b/kernel/arm64/zdot.S @@ -229,51 +229,51 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif cmp N, xzr - ble dot_kernel_L999 + ble .Lzdot_kernel_L999 cmp INC_X, #1 - bne dot_kernel_S_BEGIN + bne .Lzdot_kernel_S_BEGIN cmp INC_Y, #1 - bne dot_kernel_S_BEGIN + bne .Lzdot_kernel_S_BEGIN -dot_kernel_F_BEGIN: +.Lzdot_kernel_F_BEGIN: asr I, N, #2 cmp I, xzr - beq dot_kernel_F1 + beq .Lzdot_kernel_F1 -dot_kernel_F4: +.Lzdot_kernel_F4: KERNEL_F4 subs I, I, #1 - bne dot_kernel_F4 + bne .Lzdot_kernel_F4 KERNEL_F4_FINALIZE -dot_kernel_F1: +.Lzdot_kernel_F1: ands I, N, #3 - ble dot_kernel_L999 + ble .Lzdot_kernel_L999 -dot_kernel_F10: +.Lzdot_kernel_F10: KERNEL_F1 subs I, I, #1 - bne dot_kernel_F10 + bne .Lzdot_kernel_F10 ret -dot_kernel_S_BEGIN: +.Lzdot_kernel_S_BEGIN: INIT_S asr I, N, #2 cmp I, xzr - ble dot_kernel_S1 + ble .Lzdot_kernel_S1 -dot_kernel_S4: +.Lzdot_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -281,21 +281,21 @@ dot_kernel_S4: KERNEL_S1 subs I, I, #1 - bne dot_kernel_S4 + bne .Lzdot_kernel_S4 -dot_kernel_S1: +.Lzdot_kernel_S1: ands I, N, #3 - ble dot_kernel_L999 + ble .Lzdot_kernel_L999 -dot_kernel_S10: +.Lzdot_kernel_S10: KERNEL_S1 subs I, I, #1 - bne dot_kernel_S10 + bne .Lzdot_kernel_S10 -dot_kernel_L999: +.Lzdot_kernel_L999: ret diff --git a/kernel/arm64/zgemm_kernel_4x4.S b/kernel/arm64/zgemm_kernel_4x4.S index 08a1531cf..f8e877f3c 100644 --- a/kernel/arm64/zgemm_kernel_4x4.S +++ b/kernel/arm64/zgemm_kernel_4x4.S @@ -1099,9 +1099,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble zgemm_kernel_L2_BEGIN + ble .Lzgemm_kernel_L2_BEGIN -zgemm_kernel_L4_BEGIN: +.Lzgemm_kernel_L4_BEGIN: mov pCRow0, pC add pCRow1, pCRow0, LDC add pCRow2, pCRow1, LDC @@ -1111,20 +1111,20 @@ zgemm_kernel_L4_BEGIN: mov pA, origPA // pA = start of A array -zgemm_kernel_L4_M4_BEGIN: +.Lzgemm_kernel_L4_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble zgemm_kernel_L4_M2_BEGIN + ble .Lzgemm_kernel_L4_M2_BEGIN .align 5 -zgemm_kernel_L4_M4_20: +.Lzgemm_kernel_L4_M4_20: mov pB, origPB asr counterL , origK, #3 cmp counterL , #2 - blt zgemm_kernel_L4_M4_32 + blt .Lzgemm_kernel_L4_M4_32 KERNEL4x4_I KERNEL4x4_M2 @@ -1136,10 +1136,10 @@ zgemm_kernel_L4_M4_20: KERNEL4x4_M2 subs counterL, counterL, #2 // subtract 2 - ble zgemm_kernel_L4_M4_22a + ble .Lzgemm_kernel_L4_M4_22a .align 5 -zgemm_kernel_L4_M4_22: +.Lzgemm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 @@ -1151,10 +1151,10 @@ zgemm_kernel_L4_M4_22: KERNEL4x4_M2 subs counterL, counterL, #1 - bgt zgemm_kernel_L4_M4_22 + bgt .Lzgemm_kernel_L4_M4_22 .align 5 -zgemm_kernel_L4_M4_22a: +.Lzgemm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_M2 @@ -1165,13 +1165,13 @@ zgemm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_E - b zgemm_kernel_L4_M4_44 + b .Lzgemm_kernel_L4_M4_44 .align 5 -zgemm_kernel_L4_M4_32: +.Lzgemm_kernel_L4_M4_32: tst counterL, #1 - ble zgemm_kernel_L4_M4_40 + ble .Lzgemm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_M2 @@ -1182,55 +1182,55 @@ zgemm_kernel_L4_M4_32: KERNEL4x4_M1 KERNEL4x4_E - b zgemm_kernel_L4_M4_44 + b .Lzgemm_kernel_L4_M4_44 -zgemm_kernel_L4_M4_40: +.Lzgemm_kernel_L4_M4_40: INIT4x4 -zgemm_kernel_L4_M4_44: +.Lzgemm_kernel_L4_M4_44: ands counterL , origK, #7 - ble zgemm_kernel_L4_M4_100 + ble .Lzgemm_kernel_L4_M4_100 .align 5 -zgemm_kernel_L4_M4_46: +.Lzgemm_kernel_L4_M4_46: KERNEL4x4_SUB subs counterL, counterL, #1 - bne zgemm_kernel_L4_M4_46 + bne .Lzgemm_kernel_L4_M4_46 -zgemm_kernel_L4_M4_100: +.Lzgemm_kernel_L4_M4_100: prfm PLDL1KEEP, [pA] prfm PLDL1KEEP, [pA, #64] prfm PLDL1KEEP, [origPB] SAVE4x4 -zgemm_kernel_L4_M4_END: +.Lzgemm_kernel_L4_M4_END: subs counterI, counterI, #1 - bne zgemm_kernel_L4_M4_20 + bne .Lzgemm_kernel_L4_M4_20 -zgemm_kernel_L4_M2_BEGIN: +.Lzgemm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble zgemm_kernel_L4_END + ble .Lzgemm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble zgemm_kernel_L4_M1_BEGIN + ble .Lzgemm_kernel_L4_M1_BEGIN -zgemm_kernel_L4_M2_20: +.Lzgemm_kernel_L4_M2_20: INIT2x4 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble zgemm_kernel_L4_M2_40 + ble .Lzgemm_kernel_L4_M2_40 -zgemm_kernel_L4_M2_22: +.Lzgemm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -1243,43 +1243,43 @@ zgemm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L4_M2_22 + bgt .Lzgemm_kernel_L4_M2_22 -zgemm_kernel_L4_M2_40: +.Lzgemm_kernel_L4_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble zgemm_kernel_L4_M2_100 + ble .Lzgemm_kernel_L4_M2_100 -zgemm_kernel_L4_M2_42: +.Lzgemm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L4_M2_42 + bgt .Lzgemm_kernel_L4_M2_42 -zgemm_kernel_L4_M2_100: +.Lzgemm_kernel_L4_M2_100: SAVE2x4 -zgemm_kernel_L4_M2_END: +.Lzgemm_kernel_L4_M2_END: -zgemm_kernel_L4_M1_BEGIN: +.Lzgemm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble zgemm_kernel_L4_END + ble .Lzgemm_kernel_L4_END -zgemm_kernel_L4_M1_20: +.Lzgemm_kernel_L4_M1_20: INIT1x4 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble zgemm_kernel_L4_M1_40 + ble .Lzgemm_kernel_L4_M1_40 -zgemm_kernel_L4_M1_22: +.Lzgemm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1291,45 +1291,45 @@ zgemm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L4_M1_22 + bgt .Lzgemm_kernel_L4_M1_22 -zgemm_kernel_L4_M1_40: +.Lzgemm_kernel_L4_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble zgemm_kernel_L4_M1_100 + ble .Lzgemm_kernel_L4_M1_100 -zgemm_kernel_L4_M1_42: +.Lzgemm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L4_M1_42 + bgt .Lzgemm_kernel_L4_M1_42 -zgemm_kernel_L4_M1_100: +.Lzgemm_kernel_L4_M1_100: SAVE1x4 -zgemm_kernel_L4_END: +.Lzgemm_kernel_L4_END: lsl temp, origK, #6 add origPB, origPB, temp // B = B + K * 4 * 8 * 2 subs counterJ, counterJ , #1 // j-- - bgt zgemm_kernel_L4_BEGIN + bgt .Lzgemm_kernel_L4_BEGIN /******************************************************************************/ -zgemm_kernel_L2_BEGIN: // less than 2 left in N direction +.Lzgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble zgemm_kernel_L999 + ble .Lzgemm_kernel_L999 tst counterJ , #2 - ble zgemm_kernel_L1_BEGIN + ble .Lzgemm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1339,24 +1339,24 @@ zgemm_kernel_L2_BEGIN: // less than 2 left in N direction -zgemm_kernel_L2_M4_BEGIN: +.Lzgemm_kernel_L2_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI,#0 - ble zgemm_kernel_L2_M2_BEGIN + ble .Lzgemm_kernel_L2_M2_BEGIN -zgemm_kernel_L2_M4_20: +.Lzgemm_kernel_L2_M4_20: INIT4x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble zgemm_kernel_L2_M4_40 + ble .Lzgemm_kernel_L2_M4_40 .align 5 -zgemm_kernel_L2_M4_22: +.Lzgemm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1368,50 +1368,50 @@ zgemm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L2_M4_22 + bgt .Lzgemm_kernel_L2_M4_22 -zgemm_kernel_L2_M4_40: +.Lzgemm_kernel_L2_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble zgemm_kernel_L2_M4_100 + ble .Lzgemm_kernel_L2_M4_100 -zgemm_kernel_L2_M4_42: +.Lzgemm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L2_M4_42 + bgt .Lzgemm_kernel_L2_M4_42 -zgemm_kernel_L2_M4_100: +.Lzgemm_kernel_L2_M4_100: SAVE4x2 -zgemm_kernel_L2_M4_END: +.Lzgemm_kernel_L2_M4_END: subs counterI, counterI, #1 - bgt zgemm_kernel_L2_M4_20 + bgt .Lzgemm_kernel_L2_M4_20 -zgemm_kernel_L2_M2_BEGIN: +.Lzgemm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble zgemm_kernel_L2_END + ble .Lzgemm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble zgemm_kernel_L2_M1_BEGIN + ble .Lzgemm_kernel_L2_M1_BEGIN -zgemm_kernel_L2_M2_20: +.Lzgemm_kernel_L2_M2_20: INIT2x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble zgemm_kernel_L2_M2_40 + ble .Lzgemm_kernel_L2_M2_40 -zgemm_kernel_L2_M2_22: +.Lzgemm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -1424,43 +1424,43 @@ zgemm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L2_M2_22 + bgt .Lzgemm_kernel_L2_M2_22 -zgemm_kernel_L2_M2_40: +.Lzgemm_kernel_L2_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble zgemm_kernel_L2_M2_100 + ble .Lzgemm_kernel_L2_M2_100 -zgemm_kernel_L2_M2_42: +.Lzgemm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L2_M2_42 + bgt .Lzgemm_kernel_L2_M2_42 -zgemm_kernel_L2_M2_100: +.Lzgemm_kernel_L2_M2_100: SAVE2x2 -zgemm_kernel_L2_M2_END: +.Lzgemm_kernel_L2_M2_END: -zgemm_kernel_L2_M1_BEGIN: +.Lzgemm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble zgemm_kernel_L2_END + ble .Lzgemm_kernel_L2_END -zgemm_kernel_L2_M1_20: +.Lzgemm_kernel_L2_M1_20: INIT1x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble zgemm_kernel_L2_M1_40 + ble .Lzgemm_kernel_L2_M1_40 -zgemm_kernel_L2_M1_22: +.Lzgemm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1472,37 +1472,37 @@ zgemm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L2_M1_22 + bgt .Lzgemm_kernel_L2_M1_22 -zgemm_kernel_L2_M1_40: +.Lzgemm_kernel_L2_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble zgemm_kernel_L2_M1_100 + ble .Lzgemm_kernel_L2_M1_100 -zgemm_kernel_L2_M1_42: +.Lzgemm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L2_M1_42 + bgt .Lzgemm_kernel_L2_M1_42 -zgemm_kernel_L2_M1_100: +.Lzgemm_kernel_L2_M1_100: SAVE1x2 -zgemm_kernel_L2_END: +.Lzgemm_kernel_L2_END: lsl temp, origK, #5 add origPB, origPB, temp // B = B + K * 2 * 8 * 2 /******************************************************************************/ -zgemm_kernel_L1_BEGIN: +.Lzgemm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble zgemm_kernel_L999 // done + ble .Lzgemm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -1512,24 +1512,24 @@ zgemm_kernel_L1_BEGIN: -zgemm_kernel_L1_M4_BEGIN: +.Lzgemm_kernel_L1_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble zgemm_kernel_L1_M2_BEGIN + ble .Lzgemm_kernel_L1_M2_BEGIN -zgemm_kernel_L1_M4_20: +.Lzgemm_kernel_L1_M4_20: INIT4x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble zgemm_kernel_L1_M4_40 + ble .Lzgemm_kernel_L1_M4_40 .align 5 -zgemm_kernel_L1_M4_22: +.Lzgemm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -1541,50 +1541,50 @@ zgemm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L1_M4_22 + bgt .Lzgemm_kernel_L1_M4_22 -zgemm_kernel_L1_M4_40: +.Lzgemm_kernel_L1_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble zgemm_kernel_L1_M4_100 + ble .Lzgemm_kernel_L1_M4_100 -zgemm_kernel_L1_M4_42: +.Lzgemm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L1_M4_42 + bgt .Lzgemm_kernel_L1_M4_42 -zgemm_kernel_L1_M4_100: +.Lzgemm_kernel_L1_M4_100: SAVE4x1 -zgemm_kernel_L1_M4_END: +.Lzgemm_kernel_L1_M4_END: subs counterI, counterI, #1 - bgt zgemm_kernel_L1_M4_20 + bgt .Lzgemm_kernel_L1_M4_20 -zgemm_kernel_L1_M2_BEGIN: +.Lzgemm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble zgemm_kernel_L1_END + ble .Lzgemm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble zgemm_kernel_L1_M1_BEGIN + ble .Lzgemm_kernel_L1_M1_BEGIN -zgemm_kernel_L1_M2_20: +.Lzgemm_kernel_L1_M2_20: INIT2x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble zgemm_kernel_L1_M2_40 + ble .Lzgemm_kernel_L1_M2_40 -zgemm_kernel_L1_M2_22: +.Lzgemm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -1597,43 +1597,43 @@ zgemm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L1_M2_22 + bgt .Lzgemm_kernel_L1_M2_22 -zgemm_kernel_L1_M2_40: +.Lzgemm_kernel_L1_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble zgemm_kernel_L1_M2_100 + ble .Lzgemm_kernel_L1_M2_100 -zgemm_kernel_L1_M2_42: +.Lzgemm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L1_M2_42 + bgt .Lzgemm_kernel_L1_M2_42 -zgemm_kernel_L1_M2_100: +.Lzgemm_kernel_L1_M2_100: SAVE2x1 -zgemm_kernel_L1_M2_END: +.Lzgemm_kernel_L1_M2_END: -zgemm_kernel_L1_M1_BEGIN: +.Lzgemm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble zgemm_kernel_L1_END + ble .Lzgemm_kernel_L1_END -zgemm_kernel_L1_M1_20: +.Lzgemm_kernel_L1_M1_20: INIT1x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble zgemm_kernel_L1_M1_40 + ble .Lzgemm_kernel_L1_M1_40 -zgemm_kernel_L1_M1_22: +.Lzgemm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -1645,30 +1645,30 @@ zgemm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L1_M1_22 + bgt .Lzgemm_kernel_L1_M1_22 -zgemm_kernel_L1_M1_40: +.Lzgemm_kernel_L1_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble zgemm_kernel_L1_M1_100 + ble .Lzgemm_kernel_L1_M1_100 -zgemm_kernel_L1_M1_42: +.Lzgemm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L1_M1_42 + bgt .Lzgemm_kernel_L1_M1_42 -zgemm_kernel_L1_M1_100: +.Lzgemm_kernel_L1_M1_100: SAVE1x1 -zgemm_kernel_L1_END: +.Lzgemm_kernel_L1_END: -zgemm_kernel_L999: +.Lzgemm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/zgemm_kernel_4x4_thunderx2t99.S b/kernel/arm64/zgemm_kernel_4x4_thunderx2t99.S index e5b4cba9c..8e6ff655d 100644 --- a/kernel/arm64/zgemm_kernel_4x4_thunderx2t99.S +++ b/kernel/arm64/zgemm_kernel_4x4_thunderx2t99.S @@ -1109,9 +1109,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble zgemm_kernel_L2_BEGIN + ble .Lzgemm_kernel_L2_BEGIN -zgemm_kernel_L4_BEGIN: +.Lzgemm_kernel_L4_BEGIN: mov pCRow0, pC add pCRow1, pCRow0, LDC add pCRow2, pCRow1, LDC @@ -1121,20 +1121,20 @@ zgemm_kernel_L4_BEGIN: mov pA, origPA // pA = start of A array -zgemm_kernel_L4_M4_BEGIN: +.Lzgemm_kernel_L4_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble zgemm_kernel_L4_M2_BEGIN + ble .Lzgemm_kernel_L4_M2_BEGIN .align 5 -zgemm_kernel_L4_M4_20: +.Lzgemm_kernel_L4_M4_20: mov pB, origPB asr counterL , origK, #3 cmp counterL , #2 - blt zgemm_kernel_L4_M4_32 + blt .Lzgemm_kernel_L4_M4_32 KERNEL4x4_I KERNEL4x4_M2 @@ -1146,10 +1146,10 @@ zgemm_kernel_L4_M4_20: KERNEL4x4_M2 subs counterL, counterL, #2 // subtract 2 - ble zgemm_kernel_L4_M4_22a + ble .Lzgemm_kernel_L4_M4_22a .align 5 -zgemm_kernel_L4_M4_22: +.Lzgemm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 @@ -1161,10 +1161,10 @@ zgemm_kernel_L4_M4_22: KERNEL4x4_M2 subs counterL, counterL, #1 - bgt zgemm_kernel_L4_M4_22 + bgt .Lzgemm_kernel_L4_M4_22 .align 5 -zgemm_kernel_L4_M4_22a: +.Lzgemm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_M2 @@ -1175,13 +1175,13 @@ zgemm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_E - b zgemm_kernel_L4_M4_44 + b .Lzgemm_kernel_L4_M4_44 .align 5 -zgemm_kernel_L4_M4_32: +.Lzgemm_kernel_L4_M4_32: tst counterL, #1 - ble zgemm_kernel_L4_M4_40 + ble .Lzgemm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_M2 @@ -1192,55 +1192,55 @@ zgemm_kernel_L4_M4_32: KERNEL4x4_M1 KERNEL4x4_E - b zgemm_kernel_L4_M4_44 + b .Lzgemm_kernel_L4_M4_44 -zgemm_kernel_L4_M4_40: +.Lzgemm_kernel_L4_M4_40: INIT4x4 -zgemm_kernel_L4_M4_44: +.Lzgemm_kernel_L4_M4_44: ands counterL , origK, #7 - ble zgemm_kernel_L4_M4_100 + ble .Lzgemm_kernel_L4_M4_100 .align 5 -zgemm_kernel_L4_M4_46: +.Lzgemm_kernel_L4_M4_46: KERNEL4x4_SUB subs counterL, counterL, #1 - bne zgemm_kernel_L4_M4_46 + bne .Lzgemm_kernel_L4_M4_46 -zgemm_kernel_L4_M4_100: +.Lzgemm_kernel_L4_M4_100: prfm PLDL1KEEP, [pA] prfm PLDL1KEEP, [pA, #64] prfm PLDL1KEEP, [origPB] SAVE4x4 -zgemm_kernel_L4_M4_END: +.Lzgemm_kernel_L4_M4_END: subs counterI, counterI, #1 - bne zgemm_kernel_L4_M4_20 + bne .Lzgemm_kernel_L4_M4_20 -zgemm_kernel_L4_M2_BEGIN: +.Lzgemm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble zgemm_kernel_L4_END + ble .Lzgemm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble zgemm_kernel_L4_M1_BEGIN + ble .Lzgemm_kernel_L4_M1_BEGIN -zgemm_kernel_L4_M2_20: +.Lzgemm_kernel_L4_M2_20: INIT2x4 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble zgemm_kernel_L4_M2_40 + ble .Lzgemm_kernel_L4_M2_40 -zgemm_kernel_L4_M2_22: +.Lzgemm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -1253,43 +1253,43 @@ zgemm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L4_M2_22 + bgt .Lzgemm_kernel_L4_M2_22 -zgemm_kernel_L4_M2_40: +.Lzgemm_kernel_L4_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble zgemm_kernel_L4_M2_100 + ble .Lzgemm_kernel_L4_M2_100 -zgemm_kernel_L4_M2_42: +.Lzgemm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L4_M2_42 + bgt .Lzgemm_kernel_L4_M2_42 -zgemm_kernel_L4_M2_100: +.Lzgemm_kernel_L4_M2_100: SAVE2x4 -zgemm_kernel_L4_M2_END: +.Lzgemm_kernel_L4_M2_END: -zgemm_kernel_L4_M1_BEGIN: +.Lzgemm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble zgemm_kernel_L4_END + ble .Lzgemm_kernel_L4_END -zgemm_kernel_L4_M1_20: +.Lzgemm_kernel_L4_M1_20: INIT1x4 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble zgemm_kernel_L4_M1_40 + ble .Lzgemm_kernel_L4_M1_40 -zgemm_kernel_L4_M1_22: +.Lzgemm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1301,45 +1301,45 @@ zgemm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L4_M1_22 + bgt .Lzgemm_kernel_L4_M1_22 -zgemm_kernel_L4_M1_40: +.Lzgemm_kernel_L4_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble zgemm_kernel_L4_M1_100 + ble .Lzgemm_kernel_L4_M1_100 -zgemm_kernel_L4_M1_42: +.Lzgemm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L4_M1_42 + bgt .Lzgemm_kernel_L4_M1_42 -zgemm_kernel_L4_M1_100: +.Lzgemm_kernel_L4_M1_100: SAVE1x4 -zgemm_kernel_L4_END: +.Lzgemm_kernel_L4_END: lsl temp, origK, #6 add origPB, origPB, temp // B = B + K * 4 * 8 * 2 subs counterJ, counterJ , #1 // j-- - bgt zgemm_kernel_L4_BEGIN + bgt .Lzgemm_kernel_L4_BEGIN /******************************************************************************/ -zgemm_kernel_L2_BEGIN: // less than 2 left in N direction +.Lzgemm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble zgemm_kernel_L999 + ble .Lzgemm_kernel_L999 tst counterJ , #2 - ble zgemm_kernel_L1_BEGIN + ble .Lzgemm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1349,24 +1349,24 @@ zgemm_kernel_L2_BEGIN: // less than 2 left in N direction -zgemm_kernel_L2_M4_BEGIN: +.Lzgemm_kernel_L2_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI,#0 - ble zgemm_kernel_L2_M2_BEGIN + ble .Lzgemm_kernel_L2_M2_BEGIN -zgemm_kernel_L2_M4_20: +.Lzgemm_kernel_L2_M4_20: INIT4x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble zgemm_kernel_L2_M4_40 + ble .Lzgemm_kernel_L2_M4_40 .align 5 -zgemm_kernel_L2_M4_22: +.Lzgemm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1378,50 +1378,50 @@ zgemm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L2_M4_22 + bgt .Lzgemm_kernel_L2_M4_22 -zgemm_kernel_L2_M4_40: +.Lzgemm_kernel_L2_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble zgemm_kernel_L2_M4_100 + ble .Lzgemm_kernel_L2_M4_100 -zgemm_kernel_L2_M4_42: +.Lzgemm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L2_M4_42 + bgt .Lzgemm_kernel_L2_M4_42 -zgemm_kernel_L2_M4_100: +.Lzgemm_kernel_L2_M4_100: SAVE4x2 -zgemm_kernel_L2_M4_END: +.Lzgemm_kernel_L2_M4_END: subs counterI, counterI, #1 - bgt zgemm_kernel_L2_M4_20 + bgt .Lzgemm_kernel_L2_M4_20 -zgemm_kernel_L2_M2_BEGIN: +.Lzgemm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble zgemm_kernel_L2_END + ble .Lzgemm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble zgemm_kernel_L2_M1_BEGIN + ble .Lzgemm_kernel_L2_M1_BEGIN -zgemm_kernel_L2_M2_20: +.Lzgemm_kernel_L2_M2_20: INIT2x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble zgemm_kernel_L2_M2_40 + ble .Lzgemm_kernel_L2_M2_40 -zgemm_kernel_L2_M2_22: +.Lzgemm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -1434,43 +1434,43 @@ zgemm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L2_M2_22 + bgt .Lzgemm_kernel_L2_M2_22 -zgemm_kernel_L2_M2_40: +.Lzgemm_kernel_L2_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble zgemm_kernel_L2_M2_100 + ble .Lzgemm_kernel_L2_M2_100 -zgemm_kernel_L2_M2_42: +.Lzgemm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L2_M2_42 + bgt .Lzgemm_kernel_L2_M2_42 -zgemm_kernel_L2_M2_100: +.Lzgemm_kernel_L2_M2_100: SAVE2x2 -zgemm_kernel_L2_M2_END: +.Lzgemm_kernel_L2_M2_END: -zgemm_kernel_L2_M1_BEGIN: +.Lzgemm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble zgemm_kernel_L2_END + ble .Lzgemm_kernel_L2_END -zgemm_kernel_L2_M1_20: +.Lzgemm_kernel_L2_M1_20: INIT1x2 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble zgemm_kernel_L2_M1_40 + ble .Lzgemm_kernel_L2_M1_40 -zgemm_kernel_L2_M1_22: +.Lzgemm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1482,37 +1482,37 @@ zgemm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L2_M1_22 + bgt .Lzgemm_kernel_L2_M1_22 -zgemm_kernel_L2_M1_40: +.Lzgemm_kernel_L2_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble zgemm_kernel_L2_M1_100 + ble .Lzgemm_kernel_L2_M1_100 -zgemm_kernel_L2_M1_42: +.Lzgemm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L2_M1_42 + bgt .Lzgemm_kernel_L2_M1_42 -zgemm_kernel_L2_M1_100: +.Lzgemm_kernel_L2_M1_100: SAVE1x2 -zgemm_kernel_L2_END: +.Lzgemm_kernel_L2_END: lsl temp, origK, #5 add origPB, origPB, temp // B = B + K * 2 * 8 * 2 /******************************************************************************/ -zgemm_kernel_L1_BEGIN: +.Lzgemm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble zgemm_kernel_L999 // done + ble .Lzgemm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -1522,24 +1522,24 @@ zgemm_kernel_L1_BEGIN: -zgemm_kernel_L1_M4_BEGIN: +.Lzgemm_kernel_L1_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble zgemm_kernel_L1_M2_BEGIN + ble .Lzgemm_kernel_L1_M2_BEGIN -zgemm_kernel_L1_M4_20: +.Lzgemm_kernel_L1_M4_20: INIT4x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble zgemm_kernel_L1_M4_40 + ble .Lzgemm_kernel_L1_M4_40 .align 5 -zgemm_kernel_L1_M4_22: +.Lzgemm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -1551,50 +1551,50 @@ zgemm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L1_M4_22 + bgt .Lzgemm_kernel_L1_M4_22 -zgemm_kernel_L1_M4_40: +.Lzgemm_kernel_L1_M4_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble zgemm_kernel_L1_M4_100 + ble .Lzgemm_kernel_L1_M4_100 -zgemm_kernel_L1_M4_42: +.Lzgemm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L1_M4_42 + bgt .Lzgemm_kernel_L1_M4_42 -zgemm_kernel_L1_M4_100: +.Lzgemm_kernel_L1_M4_100: SAVE4x1 -zgemm_kernel_L1_M4_END: +.Lzgemm_kernel_L1_M4_END: subs counterI, counterI, #1 - bgt zgemm_kernel_L1_M4_20 + bgt .Lzgemm_kernel_L1_M4_20 -zgemm_kernel_L1_M2_BEGIN: +.Lzgemm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble zgemm_kernel_L1_END + ble .Lzgemm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble zgemm_kernel_L1_M1_BEGIN + ble .Lzgemm_kernel_L1_M1_BEGIN -zgemm_kernel_L1_M2_20: +.Lzgemm_kernel_L1_M2_20: INIT2x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble zgemm_kernel_L1_M2_40 + ble .Lzgemm_kernel_L1_M2_40 -zgemm_kernel_L1_M2_22: +.Lzgemm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -1607,43 +1607,43 @@ zgemm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L1_M2_22 + bgt .Lzgemm_kernel_L1_M2_22 -zgemm_kernel_L1_M2_40: +.Lzgemm_kernel_L1_M2_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble zgemm_kernel_L1_M2_100 + ble .Lzgemm_kernel_L1_M2_100 -zgemm_kernel_L1_M2_42: +.Lzgemm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L1_M2_42 + bgt .Lzgemm_kernel_L1_M2_42 -zgemm_kernel_L1_M2_100: +.Lzgemm_kernel_L1_M2_100: SAVE2x1 -zgemm_kernel_L1_M2_END: +.Lzgemm_kernel_L1_M2_END: -zgemm_kernel_L1_M1_BEGIN: +.Lzgemm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble zgemm_kernel_L1_END + ble .Lzgemm_kernel_L1_END -zgemm_kernel_L1_M1_20: +.Lzgemm_kernel_L1_M1_20: INIT1x1 mov pB, origPB asr counterL , origK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble zgemm_kernel_L1_M1_40 + ble .Lzgemm_kernel_L1_M1_40 -zgemm_kernel_L1_M1_22: +.Lzgemm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -1655,30 +1655,30 @@ zgemm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L1_M1_22 + bgt .Lzgemm_kernel_L1_M1_22 -zgemm_kernel_L1_M1_40: +.Lzgemm_kernel_L1_M1_40: ands counterL , origK, #7 // counterL = counterL % 8 - ble zgemm_kernel_L1_M1_100 + ble .Lzgemm_kernel_L1_M1_100 -zgemm_kernel_L1_M1_42: +.Lzgemm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt zgemm_kernel_L1_M1_42 + bgt .Lzgemm_kernel_L1_M1_42 -zgemm_kernel_L1_M1_100: +.Lzgemm_kernel_L1_M1_100: SAVE1x1 -zgemm_kernel_L1_END: +.Lzgemm_kernel_L1_END: -zgemm_kernel_L999: +.Lzgemm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] diff --git a/kernel/arm64/zgemv_n.S b/kernel/arm64/zgemv_n.S index a28d1b0ce..28afcada5 100644 --- a/kernel/arm64/zgemv_n.S +++ b/kernel/arm64/zgemv_n.S @@ -364,9 +364,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SAVE_REGS cmp N, xzr - ble zgemv_n_kernel_L999 + ble .Lzgemv_n_kernel_L999 cmp M, xzr - ble zgemv_n_kernel_L999 + ble .Lzgemv_n_kernel_L999 lsl LDA, LDA, #SHZ lsl INC_X, INC_X, #SHZ @@ -375,9 +375,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. INIT cmp INC_Y, #1 - bne zgemv_n_kernel_S_BEGIN + bne .Lzgemv_n_kernel_S_BEGIN -zgemv_n_kernel_F_LOOP: +.Lzgemv_n_kernel_F_LOOP: mov A_PTR, A mov Y_IPTR, Y mov Y_OPTR, Y @@ -387,40 +387,40 @@ zgemv_n_kernel_F_LOOP: asr I, M, #2 cmp I, xzr - beq zgemv_n_kernel_F1 + beq .Lzgemv_n_kernel_F1 -zgemv_n_kernel_F4: +.Lzgemv_n_kernel_F4: KERNEL_F4 subs I, I, #1 - bne zgemv_n_kernel_F4 + bne .Lzgemv_n_kernel_F4 -zgemv_n_kernel_F1: +.Lzgemv_n_kernel_F1: ands I, M, #3 - ble zgemv_n_kernel_F_END + ble .Lzgemv_n_kernel_F_END -zgemv_n_kernel_F10: +.Lzgemv_n_kernel_F10: KERNEL_F1 subs I, I, #1 - bne zgemv_n_kernel_F10 + bne .Lzgemv_n_kernel_F10 -zgemv_n_kernel_F_END: +.Lzgemv_n_kernel_F_END: add A, A, LDA subs J, J, #1 - bne zgemv_n_kernel_F_LOOP + bne .Lzgemv_n_kernel_F_LOOP - b zgemv_n_kernel_L999 + b .Lzgemv_n_kernel_L999 -zgemv_n_kernel_S_BEGIN: +.Lzgemv_n_kernel_S_BEGIN: INIT_S -zgemv_n_kernel_S_LOOP: +.Lzgemv_n_kernel_S_LOOP: mov A_PTR, A mov Y_IPTR, Y mov Y_OPTR, Y @@ -430,9 +430,9 @@ zgemv_n_kernel_S_LOOP: asr I, M, #2 cmp I, xzr - ble zgemv_n_kernel_S1 + ble .Lzgemv_n_kernel_S1 -zgemv_n_kernel_S4: +.Lzgemv_n_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -440,27 +440,27 @@ zgemv_n_kernel_S4: KERNEL_S1 subs I, I, #1 - bne zgemv_n_kernel_S4 + bne .Lzgemv_n_kernel_S4 -zgemv_n_kernel_S1: +.Lzgemv_n_kernel_S1: ands I, M, #3 - ble zgemv_n_kernel_S_END + ble .Lzgemv_n_kernel_S_END -zgemv_n_kernel_S10: +.Lzgemv_n_kernel_S10: KERNEL_S1 subs I, I, #1 - bne zgemv_n_kernel_S10 + bne .Lzgemv_n_kernel_S10 -zgemv_n_kernel_S_END: +.Lzgemv_n_kernel_S_END: add A, A, LDA subs J, J, #1 - bne zgemv_n_kernel_S_LOOP + bne .Lzgemv_n_kernel_S_LOOP -zgemv_n_kernel_L999: +.Lzgemv_n_kernel_L999: RESTORE_REGS mov w0, wzr diff --git a/kernel/arm64/zgemv_t.S b/kernel/arm64/zgemv_t.S index 79ce9bcf2..0151029c7 100644 --- a/kernel/arm64/zgemv_t.S +++ b/kernel/arm64/zgemv_t.S @@ -292,9 +292,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SAVE_REGS cmp N, xzr - ble zgemv_t_kernel_L999 + ble .Lzgemv_t_kernel_L999 cmp M, xzr - ble zgemv_t_kernel_L999 + ble .Lzgemv_t_kernel_L999 lsl LDA, LDA, #SHZ lsl INC_Y, INC_Y, #SHZ @@ -303,9 +303,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. INIT cmp INC_X, #1 - bne zgemv_t_kernel_S_BEGIN + bne .Lzgemv_t_kernel_S_BEGIN -zgemv_t_kernel_F_LOOP: +.Lzgemv_t_kernel_F_LOOP: mov A_PTR, A mov X_PTR, X @@ -314,30 +314,30 @@ zgemv_t_kernel_F_LOOP: asr I, M, #2 cmp I, xzr - beq zgemv_t_kernel_F1 + beq .Lzgemv_t_kernel_F1 -zgemv_t_kernel_F4: +.Lzgemv_t_kernel_F4: KERNEL_F4 subs I, I, #1 - bne zgemv_t_kernel_F4 + bne .Lzgemv_t_kernel_F4 KERNEL_F4_FINALIZE -zgemv_t_kernel_F1: +.Lzgemv_t_kernel_F1: ands I, M, #3 - ble zgemv_t_kernel_F_END + ble .Lzgemv_t_kernel_F_END -zgemv_t_kernel_F10: +.Lzgemv_t_kernel_F10: KERNEL_F1 subs I, I, #1 - bne zgemv_t_kernel_F10 + bne .Lzgemv_t_kernel_F10 -zgemv_t_kernel_F_END: +.Lzgemv_t_kernel_F_END: #if !defined(DOUBLE) ld1 {v4.2s}, [Y] @@ -355,15 +355,15 @@ zgemv_t_kernel_F_END: add A, A, LDA subs J, J, #1 - bne zgemv_t_kernel_F_LOOP + bne .Lzgemv_t_kernel_F_LOOP - b zgemv_t_kernel_L999 + b .Lzgemv_t_kernel_L999 -zgemv_t_kernel_S_BEGIN: +.Lzgemv_t_kernel_S_BEGIN: INIT_S -zgemv_t_kernel_S_LOOP: +.Lzgemv_t_kernel_S_LOOP: mov A_PTR, A mov X_PTR, X @@ -371,9 +371,9 @@ zgemv_t_kernel_S_LOOP: asr I, M, #2 cmp I, xzr - ble zgemv_t_kernel_S1 + ble .Lzgemv_t_kernel_S1 -zgemv_t_kernel_S4: +.Lzgemv_t_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -381,21 +381,21 @@ zgemv_t_kernel_S4: KERNEL_S1 subs I, I, #1 - bne zgemv_t_kernel_S4 + bne .Lzgemv_t_kernel_S4 -zgemv_t_kernel_S1: +.Lzgemv_t_kernel_S1: ands I, M, #3 - ble zgemv_t_kernel_S_END + ble .Lzgemv_t_kernel_S_END -zgemv_t_kernel_S10: +.Lzgemv_t_kernel_S10: KERNEL_S1 subs I, I, #1 - bne zgemv_t_kernel_S10 + bne .Lzgemv_t_kernel_S10 -zgemv_t_kernel_S_END: +.Lzgemv_t_kernel_S_END: #if !defined(DOUBLE) ld1 {v4.2s}, [Y] @@ -413,9 +413,9 @@ zgemv_t_kernel_S_END: add A, A, LDA subs J, J, #1 - bne zgemv_t_kernel_S_LOOP + bne .Lzgemv_t_kernel_S_LOOP -zgemv_t_kernel_L999: +.Lzgemv_t_kernel_L999: RESTORE_REGS mov w0, wzr ret diff --git a/kernel/arm64/znrm2.S b/kernel/arm64/znrm2.S index 1360dc993..1c89685ea 100644 --- a/kernel/arm64/znrm2.S +++ b/kernel/arm64/znrm2.S @@ -226,43 +226,43 @@ KERNEL_S1_END_\@: INIT cmp N, #0 - ble nrm2_kernel_L999 + ble .Lznrm2_kernel_L999 cmp INC_X, #0 - beq nrm2_kernel_L999 + beq .Lznrm2_kernel_L999 cmp INC_X, #1 - bne nrm2_kernel_S_BEGIN + bne .Lznrm2_kernel_S_BEGIN -nrm2_kernel_F_BEGIN: +.Lznrm2_kernel_F_BEGIN: asr I, N, #3 // I = N / 8 cmp I, xzr - ble nrm2_kernel_F1 + ble .Lznrm2_kernel_F1 -nrm2_kernel_F8: +.Lznrm2_kernel_F8: KERNEL_F8 subs I, I, #1 - bne nrm2_kernel_F8 + bne .Lznrm2_kernel_F8 -nrm2_kernel_F1: +.Lznrm2_kernel_F1: ands I, N, #7 - ble nrm2_kernel_L999 + ble .Lznrm2_kernel_L999 -nrm2_kernel_F10: +.Lznrm2_kernel_F10: KERNEL_F1 subs I, I, #1 - bne nrm2_kernel_F10 + bne .Lznrm2_kernel_F10 - b nrm2_kernel_L999 + b .Lznrm2_kernel_L999 -nrm2_kernel_S_BEGIN: +.Lznrm2_kernel_S_BEGIN: INIT_S @@ -270,15 +270,15 @@ nrm2_kernel_S_BEGIN: .align 5 -nrm2_kernel_S10: +.Lznrm2_kernel_S10: KERNEL_S1 subs I, I, #1 - bne nrm2_kernel_S10 + bne .Lznrm2_kernel_S10 -nrm2_kernel_L999: +.Lznrm2_kernel_L999: fsqrt SSQ, SSQ fmul SSQ, SCALE, SSQ diff --git a/kernel/arm64/zrot.S b/kernel/arm64/zrot.S index 90f138a19..b5e510ebe 100644 --- a/kernel/arm64/zrot.S +++ b/kernel/arm64/zrot.S @@ -181,54 +181,54 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE cmp N, xzr - ble rot_kernel_L999 + ble .Lzrot_kernel_L999 INIT cmp INC_X, #1 - bne rot_kernel_S_BEGIN + bne .Lzrot_kernel_S_BEGIN cmp INC_Y, #1 - bne rot_kernel_S_BEGIN + bne .Lzrot_kernel_S_BEGIN -rot_kernel_F_BEGIN: +.Lzrot_kernel_F_BEGIN: asr I, N, #2 cmp I, xzr - beq rot_kernel_F1 + beq .Lzrot_kernel_F1 KERNEL_INIT_F4 -rot_kernel_F4: +.Lzrot_kernel_F4: KERNEL_F4 subs I, I, #1 - bne rot_kernel_F4 + bne .Lzrot_kernel_F4 -rot_kernel_F1: +.Lzrot_kernel_F1: ands I, N, #3 - ble rot_kernel_L999 + ble .Lzrot_kernel_L999 -rot_kernel_F10: +.Lzrot_kernel_F10: KERNEL_F1 subs I, I, #1 - bne rot_kernel_F10 + bne .Lzrot_kernel_F10 mov w0, wzr ret -rot_kernel_S_BEGIN: +.Lzrot_kernel_S_BEGIN: INIT_S asr I, N, #2 cmp I, xzr - ble rot_kernel_S1 + ble .Lzrot_kernel_S1 -rot_kernel_S4: +.Lzrot_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -236,21 +236,21 @@ rot_kernel_S4: KERNEL_S1 subs I, I, #1 - bne rot_kernel_S4 + bne .Lzrot_kernel_S4 -rot_kernel_S1: +.Lzrot_kernel_S1: ands I, N, #3 - ble rot_kernel_L999 + ble .Lzrot_kernel_L999 -rot_kernel_S10: +.Lzrot_kernel_S10: KERNEL_S1 subs I, I, #1 - bne rot_kernel_S10 + bne .Lzrot_kernel_S10 -rot_kernel_L999: +.Lzrot_kernel_L999: mov w0, wzr ret diff --git a/kernel/arm64/zscal.S b/kernel/arm64/zscal.S index daaa55e9d..929455975 100644 --- a/kernel/arm64/zscal.S +++ b/kernel/arm64/zscal.S @@ -215,71 +215,71 @@ zscal_begin: mov X_COPY, X cmp N, xzr - ble zscal_kernel_L999 + ble .Lzscal_kernel_L999 fcmp DA_R, #0.0 - bne zscal_kernel_R_non_zero + bne .Lzscal_kernel_R_non_zero fcmp DA_I, #0.0 - beq zscal_kernel_RI_zero + beq .Lzscal_kernel_RI_zero - b zscal_kernel_R_zero + b .Lzscal_kernel_R_zero -zscal_kernel_R_non_zero: +.Lzscal_kernel_R_non_zero: fcmp DA_I, #0.0 - beq zscal_kernel_I_zero + beq .Lzscal_kernel_I_zero /******************************************************************************* * A_R != 0 && A_I != 0 *******************************************************************************/ -zscal_kernel_RI_non_zero: +.Lzscal_kernel_RI_non_zero: INIT cmp INC_X, #1 - bne zscal_kernel_S_BEGIN + bne .Lzscal_kernel_S_BEGIN -zscal_kernel_F_BEGIN: +.Lzscal_kernel_F_BEGIN: asr I, N, #2 cmp I, xzr - beq zscal_kernel_F1 + beq .Lzscal_kernel_F1 KERNEL_INIT_F4 -zscal_kernel_F4: +.Lzscal_kernel_F4: KERNEL_F4 subs I, I, #1 - bne zscal_kernel_F4 + bne .Lzscal_kernel_F4 -zscal_kernel_F1: +.Lzscal_kernel_F1: ands I, N, #3 - ble zscal_kernel_L999 + ble .Lzscal_kernel_L999 -zscal_kernel_F10: +.Lzscal_kernel_F10: KERNEL_F1 subs I, I, #1 - bne zscal_kernel_F10 + bne .Lzscal_kernel_F10 mov w0, wzr ret -zscal_kernel_S_BEGIN: +.Lzscal_kernel_S_BEGIN: INIT_S asr I, N, #2 cmp I, xzr - ble zscal_kernel_S1 + ble .Lzscal_kernel_S1 -zscal_kernel_S4: +.Lzscal_kernel_S4: KERNEL_S1 KERNEL_S1 @@ -287,21 +287,21 @@ zscal_kernel_S4: KERNEL_S1 subs I, I, #1 - bne zscal_kernel_S4 + bne .Lzscal_kernel_S4 -zscal_kernel_S1: +.Lzscal_kernel_S1: ands I, N, #3 - ble zscal_kernel_L999 + ble .Lzscal_kernel_L999 -zscal_kernel_S10: +.Lzscal_kernel_S10: KERNEL_S1 subs I, I, #1 - bne zscal_kernel_S10 + bne .Lzscal_kernel_S10 -zscal_kernel_L999: +.Lzscal_kernel_L999: mov w0, wzr ret @@ -310,7 +310,7 @@ zscal_kernel_L999: * A_R == 0 && A_I != 0 *******************************************************************************/ -zscal_kernel_R_zero: +.Lzscal_kernel_R_zero: INIT_S #if !defined(DOUBLE) @@ -323,7 +323,7 @@ zscal_kernel_R_zero: ins v1.d[1], v2.d[0] // v1 = -DA_I, DA_I #endif -zscal_kernel_R_zero_1: +.Lzscal_kernel_R_zero_1: #if !defined(DOUBLE) ld1 {v2.2s}, [X] // X1, X0 fmul v2.2s, v2.2s, v1.2s // -DA_I*X1, DA_I*X0 @@ -337,7 +337,7 @@ zscal_kernel_R_zero_1: #endif add X, X, INC_X subs N, N, #1 - bne zscal_kernel_R_zero_1 + bne .Lzscal_kernel_R_zero_1 mov w0, wzr ret @@ -346,7 +346,7 @@ zscal_kernel_R_zero_1: * A_R != 0 && A_I == 0 *******************************************************************************/ -zscal_kernel_I_zero: +.Lzscal_kernel_I_zero: INIT_S #if !defined(DOUBLE) ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R @@ -354,7 +354,7 @@ zscal_kernel_I_zero: ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R #endif -zscal_kernel_I_zero_1: +.Lzscal_kernel_I_zero_1: #if !defined(DOUBLE) ld1 {v2.2s}, [X] // X1, X0 fmul v2.2s, v2.2s, v0.2s // DA_R*X1, DA_R*X0 @@ -366,7 +366,7 @@ zscal_kernel_I_zero_1: #endif add X, X, INC_X subs N, N, #1 - bne zscal_kernel_I_zero_1 + bne .Lzscal_kernel_I_zero_1 mov w0, wzr ret @@ -375,16 +375,16 @@ zscal_kernel_I_zero_1: * A_R == 0 && A_I == 0 *******************************************************************************/ -zscal_kernel_RI_zero: +.Lzscal_kernel_RI_zero: INIT_S -zscal_kernel_RI_zero_1: +.Lzscal_kernel_RI_zero_1: stp DA_R, DA_I, [X] add X, X, INC_X subs N, N, #1 - bne zscal_kernel_RI_zero_1 + bne .Lzscal_kernel_RI_zero_1 mov w0, wzr ret diff --git a/kernel/arm64/ztrmm_kernel_4x4.S b/kernel/arm64/ztrmm_kernel_4x4.S index 77a7857ff..462acfe2b 100644 --- a/kernel/arm64/ztrmm_kernel_4x4.S +++ b/kernel/arm64/ztrmm_kernel_4x4.S @@ -1078,9 +1078,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov counterJ, origN asr counterJ, counterJ, #2 // J = J / 4 cmp counterJ, #0 - ble ztrmm_kernel_L2_BEGIN + ble .Lztrmm_kernel_L2_BEGIN -ztrmm_kernel_L4_BEGIN: +.Lztrmm_kernel_L4_BEGIN: mov pCRow0, pC add pCRow1, pCRow0, LDC add pCRow2, pCRow1, LDC @@ -1094,15 +1094,15 @@ ztrmm_kernel_L4_BEGIN: #endif mov pA, origPA // pA = start of A array -ztrmm_kernel_L4_M4_BEGIN: +.Lztrmm_kernel_L4_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble ztrmm_kernel_L4_M2_BEGIN + ble .Lztrmm_kernel_L4_M2_BEGIN .align 5 -ztrmm_kernel_L4_M4_20: +.Lztrmm_kernel_L4_M4_20: #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) mov pB, origPB @@ -1123,7 +1123,7 @@ ztrmm_kernel_L4_M4_20: asr counterL , tempK, #3 cmp counterL , #2 - blt ztrmm_kernel_L4_M4_32 + blt .Lztrmm_kernel_L4_M4_32 KERNEL4x4_I KERNEL4x4_M2 @@ -1135,10 +1135,10 @@ ztrmm_kernel_L4_M4_20: KERNEL4x4_M2 subs counterL, counterL, #2 - ble ztrmm_kernel_L4_M4_22a + ble .Lztrmm_kernel_L4_M4_22a .align 5 -ztrmm_kernel_L4_M4_22: +.Lztrmm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 @@ -1150,10 +1150,10 @@ ztrmm_kernel_L4_M4_22: KERNEL4x4_M2 subs counterL, counterL, #1 - bgt ztrmm_kernel_L4_M4_22 + bgt .Lztrmm_kernel_L4_M4_22 .align 5 -ztrmm_kernel_L4_M4_22a: +.Lztrmm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_M2 @@ -1164,13 +1164,13 @@ ztrmm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_E - b ztrmm_kernel_L4_M4_44 + b .Lztrmm_kernel_L4_M4_44 .align 5 -ztrmm_kernel_L4_M4_32: +.Lztrmm_kernel_L4_M4_32: tst counterL, #1 - ble ztrmm_kernel_L4_M4_40 + ble .Lztrmm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_M2 @@ -1181,26 +1181,26 @@ ztrmm_kernel_L4_M4_32: KERNEL4x4_M1 KERNEL4x4_E - b ztrmm_kernel_L4_M4_44 + b .Lztrmm_kernel_L4_M4_44 -ztrmm_kernel_L4_M4_40: +.Lztrmm_kernel_L4_M4_40: INIT4x4 -ztrmm_kernel_L4_M4_44: +.Lztrmm_kernel_L4_M4_44: ands counterL , tempK, #7 - ble ztrmm_kernel_L4_M4_100 + ble .Lztrmm_kernel_L4_M4_100 .align 5 -ztrmm_kernel_L4_M4_46: +.Lztrmm_kernel_L4_M4_46: KERNEL4x4_SUB subs counterL, counterL, #1 - bne ztrmm_kernel_L4_M4_46 + bne .Lztrmm_kernel_L4_M4_46 -ztrmm_kernel_L4_M4_100: +.Lztrmm_kernel_L4_M4_100: SAVE4x4 @@ -1223,20 +1223,20 @@ ztrmm_kernel_L4_M4_100: prfm PLDL1KEEP, [pA, #64] prfm PLDL1KEEP, [origPB] -ztrmm_kernel_L4_M4_END: +.Lztrmm_kernel_L4_M4_END: subs counterI, counterI, #1 - bne ztrmm_kernel_L4_M4_20 + bne .Lztrmm_kernel_L4_M4_20 -ztrmm_kernel_L4_M2_BEGIN: +.Lztrmm_kernel_L4_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble ztrmm_kernel_L4_END + ble .Lztrmm_kernel_L4_END tst counterI, #2 // counterI = counterI / 2 - ble ztrmm_kernel_L4_M1_BEGIN + ble .Lztrmm_kernel_L4_M1_BEGIN -ztrmm_kernel_L4_M2_20: +.Lztrmm_kernel_L4_M2_20: INIT2x4 @@ -1260,9 +1260,9 @@ ztrmm_kernel_L4_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble ztrmm_kernel_L4_M2_40 + ble .Lztrmm_kernel_L4_M2_40 -ztrmm_kernel_L4_M2_22: +.Lztrmm_kernel_L4_M2_22: KERNEL2x4_SUB KERNEL2x4_SUB @@ -1275,22 +1275,22 @@ ztrmm_kernel_L4_M2_22: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt ztrmm_kernel_L4_M2_22 + bgt .Lztrmm_kernel_L4_M2_22 -ztrmm_kernel_L4_M2_40: +.Lztrmm_kernel_L4_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ztrmm_kernel_L4_M2_100 + ble .Lztrmm_kernel_L4_M2_100 -ztrmm_kernel_L4_M2_42: +.Lztrmm_kernel_L4_M2_42: KERNEL2x4_SUB subs counterL, counterL, #1 - bgt ztrmm_kernel_L4_M2_42 + bgt .Lztrmm_kernel_L4_M2_42 -ztrmm_kernel_L4_M2_100: +.Lztrmm_kernel_L4_M2_100: SAVE2x4 @@ -1310,15 +1310,15 @@ ztrmm_kernel_L4_M2_100: add tempOffset, tempOffset, #2 #endif -ztrmm_kernel_L4_M2_END: +.Lztrmm_kernel_L4_M2_END: -ztrmm_kernel_L4_M1_BEGIN: +.Lztrmm_kernel_L4_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble ztrmm_kernel_L4_END + ble .Lztrmm_kernel_L4_END -ztrmm_kernel_L4_M1_20: +.Lztrmm_kernel_L4_M1_20: INIT1x4 @@ -1342,9 +1342,9 @@ ztrmm_kernel_L4_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble ztrmm_kernel_L4_M1_40 + ble .Lztrmm_kernel_L4_M1_40 -ztrmm_kernel_L4_M1_22: +.Lztrmm_kernel_L4_M1_22: KERNEL1x4_SUB KERNEL1x4_SUB KERNEL1x4_SUB @@ -1356,22 +1356,22 @@ ztrmm_kernel_L4_M1_22: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt ztrmm_kernel_L4_M1_22 + bgt .Lztrmm_kernel_L4_M1_22 -ztrmm_kernel_L4_M1_40: +.Lztrmm_kernel_L4_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ztrmm_kernel_L4_M1_100 + ble .Lztrmm_kernel_L4_M1_100 -ztrmm_kernel_L4_M1_42: +.Lztrmm_kernel_L4_M1_42: KERNEL1x4_SUB subs counterL, counterL, #1 - bgt ztrmm_kernel_L4_M1_42 + bgt .Lztrmm_kernel_L4_M1_42 -ztrmm_kernel_L4_M1_100: +.Lztrmm_kernel_L4_M1_100: SAVE1x4 @@ -1392,7 +1392,7 @@ ztrmm_kernel_L4_M1_100: #endif -ztrmm_kernel_L4_END: +.Lztrmm_kernel_L4_END: lsl temp, origK, #6 add origPB, origPB, temp // B = B + K * 4 * 8 * 2 @@ -1402,19 +1402,19 @@ ztrmm_kernel_L4_END: #endif subs counterJ, counterJ , #1 // j-- - bgt ztrmm_kernel_L4_BEGIN + bgt .Lztrmm_kernel_L4_BEGIN /******************************************************************************/ -ztrmm_kernel_L2_BEGIN: // less than 2 left in N direction +.Lztrmm_kernel_L2_BEGIN: // less than 2 left in N direction mov counterJ , origN tst counterJ , #3 - ble ztrmm_kernel_L999 // error, N was less than 4? + ble .Lztrmm_kernel_L999 // error, N was less than 4? tst counterJ , #2 - ble ztrmm_kernel_L1_BEGIN + ble .Lztrmm_kernel_L1_BEGIN mov pCRow0, pC // pCRow0 = pC @@ -1426,14 +1426,14 @@ ztrmm_kernel_L2_BEGIN: // less than 2 left in N direction mov pA, origPA // pA = A -ztrmm_kernel_L2_M4_BEGIN: +.Lztrmm_kernel_L2_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI,#0 - ble ztrmm_kernel_L2_M2_BEGIN + ble .Lztrmm_kernel_L2_M2_BEGIN -ztrmm_kernel_L2_M4_20: +.Lztrmm_kernel_L2_M4_20: INIT4x2 @@ -1457,10 +1457,10 @@ ztrmm_kernel_L2_M4_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble ztrmm_kernel_L2_M4_40 + ble .Lztrmm_kernel_L2_M4_40 .align 5 -ztrmm_kernel_L2_M4_22: +.Lztrmm_kernel_L2_M4_22: KERNEL4x2_SUB KERNEL4x2_SUB KERNEL4x2_SUB @@ -1472,22 +1472,22 @@ ztrmm_kernel_L2_M4_22: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt ztrmm_kernel_L2_M4_22 + bgt .Lztrmm_kernel_L2_M4_22 -ztrmm_kernel_L2_M4_40: +.Lztrmm_kernel_L2_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ztrmm_kernel_L2_M4_100 + ble .Lztrmm_kernel_L2_M4_100 -ztrmm_kernel_L2_M4_42: +.Lztrmm_kernel_L2_M4_42: KERNEL4x2_SUB subs counterL, counterL, #1 - bgt ztrmm_kernel_L2_M4_42 + bgt .Lztrmm_kernel_L2_M4_42 -ztrmm_kernel_L2_M4_100: +.Lztrmm_kernel_L2_M4_100: SAVE4x2 @@ -1507,22 +1507,22 @@ ztrmm_kernel_L2_M4_100: add tempOffset, tempOffset, #4 #endif -ztrmm_kernel_L2_M4_END: +.Lztrmm_kernel_L2_M4_END: subs counterI, counterI, #1 - bgt ztrmm_kernel_L2_M4_20 + bgt .Lztrmm_kernel_L2_M4_20 -ztrmm_kernel_L2_M2_BEGIN: +.Lztrmm_kernel_L2_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble ztrmm_kernel_L2_END + ble .Lztrmm_kernel_L2_END tst counterI, #2 // counterI = counterI / 2 - ble ztrmm_kernel_L2_M1_BEGIN + ble .Lztrmm_kernel_L2_M1_BEGIN -ztrmm_kernel_L2_M2_20: +.Lztrmm_kernel_L2_M2_20: INIT2x2 @@ -1546,9 +1546,9 @@ ztrmm_kernel_L2_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL,#0 - ble ztrmm_kernel_L2_M2_40 + ble .Lztrmm_kernel_L2_M2_40 -ztrmm_kernel_L2_M2_22: +.Lztrmm_kernel_L2_M2_22: KERNEL2x2_SUB KERNEL2x2_SUB @@ -1561,22 +1561,22 @@ ztrmm_kernel_L2_M2_22: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt ztrmm_kernel_L2_M2_22 + bgt .Lztrmm_kernel_L2_M2_22 -ztrmm_kernel_L2_M2_40: +.Lztrmm_kernel_L2_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ztrmm_kernel_L2_M2_100 + ble .Lztrmm_kernel_L2_M2_100 -ztrmm_kernel_L2_M2_42: +.Lztrmm_kernel_L2_M2_42: KERNEL2x2_SUB subs counterL, counterL, #1 - bgt ztrmm_kernel_L2_M2_42 + bgt .Lztrmm_kernel_L2_M2_42 -ztrmm_kernel_L2_M2_100: +.Lztrmm_kernel_L2_M2_100: SAVE2x2 @@ -1596,15 +1596,15 @@ ztrmm_kernel_L2_M2_100: add tempOffset, tempOffset, #2 #endif -ztrmm_kernel_L2_M2_END: +.Lztrmm_kernel_L2_M2_END: -ztrmm_kernel_L2_M1_BEGIN: +.Lztrmm_kernel_L2_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble ztrmm_kernel_L2_END + ble .Lztrmm_kernel_L2_END -ztrmm_kernel_L2_M1_20: +.Lztrmm_kernel_L2_M1_20: INIT1x2 @@ -1628,9 +1628,9 @@ ztrmm_kernel_L2_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL, #0 - ble ztrmm_kernel_L2_M1_40 + ble .Lztrmm_kernel_L2_M1_40 -ztrmm_kernel_L2_M1_22: +.Lztrmm_kernel_L2_M1_22: KERNEL1x2_SUB KERNEL1x2_SUB KERNEL1x2_SUB @@ -1642,22 +1642,22 @@ ztrmm_kernel_L2_M1_22: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt ztrmm_kernel_L2_M1_22 + bgt .Lztrmm_kernel_L2_M1_22 -ztrmm_kernel_L2_M1_40: +.Lztrmm_kernel_L2_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ztrmm_kernel_L2_M1_100 + ble .Lztrmm_kernel_L2_M1_100 -ztrmm_kernel_L2_M1_42: +.Lztrmm_kernel_L2_M1_42: KERNEL1x2_SUB subs counterL, counterL, #1 - bgt ztrmm_kernel_L2_M1_42 + bgt .Lztrmm_kernel_L2_M1_42 -ztrmm_kernel_L2_M1_100: +.Lztrmm_kernel_L2_M1_100: SAVE1x2 @@ -1678,7 +1678,7 @@ ztrmm_kernel_L2_M1_100: #endif -ztrmm_kernel_L2_END: +.Lztrmm_kernel_L2_END: #if !defined(LEFT) add tempOffset, tempOffset, #2 #endif @@ -1688,11 +1688,11 @@ ztrmm_kernel_L2_END: /******************************************************************************/ -ztrmm_kernel_L1_BEGIN: +.Lztrmm_kernel_L1_BEGIN: mov counterJ , origN tst counterJ , #1 - ble ztrmm_kernel_L999 // done + ble .Lztrmm_kernel_L999 // done mov pCRow0, pC // pCRow0 = C @@ -1706,14 +1706,14 @@ ztrmm_kernel_L1_BEGIN: -ztrmm_kernel_L1_M4_BEGIN: +.Lztrmm_kernel_L1_M4_BEGIN: mov counterI, origM asr counterI, counterI, #2 // counterI = counterI / 4 cmp counterI, #0 - ble ztrmm_kernel_L1_M2_BEGIN + ble .Lztrmm_kernel_L1_M2_BEGIN -ztrmm_kernel_L1_M4_20: +.Lztrmm_kernel_L1_M4_20: INIT4x1 @@ -1737,10 +1737,10 @@ ztrmm_kernel_L1_M4_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble ztrmm_kernel_L1_M4_40 + ble .Lztrmm_kernel_L1_M4_40 .align 5 -ztrmm_kernel_L1_M4_22: +.Lztrmm_kernel_L1_M4_22: KERNEL4x1_SUB KERNEL4x1_SUB KERNEL4x1_SUB @@ -1752,22 +1752,22 @@ ztrmm_kernel_L1_M4_22: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt ztrmm_kernel_L1_M4_22 + bgt .Lztrmm_kernel_L1_M4_22 -ztrmm_kernel_L1_M4_40: +.Lztrmm_kernel_L1_M4_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ztrmm_kernel_L1_M4_100 + ble .Lztrmm_kernel_L1_M4_100 -ztrmm_kernel_L1_M4_42: +.Lztrmm_kernel_L1_M4_42: KERNEL4x1_SUB subs counterL, counterL, #1 - bgt ztrmm_kernel_L1_M4_42 + bgt .Lztrmm_kernel_L1_M4_42 -ztrmm_kernel_L1_M4_100: +.Lztrmm_kernel_L1_M4_100: SAVE4x1 @@ -1787,22 +1787,22 @@ ztrmm_kernel_L1_M4_100: add tempOffset, tempOffset, #4 #endif -ztrmm_kernel_L1_M4_END: +.Lztrmm_kernel_L1_M4_END: subs counterI, counterI, #1 - bgt ztrmm_kernel_L1_M4_20 + bgt .Lztrmm_kernel_L1_M4_20 -ztrmm_kernel_L1_M2_BEGIN: +.Lztrmm_kernel_L1_M2_BEGIN: mov counterI, origM tst counterI , #3 - ble ztrmm_kernel_L1_END + ble .Lztrmm_kernel_L1_END tst counterI, #2 // counterI = counterI / 2 - ble ztrmm_kernel_L1_M1_BEGIN + ble .Lztrmm_kernel_L1_M1_BEGIN -ztrmm_kernel_L1_M2_20: +.Lztrmm_kernel_L1_M2_20: INIT2x1 @@ -1826,9 +1826,9 @@ ztrmm_kernel_L1_M2_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble ztrmm_kernel_L1_M2_40 + ble .Lztrmm_kernel_L1_M2_40 -ztrmm_kernel_L1_M2_22: +.Lztrmm_kernel_L1_M2_22: KERNEL2x1_SUB KERNEL2x1_SUB @@ -1841,22 +1841,22 @@ ztrmm_kernel_L1_M2_22: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt ztrmm_kernel_L1_M2_22 + bgt .Lztrmm_kernel_L1_M2_22 -ztrmm_kernel_L1_M2_40: +.Lztrmm_kernel_L1_M2_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ztrmm_kernel_L1_M2_100 + ble .Lztrmm_kernel_L1_M2_100 -ztrmm_kernel_L1_M2_42: +.Lztrmm_kernel_L1_M2_42: KERNEL2x1_SUB subs counterL, counterL, #1 - bgt ztrmm_kernel_L1_M2_42 + bgt .Lztrmm_kernel_L1_M2_42 -ztrmm_kernel_L1_M2_100: +.Lztrmm_kernel_L1_M2_100: SAVE2x1 @@ -1876,15 +1876,15 @@ ztrmm_kernel_L1_M2_100: add tempOffset, tempOffset, #2 #endif -ztrmm_kernel_L1_M2_END: +.Lztrmm_kernel_L1_M2_END: -ztrmm_kernel_L1_M1_BEGIN: +.Lztrmm_kernel_L1_M1_BEGIN: tst counterI, #1 // counterI = counterI % 2 - ble ztrmm_kernel_L1_END + ble .Lztrmm_kernel_L1_END -ztrmm_kernel_L1_M1_20: +.Lztrmm_kernel_L1_M1_20: INIT1x1 @@ -1908,9 +1908,9 @@ ztrmm_kernel_L1_M1_20: asr counterL , tempK, #3 // counterL = counterL / 8 cmp counterL , #0 - ble ztrmm_kernel_L1_M1_40 + ble .Lztrmm_kernel_L1_M1_40 -ztrmm_kernel_L1_M1_22: +.Lztrmm_kernel_L1_M1_22: KERNEL1x1_SUB KERNEL1x1_SUB KERNEL1x1_SUB @@ -1922,30 +1922,30 @@ ztrmm_kernel_L1_M1_22: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt ztrmm_kernel_L1_M1_22 + bgt .Lztrmm_kernel_L1_M1_22 -ztrmm_kernel_L1_M1_40: +.Lztrmm_kernel_L1_M1_40: ands counterL , tempK, #7 // counterL = counterL % 8 - ble ztrmm_kernel_L1_M1_100 + ble .Lztrmm_kernel_L1_M1_100 -ztrmm_kernel_L1_M1_42: +.Lztrmm_kernel_L1_M1_42: KERNEL1x1_SUB subs counterL, counterL, #1 - bgt ztrmm_kernel_L1_M1_42 + bgt .Lztrmm_kernel_L1_M1_42 -ztrmm_kernel_L1_M1_100: +.Lztrmm_kernel_L1_M1_100: SAVE1x1 -ztrmm_kernel_L1_END: +.Lztrmm_kernel_L1_END: -ztrmm_kernel_L999: +.Lztrmm_kernel_L999: mov x0, #0 // set return value ldp d8, d9, [sp, #(0 * 16)] ldp d10, d11, [sp, #(1 * 16)] From a07807caac631234250a7c1b487812b62265df0e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 25 Oct 2017 16:45:41 +0200 Subject: [PATCH 038/122] Eliminate loop code when called as/from dsdot --- kernel/x86_64/sdot_microk_haswell-2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/x86_64/sdot_microk_haswell-2.c b/kernel/x86_64/sdot_microk_haswell-2.c index 4051f9c1b..3248c408c 100644 --- a/kernel/x86_64/sdot_microk_haswell-2.c +++ b/kernel/x86_64/sdot_microk_haswell-2.c @@ -53,9 +53,11 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "vfmadd231ps 64(%3,%0,4), %%ymm14, %%ymm6 \n\t" // 2 * y "vfmadd231ps 96(%3,%0,4), %%ymm15, %%ymm7 \n\t" // 2 * y +#ifndef DSDOT "addq $32 , %0 \n\t" "subq $32 , %1 \n\t" "jnz 1b \n\t" +#endif "vextractf128 $1 , %%ymm4 , %%xmm12 \n\t" "vextractf128 $1 , %%ymm5 , %%xmm13 \n\t" From 66ac898f6441f0cb334f76d0c5603c37962bf368 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 5 Nov 2017 15:42:33 +0100 Subject: [PATCH 039/122] Change prototypes of all complex functions to use void* Change prototypes of complex functions to use void pointers like the other implementations of CBLAS --- cblas.h | 168 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/cblas.h b/cblas.h index d6949e10c..56730a3c9 100644 --- a/cblas.h +++ b/cblas.h @@ -57,45 +57,45 @@ double cblas_dsdot (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_ float cblas_sdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy); double cblas_ddot(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy); -openblas_complex_float cblas_cdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy); -openblas_complex_float cblas_cdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy); -openblas_complex_double cblas_zdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy); -openblas_complex_double cblas_zdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy); +openblas_complex_float cblas_cdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy); +openblas_complex_float cblas_cdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy); +openblas_complex_double cblas_zdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy); +openblas_complex_double cblas_zdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy); -void cblas_cdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret); -void cblas_cdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret); -void cblas_zdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret); -void cblas_zdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret); +void cblas_cdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret); +void cblas_cdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret); +void cblas_zdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret); +void cblas_zdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret); float cblas_sasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx); double cblas_dasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx); -float cblas_scasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx); -double cblas_dzasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx); +float cblas_scasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); +double cblas_dzasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); float cblas_snrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX); double cblas_dnrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX); -float cblas_scnrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX); -double cblas_dznrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX); +float cblas_scnrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX); +double cblas_dznrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX); CBLAS_INDEX cblas_isamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx); CBLAS_INDEX cblas_idamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx); -CBLAS_INDEX cblas_icamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx); -CBLAS_INDEX cblas_izamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx); +CBLAS_INDEX cblas_icamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); +CBLAS_INDEX cblas_izamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); void cblas_saxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy); void cblas_daxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy); -void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy); -void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy); +void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy); +void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy); void cblas_scopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy); void cblas_dcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy); -void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy); -void cblas_zcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy); +void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy); +void cblas_zcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy); void cblas_sswap(OPENBLAS_CONST blasint n, float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy); void cblas_dswap(OPENBLAS_CONST blasint n, double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy); -void cblas_cswap(OPENBLAS_CONST blasint n, float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy); -void cblas_zswap(OPENBLAS_CONST blasint n, double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy); +void cblas_cswap(OPENBLAS_CONST blasint n, void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy); +void cblas_zswap(OPENBLAS_CONST blasint n, void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy); void cblas_srot(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float c, OPENBLAS_CONST float s); void cblas_drot(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double c, OPENBLAS_CONST double s); @@ -111,59 +111,59 @@ void cblas_drotmg(double *d1, double *d2, double *b1, OPENBLAS_CONST double b2, void cblas_sscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, float *X, OPENBLAS_CONST blasint incX); void cblas_dscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, double *X, OPENBLAS_CONST blasint incX); -void cblas_cscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, float *X, OPENBLAS_CONST blasint incX); -void cblas_zscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, double *X, OPENBLAS_CONST blasint incX); -void cblas_csscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, float *X, OPENBLAS_CONST blasint incX); -void cblas_zdscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, double *X, OPENBLAS_CONST blasint incX); +void cblas_cscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, void *X, OPENBLAS_CONST blasint incX); +void cblas_zscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, void *X, OPENBLAS_CONST blasint incX); +void cblas_csscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, void *X, OPENBLAS_CONST blasint incX); +void cblas_zdscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, void *X, OPENBLAS_CONST blasint incX); void cblas_sgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy); void cblas_dgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy); void cblas_cgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, - OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *beta, float *y, OPENBLAS_CONST blasint incy); + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy); void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, - OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *beta, double *y, OPENBLAS_CONST blasint incy); + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy); void cblas_sger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda); void cblas_dger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda); -void cblas_cgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda); -void cblas_cgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda); -void cblas_zgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda); -void cblas_zgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda); +void cblas_cgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda); +void cblas_cgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda); +void cblas_zgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda); +void cblas_zgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda); void cblas_strsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX); void cblas_dtrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX); -void cblas_ctrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX); -void cblas_ztrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX); +void cblas_ctrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX); +void cblas_ztrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX); void cblas_strmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX); void cblas_dtrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX); -void cblas_ctrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX); -void cblas_ztrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX); +void cblas_ctrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX); +void cblas_ztrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX); void cblas_ssyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A, OPENBLAS_CONST blasint lda); void cblas_dsyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *A, OPENBLAS_CONST blasint lda); -void cblas_cher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A, OPENBLAS_CONST blasint lda); -void cblas_zher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *A, OPENBLAS_CONST blasint lda); +void cblas_cher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A, OPENBLAS_CONST blasint lda); +void cblas_zher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A, OPENBLAS_CONST blasint lda); void cblas_ssyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo,OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda); void cblas_dsyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda); -void cblas_cher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, - OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda); -void cblas_zher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, - OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda); +void cblas_cher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, + OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda); +void cblas_zher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, + OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda); void cblas_sgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY); void cblas_dgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY); void cblas_cgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, - OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY); + OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY); void cblas_zgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, - OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY); + OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY); void cblas_ssbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY); @@ -176,45 +176,45 @@ void cblas_stbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLA void cblas_dtbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX); void cblas_ctbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, - OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX); + OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX); void cblas_ztbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, - OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX); + OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX); void cblas_stbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX); void cblas_dtbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX); void cblas_ctbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, - OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX); + OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX); void cblas_ztbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, - OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX); + OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX); void cblas_stpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX); void cblas_dtpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX); void cblas_ctpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, - OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX); + OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX); void cblas_ztpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, - OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX); + OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX); void cblas_stpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX); void cblas_dtpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX); void cblas_ctpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, - OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX); + OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX); void cblas_ztpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, - OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX); + OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX); void cblas_ssymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY); void cblas_dsymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY); -void cblas_chemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, - OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY); -void cblas_zhemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, - OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY); +void cblas_chemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, + OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY); +void cblas_zhemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, + OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY); void cblas_sspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *Ap, @@ -225,36 +225,36 @@ void cblas_dspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLA void cblas_sspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *Ap); void cblas_dspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *Ap); -void cblas_chpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A); -void cblas_zhpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X,OPENBLAS_CONST blasint incX, double *A); +void cblas_chpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A); +void cblas_zhpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST void *X,OPENBLAS_CONST blasint incX, void *A); void cblas_sspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A); void cblas_dspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A); -void cblas_chpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *Ap); -void cblas_zhpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *Ap); +void cblas_chpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *Ap); +void cblas_zhpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *Ap); void cblas_chbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, - OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY); + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY); void cblas_zhbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, - OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY); + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY); void cblas_chpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, - OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *Ap, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY); + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *Ap, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY); void cblas_zhpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, - OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *Ap, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY); + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *Ap, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY); void cblas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); void cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); void cblas_cgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, - OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc); + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_cgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, - OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc); + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, - OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc); + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, - OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc); + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, @@ -262,60 +262,60 @@ void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLA void cblas_dsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); void cblas_csymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, - OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc); + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_zsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, - OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc); + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_ssyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); void cblas_dsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); void cblas_csyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, - OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc); + OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_zsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, - OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc); + OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_ssyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); void cblas_dsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); void cblas_csyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, - OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc); + OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_zsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, - OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc); + OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_strmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb); void cblas_dtrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb); void cblas_ctrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, - OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb); + OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb); void cblas_ztrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, - OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb); + OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb); void cblas_strsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb); void cblas_dtrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb); void cblas_ctrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, - OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb); + OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb); void cblas_ztrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, - OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb); + OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb); void cblas_chemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, - OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc); + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_zhemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, - OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc); + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_cherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, - OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); + OPENBLAS_CONST float alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_zherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, - OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); + OPENBLAS_CONST double alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_cher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, - OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_zher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, - OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_xerbla(blasint p, char *rout, char *form, ...); @@ -325,9 +325,9 @@ void cblas_saxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS void cblas_daxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy); -void cblas_caxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST float *beta, float *y, OPENBLAS_CONST blasint incy); +void cblas_caxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy); -void cblas_zaxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double *beta, double *y, OPENBLAS_CONST blasint incy); +void cblas_zaxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy); void cblas_somatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint clda, float *b, OPENBLAS_CONST blasint cldb); From 2c222f1faafcee5c6439e36e507e05a6c6619e62 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 5 Nov 2017 15:53:14 +0100 Subject: [PATCH 040/122] Modify complex CBLAS functions to take void pointers Modify complex CBLAS functions to take void pointers instead of float or double arguments (to bring the prototypes in line with netlib and other implementations' cblas.h) --- interface/asum.c | 6 +++++- interface/copy.c | 6 ++++++ interface/gemm.c | 17 +++++++++++------ interface/imax.c | 6 +++++- interface/nrm2.c | 5 +++++ interface/symm.c | 17 +++++++++++------ interface/syr2k.c | 27 ++++++++++++++++++++++----- interface/syrk.c | 21 +++++++++++++++++++-- interface/tpmv.c | 9 ++++++++- interface/trsm.c | 11 ++++++++--- interface/zaxpby.c | 6 +++++- interface/zaxpy.c | 9 +++++++-- interface/zdot.c | 6 ++++-- interface/zgbmv.c | 16 +++++++++++----- interface/zgemv.c | 17 +++++++++++------ interface/zger.c | 13 +++++++++---- interface/zhbmv.c | 16 +++++++++++----- interface/zhemv.c | 10 ++++++++-- interface/zher.c | 5 ++++- interface/zher2.c | 7 ++++++- interface/zhpmv.c | 16 +++++++++++----- interface/zhpr.c | 7 +++++-- interface/zhpr2.c | 13 +++++++++---- interface/zscal.c | 8 +++++--- interface/zswap.c | 5 +++-- interface/ztbmv.c | 5 ++++- interface/ztbsv.c | 5 ++++- interface/ztpmv.c | 5 ++++- interface/ztpsv.c | 5 ++++- interface/ztrmv.c | 5 ++++- interface/ztrsv.c | 5 ++++- 31 files changed, 233 insertions(+), 76 deletions(-) diff --git a/interface/asum.c b/interface/asum.c index 139398940..bd83c88e0 100644 --- a/interface/asum.c +++ b/interface/asum.c @@ -68,8 +68,12 @@ FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX){ } #else - +#ifdef COMPLEX +FLOAT CNAME(blasint n, void *vx, blasint incx){ + FLOAT *x = (FLOAT*) vx; +#else FLOAT CNAME(blasint n, FLOAT *x, blasint incx){ +#endif FLOAT ret; diff --git a/interface/copy.c b/interface/copy.c index 3fb2182a9..20b5aec0a 100644 --- a/interface/copy.c +++ b/interface/copy.c @@ -54,7 +54,13 @@ void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ #else +#ifdef COMPLEX +void CNAME(blasint n, void *vx, blasint incx, void *vy, blasint incy){ +FLOAT *x = (FLOAT*) vx; +FLOAT *y = (FLOAT*) vy; +#else void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ +#endif PRINT_DEBUG_CNAME; diff --git a/interface/gemm.c b/interface/gemm.c index 7253b0500..8baf3fbec 100644 --- a/interface/gemm.c +++ b/interface/gemm.c @@ -220,17 +220,22 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS blasint m, blasint n, blasint k, #ifndef COMPLEX FLOAT alpha, -#else - FLOAT *alpha, -#endif FLOAT *a, blasint lda, FLOAT *b, blasint ldb, -#ifndef COMPLEX FLOAT beta, + FLOAT *c, blasint ldc) { #else - FLOAT *beta, + void *valpha, + void *va, blasint lda, + void *vb, blasint ldb, + void *vbeta, + void *vc, blasint ldc) { + FLOAT *alpha = (FLOAT*) valpha; + FLOAT *beta = (FLOAT*) vbeta; + FLOAT *a = (FLOAT*) va; + FLOAT *b = (FLOAT*) vb; + FLOAT *c = (FLOAT*) vc; #endif - FLOAT *c, blasint ldc) { blas_arg_t args; int transa, transb; diff --git a/interface/imax.c b/interface/imax.c index 4378f1e22..456460d98 100644 --- a/interface/imax.c +++ b/interface/imax.c @@ -146,8 +146,12 @@ blasint NAME(blasint *N, FLOAT *x, blasint *INCX){ } #else - +#ifdef COMPLEX +CBLAS_INDEX CNAME(blasint n, void *vx, blasint incx){ + FLOAT *x = (FLOAT*) vx; +#else CBLAS_INDEX CNAME(blasint n, FLOAT *x, blasint incx){ +#endif CBLAS_INDEX ret; diff --git a/interface/nrm2.c b/interface/nrm2.c index cb4c8f6f4..dc8c08e9a 100644 --- a/interface/nrm2.c +++ b/interface/nrm2.c @@ -69,7 +69,12 @@ FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX){ #else +#ifdef COMPLEX +FLOAT CNAME(blasint n, void *vx, blasint incx){ + FLOAT *x = (FLOAT*) vx; +#else FLOAT CNAME(blasint n, FLOAT *x, blasint incx){ +#endif FLOAT ret; diff --git a/interface/symm.c b/interface/symm.c index 3210d371a..0e29a5f48 100644 --- a/interface/symm.c +++ b/interface/symm.c @@ -228,17 +228,22 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint m, blasint n, #ifndef COMPLEX FLOAT alpha, -#else - FLOAT *alpha, -#endif FLOAT *a, blasint lda, FLOAT *b, blasint ldb, -#ifndef COMPLEX FLOAT beta, + FLOAT *c, blasint ldc) { #else - FLOAT *beta, + void *valpha, + void *va, blasint lda, + void *vb, blasint ldb, + void *vbeta, + void *vc, blasint ldc) { + FLOAT *alpha = (FLOAT*) valpha; + FLOAT *beta = (FLOAT*) vbeta; + FLOAT *a = (FLOAT*) va; + FLOAT *b = (FLOAT*) vb; + FLOAT *c = (FLOAT*) vc; #endif - FLOAT *c, blasint ldc) { blas_arg_t args; int side, uplo; diff --git a/interface/syr2k.c b/interface/syr2k.c index bfa5d8be4..a72330c0b 100644 --- a/interface/syr2k.c +++ b/interface/syr2k.c @@ -185,17 +185,34 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr blasint n, blasint k, #ifndef COMPLEX FLOAT alpha, -#else - FLOAT *alpha, -#endif FLOAT *a, blasint lda, FLOAT *b, blasint ldb, +#else + void *valpha, + void *va, blasint lda, + void *vb, blasint ldb, +#endif #if !defined(COMPLEX) || defined(HEMM) FLOAT beta, #else - FLOAT *beta, + void *vbeta, +#endif +#ifndef COMPLEX + FLOAT *c, +#else + void *vc, +#endif + blasint ldc) { + +#ifdef COMPLEX + FLOAT* alpha = (FLOAT*) valpha; +#if !defined(HEMM) + FLOAT* beta = (FLOAT*) vbeta; +#endif + FLOAT* a = (FLOAT*) va; + FLOAT* b = (FLOAT*) vb; + FLOAT* c = (FLOAT*) vc; #endif - FLOAT *c, blasint ldc) { blas_arg_t args; int uplo, trans; diff --git a/interface/syrk.c b/interface/syrk.c index f8c697033..7dc87e044 100644 --- a/interface/syrk.c +++ b/interface/syrk.c @@ -188,15 +188,32 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr #if !defined(COMPLEX) || defined(HEMM) FLOAT alpha, #else - FLOAT *alpha, + void *valpha, #endif +#if !defined(COMPLEX) FLOAT *a, blasint lda, +#else + void *va, blasint lda, +#endif #if !defined(COMPLEX) || defined(HEMM) FLOAT beta, #else - FLOAT *beta, + void *vbeta, #endif +#if !defined(COMPLEX) FLOAT *c, blasint ldc) { +#else + void *vc, blasint ldc) { +#endif + +#ifdef COMPLEX +#if !defined(HEMM) + FLOAT* alpha = (FLOAT*) valpha; + FLOAT* beta = (FLOAT*) vbeta; +#endif + FLOAT* a = (FLOAT*) va; + FLOAT* c = (FLOAT*) vc; +#endif blas_arg_t args; int uplo, trans; diff --git a/interface/tpmv.c b/interface/tpmv.c index edf010492..262af2285 100644 --- a/interface/tpmv.c +++ b/interface/tpmv.c @@ -135,10 +135,17 @@ void NAME(char *UPLO, char *TRANS, char *DIAG, } #else - +#ifndef COMPLEX void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint n, FLOAT *a, FLOAT *x, blasint incx) { +#else +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + blasint n, void *va, void *vx, blasint incx) { + FLOAT *a = (FLOAT*) va; + FLOAT *x = (FLOAT*) vx; +#endif int trans, uplo, unit; blasint info; diff --git a/interface/trsm.c b/interface/trsm.c index 3d4aed282..60c49795d 100644 --- a/interface/trsm.c +++ b/interface/trsm.c @@ -210,11 +210,16 @@ void CNAME(enum CBLAS_ORDER order, blasint m, blasint n, #ifndef COMPLEX FLOAT alpha, -#else - FLOAT *alpha, -#endif FLOAT *a, blasint lda, FLOAT *b, blasint ldb) { +#else + void *valpha, + void *va, blasint lda, + void *vb, blasint ldb) { + FLOAT *alpha = (FLOAT*) valpha; + FLOAT *a = (FLOAT*) va; + FLOAT *b = (FLOAT*) vb; +#endif blas_arg_t args; int side, uplo, trans, unit; diff --git a/interface/zaxpby.c b/interface/zaxpby.c index 1abb24de9..3a4db7403 100644 --- a/interface/zaxpby.c +++ b/interface/zaxpby.c @@ -48,8 +48,12 @@ void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT #else -void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT *y, blasint incy) +void CNAME(blasint n, void *VALPHA, void *vx, blasint incx, void *VBETA, void *vy, blasint incy) { + FLOAT* ALPHA = (FLOAT*) VALPHA; + FLOAT* BETA = (FLOAT*) VBETA; + FLOAT* x = (FLOAT*) vx; + FLOAT* y = (FLOAT*) vy; #endif diff --git a/interface/zaxpy.c b/interface/zaxpy.c index daa12bafb..fbb830ffb 100644 --- a/interface/zaxpy.c +++ b/interface/zaxpy.c @@ -51,9 +51,14 @@ void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *y, blasint * blasint incy = *INCY; #else - +#ifdef COMPLEX +void CNAME(blasint n, void *VALPHA, void *vx, blasint incx, void *vy, blasint incy){ +FLOAT *ALPHA = (FLOAT*) VALPHA; +FLOAT *x = (FLOAT*) vx; +FLOAT *y = (FLOAT*) vy; +#else void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ - +#endif #endif FLOAT alpha_r = *(ALPHA + 0); diff --git a/interface/zdot.c b/interface/zdot.c index cd956b075..40f425cc1 100644 --- a/interface/zdot.c +++ b/interface/zdot.c @@ -148,13 +148,15 @@ OPENBLAS_COMPLEX_FLOAT NAME( blasint *N, FLOAT *x, blasin #else #ifdef FORCE_USE_STACK -void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, OPENBLAS_COMPLEX_FLOAT *result){ +void CNAME(blasint n, void *vx, blasint incx, void *vy, blasint incy, OPENBLAS_COMPLEX_FLOAT *result){ #else -OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ +OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, void *vx, blasint incx, void *vy, blasint incy){ OPENBLAS_COMPLEX_FLOAT ret; OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); #endif + FLOAT *x = (FLOAT*) vx; + FLOAT *y = (FLOAT*) vy; PRINT_DEBUG_CNAME; diff --git a/interface/zgbmv.c b/interface/zgbmv.c index a18cede1c..a04be2fbf 100644 --- a/interface/zgbmv.c +++ b/interface/zgbmv.c @@ -149,11 +149,17 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasint n, blasint ku, blasint kl, - FLOAT *ALPHA, - FLOAT *a, blasint lda, - FLOAT *x, blasint incx, - FLOAT *BETA, - FLOAT *y, blasint incy){ + void *VALPHA, + void *va, blasint lda, + void *vx, blasint incx, + void *VBETA, + void *vy, blasint incy){ + + FLOAT* ALPHA = (FLOAT*) VALPHA; + FLOAT* BETA = (FLOAT*) VBETA; + FLOAT* a = (FLOAT*) va; + FLOAT* x = (FLOAT*) vx; + FLOAT* y = (FLOAT*) vy; FLOAT alpha_r = ALPHA[0]; FLOAT alpha_i = ALPHA[1]; diff --git a/interface/zgemv.c b/interface/zgemv.c index e5ba3757c..0c75564f0 100644 --- a/interface/zgemv.c +++ b/interface/zgemv.c @@ -134,12 +134,17 @@ void NAME(char *TRANS, blasint *M, blasint *N, void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasint n, - FLOAT *ALPHA, - FLOAT *a, blasint lda, - FLOAT *x, blasint incx, - FLOAT *BETA, - FLOAT *y, blasint incy){ - + void *VALPHA, + void *va, blasint lda, + void *vx, blasint incx, + void *VBETA, + void *vy, blasint incy){ + + FLOAT *ALPHA = (FLOAT*) VALPHA; + FLOAT *a = (FLOAT*) va; + FLOAT *x = (FLOAT*) vx; + FLOAT *BETA = (FLOAT*) VBETA; + FLOAT *y = (FLOAT*) vy; FLOAT *buffer; blasint lenx, leny; int trans, buffer_size; diff --git a/interface/zger.c b/interface/zger.c index db72b4e4c..1360c5873 100644 --- a/interface/zger.c +++ b/interface/zger.c @@ -141,10 +141,15 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha, void CNAME(enum CBLAS_ORDER order, blasint m, blasint n, - FLOAT *Alpha, - FLOAT *x, blasint incx, - FLOAT *y, blasint incy, - FLOAT *a, blasint lda) { + void *VAlpha, + void *vx, blasint incx, + void *vy, blasint incy, + void *va, blasint lda) { + + FLOAT* Alpha = (FLOAT*) VAlpha; + FLOAT* a = (FLOAT*) va; + FLOAT* x = (FLOAT*) vx; + FLOAT* y = (FLOAT*) vy; FLOAT alpha_r = Alpha[0]; FLOAT alpha_i = Alpha[1]; diff --git a/interface/zhbmv.c b/interface/zhbmv.c index 8a16bbe28..9ad1b53a1 100644 --- a/interface/zhbmv.c +++ b/interface/zhbmv.c @@ -125,11 +125,17 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, blasint k, - FLOAT *ALPHA, - FLOAT *a, blasint lda, - FLOAT *x, blasint incx, - FLOAT *BETA, - FLOAT *y, blasint incy){ + void *VALPHA, + void *va, blasint lda, + void *vx, blasint incx, + void *VBETA, + void *vy, blasint incy){ + + FLOAT* ALPHA = (FLOAT*) VALPHA; + FLOAT* BETA = (FLOAT*) VBETA; + FLOAT* a = (FLOAT*) va; + FLOAT* x = (FLOAT*) vx; + FLOAT* y = (FLOAT*) vy; FLOAT alpha_r = ALPHA[0]; FLOAT alpha_i = ALPHA[1]; diff --git a/interface/zhemv.c b/interface/zhemv.c index 35d29baea..2aee880dc 100644 --- a/interface/zhemv.c +++ b/interface/zhemv.c @@ -108,8 +108,14 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA, #else -void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT *ALPHA, - FLOAT *a, blasint lda, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT *y, blasint incy) { +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, void *VALPHA, + void *va, blasint lda, void *vx, blasint incx, void *VBETA, void *vy, blasint incy) { + + FLOAT* ALPHA = (FLOAT*) VALPHA; + FLOAT* BETA = (FLOAT*) VBETA; + FLOAT* a = (FLOAT*) va; + FLOAT* x = (FLOAT*) vx; + FLOAT* y = (FLOAT*) vy; FLOAT alpha_r = ALPHA[0]; FLOAT alpha_i = ALPHA[1]; diff --git a/interface/zher.c b/interface/zher.c index 2e4f0cb33..0d24984e6 100644 --- a/interface/zher.c +++ b/interface/zher.c @@ -113,7 +113,10 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, #else -void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *a, blasint lda) { +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, void *vx, blasint incx, void *va, blasint lda) { + + FLOAT* a = (FLOAT*) va; + FLOAT* x = (FLOAT*) vx; FLOAT *buffer; int uplo; diff --git a/interface/zher2.c b/interface/zher2.c index 2717c57b3..1cae633ce 100644 --- a/interface/zher2.c +++ b/interface/zher2.c @@ -116,7 +116,12 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, #else -void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT *a, blasint lda) { +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, void *VALPHA, void *vx, blasint incx, void *vy, blasint incy, void *va, blasint lda) { + + FLOAT* ALPHA = (FLOAT*) VALPHA; + FLOAT* a = (FLOAT*) va; + FLOAT* x = (FLOAT*) vx; + FLOAT* y = (FLOAT*) vy; FLOAT alpha_r = ALPHA[0]; FLOAT alpha_i = ALPHA[1]; diff --git a/interface/zhpmv.c b/interface/zhpmv.c index bab6e5531..b72a6d670 100644 --- a/interface/zhpmv.c +++ b/interface/zhpmv.c @@ -119,11 +119,17 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a, void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, - FLOAT *ALPHA, - FLOAT *a, - FLOAT *x, blasint incx, - FLOAT *BETA, - FLOAT *y, blasint incy){ + void *VALPHA, + void *va, + void *vx, blasint incx, + void *VBETA, + void *vy, blasint incy){ + + FLOAT* ALPHA = (FLOAT*) VALPHA; + FLOAT* BETA = (FLOAT*) VBETA; + FLOAT* a = (FLOAT*) va; + FLOAT* x = (FLOAT*) vx; + FLOAT* y = (FLOAT*) vy; FLOAT alpha_r = ALPHA[0]; FLOAT alpha_i = ALPHA[1]; diff --git a/interface/zhpr.c b/interface/zhpr.c index 5159ba7e1..10507a71f 100644 --- a/interface/zhpr.c +++ b/interface/zhpr.c @@ -115,8 +115,11 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, - FLOAT *x, blasint incx, - FLOAT *a) { + void *vx, blasint incx, + void *va) { + + FLOAT* x = (FLOAT*) vx; + FLOAT* a = (FLOAT*) va; FLOAT *buffer; int uplo; diff --git a/interface/zhpr2.c b/interface/zhpr2.c index 1712e5d52..c9bfb44b0 100644 --- a/interface/zhpr2.c +++ b/interface/zhpr2.c @@ -117,10 +117,15 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, - FLOAT *ALPHA, - FLOAT *x, blasint incx, - FLOAT *y, blasint incy, - FLOAT *a) { + void *VALPHA, + void *vx, blasint incx, + void *vy, blasint incy, + void *va) { + + FLOAT* ALPHA = (FLOAT*) VALPHA; + FLOAT* x = (FLOAT*) vx; + FLOAT* y = (FLOAT*) vy; + FLOAT* a = (FLOAT*) va; FLOAT alpha_r = ALPHA[0]; FLOAT alpha_i = ALPHA[1]; diff --git a/interface/zscal.c b/interface/zscal.c index 507d649bf..633b6ecf5 100644 --- a/interface/zscal.c +++ b/interface/zscal.c @@ -58,11 +58,13 @@ void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX){ #else #ifndef SSCAL -void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx){ +void CNAME(blasint n, void *VALPHA, void *vx, blasint incx){ - FLOAT *alpha=ALPHA; + FLOAT *x = (FLOAT*) vx; + FLOAT *alpha=(FLOAT*)VALPHA; #else -void CNAME(blasint n, FLOAT alpha_r, FLOAT *x, blasint incx){ +void CNAME(blasint n, FLOAT alpha_r, void *vx, blasint incx){ + FLOAT *x = (FLOAT*) vx; FLOAT alpha[2] = {alpha_r, ZERO}; #endif diff --git a/interface/zswap.c b/interface/zswap.c index fc62f7363..5308cbe90 100644 --- a/interface/zswap.c +++ b/interface/zswap.c @@ -52,8 +52,9 @@ void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ #else -void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ - +void CNAME(blasint n, void *vx, blasint incx, void *vy, blasint incy){ +FLOAT *x = (FLOAT*)vx; +FLOAT *y = (FLOAT*)vy; #endif #ifdef SMP diff --git a/interface/ztbmv.c b/interface/ztbmv.c index 0b6243125..d56620c5b 100644 --- a/interface/ztbmv.c +++ b/interface/ztbmv.c @@ -155,7 +155,10 @@ void NAME(char *UPLO, char *TRANS, char *DIAG, void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint n, blasint k, FLOAT *a, blasint lda, FLOAT *x, blasint incx) { + blasint n, blasint k, void *va, blasint lda, void *vx, blasint incx) { + + FLOAT *a = (FLOAT*) va; + FLOAT *x = (FLOAT*) vx; int trans, uplo, unit; blasint info; diff --git a/interface/ztbsv.c b/interface/ztbsv.c index 8afd2afe7..7e144ce75 100644 --- a/interface/ztbsv.c +++ b/interface/ztbsv.c @@ -131,8 +131,11 @@ void NAME(char *UPLO, char *TRANS, char *DIAG, void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint n, blasint k, FLOAT *a, blasint lda, FLOAT *x, blasint incx) { + blasint n, blasint k, void *va, blasint lda, void *vx, blasint incx) { + FLOAT *a = (FLOAT*) va; + FLOAT *x = (FLOAT*) vx; + int trans, uplo, unit; blasint info; FLOAT *buffer; diff --git a/interface/ztpmv.c b/interface/ztpmv.c index f9dfa75fb..3791d1602 100644 --- a/interface/ztpmv.c +++ b/interface/ztpmv.c @@ -150,8 +150,11 @@ void NAME(char *UPLO, char *TRANS, char *DIAG, void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint n, FLOAT *a, FLOAT *x, blasint incx) { + blasint n, void *va, void *vx, blasint incx) { + FLOAT *a = (FLOAT*) va; + FLOAT *x = (FLOAT*) vx; + int trans, uplo, unit; blasint info; FLOAT *buffer; diff --git a/interface/ztpsv.c b/interface/ztpsv.c index c63e4d033..fa706b565 100644 --- a/interface/ztpsv.c +++ b/interface/ztpsv.c @@ -126,8 +126,11 @@ void NAME(char *UPLO, char *TRANS, char *DIAG, void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint n, FLOAT *a, FLOAT *x, blasint incx) { + blasint n, void *va, void *vx, blasint incx) { + FLOAT *a = (FLOAT*) va; + FLOAT *x = (FLOAT*) vx; + int trans, uplo, unit; blasint info; FLOAT *buffer; diff --git a/interface/ztrmv.c b/interface/ztrmv.c index 1721afc1c..4c47e9e91 100644 --- a/interface/ztrmv.c +++ b/interface/ztrmv.c @@ -152,8 +152,11 @@ void NAME(char *UPLO, char *TRANS, char *DIAG, void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint n, FLOAT *a, blasint lda, FLOAT *x, blasint incx) { + blasint n, void *va, blasint lda, void *vx, blasint incx) { + FLOAT *a = (FLOAT*) va; + FLOAT *x = (FLOAT*) vx; + int trans, uplo, unit, buffer_size; blasint info; FLOAT *buffer; diff --git a/interface/ztrsv.c b/interface/ztrsv.c index ceac1727f..cbb7bba13 100644 --- a/interface/ztrsv.c +++ b/interface/ztrsv.c @@ -130,8 +130,11 @@ void NAME(char *UPLO, char *TRANS, char *DIAG, void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint n, FLOAT *a, blasint lda, FLOAT *x, blasint incx) { + blasint n, void *va, blasint lda, void *vx, blasint incx) { + FLOAT *a = (FLOAT*) va; + FLOAT *x = (FLOAT*) vx; + int trans, uplo, unit; blasint info; FLOAT *buffer; From 632fc75d7758a831f3b529c6e9fa4cf1e7dd5433 Mon Sep 17 00:00:00 2001 From: Ian Henriksen Date: Mon, 6 Nov 2017 14:39:12 -0600 Subject: [PATCH 041/122] Allow using compilers other than gfortran in conjunction with MSVC or clang-cl. --- cmake/f_check.cmake | 6 ------ 1 file changed, 6 deletions(-) diff --git a/cmake/f_check.cmake b/cmake/f_check.cmake index 4848553d9..f877fc3e1 100644 --- a/cmake/f_check.cmake +++ b/cmake/f_check.cmake @@ -20,12 +20,6 @@ # NEEDBUNDERSCORE # NEED2UNDERSCORES -if (MSVC) - # had to do this for MSVC, else CMake automatically assumes I have ifort... -hpa - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER(gfortran GNU) -endif () - if (NOT NO_LAPACK) enable_language(Fortran) else() From 61587b0670b93b209a483f9d0823e2b7106bb783 Mon Sep 17 00:00:00 2001 From: Ian Henriksen Date: Mon, 6 Nov 2017 14:41:02 -0600 Subject: [PATCH 042/122] Update lapack.cmake with additional routines from LAPACK version 3.7.0. --- cmake/lapack.cmake | 818 +++++++++++++++++++++++++-------------------- 1 file changed, 458 insertions(+), 360 deletions(-) diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake index e6cd5373d..a4ac00eff 100644 --- a/cmake/lapack.cmake +++ b/cmake/lapack.cmake @@ -1,387 +1,485 @@ # Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files. -set(ALLAUX - ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f - ilaprec.f ilatrans.f ilauplo.f iladiag.f iparam2stage.F chla_transtype.f - ../INSTALL/ilaver.f ../INSTALL/slamch.f -) +set(ALLAUX ilaenv.f ieeeck.f lsamen.f iparmq.f iparam2stage.F + ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f + ../INSTALL/ilaver.f xerbla_array.f + ../INSTALL/slamch.f) set(SCLAUX - sbdsdc.f - sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f - slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f - slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f - slagts.f slamrg.f slanst.f - slapy2.f slapy3.f slarnv.f - slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f - slarrk.f slarrr.f slaneg.f - slartg.f slaruv.f slas2.f slascl.f - slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f - slasd7.f slasd8.f slasda.f slasdq.f slasdt.f - slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f - slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f - ssteqr.f ssterf.f slaisnan.f sisnan.f - slartgp.f slartgs.f - ../INSTALL/second_${TIMER}.f -) + sbdsdc.f + sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f + slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f + slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f + slagts.f slamrg.f slanst.f + slapy2.f slapy3.f slarnv.f + slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f + slarrk.f slarrr.f slaneg.f + slartg.f slaruv.f slas2.f slascl.f + slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f + slasd7.f slasd8.f slasda.f slasdq.f slasdt.f + slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f + slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f + ssteqr.f ssterf.f slaisnan.f sisnan.f + slartgp.f slartgs.f + ../INSTALL/second_${TIMER}.f) set(DZLAUX - dbdsdc.f dbdsvdx.f - dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f - dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f - dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f - dlagts.f dlamrg.f dlanst.f - dlapy2.f dlapy3.f dlarnv.f - dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f - dlarrk.f dlarrr.f dlaneg.f - dlartg.f dlaruv.f dlas2.f dlascl.f - dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f - dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f - dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f - dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f - dsteqr.f dsterf.f dlaisnan.f disnan.f - dlartgp.f dlartgs.f - ../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f - dgelq.f dgelqt.f dgelqt3.f dgemlq.f dgemlqt.f dgemqr.f dgeqr.f - dgetsls.f dlamswlq.f dlamtsqr.f dlaswlq.f dlatsqr.f dtplqt.f - dtplqt2.f dtpmlqt.f dsysv_aa.f dsytrf_aa.f dsytrs_aa.f dlasyf_aa.f - dsytf2_rk.f dlasyf_rk.f dsytrf_rk.f dsytrs_3.f dsycon_3.f dsytri_3.f - dsytri_3x.f dsysv_rk.f dsb2st_kernels.f dsbev_2stage.f dsbevd_2stage.f - dsbevx_2stage.f dsyev_2stage.f dsyevd_2stage.f dsyevr_2stage.f - dsyevx_2stage.f dsygv_2stage.f dsytrd_2stage.f dsytrd_sb2st.F - dsytrd_sy2sb.f dlarfy.f -) + dbdsdc.f + dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f + dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f + dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f + dlagts.f dlamrg.f dlanst.f + dlapy2.f dlapy3.f dlarnv.f + dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f + dlarrk.f dlarrr.f dlaneg.f + dlartg.f dlaruv.f dlas2.f dlascl.f + dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f + dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f + dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f + dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f + dsteqr.f dsterf.f dlaisnan.f disnan.f + dlartgp.f dlartgs.f + ../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f) set(SLASRC - sbdsvdx.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f - sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f - sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f - DEPRECATED/sgegs.f DEPRECATED/sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f - sgels.f sgelsd.f sgelss.f DEPRECATED/sgelsx.f sgelsy.f sgeql2.f sgeqlf.f - sgeqp3.f DEPRECATED/sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f - sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvdx.f sgesvx.f - sgetc2.f sgetri.f sgetrf2.f - sggbak.f sggbal.f sgghd3.f sgges.f sgges3.f sggesx.f sggev.f sggev3.f sggevx.f - sggglm.f sgghrd.f sgglse.f sggqrf.f - sggrqf.f DEPRECATED/sggsvd.f sggsvd3.f DEPRECATED/sggsvp.f sggsvp3.f sgtcon.f sgtrfs.f sgtsv.f - sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f - shsein.f shseqr.f slabrd.f slacon.f slacn2.f - slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f - DEPRECATED/slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f - slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f - slansy.f slantb.f slantp.f slantr.f slanv2.f - slapll.f slapmt.f - slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f - slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f - slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f - slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f - slarrv.f slartv.f - slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f - slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f DEPRECATED/slatzm.f - sopgtr.f sopmtr.f sorg2l.f sorg2r.f - sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f - sorgrq.f sorgtr.f sorm2l.f sorm2r.f sorm22.f - sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f - sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f - spbstf.f spbsv.f spbsvx.f - spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f - sposvx.f spstrf.f spstf2.f - sppcon.f sppequ.f - spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f - spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f - ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f - ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f - sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f - ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f - sstevx.f - ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f - ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f - ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f - ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f - ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f - ssytri_rook.f ssycon_rook.f ssysv_rook.f - stbcon.f - stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f - stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f - stptrs.f - strcon.f strevc.f strevc3.f strexc.f strrfs.f strsen.f strsna.f strsyl.f - strtrs.f DEPRECATED/stzrqf.f stzrzf.f sstemr.f - slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f - stfttr.f stpttf.f stpttr.f strttf.f strttp.f - sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f - sgeequb.f ssyequb.f spoequb.f sgbequb.f - sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f - sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f - sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f - stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f - sgelq.f sgelqt.f sgelqt3.f sgemlq.f sgemlqt.f sgemqr.f sgeqr.f sgetsls.f - slamswlq.f slamtsqr.f slaswlq.f slatsqr.f stplqt.f stplqt2.f stpmlqt.f - ssysv_aa.f ssytrf_aa.f ssytrs_aa.f slasyf_aa.f ssytf2_rk.f slasyf_rk.f - ssytrf_rk.f ssytrs_3.f ssycon_3.f ssytri_3.f ssytri_3x.f ssysv_rk.f - ssb2st_kernels.f ssbev_2stage.f ssbevd_2stage.f ssbevx_2stage.f - ssyev_2stage.f ssyevd_2stage.f ssyevr_2stage.f ssyevx_2stage.f - ssygv_2stage.f ssytrd_2stage.f ssytrd_sb2st.F ssytrd_sy2sb.f slarfy.f -) + sbdsvdx.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f + sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f + sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f + sgehd2.f sgehrd.f sgelq2.f sgelqf.f + sgels.f sgelsd.f sgelss.f sgelsy.f sgeql2.f sgeqlf.f + sgeqp3.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f sgerq2.f sgerqf.f + sgesc2.f sgesdd.f sgesvd.f sgesvdx.f sgesvx.f sgetc2.f + sgetrf2.f sgetri.f + sggbak.f sggbal.f + sgges.f sgges3.f sggesx.f sggev.f sggev3.f sggevx.f + sggglm.f sgghrd.f sgghd3.f sgglse.f sggqrf.f + sggrqf.f sggsvd3.f sggsvp3.f sgtcon.f sgtrfs.f sgtsv.f + sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f + shsein.f shseqr.f slabrd.f slacon.f slacn2.f + slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f + slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f + slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f + slansy.f slantb.f slantp.f slantr.f slanv2.f + slapll.f slapmt.f + slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f + slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f + slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f + slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f + slarrv.f slartv.f + slarz.f slarzb.f slarzt.f slasy2.f + slasyf.f slasyf_rook.f slasyf_rk.f slasyf_aa.f + slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f + sopgtr.f sopmtr.f sorg2l.f sorg2r.f + sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f + sorgrq.f sorgtr.f sorm2l.f sorm2r.f sorm22.f + sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f + sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f + spbstf.f spbsv.f spbsvx.f + spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f + sposvx.f spotrf2.f spotri.f spstrf.f spstf2.f + sppcon.f sppequ.f + spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f + spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f + ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f + ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f + sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f + ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f + sstevx.f ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f + ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f + ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f + ssyswapr.f ssytrs.f ssytrs2.f + ssyconv.f ssyconvf.f ssyconvf_rook.f + ssysv_aa.f ssytrf_aa.f ssytrs_aa.f + ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f + ssytri_rook.f ssycon_rook.f ssysv_rook.f + ssytf2_rk.f ssytrf_rk.f ssytrs_3.f + ssytri_3.f ssytri_3x.f ssycon_3.f ssysv_rk.f + ssysv_aa.f ssytrf_aa.f ssytrs_aa.f + stbcon.f + stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f + stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f + stptrs.f + strcon.f strevc.f strevc3.f strexc.f strrfs.f strsen.f strsna.f strsyl.f + strtrs.f stzrzf.f sstemr.f + slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f + stfttr.f stpttf.f stpttr.f strttf.f strttp.f + sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f + sgeequb.f ssyequb.f spoequb.f sgbequb.f + sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f + sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f + sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f + stpqrt.f stpqrt2.f stpmqrt.f stprfb.f + sgelqt.f sgelqt3.f sgemlqt.f + sgetsls.f sgeqr.f slatsqr.f slamtsqr.f sgemqr.f + sgelq.f slaswlq.f slamswlq.f sgemlq.f + stplqt.f stplqt2.f stpmlqt.f + ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f + ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f + ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f) -set(DSLASRC spotrs.f spotrf2.f) +set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f + sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f + sla_syrfsx_extended.f sla_syamv.f sla_syrcond.f sla_syrpvgrw.f + sposvxx.f sporfsx.f sla_porfsx_extended.f sla_porcond.f + sla_porpvgrw.f sgbsvxx.f sgbrfsx.f sla_gbrfsx_extended.f + sla_gbamv.f sla_gbrcond.f sla_gbrpvgrw.f sla_lin_berr.f slarscl2.f + slascl2.f sla_wwaddw.f) set(CLASRC - cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f - cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f - cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f - DEPRECATED/cgegs.f DEPRECATED/cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f - cgels.f cgelsd.f cgelss.f DEPRECATED/cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f - DEPRECATED/cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f - cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f - cgesvx.f cgetc2.f cgetri.f - cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f - cgghrd.f cgglse.f cggqrf.f cggrqf.f - DEPRECATED/cggsvd.f DEPRECATED/cggsvp.f - cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f - chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f - checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f - chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f - chetf2.f chetrd.f - chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f - chetrs.f chetrs2.f - chetf2_rook.f chetrf_rook.f chetri_rook.f chetrs_rook.f checon_rook.f chesv_rook.f - chgeqz.f chpcon.f chpev.f chpevd.f - chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f - chpsvx.f - chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f - clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f - claed0.f claed7.f claed8.f - claein.f claesy.f claev2.f clags2.f clagtm.f - clahef.f clahef_rook.f clahqr.f - DEPRECATED/clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f - clanhb.f clanhe.f - clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f - clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f - claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f - claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f - claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f - clarf.f clarfb.f clarfg.f clarft.f clarfgp.f - clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f - clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f - clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f - DEPRECATED/clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f - cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f - cposv.f cposvx.f cpstrf.f cpstf2.f - cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f - cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f - crot.f cspcon.f csprfs.f cspsv.f - cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f - cstegr.f cstein.f csteqr.f - csycon.f - csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f csytri2.f csytri2x.f - csyswapr.f csytrs.f csytrs2.f csyconv.f - csytf2_rook.f csytrf_rook.f csytrs_rook.f - csytri_rook.f csycon_rook.f csysv_rook.f - ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f - ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f - ctprfs.f ctptri.f - ctptrs.f ctrcon.f ctrevc.f ctrevc3.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f - ctrsyl.f ctrtrs.f DEPRECATED/ctzrqf.f ctzrzf.f cung2l.f cung2r.f - cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f - cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f - cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f - cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f - chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f - ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f - cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f - cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f - cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f - cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f - ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f - cgelq.f cgelqt.f cgelqt3.f cgemlq.f cgemlqt.f cgemqr.f cgeqr.f cgetsls.f - clamswlq.f clamtsqr.f claswlq.f clatsqr.f ctplqt.f ctplqt2.f ctpmlqt.f - chesv_aa.f chetrf_aa.f chetrs_aa.f clahef_aa.f csytf2_rk.f clasyf_rk.f - csytrf_rk.f csytrs_3.f csycon_3.f csytri_3.f csytri_3x.f csysv_rk.f - chetf2_rk.f clahef_rk.f chetrf_rk.f chetrs_3.f checon_3.f chetri_3.f - chetri_3x.f chesv_rk.f chb2st_kernels.f chbev_2stage.f chbevd_2stage.f - chbevx_2stage.f cheev_2stage.f cheevd_2stage.f cheevr_2stage.f cheevx_2stage.f - chegv_2stage.f chetrd_2stage.f chetrd_hb2st.F chetrd_he2hb.f clarfy.f -) + cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f + cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f + cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f + cgehd2.f cgehrd.f cgelq2.f cgelqf.f + cgels.f cgelsd.f cgelss.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f + cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f cgerq2.f cgerqf.f + cgesc2.f cgesdd.f cgesvd.f cgesvdx.f + cgesvj.f cgejsv.f cgsvj0.f cgsvj1.f + cgesvx.f cgetc2.f cgetrf2.f + cgetri.f + cggbak.f cggbal.f + cgges.f cgges3.f cggesx.f cggev.f cggev3.f cggevx.f + cggglm.f cgghrd.f cgghd3.f cgglse.f cggqrf.f cggrqf.f + cggsvd3.f cggsvp3.f + cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f + chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f + checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f + chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f + chetf2.f chetrd.f + chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f + chetrs.f chetrs2.f + chetf2_rook.f chetrf_rook.f chetri_rook.f + chetrs_rook.f checon_rook.f chesv_rook.f + chetf2_rk.f chetrf_rk.f chetri_3.f chetri_3x.f + chetrs_3.f checon_3.f chesv_rk.f + chesv_aa.f chetrf_aa.f chetrs_aa.f + chgeqz.f chpcon.f chpev.f chpevd.f + chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f + chpsvx.f + chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f + clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f + claed0.f claed7.f claed8.f + claein.f claesy.f claev2.f clags2.f clagtm.f + clahef.f clahef_rook.f clahef_rk.f clahef_aa.f clahqr.f + clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f + clanhb.f clanhe.f + clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f + clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f + claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f + claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f + claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f + clarf.f clarfb.f clarfg.f clarfgp.f clarft.f + clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f clartv.f + clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f + clasyf.f clasyf_rook.f clasyf_rk.f clasyf_aa.f + clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f + cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f + cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f + cposv.f cposvx.f cpotrf2.f cpotri.f cpstrf.f cpstf2.f + cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f + cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f + crot.f cspcon.f csprfs.f cspsv.f + cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f + cstegr.f cstein.f csteqr.f csycon.f + csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f + csytri2.f csytri2x.f csyswapr.f + csytrs.f csytrs2.f + csyconv.f csyconvf.f csyconvf_rook.f + csytf2_rook.f csytrf_rook.f csytrs_rook.f + csytri_rook.f csycon_rook.f csysv_rook.f + csytf2_rk.f csytrf_rk.f csytrf_aa.f csytrs_3.f csytrs_aa.f + csytri_3.f csytri_3x.f csycon_3.f csysv_rk.f csysv_aa.f + ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f + ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f + ctprfs.f ctptri.f + ctptrs.f ctrcon.f ctrevc.f ctrevc3.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f + ctrsyl.f ctrtrs.f ctzrzf.f cung2l.f cung2r.f + cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f + cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f cunm22.f + cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f + cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f + chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f + ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f + cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f + cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f + cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f + cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f + ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f + cgelqt.f cgelqt3.f cgemlqt.f + cgetsls.f cgeqr.f clatsqr.f clamtsqr.f cgemqr.f + cgelq.f claswlq.f clamswlq.f cgemlq.f + ctplqt.f ctplqt2.f ctpmlqt.f + chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f + cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f + chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f) -set(ZCLASRC cpotrs.f) +set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f + cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f + csysvxx.f csyrfsx.f cla_syrfsx_extended.f cla_syamv.f + cla_syrcond_c.f cla_syrcond_x.f cla_syrpvgrw.f + cposvxx.f cporfsx.f cla_porfsx_extended.f + cla_porcond_c.f cla_porcond_x.f cla_porpvgrw.f + cgbsvxx.f cgbrfsx.f cla_gbrfsx_extended.f cla_gbamv.f + cla_gbrcond_c.f cla_gbrcond_x.f cla_gbrpvgrw.f + chesvxx.f cherfsx.f cla_herfsx_extended.f cla_heamv.f + cla_hercond_c.f cla_hercond_x.f cla_herpvgrw.f + cla_lin_berr.f clarscl2.f clascl2.f cla_wwaddw.f) set(DLASRC - dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f - dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f - dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f - DEPRECATED/dgegs.f DEPRECATED/dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f - dgels.f dgelsd.f dgelss.f DEPRECATED/dgelsx.f dgelsy.f dgeql2.f dgeqlf.f - dgeqp3.f DEPRECATED/dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f - dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvdx.f dgesvx.f - dgetc2.f dgetri.f dgetrf2.f - dggbak.f dggbal.f dgges.f dgges3.f dggesx.f dggev.f dggev3.f dggevx.f - dggglm.f dgghd3.f dgghrd.f dgglse.f dggqrf.f - dggrqf.f dggsvd3.f dggsvp3.f DEPRECATED/dggsvd.f DEPRECATED/dggsvp.f dgtcon.f dgtrfs.f dgtsv.f - dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f - dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f - dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f - DEPRECATED/dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f - dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f - dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f - dlapll.f dlapmt.f - dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f - dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f - dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f - dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f - dlargv.f dlarrv.f dlartv.f - dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f - dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f DEPRECATED/dlatzm.f - dopgtr.f dopmtr.f dorg2l.f dorg2r.f - dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f - dorgrq.f dorgtr.f dorm2l.f dorm2r.f dorm22.f - dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f - dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f - dpbstf.f dpbsv.f dpbsvx.f - dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f - dposvx.f dpotrf2.f dpotrs.f dpstrf.f dpstf2.f - dppcon.f dppequ.f - dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f - dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f - dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f - dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f - dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f - dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f - dstevx.f - dsycon.f dsyev.f dsyevd.f dsyevr.f - dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f - dsysv.f dsysvx.f - dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f - dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f - dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f - dsytri_rook.f dsycon_rook.f dsysv_rook.f - dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f - dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f - dtptrs.f - dtrcon.f dtrevc.f dtrevc3.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f - dtrtrs.f DEPRECATED/dtzrqf.f dtzrzf.f dstemr.f - dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f - dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f - dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f - dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f - dgeequb.f dsyequb.f dpoequb.f dgbequb.f - dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f - dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f - dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f - dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f - dgelq.f dgelqt.f dgelqt3.f dgemlq.f dgemlqt.f dgemqr.f dgeqr.f dgetsls.f - dlamswlq.f dlamtsqr.f dlaswlq.f dlatsqr.f dtplqt.f dtplqt2.f dtpmlqt.f - dsysv_aa.f dsytrf_aa.f dsytrs_aa.f dlasyf_aa.f dsytf2_rk.f dlasyf_rk.f - dsytrf_rk.f dsytrs_3.f dsycon_3.f dsytri_3.f dsytri_3x.f dsysv_rk.f - dsb2st_kernels.f dsbev_2stage.f dsbevd_2stage.f dsbevx_2stage.f - dsyev_2stage.f dsyevd_2stage.f dsyevr_2stage.f dsyevx_2stage.f - dsygv_2stage.f dsytrd_2stage.f dsytrd_sb2st.F dsytrd_sy2sb.f dlarfy.f -) + dbdsvdx.f dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f + dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f + dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f + dgehd2.f dgehrd.f dgelq2.f dgelqf.f + dgels.f dgelsd.f dgelss.f dgelsy.f dgeql2.f dgeqlf.f + dgeqp3.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f dgerq2.f dgerqf.f + dgesc2.f dgesdd.f dgesvd.f dgesvdx.f dgesvx.f dgetc2.f + dgetrf2.f dgetri.f + dggbak.f dggbal.f + dgges.f dgges3.f dggesx.f dggev.f dggev3.f dggevx.f + dggglm.f dgghrd.f dgghd3.f dgglse.f dggqrf.f + dggrqf.f dggsvd3.f dggsvp3.f dgtcon.f dgtrfs.f dgtsv.f + dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f + dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f + dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f + dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f + dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f + dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f + dlapll.f dlapmt.f + dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f + dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f + dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f + dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f + dlargv.f dlarrv.f dlartv.f + dlarz.f dlarzb.f dlarzt.f dlasy2.f + dlasyf.f dlasyf_rook.f dlasyf_rk.f dlasyf_aa.f + dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f + dopgtr.f dopmtr.f dorg2l.f dorg2r.f + dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f + dorgrq.f dorgtr.f dorm2l.f dorm2r.f dorm22.f + dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f + dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f + dpbstf.f dpbsv.f dpbsvx.f + dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f + dposvx.f dpotrf2.f dpotri.f dpotrs.f dpstrf.f dpstf2.f + dppcon.f dppequ.f + dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f + dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f + dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f + dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f + dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f + dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f + dstevx.f dsycon.f dsyev.f dsyevd.f dsyevr.f + dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f + dsysv.f dsysvx.f + dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytrs.f dsytrs2.f + dsytri2.f dsytri2x.f dsyswapr.f + dsyconv.f dsyconvf.f dsyconvf_rook.f + dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f + dsytri_rook.f dsycon_rook.f dsysv_rook.f + dsytf2_rk.f dsytrf_rk.f dsytrs_3.f + dsytri_3.f dsytri_3x.f dsycon_3.f dsysv_rk.f + dsysv_aa.f dsytrf_aa.f dsytrs_aa.f + dtbcon.f + dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f + dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f + dtptrs.f + dtrcon.f dtrevc.f dtrevc3.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f + dtrtrs.f dtzrzf.f dstemr.f + dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f + dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f + dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f + dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f + dgeequb.f dsyequb.f dpoequb.f dgbequb.f + dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f + dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f + dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f + dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f + dgelqt.f dgelqt3.f dgemlqt.f + dgetsls.f dgeqr.f dlatsqr.f dlamtsqr.f dgemqr.f + dgelq.f dlaswlq.f dlamswlq.f dgemlq.f + dtplqt.f dtplqt2.f dtpmlqt.f + dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f + dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f + dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f) + +set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f + dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f + dla_syrfsx_extended.f dla_syamv.f dla_syrcond.f dla_syrpvgrw.f + dposvxx.f dporfsx.f dla_porfsx_extended.f dla_porcond.f + dla_porpvgrw.f dgbsvxx.f dgbrfsx.f dla_gbrfsx_extended.f + dla_gbamv.f dla_gbrcond.f dla_gbrpvgrw.f dla_lin_berr.f dlarscl2.f + dlascl2.f dla_wwaddw.f) set(ZLASRC - zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f - zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f - zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f - DEPRECATED/zgegs.f DEPRECATED/zgegv.f zgehd2.f zgehrd.f zgejsv.f zgelq2.f zgelqf.f - zgels.f zgelsd.f zgelss.f DEPRECATED/zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f - DEPRECATED/zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f - zgesc2.f zgesdd.f zgesvd.f zgesvdx.f zgesvj.f zgesvx.f zgetc2.f - zgetri.f zgetrf2.f - zggbak.f zggbal.f zgges.f zgges3.f zggesx.f zggev.f zggev3.f zggevx.f zggglm.f - zgghd3.f zgghrd.f zgglse.f zggqrf.f zggrqf.f - DEPRECATED/zggsvd.f zggsvd3.f DEPRECATED/zggsvp.f zggsvp3.f - zgsvj0.f zgsvj1.f - zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f - zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f - zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f - zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f - zhetf2.f zhetrd.f - zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f - zhetrs.f zhetrs2.f - zhetf2_rook.f zhetrf_rook.f zhetri_rook.f zhetrs_rook.f zhecon_rook.f zhesv_rook.f - zhgeqz.f zhpcon.f zhpev.f zhpevd.f - zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f - zhpsvx.f - zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f - zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f - zlaed0.f zlaed7.f zlaed8.f - zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f - zlahef.f zlahef_rook.f zlahqr.f - DEPRECATED/zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f - zlangt.f zlanhb.f - zlanhe.f - zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f - zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f - zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f - zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f - zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f - zlarcm.f zlarf.f zlarfb.f - zlarfg.f zlarft.f zlarfgp.f - zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f - zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f - zlassq.f zlasyf.f zlasyf_rook.f zlasyf_aa.f - zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f DEPRECATED/zlatzm.f - zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f - zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f - zposv.f zposvx.f zpotrf2.f zpotrs.f zpstrf.f zpstf2.f - zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f - zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f - zrot.f zspcon.f zsprfs.f zspsv.f - zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f - zstegr.f zstein.f zsteqr.f - zsycon.f zsysv_aa.f - zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f - zsyswapr.f zsytrs.f zsytrs_aa.f zsytrs2.f zsyconv.f - zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f - zsytri_rook.f zsycon_rook.f zsysv_rook.f - ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f - ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f - ztprfs.f ztptri.f - ztptrs.f ztrcon.f ztrevc.f ztrevc3.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f - ztrsyl.f ztrtrs.f DEPRECATED/ztzrqf.f ztzrzf.f zung2l.f - zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f - zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunm22.f zunml2.f - zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f - zunmtr.f zupgtr.f - zupmtr.f izmax1.f dzsum1.f zstemr.f - zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f - zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f - ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f - zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f - zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f - zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f - zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f - ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f - zgelq.f zgelqt.f zgelqt3.f zgemlq.f zgemlqt.f zgemqr.f zgeqr.f zgetsls.f - zlamswlq.f zlamtsqr.f zlaswlq.f zlatsqr.f ztplqt.f ztplqt2.f ztpmlqt.f - zhesv_aa.f zhetrf_aa.f zhetrs_aa.f zlahef_aa.f zsytf2_rk.f zlasyf_rk.f - zsytrf_aa.f zsytrf_rk.f zsytrs_3.f zsycon_3.f zsytri_3.f zsytri_3x.f zsysv_rk.f - zhetf2_rk.f zlahef_rk.f zhetrf_rk.f zhetrs_3.f zhecon_3.f zhetri_3.f - zhetri_3x.f zhesv_rk.f zhb2st_kernels.f zhbev_2stage.f zhbevd_2stage.f - zhbevx_2stage.f zheev_2stage.f zheevd_2stage.f zheevr_2stage.f - zheevx_2stage.f zhegv_2stage.f zhetrd_2stage.f zhetrd_hb2st.F zhetrd_he2hb.f - zlarfy.f -) + zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f + zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f + zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f + zgehd2.f zgehrd.f zgelq2.f zgelqf.f + zgels.f zgelsd.f zgelss.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f + zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f + zgesc2.f zgesdd.f zgesvd.f zgesvdx.f zgesvx.f + zgesvj.f zgejsv.f zgsvj0.f zgsvj1.f + zgetc2.f zgetrf2.f + zgetri.f + zggbak.f zggbal.f + zgges.f zgges3.f zggesx.f zggev.f zggev3.f zggevx.f + zggglm.f zgghrd.f zgghd3.f zgglse.f zggqrf.f zggrqf.f + zggsvd3.f zggsvp3.f + zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f + zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f + zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f + zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f + zhetf2.f zhetrd.f + zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f + zhetrs.f zhetrs2.f + zhetf2_rook.f zhetrf_rook.f zhetri_rook.f + zhetrs_rook.f zhecon_rook.f zhesv_rook.f + zhetf2_rk.f zhetrf_rk.f zhetri_3.f zhetri_3x.f + zhetrs_3.f zhecon_3.f zhesv_rk.f + zhesv_aa.f zhetrf_aa.f zhetrs_aa.f + zhgeqz.f zhpcon.f zhpev.f zhpevd.f + zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f + zhpsvx.f + zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f + zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f + zlaed0.f zlaed7.f zlaed8.f + zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f + zlahef.f zlahef_rook.f zlahef_rk.f zlahef_aa.f zlahqr.f + zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f + zlangt.f zlanhb.f + zlanhe.f + zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f + zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f + zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f + zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f + zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f + zlarcm.f zlarf.f zlarfb.f + zlarfg.f zlarfgp.f zlarft.f + zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f + zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f + zlassq.f zlasyf.f zlasyf_rook.f zlasyf_rk.f zlasyf_aa.f + zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f + zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f + zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f + zposv.f zposvx.f zpotrf2.f zpotri.f zpotrs.f zpstrf.f zpstf2.f + zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f + zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f + zrot.f zspcon.f zsprfs.f zspsv.f + zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f + zstegr.f zstein.f zsteqr.f zsycon.f + zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f + zsytri2.f zsytri2x.f zsyswapr.f + zsytrs.f zsytrs2.f + zsyconv.f zsyconvf.f zsyconvf_rook.f + zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f zsytrs_aa.f + zsytri_rook.f zsycon_rook.f zsysv_rook.f + zsytf2_rk.f zsytrf_rk.f zsytrf_aa.f zsytrs_3.f + zsytri_3.f zsytri_3x.f zsycon_3.f zsysv_rk.f zsysv_aa.f + ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f + ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f + ztprfs.f ztptri.f + ztptrs.f ztrcon.f ztrevc.f ztrevc3.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f + ztrsyl.f ztrtrs.f ztzrzf.f zung2l.f + zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f + zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f zunm22.f + zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f + zunmtr.f zupgtr.f + zupmtr.f izmax1.f dzsum1.f zstemr.f + zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f + zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f + ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f + zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f + zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f + zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f + zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f + ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f + ztplqt.f ztplqt2.f ztpmlqt.f + zgelqt.f zgelqt3.f zgemlqt.f + zgetsls.f zgeqr.f zlatsqr.f zlamtsqr.f zgemqr.f + zgelq.f zlaswlq.f zlamswlq.f zgemlq.f + zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f + zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f + zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f) + +set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f + zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f + zla_syrfsx_extended.f zla_syamv.f zla_syrcond_c.f zla_syrcond_x.f + zla_syrpvgrw.f zposvxx.f zporfsx.f zla_porfsx_extended.f + zla_porcond_c.f zla_porcond_x.f zla_porpvgrw.f zgbsvxx.f zgbrfsx.f + zla_gbrfsx_extended.f zla_gbamv.f zla_gbrcond_c.f zla_gbrcond_x.f + zla_gbrpvgrw.f zhesvxx.f zherfsx.f zla_herfsx_extended.f + zla_heamv.f zla_hercond_c.f zla_hercond_x.f zla_herpvgrw.f + zla_lin_berr.f zlarscl2.f zlascl2.f zla_wwaddw.f) + -set(LA_REL_SRC ${ALLAUX}) -if (BUILD_SINGLE) - list(APPEND LA_REL_SRC ${SLASRC} ${DSLASRC} ${SCLAUX}) -endif () +if(USE_XBLAS) + set(ALLXOBJ ${SXLASRC} ${DXLASRC} ${CXLASRC} ${ZXLASRC}) +endif() -if (BUILD_DOUBLE) - list(APPEND LA_REL_SRC ${DLASRC} ${DSLASRC} ${DZLAUX}) -endif () +list(APPEND SLASRC DEPRECATED/sgegs.f DEPRECATED/sgegv.f + DEPRECATED/sgeqpf.f DEPRECATED/sgelsx.f DEPRECATED/sggsvd.f + DEPRECATED/sggsvp.f DEPRECATED/slahrd.f DEPRECATED/slatzm.f DEPRECATED/stzrqf.f) +list(APPEND DLASRC DEPRECATED/dgegs.f DEPRECATED/dgegv.f + DEPRECATED/dgeqpf.f DEPRECATED/dgelsx.f DEPRECATED/dggsvd.f + DEPRECATED/dggsvp.f DEPRECATED/dlahrd.f DEPRECATED/dlatzm.f DEPRECATED/dtzrqf.f) +list(APPEND CLASRC DEPRECATED/cgegs.f DEPRECATED/cgegv.f + DEPRECATED/cgeqpf.f DEPRECATED/cgelsx.f DEPRECATED/cggsvd.f + DEPRECATED/cggsvp.f DEPRECATED/clahrd.f DEPRECATED/clatzm.f DEPRECATED/ctzrqf.f) +list(APPEND ZLASRC DEPRECATED/zgegs.f DEPRECATED/zgegv.f + DEPRECATED/zgeqpf.f DEPRECATED/zgelsx.f DEPRECATED/zggsvd.f + DEPRECATED/zggsvp.f DEPRECATED/zlahrd.f DEPRECATED/zlatzm.f DEPRECATED/ztzrqf.f) +message(STATUS "Building deprecated routines") + +set(DSLASRC spotrs.f) + +set(ZCLASRC cpotrs.f) -if (BUILD_COMPLEX) - list(APPEND LA_REL_SRC ${CLASRC} ${ZCLASRC} ${SCLAUX}) -endif () +set(SCATGEN slatm1.f slaran.f slarnd.f) -if (BUILD_COMPLEX16) - list(APPEND LA_REL_SRC ${ZLASRC} ${ZCLASRC} ${DZLAUX}) -endif () +set(SMATGEN slatms.f slatme.f slatmr.f slatmt.f + slagge.f slagsy.f slakf2.f slarge.f slaror.f slarot.f slatm2.f + slatm3.f slatm5.f slatm6.f slatm7.f slahilb.f) + +set(CMATGEN clatms.f clatme.f clatmr.f clatmt.f + clagge.f claghe.f clagsy.f clakf2.f clarge.f claror.f clarot.f + clatm1.f clarnd.f clatm2.f clatm3.f clatm5.f clatm6.f clahilb.f slatm7.f) + +set(DZATGEN dlatm1.f dlaran.f dlarnd.f) + +set(DMATGEN dlatms.f dlatme.f dlatmr.f dlatmt.f + dlagge.f dlagsy.f dlakf2.f dlarge.f dlaror.f dlarot.f dlatm2.f + dlatm3.f dlatm5.f dlatm6.f dlatm7.f dlahilb.f) + +set(ZMATGEN zlatms.f zlatme.f zlatmr.f zlatmt.f + zlagge.f zlaghe.f zlagsy.f zlakf2.f zlarge.f zlaror.f zlarot.f + zlatm1.f zlarnd.f zlatm2.f zlatm3.f zlatm5.f zlatm6.f zlahilb.f dlatm7.f) + +if(BUILD_SINGLE) + set(LA_REL_SRC ${SLASRC} ${DSLASRC} ${ALLAUX} ${SCLAUX}) + set(LA_GEN_SRC ${SMATGEN} ${SCATGEN}) + message(STATUS "Building Single Precision") +endif() +if(BUILD_DOUBLE) + set(LA_REL_SRC ${LA_REL_SRC} ${DLASRC} ${DSLASRC} ${ALLAUX} ${DZLAUX}) + set(LA_GEN_SRC ${LA_GEN_SRC} ${DMATGEN} ${DZATGEN}) + message(STATUS "Building Double Precision") +endif() +if(BUILD_COMPLEX) + set(LA_REL_SRC ${LA_REL_SRC} ${CLASRC} ${ZCLASRC} ${ALLAUX} ${SCLAUX}) + SET(LA_GEN_SRC ${LA_GEN_SRC} ${CMATGEN} ${SCATGEN}) + message(STATUS "Building Complex Precision") +endif() +if(BUILD_COMPLEX16) + set(LA_REL_SRC ${LA_REL_SRC} ${ZLASRC} ${ZCLASRC} ${ALLAUX} ${DZLAUX}) + SET(LA_GEN_SRC ${LA_GEN_SRC} ${ZMATGEN} ${DZATGEN}) + message(STATUS "Building Double Complex Precision") +endif() # add lapack-netlib folder to the sources set(LA_SOURCES "") foreach (LA_FILE ${LA_REL_SRC}) list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/${LA_FILE}") endforeach () +foreach (LA_FILE ${LA_GEN_SRC}) + list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/TESTING/MATGEN/${LA_FILE}") +endforeach () set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}") From 505dc08635aa90c5485b5e8e16243493751d9abe Mon Sep 17 00:00:00 2001 From: Ian Henriksen Date: Mon, 6 Nov 2017 14:43:33 -0600 Subject: [PATCH 043/122] Update lapacke.cmake with routines added in LAPACK 3.7.0. --- cmake/lapacke.cmake | 177 ++++++++++++++++++++++---------------------- 1 file changed, 87 insertions(+), 90 deletions(-) diff --git a/cmake/lapacke.cmake b/cmake/lapacke.cmake index fd5aee134..93e2824a1 100644 --- a/cmake/lapacke.cmake +++ b/cmake/lapacke.cmake @@ -1,5 +1,5 @@ -set(C_SRC +set(CSRC lapacke_cbbcsd.c lapacke_cbbcsd_work.c lapacke_cbdsqr.c @@ -1839,6 +1839,12 @@ set(ZSRC lapacke_zhbevd_work.c lapacke_zhbevx.c lapacke_zhbevx_work.c + lapacke_zhbev_2stage.c + lapacke_zhbev_2stage_work.c + lapacke_zhbevd_2stage.c + lapacke_zhbevd_2stage_work.c + lapacke_zhbevx_2stage.c + lapacke_zhbevx_2stage_work.c lapacke_zhbgst.c lapacke_zhbgst_work.c lapacke_zhbgv.c @@ -2263,104 +2269,91 @@ set(ZSRC ) set(SRCX - lapacke_cgbrfsx.c lapacke_cporfsx.c lapacke_dgerfsx.c lapacke_sgbrfsx.c lapacke_ssyrfsx.c lapacke_zherfsx.c - lapacke_cgbrfsx_work.c lapacke_cporfsx_work.c lapacke_dgerfsx_work.c lapacke_sgbrfsx_work.c lapacke_ssyrfsx_work.c lapacke_zherfsx_work.c - lapacke_cgerfsx.c lapacke_csyrfsx.c lapacke_dporfsx.c lapacke_sgerfsx.c lapacke_zgbrfsx.c lapacke_zporfsx.c - lapacke_cgerfsx_work.c lapacke_csyrfsx_work.c lapacke_dporfsx_work.c lapacke_sgerfsx_work.c lapacke_zgbrfsx_work.c lapacke_zporfsx_work.c - lapacke_cherfsx.c lapacke_dgbrfsx.c lapacke_dsyrfsx.c lapacke_sporfsx.c lapacke_zgerfsx.c lapacke_zsyrfsx.c - lapacke_cherfsx_work.c lapacke_dgbrfsx_work.c lapacke_dsyrfsx_work.c lapacke_sporfsx_work.c lapacke_zgerfsx_work.c lapacke_zsyrfsx_work.c - lapacke_cgbsvxx.c lapacke_cposvxx.c lapacke_dgesvxx.c lapacke_sgbsvxx.c lapacke_ssysvxx.c lapacke_zhesvxx.c - lapacke_cgbsvxx_work.c lapacke_cposvxx_work.c lapacke_dgesvxx_work.c lapacke_sgbsvxx_work.c lapacke_ssysvxx_work.c lapacke_zhesvxx_work.c - lapacke_cgesvxx.c lapacke_csysvxx.c lapacke_dposvxx.c lapacke_sgesvxx.c lapacke_zgbsvxx.c lapacke_zposvxx.c - lapacke_cgesvxx_work.c lapacke_csysvxx_work.c lapacke_dposvxx_work.c lapacke_sgesvxx_work.c lapacke_zgbsvxx_work.c lapacke_zposvxx_work.c - lapacke_chesvxx.c lapacke_dgbsvxx.c lapacke_dsysvxx.c lapacke_sposvxx.c lapacke_zgesvxx.c lapacke_zsysvxx.c + lapacke_cgbrfsx.c lapacke_cporfsx.c lapacke_dgerfsx.c lapacke_sgbrfsx.c lapacke_ssyrfsx.c lapacke_zherfsx.c + lapacke_cgbrfsx_work.c lapacke_cporfsx_work.c lapacke_dgerfsx_work.c lapacke_sgbrfsx_work.c lapacke_ssyrfsx_work.c lapacke_zherfsx_work.c + lapacke_cgerfsx.c lapacke_csyrfsx.c lapacke_dporfsx.c lapacke_sgerfsx.c lapacke_zgbrfsx.c lapacke_zporfsx.c + lapacke_cgerfsx_work.c lapacke_csyrfsx_work.c lapacke_dporfsx_work.c lapacke_sgerfsx_work.c lapacke_zgbrfsx_work.c lapacke_zporfsx_work.c + lapacke_cherfsx.c lapacke_dgbrfsx.c lapacke_dsyrfsx.c lapacke_sporfsx.c lapacke_zgerfsx.c lapacke_zsyrfsx.c + lapacke_cherfsx_work.c lapacke_dgbrfsx_work.c lapacke_dsyrfsx_work.c lapacke_sporfsx_work.c lapacke_zgerfsx_work.c lapacke_zsyrfsx_work.c + lapacke_cgbsvxx.c lapacke_cposvxx.c lapacke_dgesvxx.c lapacke_sgbsvxx.c lapacke_ssysvxx.c lapacke_zhesvxx.c + lapacke_cgbsvxx_work.c lapacke_cposvxx_work.c lapacke_dgesvxx_work.c lapacke_sgbsvxx_work.c lapacke_ssysvxx_work.c lapacke_zhesvxx_work.c + lapacke_cgesvxx.c lapacke_csysvxx.c lapacke_dposvxx.c lapacke_sgesvxx.c lapacke_zgbsvxx.c lapacke_zposvxx.c + lapacke_cgesvxx_work.c lapacke_csysvxx_work.c lapacke_dposvxx_work.c lapacke_sgesvxx_work.c lapacke_zgbsvxx_work.c lapacke_zposvxx_work.c + lapacke_chesvxx.c lapacke_dgbsvxx.c lapacke_dsysvxx.c lapacke_sposvxx.c lapacke_zgesvxx.c lapacke_zsysvxx.c lapacke_chesvxx_work.c lapacke_dgbsvxx_work.c lapacke_dsysvxx_work.c lapacke_sposvxx_work.c lapacke_zgesvxx_work.c lapacke_zsysvxx_work.c ) -# FILE PARTS OF TMGLIB +# FILE PARTS OF TMGLIB set(MATGEN - lapacke_clatms.c - lapacke_clatms_work.c - lapacke_dlatms.c - lapacke_dlatms_work.c - lapacke_slatms.c - lapacke_slatms_work.c - lapacke_zlatms.c - lapacke_zlatms_work.c - lapacke_clagge.c - lapacke_clagge_work.c - lapacke_dlagge.c - lapacke_dlagge_work.c - lapacke_slagge.c - lapacke_slagge_work.c - lapacke_zlagge.c - lapacke_zlagge_work.c - lapacke_claghe.c - lapacke_claghe_work.c - lapacke_zlaghe.c - lapacke_zlaghe_work.c - lapacke_clagsy.c - lapacke_clagsy_work.c - lapacke_dlagsy.c - lapacke_dlagsy_work.c - lapacke_slagsy.c - lapacke_slagsy_work.c - lapacke_zlagsy.c + lapacke_clatms.c + lapacke_clatms_work.c + lapacke_dlatms.c + lapacke_dlatms_work.c + lapacke_slatms.c + lapacke_slatms_work.c + lapacke_zlatms.c + lapacke_zlatms_work.c + lapacke_clagge.c + lapacke_clagge_work.c + lapacke_dlagge.c + lapacke_dlagge_work.c + lapacke_slagge.c + lapacke_slagge_work.c + lapacke_zlagge.c + lapacke_zlagge_work.c + lapacke_claghe.c + lapacke_claghe_work.c + lapacke_zlaghe.c + lapacke_zlaghe_work.c + lapacke_clagsy.c + lapacke_clagsy_work.c + lapacke_dlagsy.c + lapacke_dlagsy_work.c + lapacke_slagsy.c + lapacke_slagsy_work.c + lapacke_zlagsy.c lapacke_zlagsy_work.c ) set(Utils_SRC -lapacke_cgb_nancheck.c lapacke_dpf_nancheck.c lapacke_ssy_trans.c -lapacke_cgb_trans.c lapacke_dpf_trans.c lapacke_stb_nancheck.c -lapacke_cge_nancheck.c lapacke_dpo_nancheck.c lapacke_stb_trans.c -lapacke_cge_trans.c lapacke_dpo_trans.c lapacke_stf_nancheck.c -lapacke_cgg_nancheck.c lapacke_dpp_nancheck.c lapacke_stf_trans.c -lapacke_cgg_trans.c lapacke_dpp_trans.c lapacke_stp_nancheck.c -lapacke_cgt_nancheck.c lapacke_dpt_nancheck.c lapacke_stp_trans.c -lapacke_chb_nancheck.c lapacke_dsb_nancheck.c lapacke_str_nancheck.c -lapacke_chb_trans.c lapacke_dsb_trans.c lapacke_str_trans.c -lapacke_che_nancheck.c lapacke_dsp_nancheck.c lapacke_xerbla.c -lapacke_che_trans.c lapacke_dsp_trans.c lapacke_zgb_nancheck.c -lapacke_chp_nancheck.c lapacke_dst_nancheck.c lapacke_zgb_trans.c -lapacke_chp_trans.c lapacke_dsy_nancheck.c lapacke_zge_nancheck.c -lapacke_chs_nancheck.c lapacke_dsy_trans.c lapacke_zge_trans.c -lapacke_chs_trans.c lapacke_dtb_nancheck.c lapacke_zgg_nancheck.c -lapacke_c_nancheck.c lapacke_dtb_trans.c lapacke_zgg_trans.c -lapacke_cpb_nancheck.c lapacke_dtf_nancheck.c lapacke_zgt_nancheck.c -lapacke_cpb_trans.c lapacke_dtf_trans.c lapacke_zhb_nancheck.c -lapacke_cpf_nancheck.c lapacke_dtp_nancheck.c lapacke_zhb_trans.c -lapacke_cpf_trans.c lapacke_dtp_trans.c lapacke_zhe_nancheck.c -lapacke_cpo_nancheck.c lapacke_dtr_nancheck.c lapacke_zhe_trans.c -lapacke_cpo_trans.c lapacke_dtr_trans.c lapacke_zhp_nancheck.c -lapacke_cpp_nancheck.c lapacke_lsame.c lapacke_zhp_trans.c -lapacke_cpp_trans.c lapacke_make_complex_double.c lapacke_zhs_nancheck.c -lapacke_cpt_nancheck.c lapacke_make_complex_float.c lapacke_zhs_trans.c -lapacke_csp_nancheck.c lapacke_sgb_nancheck.c lapacke_z_nancheck.c -lapacke_csp_trans.c lapacke_sgb_trans.c lapacke_zpb_nancheck.c -lapacke_cst_nancheck.c lapacke_sge_nancheck.c lapacke_zpb_trans.c -lapacke_csy_nancheck.c lapacke_sge_trans.c lapacke_zpf_nancheck.c -lapacke_csy_trans.c lapacke_sgg_nancheck.c lapacke_zpf_trans.c -lapacke_ctb_nancheck.c lapacke_sgg_trans.c lapacke_zpo_nancheck.c -lapacke_ctb_trans.c lapacke_sgt_nancheck.c lapacke_zpo_trans.c -lapacke_ctf_nancheck.c lapacke_shs_nancheck.c lapacke_zpp_nancheck.c -lapacke_ctf_trans.c lapacke_shs_trans.c lapacke_zpp_trans.c -lapacke_ctp_nancheck.c lapacke_s_nancheck.c lapacke_zpt_nancheck.c -lapacke_ctp_trans.c lapacke_spb_nancheck.c lapacke_zsp_nancheck.c -lapacke_ctr_nancheck.c lapacke_spb_trans.c lapacke_zsp_trans.c -lapacke_ctr_trans.c lapacke_spf_nancheck.c lapacke_zst_nancheck.c -lapacke_dgb_nancheck.c lapacke_spf_trans.c lapacke_zsy_nancheck.c -lapacke_dgb_trans.c lapacke_spo_nancheck.c lapacke_zsy_trans.c -lapacke_dge_nancheck.c lapacke_spo_trans.c lapacke_ztb_nancheck.c -lapacke_dge_trans.c lapacke_spp_nancheck.c lapacke_ztb_trans.c -lapacke_dgg_nancheck.c lapacke_spp_trans.c lapacke_ztf_nancheck.c -lapacke_dgg_trans.c lapacke_spt_nancheck.c lapacke_ztf_trans.c -lapacke_dgt_nancheck.c lapacke_ssb_nancheck.c lapacke_ztp_nancheck.c -lapacke_dhs_nancheck.c lapacke_ssb_trans.c lapacke_ztp_trans.c -lapacke_dhs_trans.c lapacke_ssp_nancheck.c lapacke_ztr_nancheck.c -lapacke_d_nancheck.c lapacke_ssp_trans.c lapacke_ztr_trans.c -lapacke_dpb_nancheck.c lapacke_sst_nancheck.c -lapacke_dpb_trans.c lapacke_ssy_nancheck.c + lapacke_c_nancheck.c lapacke_ctr_trans.c lapacke_make_complex_float.c lapacke_zgb_nancheck.c + lapacke_cgb_nancheck.c lapacke_d_nancheck.c lapacke_s_nancheck.c lapacke_zgb_trans.c + lapacke_cgb_trans.c lapacke_dgb_nancheck.c lapacke_sgb_nancheck.c lapacke_zge_nancheck.c + lapacke_cge_nancheck.c lapacke_dgb_trans.c lapacke_sgb_trans.c lapacke_zge_trans.c + lapacke_cge_trans.c lapacke_dge_nancheck.c lapacke_sge_nancheck.c lapacke_zgg_nancheck.c + lapacke_cgg_nancheck.c lapacke_dge_trans.c lapacke_sge_trans.c lapacke_zgg_trans.c + lapacke_cgg_trans.c lapacke_dgg_nancheck.c lapacke_sgg_nancheck.c lapacke_zgt_nancheck.c + lapacke_cgt_nancheck.c lapacke_dgg_trans.c lapacke_sgg_trans.c lapacke_zhb_nancheck.c + lapacke_chb_nancheck.c lapacke_dgt_nancheck.c lapacke_sgt_nancheck.c lapacke_zhb_trans.c + lapacke_chb_trans.c lapacke_dhs_nancheck.c lapacke_shs_nancheck.c lapacke_zhe_nancheck.c + lapacke_che_nancheck.c lapacke_dhs_trans.c lapacke_shs_trans.c lapacke_zhe_trans.c + lapacke_che_trans.c lapacke_dpb_nancheck.c lapacke_spb_nancheck.c lapacke_zhp_nancheck.c + lapacke_chp_nancheck.c lapacke_dpb_trans.c lapacke_spb_trans.c lapacke_zhp_trans.c + lapacke_chp_trans.c lapacke_dpf_nancheck.c lapacke_spf_nancheck.c lapacke_zhs_nancheck.c + lapacke_chs_nancheck.c lapacke_dpf_trans.c lapacke_spf_trans.c lapacke_zhs_trans.c + lapacke_chs_trans.c lapacke_dpo_nancheck.c lapacke_spo_nancheck.c lapacke_zpb_nancheck.c + lapacke_cpb_nancheck.c lapacke_dpo_trans.c lapacke_spo_trans.c lapacke_zpb_trans.c + lapacke_cpb_trans.c lapacke_dpp_nancheck.c lapacke_spp_nancheck.c lapacke_zpf_nancheck.c + lapacke_cpf_nancheck.c lapacke_dpp_trans.c lapacke_spp_trans.c lapacke_zpf_trans.c + lapacke_cpf_trans.c lapacke_dpt_nancheck.c lapacke_spt_nancheck.c lapacke_zpo_nancheck.c + lapacke_cpo_nancheck.c lapacke_dsb_nancheck.c lapacke_ssb_nancheck.c lapacke_zpo_trans.c + lapacke_cpo_trans.c lapacke_dsb_trans.c lapacke_ssb_trans.c lapacke_zpp_nancheck.c + lapacke_cpp_nancheck.c lapacke_dsp_nancheck.c lapacke_ssp_nancheck.c lapacke_zpp_trans.c + lapacke_cpp_trans.c lapacke_dsp_trans.c lapacke_ssp_trans.c lapacke_zpt_nancheck.c + lapacke_cpt_nancheck.c lapacke_dst_nancheck.c lapacke_sst_nancheck.c lapacke_zsp_nancheck.c + lapacke_csp_nancheck.c lapacke_dsy_nancheck.c lapacke_ssy_nancheck.c lapacke_zsp_trans.c + lapacke_csp_trans.c lapacke_dsy_trans.c lapacke_ssy_trans.c lapacke_zst_nancheck.c + lapacke_cst_nancheck.c lapacke_dtb_nancheck.c lapacke_stb_nancheck.c lapacke_zsy_nancheck.c + lapacke_csy_nancheck.c lapacke_dtb_trans.c lapacke_stb_trans.c lapacke_zsy_trans.c + lapacke_csy_trans.c lapacke_dtf_nancheck.c lapacke_stf_nancheck.c lapacke_ztb_nancheck.c + lapacke_ctb_nancheck.c lapacke_dtf_trans.c lapacke_stf_trans.c lapacke_ztb_trans.c + lapacke_ctb_trans.c lapacke_dtp_nancheck.c lapacke_stp_nancheck.c lapacke_ztf_nancheck.c + lapacke_ctf_nancheck.c lapacke_dtp_trans.c lapacke_stp_trans.c lapacke_ztf_trans.c + lapacke_ctf_trans.c lapacke_dtr_nancheck.c lapacke_str_nancheck.c lapacke_ztp_nancheck.c + lapacke_ctp_nancheck.c lapacke_dtr_trans.c lapacke_str_trans.c lapacke_ztp_trans.c + lapacke_ctp_trans.c lapacke_lsame.c lapacke_xerbla.c lapacke_ztr_nancheck.c + lapacke_ctr_nancheck.c lapacke_make_complex_double.c lapacke_z_nancheck.c lapacke_ztr_trans.c ) set(LAPACKE_REL_SRC "") @@ -2380,6 +2373,10 @@ if (BUILD_COMPLEX16) list(APPEND LAPACKE_REL_SRC ${ZSRC}) endif () +if (BUILD_MATGEN) + list(APPEND LAPACKE_REL_SRC ${MATGEN}) +endif () + # add lapack-netlib folder to the sources set(LAPACKE_SOURCES "") foreach (LAE_FILE ${LAPACKE_REL_SRC}) From 72956e8950bfcf304be701174d8dedf8aefb7270 Mon Sep 17 00:00:00 2001 From: Ian Henriksen Date: Mon, 6 Nov 2017 14:47:27 -0600 Subject: [PATCH 044/122] Build MATGEN LAPACK routines by default when building with CMake. --- CMakeLists.txt | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4fdfb3ffa..6c52b2501 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,6 +70,10 @@ if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_ set(BUILD_COMPLEX16 true) endif () +if (NOT DEFINED BUILD_MATGEN) + set(BUILD_MATGEN true) +endif() + set(FLOAT_TYPES "") if (BUILD_SINGLE) message(STATUS "Building Single Precision") @@ -163,7 +167,7 @@ set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES}) string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG ) - + set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} ) set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} ) set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} ) @@ -194,7 +198,7 @@ if (NOT MSVC AND NOT NOFORTRAN) endif() endif() -set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES +set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION} SOVERSION ${OpenBLAS_MAJOR_VERSION} ) @@ -206,7 +210,7 @@ install(TARGETS ${OpenBLAS_LIBNAME} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ) - + message(STATUS "Generating openblas_config.h in ${CMAKE_INSTALL_INCLUDEDIR}") set(OPENBLAS_CONFIG_H ${CMAKE_BINARY_DIR}/openblas_config.h) @@ -250,7 +254,7 @@ if(NOT NO_LAPACKE) add_dependencies( ${OpenBLAS_LIBNAME} genlapacke) FILE(GLOB_RECURSE INCLUDE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/*.h") install (FILES ${INCLUDE_FILES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) - + ADD_CUSTOM_TARGET(genlapacke COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h" ) From 3cfc64404a42897fdd60d6fc753deb05aa9f60b2 Mon Sep 17 00:00:00 2001 From: xoviat Date: Mon, 6 Nov 2017 15:05:20 -0600 Subject: [PATCH 045/122] [appveyor] fixes --- appveyor.yml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 1c0474d2d..4eaae5eed 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -24,23 +24,32 @@ skip_commits: message: /\[av skip\]/ environment: + global: + CONDA_INSTALL_LOCN: C:\\Miniconda36-x64 matrix: - COMPILER: clang-cl - DYNAMIC_ARCH: ON + WITH_FORTRAN: yes - COMPILER: clang-cl + DYNAMIC_ARCH: ON + WITH_FORTRAN: no - COMPILER: cl install: - - if [%COMPILER%]==[clang-cl] call C:\Miniconda36-x64\Scripts\activate.bat + - if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat - if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force - - if [%COMPILER%]==[clang-cl] conda install --yes clangdev ninja cmake + - if [%WITH_FORTRAN%]==[yes] conda config --add channels isuruf/label/flang --force + - if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev ninja cmake + - if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet flangdev clangdev cmake - if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 + - if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" + - if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" before_build: - echo Running cmake... - cd c:\projects\OpenBLAS - if [%COMPILER%]==[cl] cmake -G "Visual Studio 12 Win64" . - - if [%COMPILER%]==[clang-cl] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl . + - if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl . + - if [%WITH_FORTRAN%]==[yes] cmake -G "NMake Makefiles" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 . - if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON . build_script: From 92683142907ca9f5807cdd93688d928ffeaf87f3 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Mon, 6 Nov 2017 19:00:23 -0600 Subject: [PATCH 046/122] Fix gensymbol script --- exports/gensymbol | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/exports/gensymbol b/exports/gensymbol index 89c6e8320..f1983d458 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -3411,13 +3411,17 @@ ); +use File::Spec; +use File::Basename; +my $dirname = File::Spec->catfile(dirname(dirname(File::Spec->rel2abs(__FILE__))), "lapack-netlib"); + if ($ARGV[8] == 1) { #ONLY_CBLAS=1 @underscore_objs = (@misc_underscore_objs); } elsif ($ARGV[5] == 1) { #NO_LAPACK=1 @underscore_objs = (@blasobjs, @misc_underscore_objs); -} elsif (-d "../lapack-netlib") { +} elsif (-d $dirname) { if ($ARGV[7] == 0) { # NEED2UNDERSCORES=0 # Don't need 2 underscores From d8576826c477f078814103fffe4538cedb6d8895 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 9 Nov 2017 17:31:44 +0100 Subject: [PATCH 047/122] Output an error message when shmat() fails Observed in #1351 with SELinux as the likely culprit. Without the message, the user saw a segfault with no apparent reason --- driver/others/init.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/driver/others/init.c b/driver/others/init.c index 6efd351ac..855dca929 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -78,6 +78,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include +#include #include #include #include @@ -659,8 +661,9 @@ static void open_shmem(void) { exit(1); } - if (shmid != -1) common = (shm_t *)shmat(shmid, NULL, 0); - + if (shmid != -1) { + if ( (common = shmat(shmid, NULL, 0)) == (void*)-1) perror ("Attaching shared memory segment"); + } #ifdef DEBUG fprintf(stderr, "Shared Memory id = %x Address = %p\n", shmid, common); #endif From 307305aeb566e76cc297d1fc5ed8885b0d7a4f64 Mon Sep 17 00:00:00 2001 From: xoviat Date: Thu, 9 Nov 2017 15:10:02 -0600 Subject: [PATCH 048/122] [appeyor] use flang from conda-forge This flang will be updated in the future. We leave cmake because it's not yet released with fortran support --- appveyor.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 4eaae5eed..41ec9a6ef 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -37,9 +37,12 @@ environment: install: - if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat - if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force + - if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev + + - if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet flang - if [%WITH_FORTRAN%]==[yes] conda config --add channels isuruf/label/flang --force - - if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev ninja cmake - - if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet flangdev clangdev cmake + + - if [%COMPILER%]==[clang-cl] conda install --yes --quiet ninja cmake - if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 - if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" - if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" From 2a6fef9a55f5760be2d2aac280764284d9637ff5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 9 Nov 2017 23:16:13 +0100 Subject: [PATCH 049/122] Try to handle shmget or shmat failing also replaces one verbatim sched_yield with the YIELDING macro for consistency as suggested in #1351 --- driver/others/init.c | 51 +++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/driver/others/init.c b/driver/others/init.c index 6efd351ac..962794bc9 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -78,6 +78,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include +#include #include #include #include @@ -629,7 +631,7 @@ static inline int is_dead(int id) { return shmctl(id, IPC_STAT, &ds); } -static void open_shmem(void) { +static int open_shmem(void) { int try = 0; @@ -655,29 +657,42 @@ static void open_shmem(void) { } while ((try < 10) && (shmid == -1)); if (shmid == -1) { - fprintf(stderr, "GotoBLAS : Can't open shared memory. Terminated.\n"); - exit(1); + perror ("Obtaining shared memory segment failed in open_shmem"); + return (1); } - if (shmid != -1) common = (shm_t *)shmat(shmid, NULL, 0); - + if (shmid != -1) { + if ( (common = shmat(shmid, NULL, 0)) == (void*)-1) { + perror ("Attaching shared memory segment failed in open_shmem"); + return (1); + } + } #ifdef DEBUG fprintf(stderr, "Shared Memory id = %x Address = %p\n", shmid, common); #endif - + return (0); } -static void create_pshmem(void) { +static int create_pshmem(void) { pshmid = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666); - paddr = shmat(pshmid, NULL, 0); - - shmctl(pshmid, IPC_RMID, 0); + if (pshmid == -1) { + perror ("Obtaining shared memory segment failed in create_pshmem"); + return(1); + } + + if ( (paddr = shmat(pshmid, NULL, 0)) == (void*)-1) { + perror ("Attaching shared memory segment failed in create_pshmem"); + return (1); + } + + if (shmctl(pshmid, IPC_RMID, 0) == -1) return (1); #ifdef DEBUG fprintf(stderr, "Private Shared Memory id = %x Address = %p\n", pshmid, paddr); #endif + return(0); } static void local_cpu_map(void) { @@ -805,17 +820,23 @@ void gotoblas_affinity_init(void) { return; } - create_pshmem(); - - open_shmem(); - + if (create_pshmem() != 0) { + disable_mapping = 1; + return; + } + + if (open_shmem() != 0) { + disable_mapping = 1; + return; + } + while ((common -> lock) && (common -> magic != SH_MAGIC)) { if (is_dead(common -> shmid)) { common -> lock = 0; common -> shmid = 0; common -> magic = 0; } else { - sched_yield(); + YIELDING; } } From 2df1e3372d648eaa16eb8c8278138034608e1d00 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 10 Nov 2017 20:02:21 +0100 Subject: [PATCH 050/122] Break out of potentially infinite rescaling loop after 1000 iterations Inf values in the input vector will survive rescaling, causing an infinite loop. The value of 1000 is arbitrarily chosen as a large but finite value with the intention to never interfere with regular calculations. --- lapack-netlib/SRC/clarfg.f | 2 +- lapack-netlib/SRC/clarfgp.f | 2 +- lapack-netlib/SRC/dlarfg.f | 2 +- lapack-netlib/SRC/dlarfgp.f | 2 +- lapack-netlib/SRC/slarfg.f | 2 +- lapack-netlib/SRC/slarfgp.f | 2 +- lapack-netlib/SRC/zlarfg.f | 2 +- lapack-netlib/SRC/zlarfgp.f | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lapack-netlib/SRC/clarfg.f b/lapack-netlib/SRC/clarfg.f index 05a27a283..4c0c5f715 100644 --- a/lapack-netlib/SRC/clarfg.f +++ b/lapack-netlib/SRC/clarfg.f @@ -175,7 +175,7 @@ BETA = BETA*RSAFMN ALPHI = ALPHI*RSAFMN ALPHR = ALPHR*RSAFMN - IF( ABS( BETA ).LT.SAFMIN ) + IF( ABS( BETA ).LT.SAFMIN .AND. KNT .LT. 1000) $ GO TO 10 * * New BETA is at most 1, at least SAFMIN diff --git a/lapack-netlib/SRC/clarfgp.f b/lapack-netlib/SRC/clarfgp.f index d5f19b041..75cfd8cc2 100644 --- a/lapack-netlib/SRC/clarfgp.f +++ b/lapack-netlib/SRC/clarfgp.f @@ -197,7 +197,7 @@ BETA = BETA*BIGNUM ALPHI = ALPHI*BIGNUM ALPHR = ALPHR*BIGNUM - IF( ABS( BETA ).LT.SMLNUM ) + IF( ABS( BETA ).LT.SMLNUM .AND. KNT .LT. 1000 ) $ GO TO 10 * * New BETA is at most 1, at least SMLNUM diff --git a/lapack-netlib/SRC/dlarfg.f b/lapack-netlib/SRC/dlarfg.f index cb177a570..aa5fabc57 100644 --- a/lapack-netlib/SRC/dlarfg.f +++ b/lapack-netlib/SRC/dlarfg.f @@ -170,7 +170,7 @@ CALL DSCAL( N-1, RSAFMN, X, INCX ) BETA = BETA*RSAFMN ALPHA = ALPHA*RSAFMN - IF( ABS( BETA ).LT.SAFMIN ) + IF( ABS( BETA ).LT.SAFMIN .AND. KNT .LT. 1000 ) $ GO TO 10 * * New BETA is at most 1, at least SAFMIN diff --git a/lapack-netlib/SRC/dlarfgp.f b/lapack-netlib/SRC/dlarfgp.f index c05f837ea..70efabbb8 100644 --- a/lapack-netlib/SRC/dlarfgp.f +++ b/lapack-netlib/SRC/dlarfgp.f @@ -181,7 +181,7 @@ CALL DSCAL( N-1, BIGNUM, X, INCX ) BETA = BETA*BIGNUM ALPHA = ALPHA*BIGNUM - IF( ABS( BETA ).LT.SMLNUM ) + IF( ABS( BETA ).LT.SMLNUM .AND. KNT .LT. 1000) $ GO TO 10 * * New BETA is at most 1, at least SMLNUM diff --git a/lapack-netlib/SRC/slarfg.f b/lapack-netlib/SRC/slarfg.f index 638b9ab8f..d63c4ac29 100644 --- a/lapack-netlib/SRC/slarfg.f +++ b/lapack-netlib/SRC/slarfg.f @@ -170,7 +170,7 @@ CALL SSCAL( N-1, RSAFMN, X, INCX ) BETA = BETA*RSAFMN ALPHA = ALPHA*RSAFMN - IF( ABS( BETA ).LT.SAFMIN ) + IF( ABS( BETA ).LT.SAFMIN .AND. KNT .LT. 1000) $ GO TO 10 * * New BETA is at most 1, at least SAFMIN diff --git a/lapack-netlib/SRC/slarfgp.f b/lapack-netlib/SRC/slarfgp.f index 59038dfce..d63a409a1 100644 --- a/lapack-netlib/SRC/slarfgp.f +++ b/lapack-netlib/SRC/slarfgp.f @@ -181,7 +181,7 @@ CALL SSCAL( N-1, BIGNUM, X, INCX ) BETA = BETA*BIGNUM ALPHA = ALPHA*BIGNUM - IF( ABS( BETA ).LT.SMLNUM ) + IF( ABS( BETA ).LT.SMLNUM .AND. KNT .LT. 1000 ) $ GO TO 10 * * New BETA is at most 1, at least SMLNUM diff --git a/lapack-netlib/SRC/zlarfg.f b/lapack-netlib/SRC/zlarfg.f index f8a795d54..76ca452f6 100644 --- a/lapack-netlib/SRC/zlarfg.f +++ b/lapack-netlib/SRC/zlarfg.f @@ -175,7 +175,7 @@ BETA = BETA*RSAFMN ALPHI = ALPHI*RSAFMN ALPHR = ALPHR*RSAFMN - IF( ABS( BETA ).LT.SAFMIN ) + IF( ABS( BETA ).LT.SAFMIN .AND. KNT .LT. 1000) $ GO TO 10 * * New BETA is at most 1, at least SAFMIN diff --git a/lapack-netlib/SRC/zlarfgp.f b/lapack-netlib/SRC/zlarfgp.f index 54ce6e63f..32e55ea6c 100644 --- a/lapack-netlib/SRC/zlarfgp.f +++ b/lapack-netlib/SRC/zlarfgp.f @@ -197,7 +197,7 @@ BETA = BETA*BIGNUM ALPHI = ALPHI*BIGNUM ALPHR = ALPHR*BIGNUM - IF( ABS( BETA ).LT.SMLNUM ) + IF( ABS( BETA ).LT.SMLNUM .AND. KNT .LT. 1000) $ GO TO 10 * * New BETA is at most 1, at least SMLNUM From acf3d34bc50f65ddfebb037ab9e377e6236dd960 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 14 Nov 2017 23:23:44 +0100 Subject: [PATCH 051/122] Silence an unused variable warning with a cast l2 cache size is not universally needed to assign default unrolling limits, but neither putting its declaration inside an ifdef nor cloning it into all ifdef sections that need it really makes sense here. --- kernel/setparam-ref.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 9320cb56c..b6c5b54de 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -684,6 +684,9 @@ static void init_parameter(void) { int l2 = get_l2_size(); + (void) l2; /* dirty trick to suppress unused variable warning for targets */ + /* where the GEMM unrolling parameters do not depend on l2 */ + TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; From 65bf0a343c5c7e9661648317959b4c3f33ad0d4c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 14 Nov 2017 23:25:50 +0100 Subject: [PATCH 052/122] Remove unused variable btpr --- kernel/generic/zimatcopy_cnc.c | 2 +- kernel/generic/zimatcopy_rn.c | 2 +- kernel/generic/zimatcopy_rnc.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/generic/zimatcopy_cnc.c b/kernel/generic/zimatcopy_cnc.c index 7fffe193a..8e772bd8a 100644 --- a/kernel/generic/zimatcopy_cnc.c +++ b/kernel/generic/zimatcopy_cnc.c @@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda) { BLASLONG i,j,ia; - FLOAT *aptr,*bptr; + FLOAT *aptr; FLOAT a0, a1; if ( rows <= 0 ) return(0); diff --git a/kernel/generic/zimatcopy_rn.c b/kernel/generic/zimatcopy_rn.c index 95819bdf6..b3037d3e7 100644 --- a/kernel/generic/zimatcopy_rn.c +++ b/kernel/generic/zimatcopy_rn.c @@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda) { BLASLONG i,j,ia; - FLOAT *aptr,*bptr; + FLOAT *aptr; FLOAT a0, a1; if ( rows <= 0 ) return(0); diff --git a/kernel/generic/zimatcopy_rnc.c b/kernel/generic/zimatcopy_rnc.c index c95644987..4c20865a3 100644 --- a/kernel/generic/zimatcopy_rnc.c +++ b/kernel/generic/zimatcopy_rnc.c @@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda) { BLASLONG i,j,ia; - FLOAT *aptr,*bptr; + FLOAT *aptr; FLOAT a0, a1; if ( rows <= 0 ) return(0); From 5f402b7759d7a843c78239a1a10be64747524197 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 14 Nov 2017 23:29:42 +0100 Subject: [PATCH 053/122] Remove unused (loop?) variable j from the gemv_n_4 implementations --- kernel/x86_64/cgemv_n_4.c | 1 - kernel/x86_64/dgemv_n_4.c | 1 - kernel/x86_64/sgemv_n_4.c | 1 - kernel/x86_64/zgemv_n_4.c | 1 - 4 files changed, 4 deletions(-) diff --git a/kernel/x86_64/cgemv_n_4.c b/kernel/x86_64/cgemv_n_4.c index d0a2c84e2..14cc9fe09 100644 --- a/kernel/x86_64/cgemv_n_4.c +++ b/kernel/x86_64/cgemv_n_4.c @@ -216,7 +216,6 @@ static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest,FLOAT a int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r,FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) { BLASLONG i; - BLASLONG j; FLOAT *a_ptr; FLOAT *x_ptr; FLOAT *y_ptr; diff --git a/kernel/x86_64/dgemv_n_4.c b/kernel/x86_64/dgemv_n_4.c index f8234fbc1..a8437a016 100644 --- a/kernel/x86_64/dgemv_n_4.c +++ b/kernel/x86_64/dgemv_n_4.c @@ -204,7 +204,6 @@ static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest) int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) { BLASLONG i; - BLASLONG j; FLOAT *a_ptr; FLOAT *x_ptr; FLOAT *y_ptr; diff --git a/kernel/x86_64/sgemv_n_4.c b/kernel/x86_64/sgemv_n_4.c index 7c091c765..60074d3d9 100644 --- a/kernel/x86_64/sgemv_n_4.c +++ b/kernel/x86_64/sgemv_n_4.c @@ -292,7 +292,6 @@ static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest) int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) { BLASLONG i; - BLASLONG j; FLOAT *a_ptr; FLOAT *x_ptr; FLOAT *y_ptr; diff --git a/kernel/x86_64/zgemv_n_4.c b/kernel/x86_64/zgemv_n_4.c index 1d0f1e8f7..2d7fd5798 100644 --- a/kernel/x86_64/zgemv_n_4.c +++ b/kernel/x86_64/zgemv_n_4.c @@ -218,7 +218,6 @@ static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest,FLOAT a int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r,FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) { BLASLONG i; - BLASLONG j; FLOAT *a_ptr; FLOAT *x_ptr; FLOAT *y_ptr; From 8f177621bc7f0829791b19080a77f49f3699fc9e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 14 Nov 2017 23:32:25 +0100 Subject: [PATCH 054/122] Remove unused variables at0...at3 from ?symv_U --- kernel/x86_64/dsymv_U.c | 1 - kernel/x86_64/ssymv_U.c | 1 - 2 files changed, 2 deletions(-) diff --git a/kernel/x86_64/dsymv_U.c b/kernel/x86_64/dsymv_U.c index 226458bf7..61cb77a64 100644 --- a/kernel/x86_64/dsymv_U.c +++ b/kernel/x86_64/dsymv_U.c @@ -164,7 +164,6 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA FLOAT temp2; FLOAT *xp, *yp; FLOAT *a0,*a1,*a2,*a3; - FLOAT at0,at1,at2,at3; FLOAT tmp1[4]; FLOAT tmp2[4]; diff --git a/kernel/x86_64/ssymv_U.c b/kernel/x86_64/ssymv_U.c index 104b29355..691a071f7 100644 --- a/kernel/x86_64/ssymv_U.c +++ b/kernel/x86_64/ssymv_U.c @@ -164,7 +164,6 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA FLOAT temp2; FLOAT *xp, *yp; FLOAT *a0,*a1,*a2,*a3; - FLOAT at0,at1,at2,at3; FLOAT tmp1[4]; FLOAT tmp2[4]; From 3fea849bbf90e50ee2e22c56c4b82205dd85c745 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 14 Nov 2017 23:35:10 +0100 Subject: [PATCH 055/122] Remove unused variables from Haswell dtrmm and Bulldozer dtrsm --- kernel/x86_64/dtrmm_kernel_4x8_haswell.c | 12 ++++++++---- kernel/x86_64/dtrsm_kernel_LN_bulldozer.c | 2 ++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/kernel/x86_64/dtrmm_kernel_4x8_haswell.c b/kernel/x86_64/dtrmm_kernel_4x8_haswell.c index ac8c97d03..70be88f07 100644 --- a/kernel/x86_64/dtrmm_kernel_4x8_haswell.c +++ b/kernel/x86_64/dtrmm_kernel_4x8_haswell.c @@ -167,24 +167,28 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL FLOAT res4_0; FLOAT res4_1; +/* FLOAT res4_2; FLOAT res4_3; - +*/ FLOAT res5_0; FLOAT res5_1; +/* FLOAT res5_2; FLOAT res5_3; - +*/ FLOAT res6_0; FLOAT res6_1; +/* FLOAT res6_2; FLOAT res6_3; - +*/ FLOAT res7_0; FLOAT res7_1; +/* FLOAT res7_2; FLOAT res7_3; - +*/ FLOAT a0; FLOAT a1; diff --git a/kernel/x86_64/dtrsm_kernel_LN_bulldozer.c b/kernel/x86_64/dtrsm_kernel_LN_bulldozer.c index efd8a4972..0a4f9db2b 100644 --- a/kernel/x86_64/dtrsm_kernel_LN_bulldozer.c +++ b/kernel/x86_64/dtrsm_kernel_LN_bulldozer.c @@ -438,7 +438,9 @@ static inline void solve(BLASLONG m, BLASLONG n, FLOAT *a, FLOAT *b, FLOAT *c, B *(cj + i) = bb; b ++; +/* BLASLONG i1 = i & -4 ; +*/ FLOAT t0,t1,t2,t3; k=0; From 27575d200a1b1efc7b30ea7eb7cbe80a32ae3cd4 Mon Sep 17 00:00:00 2001 From: Andrew Date: Wed, 15 Nov 2017 15:32:38 +0100 Subject: [PATCH 056/122] Eliminate mode variable where not needed --- interface/syrk.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/interface/syrk.c b/interface/syrk.c index f8c697033..507bb4490 100644 --- a/interface/syrk.c +++ b/interface/syrk.c @@ -101,6 +101,7 @@ void NAME(char *UPLO, char *TRANS, FLOAT *sa, *sb; #ifdef SMP +#ifdef USE_SIMPLE_THREADED_LEVEL3 #ifndef COMPLEX #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_REAL; @@ -118,6 +119,7 @@ void NAME(char *UPLO, char *TRANS, int mode = BLAS_SINGLE | BLAS_COMPLEX; #endif #endif +#endif #endif blasint info; @@ -206,6 +208,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr FLOAT *sa, *sb; #ifdef SMP +#ifdef USE_SIMPLE_THREADED_LEVEL3 #ifndef COMPLEX #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_REAL; @@ -223,6 +226,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr int mode = BLAS_SINGLE | BLAS_COMPLEX; #endif #endif +#endif #endif PRINT_DEBUG_CNAME; @@ -323,13 +327,14 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); #ifdef SMP +#ifdef USE_SIMPLE_THREADED_LEVEL3 if (!trans){ mode |= (BLAS_TRANSA_N | BLAS_TRANSB_T); } else { mode |= (BLAS_TRANSA_T | BLAS_TRANSB_N); } - mode |= (uplo << BLAS_UPLO_SHIFT); +#endif args.common = NULL; args.nthreads = num_cpu_avail(3); From 1c9f4422b5158aa9f80c9a5bd9672c4e374da2ba Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 18 Nov 2017 18:56:30 +0100 Subject: [PATCH 057/122] Fix declaration of cblas_Xdotc_sub and cblas_Xdotu_sub last parameter of cblas_(c,z)dotc_sub and cblas_(c,z)dotu_sub should be void* as well --- cblas.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cblas.h b/cblas.h index 56730a3c9..89f78c133 100644 --- a/cblas.h +++ b/cblas.h @@ -62,10 +62,10 @@ openblas_complex_float cblas_cdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST voi openblas_complex_double cblas_zdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy); openblas_complex_double cblas_zdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy); -void cblas_cdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret); -void cblas_cdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret); -void cblas_zdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret); -void cblas_zdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret); +void cblas_cdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret); +void cblas_cdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret); +void cblas_zdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret); +void cblas_zdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret); float cblas_sasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx); double cblas_dasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx); From 3ce401f51b30605783d2c3782f36b05203db99db Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 18 Nov 2017 18:58:40 +0100 Subject: [PATCH 058/122] Make last parameter of cblas_Xdotc_sub/cblas_Xdotu_sub a void pointer as well --- interface/zdot.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/interface/zdot.c b/interface/zdot.c index 40f425cc1..2db2d1efc 100644 --- a/interface/zdot.c +++ b/interface/zdot.c @@ -148,7 +148,8 @@ OPENBLAS_COMPLEX_FLOAT NAME( blasint *N, FLOAT *x, blasin #else #ifdef FORCE_USE_STACK -void CNAME(blasint n, void *vx, blasint incx, void *vy, blasint incy, OPENBLAS_COMPLEX_FLOAT *result){ +void CNAME(blasint n, void *vx, blasint incx, void *vy, blasint incy, void *vresult){ +OPENBLAS_COMPLEX_FLOAT *result; #else OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, void *vx, blasint incx, void *vy, blasint incy){ @@ -164,6 +165,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, void *vx, blasint incx, void *vy, blasin #ifdef FORCE_USE_STACK OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); *result = zero; + vresult=(void*)result; // CREAL(*result) = 0.0; // CIMAG(*result) = 0.0; return; @@ -183,8 +185,10 @@ OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, void *vx, blasint incx, void *vy, blasin #ifndef CONJ *result = DOTU_K(n, x, incx, y, incy); + vresult=(void*)result; #else *result = DOTC_K(n, x, incx, y, incy); + vresult=(void*)result; #endif FUNCTION_PROFILE_END(4, 2 * n, 2 * n); From b46e2b57ccd5acab22ac56d8a3fdc75e96738b3e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 18 Nov 2017 20:28:02 +0100 Subject: [PATCH 059/122] Make return parameter of cblas_Xdotc_sub, cblas_Xdotu_sub a void pointer as well --- interface/zdot.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/interface/zdot.c b/interface/zdot.c index 2db2d1efc..af91b96d5 100644 --- a/interface/zdot.c +++ b/interface/zdot.c @@ -148,8 +148,8 @@ OPENBLAS_COMPLEX_FLOAT NAME( blasint *N, FLOAT *x, blasin #else #ifdef FORCE_USE_STACK -void CNAME(blasint n, void *vx, blasint incx, void *vy, blasint incy, void *vresult){ -OPENBLAS_COMPLEX_FLOAT *result; +void CNAME(blasint n, void *vx, blasint incx, void *vy, blasint incy, void* vresult){ +OPENBLAS_COMPLEX_FLOAT *result= (OPENBLAS_COMPLEX_FLOAT*)vresult; #else OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, void *vx, blasint incx, void *vy, blasint incy){ @@ -165,7 +165,6 @@ OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, void *vx, blasint incx, void *vy, blasin #ifdef FORCE_USE_STACK OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); *result = zero; - vresult=(void*)result; // CREAL(*result) = 0.0; // CIMAG(*result) = 0.0; return; @@ -185,10 +184,8 @@ OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, void *vx, blasint incx, void *vy, blasin #ifndef CONJ *result = DOTU_K(n, x, incx, y, incy); - vresult=(void*)result; #else *result = DOTC_K(n, x, incx, y, incy); - vresult=(void*)result; #endif FUNCTION_PROFILE_END(4, 2 * n, 2 * n); From 07e7c36dac30f8f4864d86efad44a4e1b74e8ff3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 18 Nov 2017 23:57:44 +0100 Subject: [PATCH 060/122] Handle shmem init failures in cpu affinity setup code Failures to obtain or attach shared memory segments would lead to an exit without explanation of the exact cause. This change introduces a more verbose error message and tries to make the code continue without setting cpu affinity. Fixes #1351 --- driver/others/init.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/driver/others/init.c b/driver/others/init.c index 962794bc9..5fb032fd5 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -26,7 +26,7 @@ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIA DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +kOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************************/ @@ -635,6 +635,8 @@ static int open_shmem(void) { int try = 0; + int err = 0; + do { #if defined(BIGNUMA) @@ -652,18 +654,22 @@ static int open_shmem(void) { #endif } + if (shmid == -1) err = errno; + try ++; } while ((try < 10) && (shmid == -1)); if (shmid == -1) { - perror ("Obtaining shared memory segment failed in open_shmem"); + fprintf (stderr, "Obtaining shared memory segment failed in open_shmem: %s\n",strerror(err)); + fprintf (stderr, "Setting CPU affinity not possible without shared memory access.\n"); return (1); } if (shmid != -1) { if ( (common = shmat(shmid, NULL, 0)) == (void*)-1) { perror ("Attaching shared memory segment failed in open_shmem"); + fprintf (stderr, "Setting CPU affinity not possible without shared memory access.\n"); return (1); } } @@ -679,11 +685,13 @@ static int create_pshmem(void) { if (pshmid == -1) { perror ("Obtaining shared memory segment failed in create_pshmem"); + fprintf (stderr, "Setting CPU affinity not possible without shared memory access.\n"); return(1); } if ( (paddr = shmat(pshmid, NULL, 0)) == (void*)-1) { perror ("Attaching shared memory segment failed in create_pshmem"); + fprintf (stderr, "Setting CPU affinity not possible without shared memory access.\n"); return (1); } From be9e63c0216842e63b776cf08bfeca9960006707 Mon Sep 17 00:00:00 2001 From: xoviat Date: Tue, 21 Nov 2017 18:44:02 -0600 Subject: [PATCH 061/122] [appveyor] use cmake from conda forge --- appveyor.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 41ec9a6ef..bb4e06fcb 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -37,12 +37,8 @@ environment: install: - if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat - if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force - - if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev - + - if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev ninja cmake - if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet flang - - if [%WITH_FORTRAN%]==[yes] conda config --add channels isuruf/label/flang --force - - - if [%COMPILER%]==[clang-cl] conda install --yes --quiet ninja cmake - if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 - if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" - if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" From 3be5c3d34342ab25854345158fc851afc2c84566 Mon Sep 17 00:00:00 2001 From: martin Date: Thu, 23 Nov 2017 18:13:35 +0100 Subject: [PATCH 062/122] Update LAPACK to 3.8.0 --- Makefile | 1 + exports/gensymbol | 111 +- lapack-netlib/.gitignore | 37 + lapack-netlib/.travis.yml | 62 + lapack-netlib/BLAS/CMakeLists.txt | 2 +- lapack-netlib/BLAS/Makefile | 22 + lapack-netlib/BLAS/SRC/CMakeLists.txt | 77 +- lapack-netlib/BLAS/SRC/Makefile | 21 +- lapack-netlib/BLAS/SRC/caxpy.f | 43 +- lapack-netlib/BLAS/SRC/ccopy.f | 37 +- lapack-netlib/BLAS/SRC/cdotc.f | 37 +- lapack-netlib/BLAS/SRC/cdotu.f | 37 +- lapack-netlib/BLAS/SRC/cgbmv.f | 6 +- lapack-netlib/BLAS/SRC/cgemm.f | 6 +- lapack-netlib/BLAS/SRC/cgemv.f | 6 +- lapack-netlib/BLAS/SRC/cgerc.f | 6 +- lapack-netlib/BLAS/SRC/cgeru.f | 6 +- lapack-netlib/BLAS/SRC/chbmv.f | 6 +- lapack-netlib/BLAS/SRC/chemm.f | 6 +- lapack-netlib/BLAS/SRC/chemv.f | 6 +- lapack-netlib/BLAS/SRC/cher.f | 4 +- lapack-netlib/BLAS/SRC/cher2.f | 6 +- lapack-netlib/BLAS/SRC/cher2k.f | 6 +- lapack-netlib/BLAS/SRC/cherk.f | 4 +- lapack-netlib/BLAS/SRC/chpmv.f | 6 +- lapack-netlib/BLAS/SRC/chpr.f | 4 +- lapack-netlib/BLAS/SRC/chpr2.f | 6 +- lapack-netlib/BLAS/SRC/crotg.f | 29 +- lapack-netlib/BLAS/SRC/cscal.f | 32 +- lapack-netlib/BLAS/SRC/csscal.f | 32 +- lapack-netlib/BLAS/SRC/cswap.f | 37 +- lapack-netlib/BLAS/SRC/csymm.f | 6 +- lapack-netlib/BLAS/SRC/csyr2k.f | 6 +- lapack-netlib/BLAS/SRC/csyrk.f | 4 +- lapack-netlib/BLAS/SRC/ctbmv.f | 4 +- lapack-netlib/BLAS/SRC/ctbsv.f | 4 +- lapack-netlib/BLAS/SRC/ctpmv.f | 4 +- lapack-netlib/BLAS/SRC/ctpsv.f | 4 +- lapack-netlib/BLAS/SRC/ctrmm.f | 4 +- lapack-netlib/BLAS/SRC/ctrmv.f | 4 +- lapack-netlib/BLAS/SRC/ctrsm.f | 4 +- lapack-netlib/BLAS/SRC/ctrsv.f | 4 +- lapack-netlib/BLAS/SRC/dasum.f | 26 +- lapack-netlib/BLAS/SRC/daxpy.f | 43 +- lapack-netlib/BLAS/SRC/dcabs1.f | 14 +- lapack-netlib/BLAS/SRC/dcopy.f | 39 +- lapack-netlib/BLAS/SRC/ddot.f | 37 +- lapack-netlib/BLAS/SRC/dgbmv.f | 6 +- lapack-netlib/BLAS/SRC/dgemm.f | 6 +- lapack-netlib/BLAS/SRC/dgemv.f | 6 +- lapack-netlib/BLAS/SRC/dger.f | 6 +- lapack-netlib/BLAS/SRC/dnrm2.f | 26 +- lapack-netlib/BLAS/SRC/drot.f | 47 +- lapack-netlib/BLAS/SRC/drotg.f | 29 +- lapack-netlib/BLAS/SRC/drotm.f | 16 +- lapack-netlib/BLAS/SRC/drotmg.f | 10 +- lapack-netlib/BLAS/SRC/dsbmv.f | 6 +- lapack-netlib/BLAS/SRC/dscal.f | 34 +- lapack-netlib/BLAS/SRC/dspmv.f | 6 +- lapack-netlib/BLAS/SRC/dspr.f | 4 +- lapack-netlib/BLAS/SRC/dspr2.f | 6 +- lapack-netlib/BLAS/SRC/dswap.f | 41 +- lapack-netlib/BLAS/SRC/dsymm.f | 6 +- lapack-netlib/BLAS/SRC/dsymv.f | 6 +- lapack-netlib/BLAS/SRC/dsyr.f | 4 +- lapack-netlib/BLAS/SRC/dsyr2.f | 6 +- lapack-netlib/BLAS/SRC/dsyr2k.f | 6 +- lapack-netlib/BLAS/SRC/dsyrk.f | 4 +- lapack-netlib/BLAS/SRC/dtbmv.f | 4 +- lapack-netlib/BLAS/SRC/dtbsv.f | 4 +- lapack-netlib/BLAS/SRC/dtpmv.f | 4 +- lapack-netlib/BLAS/SRC/dtpsv.f | 4 +- lapack-netlib/BLAS/SRC/dtrmm.f | 4 +- lapack-netlib/BLAS/SRC/dtrmv.f | 4 +- lapack-netlib/BLAS/SRC/dtrsm.f | 4 +- lapack-netlib/BLAS/SRC/dtrsv.f | 4 +- lapack-netlib/BLAS/SRC/dzasum.f | 26 +- lapack-netlib/BLAS/SRC/dznrm2.f | 27 +- lapack-netlib/BLAS/SRC/icamax.f | 26 +- lapack-netlib/BLAS/SRC/idamax.f | 26 +- lapack-netlib/BLAS/SRC/isamax.f | 26 +- lapack-netlib/BLAS/SRC/izamax.f | 26 +- lapack-netlib/BLAS/SRC/sasum.f | 26 +- lapack-netlib/BLAS/SRC/saxpy.f | 43 +- lapack-netlib/BLAS/SRC/scabs1.f | 14 +- lapack-netlib/BLAS/SRC/scasum.f | 26 +- lapack-netlib/BLAS/SRC/scnrm2.f | 27 +- lapack-netlib/BLAS/SRC/scopy.f | 37 +- lapack-netlib/BLAS/SRC/sdot.f | 37 +- lapack-netlib/BLAS/SRC/sdsdot.f | 124 +- lapack-netlib/BLAS/SRC/sgbmv.f | 6 +- lapack-netlib/BLAS/SRC/sgemm.f | 6 +- lapack-netlib/BLAS/SRC/sgemv.f | 6 +- lapack-netlib/BLAS/SRC/sger.f | 6 +- lapack-netlib/BLAS/SRC/snrm2.f | 26 +- lapack-netlib/BLAS/SRC/srot.f | 47 +- lapack-netlib/BLAS/SRC/srotg.f | 29 +- lapack-netlib/BLAS/SRC/srotm.f | 16 +- lapack-netlib/BLAS/SRC/srotmg.f | 10 +- lapack-netlib/BLAS/SRC/ssbmv.f | 6 +- lapack-netlib/BLAS/SRC/sscal.f | 34 +- lapack-netlib/BLAS/SRC/sspmv.f | 6 +- lapack-netlib/BLAS/SRC/sspr.f | 4 +- lapack-netlib/BLAS/SRC/sspr2.f | 6 +- lapack-netlib/BLAS/SRC/sswap.f | 39 +- lapack-netlib/BLAS/SRC/ssymm.f | 6 +- lapack-netlib/BLAS/SRC/ssymv.f | 6 +- lapack-netlib/BLAS/SRC/ssyr.f | 4 +- lapack-netlib/BLAS/SRC/ssyr2.f | 6 +- lapack-netlib/BLAS/SRC/ssyr2k.f | 6 +- lapack-netlib/BLAS/SRC/ssyrk.f | 4 +- lapack-netlib/BLAS/SRC/stbmv.f | 4 +- lapack-netlib/BLAS/SRC/stbsv.f | 4 +- lapack-netlib/BLAS/SRC/stpmv.f | 4 +- lapack-netlib/BLAS/SRC/stpsv.f | 4 +- lapack-netlib/BLAS/SRC/strmm.f | 4 +- lapack-netlib/BLAS/SRC/strmv.f | 4 +- lapack-netlib/BLAS/SRC/strsm.f | 4 +- lapack-netlib/BLAS/SRC/strsv.f | 4 +- lapack-netlib/BLAS/SRC/zaxpy.f | 43 +- lapack-netlib/BLAS/SRC/zcopy.f | 37 +- lapack-netlib/BLAS/SRC/zdotc.f | 37 +- lapack-netlib/BLAS/SRC/zdotu.f | 37 +- lapack-netlib/BLAS/SRC/zdscal.f | 32 +- lapack-netlib/BLAS/SRC/zgbmv.f | 6 +- lapack-netlib/BLAS/SRC/zgemm.f | 6 +- lapack-netlib/BLAS/SRC/zgemv.f | 6 +- lapack-netlib/BLAS/SRC/zgerc.f | 6 +- lapack-netlib/BLAS/SRC/zgeru.f | 6 +- lapack-netlib/BLAS/SRC/zhbmv.f | 6 +- lapack-netlib/BLAS/SRC/zhemm.f | 6 +- lapack-netlib/BLAS/SRC/zhemv.f | 6 +- lapack-netlib/BLAS/SRC/zher.f | 4 +- lapack-netlib/BLAS/SRC/zher2.f | 6 +- lapack-netlib/BLAS/SRC/zher2k.f | 6 +- lapack-netlib/BLAS/SRC/zherk.f | 4 +- lapack-netlib/BLAS/SRC/zhpmv.f | 6 +- lapack-netlib/BLAS/SRC/zhpr.f | 4 +- lapack-netlib/BLAS/SRC/zhpr2.f | 6 +- lapack-netlib/BLAS/SRC/zrotg.f | 29 +- lapack-netlib/BLAS/SRC/zscal.f | 32 +- lapack-netlib/BLAS/SRC/zswap.f | 37 +- lapack-netlib/BLAS/SRC/zsymm.f | 6 +- lapack-netlib/BLAS/SRC/zsyr2k.f | 6 +- lapack-netlib/BLAS/SRC/zsyrk.f | 4 +- lapack-netlib/BLAS/SRC/ztbmv.f | 6 +- lapack-netlib/BLAS/SRC/ztbsv.f | 4 +- lapack-netlib/BLAS/SRC/ztpmv.f | 6 +- lapack-netlib/BLAS/SRC/ztpsv.f | 4 +- lapack-netlib/BLAS/SRC/ztrmm.f | 6 +- lapack-netlib/BLAS/SRC/ztrmv.f | 6 +- lapack-netlib/BLAS/SRC/ztrsm.f | 4 +- lapack-netlib/BLAS/SRC/ztrsv.f | 4 +- lapack-netlib/BLAS/TESTING/CMakeLists.txt | 30 +- lapack-netlib/BLAS/TESTING/Makeblat1 | 67 - lapack-netlib/BLAS/TESTING/Makeblat2 | 67 - lapack-netlib/BLAS/TESTING/Makeblat3 | 67 - lapack-netlib/BLAS/TESTING/Makefile | 59 + lapack-netlib/BLAS/{ => TESTING}/cblat2.in | 0 lapack-netlib/BLAS/{ => TESTING}/cblat3.in | 0 lapack-netlib/BLAS/TESTING/dblat1.f | 7 +- lapack-netlib/BLAS/{ => TESTING}/dblat2.in | 0 lapack-netlib/BLAS/{ => TESTING}/dblat3.in | 0 lapack-netlib/BLAS/TESTING/sblat1.f | 4 +- lapack-netlib/BLAS/{ => TESTING}/sblat2.in | 0 lapack-netlib/BLAS/{ => TESTING}/sblat3.in | 0 lapack-netlib/BLAS/{ => TESTING}/zblat2.in | 0 lapack-netlib/BLAS/{ => TESTING}/zblat3.in | 0 lapack-netlib/BLAS/blas.pc.in | 5 +- lapack-netlib/CBLAS/CMakeLists.txt | 8 +- lapack-netlib/CBLAS/Makefile | 44 +- lapack-netlib/CBLAS/cblas.pc.in | 7 +- .../CBLAS/cmake/cblas-config-install.cmake.in | 2 +- lapack-netlib/CBLAS/examples/CMakeLists.txt | 2 +- lapack-netlib/CBLAS/examples/Makefile | 21 +- lapack-netlib/CBLAS/src/CMakeLists.txt | 116 +- lapack-netlib/CBLAS/src/Makefile | 183 +- lapack-netlib/CBLAS/testing/CMakeLists.txt | 30 +- lapack-netlib/CBLAS/testing/Makefile | 120 +- lapack-netlib/CMAKE/FindGcov.cmake | 155 + lapack-netlib/CMAKE/Findcodecov.cmake | 202 + lapack-netlib/CMakeLists.txt | 101 +- lapack-netlib/DOCS/Doxyfile | 2 +- lapack-netlib/DOCS/Doxyfile_man | 2 +- lapack-netlib/DOCS/lawn81.tex | 4 +- lapack-netlib/INSTALL/LAPACK_version.f | 14 +- lapack-netlib/INSTALL/Makefile | 35 +- lapack-netlib/INSTALL/dsecndtst.f | 11 +- lapack-netlib/INSTALL/ilaver.f | 8 +- lapack-netlib/INSTALL/make.inc.ALPHA | 109 +- lapack-netlib/INSTALL/make.inc.HPPA | 109 +- lapack-netlib/INSTALL/make.inc.IRIX64 | 113 +- lapack-netlib/INSTALL/make.inc.O2K | 115 +- lapack-netlib/INSTALL/make.inc.SGI5 | 109 +- lapack-netlib/INSTALL/make.inc.SUN4 | 109 +- lapack-netlib/INSTALL/make.inc.SUN4SOL2 | 117 +- lapack-netlib/INSTALL/make.inc.XLF | 109 +- lapack-netlib/INSTALL/make.inc.gfortran | 113 +- lapack-netlib/INSTALL/make.inc.gfortran_debug | 113 +- lapack-netlib/INSTALL/make.inc.ifort | 107 +- lapack-netlib/INSTALL/make.inc.pgf95 | 107 +- lapack-netlib/INSTALL/make.inc.pghpf | 111 +- lapack-netlib/INSTALL/secondtst.f | 11 +- lapack-netlib/LAPACKE/CMakeLists.txt | 50 +- lapack-netlib/LAPACKE/Makefile | 34 +- .../cmake/lapacke-config-install.cmake.in | 2 +- lapack-netlib/LAPACKE/example/CMakeLists.txt | 8 +- lapack-netlib/LAPACKE/example/Makefile | 17 +- lapack-netlib/LAPACKE/include/lapacke.h | 460 +- lapack-netlib/LAPACKE/lapacke.pc.in | 7 +- lapack-netlib/LAPACKE/src/CMakeLists.txt | 145 +- lapack-netlib/LAPACKE/src/Makefile | 124 +- lapack-netlib/LAPACKE/src/lapacke_cbbcsd.c | 59 +- .../LAPACKE/src/lapacke_cbbcsd_work.c | 168 +- lapack-netlib/LAPACKE/src/lapacke_cbdsqr.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_cgbbrd.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_cgbcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cgbequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgbequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgbrfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_cgbrfsx.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_cgbsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cgbsvx.c | 44 +- lapack-netlib/LAPACKE/src/lapacke_cgbsvxx.c | 52 +- lapack-netlib/LAPACKE/src/lapacke_cgbtrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgbtrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cgebak.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cgebal.c | 12 +- lapack-netlib/LAPACKE/src/lapacke_cgebrd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgecon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cgeequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgeequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgees.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgeesx.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgeev.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgeevx.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgehrd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgejsv.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_cgelq.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_cgelq2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgelqf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgels.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cgelsd.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_cgelss.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_cgelsy.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_cgemlq.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_cgemqr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_cgemqrt.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_cgeqlf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgeqp3.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgeqpf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgeqr.c | 10 +- lapack-netlib/LAPACKE/src/lapacke_cgeqr2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgeqrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgeqrfp.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgeqrt.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgeqrt2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgeqrt3.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgerfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_cgerfsx.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_cgerqf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgesdd.c | 12 +- lapack-netlib/LAPACKE/src/lapacke_cgesv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cgesvd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgesvdx.c | 16 +- .../LAPACKE/src/lapacke_cgesvdx_work.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_cgesvj.c | 20 +- .../LAPACKE/src/lapacke_cgesvj_work.c | 2 +- lapack-netlib/LAPACKE/src/lapacke_cgesvx.c | 42 +- lapack-netlib/LAPACKE/src/lapacke_cgesvxx.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_cgetf2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgetrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgetrf2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgetri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cgetrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cgetsls.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cggbak.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_cggbal.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_cgges.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cgges3.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cggesx.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cggev.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cggev3.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cggevx.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cggglm.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_cgghd3.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_cgghrd.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_cgglse.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_cggqrf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cggrqf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cggsvd.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cggsvd3.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_cggsvp.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_cggsvp3.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_cgtcon.c | 32 +- lapack-netlib/LAPACKE/src/lapacke_cgtrfs.c | 56 +- lapack-netlib/LAPACKE/src/lapacke_cgtsv.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_cgtsvx.c | 58 +- lapack-netlib/LAPACKE/src/lapacke_cgttrf.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_cgttrs.c | 32 +- lapack-netlib/LAPACKE/src/lapacke_chbev.c | 8 +- .../LAPACKE/src/lapacke_chbev_2stage.c | 8 +- .../LAPACKE/src/lapacke_chbev_2stage_work.c | 4 +- lapack-netlib/LAPACKE/src/lapacke_chbevd.c | 8 +- .../LAPACKE/src/lapacke_chbevd_2stage.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_chbevx.c | 30 +- .../LAPACKE/src/lapacke_chbevx_2stage.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_chbgst.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_chbgv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_chbgvd.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_chbgvx.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_chbtrd.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_checon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_checon_3.c | 23 +- lapack-netlib/LAPACKE/src/lapacke_cheequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cheev.c | 8 +- .../LAPACKE/src/lapacke_cheev_2stage.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cheevd.c | 8 +- .../LAPACKE/src/lapacke_cheevd_2stage.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cheevr.c | 30 +- .../LAPACKE/src/lapacke_cheevr_2stage.c | 30 +- .../LAPACKE/src/lapacke_cheevr_work.c | 7 +- lapack-netlib/LAPACKE/src/lapacke_cheevx.c | 30 +- .../LAPACKE/src/lapacke_cheevx_2stage.c | 30 +- .../LAPACKE/src/lapacke_cheevx_work.c | 7 +- lapack-netlib/LAPACKE/src/lapacke_chegst.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_chegv.c | 14 +- .../LAPACKE/src/lapacke_chegv_2stage.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_chegvd.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_chegvx.c | 36 +- .../LAPACKE/src/lapacke_chegvx_work.c | 2 +- lapack-netlib/LAPACKE/src/lapacke_cherfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_cherfsx.c | 42 +- lapack-netlib/LAPACKE/src/lapacke_chesv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_chesv_aa.c | 14 +- .../LAPACKE/src/lapacke_chesv_aa_2stage.c | 90 + .../src/lapacke_chesv_aa_2stage_work.c | 124 + lapack-netlib/LAPACKE/src/lapacke_chesv_rk.c | 19 +- lapack-netlib/LAPACKE/src/lapacke_chesvx.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_chesvxx.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_cheswapr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_chetrd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_chetrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_chetrf_aa.c | 8 +- .../LAPACKE/src/lapacke_chetrf_aa_2stage.c | 86 + .../src/lapacke_chetrf_aa_2stage_work.c | 108 + lapack-netlib/LAPACKE/src/lapacke_chetrf_rk.c | 13 +- .../LAPACKE/src/lapacke_chetrf_rk_work.c | 4 +- .../LAPACKE/src/lapacke_chetrf_rook.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_chetri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_chetri2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_chetri2x.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_chetri_3.c | 17 +- lapack-netlib/LAPACKE/src/lapacke_chetrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_chetrs2.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_chetrs_3.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_chetrs_aa.c | 14 +- .../LAPACKE/src/lapacke_chetrs_aa_2stage.c | 66 + .../src/lapacke_chetrs_aa_2stage_work.c | 115 + .../LAPACKE/src/lapacke_chetrs_rook.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_chfrk.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_chgeqz.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_chpcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_chpev.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_chpevd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_chpevx.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_chpgst.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_chpgv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_chpgvd.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_chpgvx.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_chprfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_chpsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_chpsvx.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_chptrd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_chptrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_chptri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_chptrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_chsein.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_chseqr.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_clacgv.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_clacn2.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_clacp2.c | 8 +- .../LAPACKE/src/lapacke_clacp2_work.c | 1 - lapack-netlib/LAPACKE/src/lapacke_clacpy.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_clacrm.c | 76 + .../LAPACKE/src/lapacke_clacrm_work.c | 110 + lapack-netlib/LAPACKE/src/lapacke_clag2z.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_clagge.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_claghe.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_clagsy.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_clange.c | 8 +- .../LAPACKE/src/lapacke_clange_work.c | 44 +- lapack-netlib/LAPACKE/src/lapacke_clanhe.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_clansy.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_clantr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_clapmr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_clapmt.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_clarcm.c | 76 + .../LAPACKE/src/lapacke_clarcm_work.c | 110 + lapack-netlib/LAPACKE/src/lapacke_clarfb.c | 107 +- lapack-netlib/LAPACKE/src/lapacke_clarfg.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_clarft.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_clarfx.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_clascl.c | 124 +- lapack-netlib/LAPACKE/src/lapacke_claset.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_classq.c | 54 + .../LAPACKE/src/lapacke_classq_work.c | 41 + lapack-netlib/LAPACKE/src/lapacke_claswp.c | 28 +- .../LAPACKE/src/lapacke_claswp_work.c | 2 +- lapack-netlib/LAPACKE/src/lapacke_clatms.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_clauum.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cpbcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cpbequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cpbrfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_cpbstf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cpbsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cpbsvx.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_cpbtrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cpbtrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cpftrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cpftri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cpftrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cpocon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cpoequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cpoequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cporfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_cporfsx.c | 42 +- lapack-netlib/LAPACKE/src/lapacke_cposv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cposvx.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_cposvxx.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_cpotrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cpotrf2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cpotri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cpotrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cppcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cppequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cpprfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_cppsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cppsvx.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_cpptrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cpptri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cpptrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cpstrf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cptcon.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_cpteqr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_cptrfs.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_cptsv.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_cptsvx.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_cpttrf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cpttrs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_cspcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_csprfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_cspsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cspsvx.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_csptrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_csptri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_csptrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cstedc.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_cstegr.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_cstein.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_cstemr.c | 26 +- .../LAPACKE/src/lapacke_cstemr_work.c | 4 +- lapack-netlib/LAPACKE/src/lapacke_csteqr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_csycon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_csycon_3.c | 23 +- .../LAPACKE/src/lapacke_csycon_3_work.c | 4 +- lapack-netlib/LAPACKE/src/lapacke_csyconv.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_csyequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_csyr.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_csyrfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_csyrfsx.c | 42 +- lapack-netlib/LAPACKE/src/lapacke_csysv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_csysv_aa.c | 16 +- .../LAPACKE/src/lapacke_csysv_aa_2stage.c | 89 + .../src/lapacke_csysv_aa_2stage_work.c | 124 + lapack-netlib/LAPACKE/src/lapacke_csysv_rk.c | 19 +- .../LAPACKE/src/lapacke_csysv_rook.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_csysvx.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_csysvxx.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_csyswapr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_csytrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_csytrf_aa.c | 8 +- .../LAPACKE/src/lapacke_csytrf_aa_2stage.c | 86 + .../src/lapacke_csytrf_aa_2stage_work.c | 109 + lapack-netlib/LAPACKE/src/lapacke_csytrf_rk.c | 13 +- .../LAPACKE/src/lapacke_csytrf_rook.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_csytri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_csytri2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_csytri2x.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_csytri_3.c | 17 +- lapack-netlib/LAPACKE/src/lapacke_csytrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_csytrs2.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_csytrs_3.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_csytrs_aa.c | 14 +- .../LAPACKE/src/lapacke_csytrs_aa_2stage.c | 66 + .../src/lapacke_csytrs_aa_2stage_work.c | 114 + .../LAPACKE/src/lapacke_csytrs_rook.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ctbcon.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ctbrfs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_ctbtrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ctfsm.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_ctftri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ctfttp.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ctfttr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ctgevc.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_ctgexc.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_ctgsen.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_ctgsja.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_ctgsna.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_ctgsyl.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_ctpcon.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ctpmqrt.c | 40 +- lapack-netlib/LAPACKE/src/lapacke_ctpqrt.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ctpqrt2.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ctprfb.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_ctprfs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_ctptri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ctptrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ctpttf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ctpttr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ctrcon.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ctrevc.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_ctrexc.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_ctrrfs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_ctrsen.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_ctrsna.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_ctrsyl.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_ctrtri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ctrtrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ctrttf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ctrttp.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ctzrzf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_cunbdb.c | 39 +- .../LAPACKE/src/lapacke_cunbdb_work.c | 132 +- lapack-netlib/LAPACKE/src/lapacke_cuncsd.c | 39 +- .../LAPACKE/src/lapacke_cuncsd2by1.c | 19 +- .../LAPACKE/src/lapacke_cuncsd_work.c | 240 +- lapack-netlib/LAPACKE/src/lapacke_cungbr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cunghr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cunglq.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cungql.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cungqr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cungrq.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cungtr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cunmbr.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_cunmhr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_cunmlq.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_cunmql.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_cunmqr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_cunmrq.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_cunmrz.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_cunmtr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_cupgtr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_cupmtr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_dbbcsd.c | 59 +- .../LAPACKE/src/lapacke_dbbcsd_work.c | 164 +- lapack-netlib/LAPACKE/src/lapacke_dbdsdc.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_dbdsqr.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_dbdsvdx.c | 18 +- .../LAPACKE/src/lapacke_dbdsvdx_work.c | 18 +- lapack-netlib/LAPACKE/src/lapacke_ddisna.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgbbrd.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_dgbcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dgbequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgbequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgbrfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_dgbrfsx.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_dgbsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dgbsvx.c | 44 +- lapack-netlib/LAPACKE/src/lapacke_dgbsvxx.c | 52 +- lapack-netlib/LAPACKE/src/lapacke_dgbtrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgbtrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dgebak.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dgebal.c | 12 +- lapack-netlib/LAPACKE/src/lapacke_dgebrd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgecon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dgeequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgeequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgees.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgeesx.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgeev.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgeevx.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgehrd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgejsv.c | 12 +- lapack-netlib/LAPACKE/src/lapacke_dgelq.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_dgelq2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgelqf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgels.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dgelsd.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dgelss.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dgelsy.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dgemlq.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dgemqr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_dgemqrt.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_dgeqlf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgeqp3.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgeqpf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgeqr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgeqr2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgeqrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgeqrfp.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgeqrt.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgeqrt2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgeqrt3.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgerfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_dgerfsx.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_dgerqf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgesdd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgesv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dgesvd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgesvdx.c | 14 +- .../LAPACKE/src/lapacke_dgesvdx_work.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dgesvj.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dgesvx.c | 42 +- lapack-netlib/LAPACKE/src/lapacke_dgesvxx.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_dgetf2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgetrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgetrf2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgetri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dgetrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dgetsls.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dggbak.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dggbal.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_dgges.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dgges3.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dggesx.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dggev.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dggev3.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dggevx.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dggglm.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dgghd3.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_dgghrd.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_dgglse.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_dggqrf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dggrqf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dggsvd.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dggsvd3.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dggsvp.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_dggsvp3.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_dgtcon.c | 32 +- lapack-netlib/LAPACKE/src/lapacke_dgtrfs.c | 56 +- lapack-netlib/LAPACKE/src/lapacke_dgtsv.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_dgtsvx.c | 58 +- lapack-netlib/LAPACKE/src/lapacke_dgttrf.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dgttrs.c | 32 +- lapack-netlib/LAPACKE/src/lapacke_dhgeqz.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_dhsein.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_dhseqr.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_dlacn2.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dlacpy.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dlag2s.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dlagge.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dlagsy.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dlange.c | 12 +- .../LAPACKE/src/lapacke_dlange_work.c | 45 +- lapack-netlib/LAPACKE/src/lapacke_dlansy.c | 12 +- .../LAPACKE/src/lapacke_dlansy_work.c | 4 +- lapack-netlib/LAPACKE/src/lapacke_dlantr.c | 10 +- lapack-netlib/LAPACKE/src/lapacke_dlapmr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dlapmt.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dlapy2.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dlapy3.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dlarfb.c | 107 +- lapack-netlib/LAPACKE/src/lapacke_dlarfg.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dlarft.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_dlarfx.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dlartgp.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dlartgs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dlascl.c | 124 +- lapack-netlib/LAPACKE/src/lapacke_dlaset.c | 12 +- lapack-netlib/LAPACKE/src/lapacke_dlasrt.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dlassq.c | 53 + .../LAPACKE/src/lapacke_dlassq_work.c | 41 + lapack-netlib/LAPACKE/src/lapacke_dlaswp.c | 28 +- .../LAPACKE/src/lapacke_dlaswp_work.c | 2 +- lapack-netlib/LAPACKE/src/lapacke_dlatms.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_dlauum.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dopgtr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dopmtr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_dorbdb.c | 39 +- .../LAPACKE/src/lapacke_dorbdb_work.c | 128 +- lapack-netlib/LAPACKE/src/lapacke_dorcsd.c | 39 +- .../LAPACKE/src/lapacke_dorcsd2by1.c | 23 +- .../LAPACKE/src/lapacke_dorcsd_work.c | 232 +- lapack-netlib/LAPACKE/src/lapacke_dorgbr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dorghr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dorglq.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dorgql.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dorgqr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dorgrq.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dorgtr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dormbr.c | 28 +- lapack-netlib/LAPACKE/src/lapacke_dormhr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_dormlq.c | 24 +- .../LAPACKE/src/lapacke_dormlq_work.c | 4 +- lapack-netlib/LAPACKE/src/lapacke_dormql.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_dormqr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_dormrq.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dormrz.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dormtr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_dpbcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dpbequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dpbrfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_dpbstf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dpbsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dpbsvx.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_dpbtrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dpbtrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dpftrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dpftri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dpftrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dpocon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dpoequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dpoequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dporfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_dporfsx.c | 42 +- lapack-netlib/LAPACKE/src/lapacke_dposv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dposvx.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_dposvxx.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_dpotrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dpotrf2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dpotri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dpotrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dppcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dppequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dpprfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_dppsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dppsvx.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_dpptrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dpptri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dpptrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dpstrf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dptcon.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dpteqr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_dptrfs.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_dptsv.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dptsvx.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_dpttrf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dpttrs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dsbev.c | 8 +- .../LAPACKE/src/lapacke_dsbev_2stage.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dsbevd.c | 8 +- .../LAPACKE/src/lapacke_dsbevd_2stage.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dsbevx.c | 30 +- .../LAPACKE/src/lapacke_dsbevx_2stage.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_dsbgst.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dsbgv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dsbgvd.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dsbgvx.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_dsbtrd.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_dsfrk.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_dsgesv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dspcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dspev.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dspevd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dspevx.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_dspgst.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dspgv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dspgvd.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dspgvx.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_dsposv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dsprfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_dspsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dspsvx.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_dsptrd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dsptrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dsptri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dsptrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dstebz.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_dstedc.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_dstegr.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_dstein.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_dstemr.c | 26 +- .../LAPACKE/src/lapacke_dstemr_work.c | 4 +- lapack-netlib/LAPACKE/src/lapacke_dsteqr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_dsterf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dstev.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_dstevd.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_dstevr.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_dstevx.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_dsycon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dsycon_3.c | 23 +- lapack-netlib/LAPACKE/src/lapacke_dsyconv.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dsyequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dsyev.c | 8 +- .../LAPACKE/src/lapacke_dsyev_2stage.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dsyevd.c | 8 +- .../LAPACKE/src/lapacke_dsyevd_2stage.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dsyevr.c | 30 +- .../LAPACKE/src/lapacke_dsyevr_2stage.c | 30 +- .../LAPACKE/src/lapacke_dsyevr_work.c | 7 +- lapack-netlib/LAPACKE/src/lapacke_dsyevx.c | 30 +- .../LAPACKE/src/lapacke_dsyevx_2stage.c | 30 +- .../LAPACKE/src/lapacke_dsyevx_work.c | 7 +- lapack-netlib/LAPACKE/src/lapacke_dsygst.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dsygv.c | 14 +- .../LAPACKE/src/lapacke_dsygv_2stage.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dsygvd.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dsygvx.c | 36 +- .../LAPACKE/src/lapacke_dsygvx_work.c | 2 +- lapack-netlib/LAPACKE/src/lapacke_dsyrfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_dsyrfsx.c | 42 +- lapack-netlib/LAPACKE/src/lapacke_dsysv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dsysv_aa.c | 14 +- .../LAPACKE/src/lapacke_dsysv_aa_2stage.c | 88 + .../src/lapacke_dsysv_aa_2stage_work.c | 124 + lapack-netlib/LAPACKE/src/lapacke_dsysv_rk.c | 19 +- .../LAPACKE/src/lapacke_dsysv_rook.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dsysvx.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_dsysvxx.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_dsyswapr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dsytrd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dsytrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa.c | 8 +- .../LAPACKE/src/lapacke_dsytrf_aa_2stage.c | 85 + .../src/lapacke_dsytrf_aa_2stage_work.c | 109 + lapack-netlib/LAPACKE/src/lapacke_dsytrf_rk.c | 13 +- .../LAPACKE/src/lapacke_dsytrf_rook.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dsytri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dsytri2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dsytri2x.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dsytri_3.c | 17 +- lapack-netlib/LAPACKE/src/lapacke_dsytrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dsytrs2.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dsytrs_3.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_dsytrs_aa.c | 18 +- .../LAPACKE/src/lapacke_dsytrs_aa_2stage.c | 65 + .../src/lapacke_dsytrs_aa_2stage_work.c | 114 + .../LAPACKE/src/lapacke_dsytrs_rook.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dtbcon.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dtbrfs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dtbtrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dtfsm.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_dtftri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dtfttp.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dtfttr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dtgevc.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_dtgexc.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_dtgsen.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_dtgsja.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_dtgsna.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_dtgsyl.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_dtpcon.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dtpmqrt.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_dtpqrt.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dtpqrt2.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dtprfb.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_dtprfs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dtptri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dtptrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dtpttf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dtpttr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dtrcon.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dtrevc.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_dtrexc.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_dtrrfs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dtrsen.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_dtrsna.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_dtrsyl.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_dtrtri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dtrtrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_dtrttf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dtrttp.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_dtzrzf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_nancheck.c | 60 + lapack-netlib/LAPACKE/src/lapacke_sbbcsd.c | 59 +- .../LAPACKE/src/lapacke_sbbcsd_work.c | 163 +- lapack-netlib/LAPACKE/src/lapacke_sbdsdc.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_sbdsqr.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_sbdsvdx.c | 18 +- .../LAPACKE/src/lapacke_sbdsvdx_work.c | 18 +- lapack-netlib/LAPACKE/src/lapacke_sdisna.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgbbrd.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_sgbcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sgbequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgbequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgbrfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_sgbrfsx.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_sgbsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sgbsvx.c | 44 +- lapack-netlib/LAPACKE/src/lapacke_sgbsvxx.c | 52 +- lapack-netlib/LAPACKE/src/lapacke_sgbtrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgbtrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sgebak.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sgebal.c | 12 +- lapack-netlib/LAPACKE/src/lapacke_sgebrd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgecon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sgeequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgeequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgees.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgeesx.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgeev.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgeevx.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgehrd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgejsv.c | 12 +- lapack-netlib/LAPACKE/src/lapacke_sgelq.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_sgelq2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgelqf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgels.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sgelsd.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_sgelss.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_sgelsy.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_sgemlq.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_sgemqr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_sgemqrt.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_sgeqlf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgeqp3.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgeqpf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgeqr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgeqr2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgeqrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgeqrfp.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgeqrt.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgeqrt2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgeqrt3.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgerfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_sgerfsx.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_sgerqf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgesdd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgesv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sgesvd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgesvdx.c | 14 +- .../LAPACKE/src/lapacke_sgesvdx_work.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_sgesvj.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_sgesvx.c | 42 +- lapack-netlib/LAPACKE/src/lapacke_sgesvxx.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_sgetf2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgetrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgetrf2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgetri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sgetrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sgetsls.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sggbak.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_sggbal.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_sgges.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sgges3.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sggesx.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sggev.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sggev3.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sggevx.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sggglm.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_sgghd3.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_sgghrd.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_sgglse.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_sggqrf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sggrqf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sggsvd.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sggsvd3.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sggsvp.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_sggsvp3.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_sgtcon.c | 32 +- lapack-netlib/LAPACKE/src/lapacke_sgtrfs.c | 56 +- lapack-netlib/LAPACKE/src/lapacke_sgtsv.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_sgtsvx.c | 58 +- lapack-netlib/LAPACKE/src/lapacke_sgttrf.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_sgttrs.c | 32 +- lapack-netlib/LAPACKE/src/lapacke_shgeqz.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_shsein.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_shseqr.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_slacn2.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_slacpy.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_slag2d.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_slagge.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_slagsy.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_slange.c | 12 +- .../LAPACKE/src/lapacke_slange_work.c | 45 +- lapack-netlib/LAPACKE/src/lapacke_slansy.c | 12 +- .../LAPACKE/src/lapacke_slansy_work.c | 4 +- lapack-netlib/LAPACKE/src/lapacke_slantr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_slapmr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_slapmt.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_slapy2.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_slapy3.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_slarfb.c | 107 +- lapack-netlib/LAPACKE/src/lapacke_slarfg.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_slarft.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_slarfx.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_slartgp.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_slartgs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_slascl.c | 124 +- lapack-netlib/LAPACKE/src/lapacke_slaset.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_slasrt.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_slassq.c | 53 + .../LAPACKE/src/lapacke_slassq_work.c | 41 + lapack-netlib/LAPACKE/src/lapacke_slaswp.c | 28 +- .../LAPACKE/src/lapacke_slaswp_work.c | 2 +- lapack-netlib/LAPACKE/src/lapacke_slatms.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_slauum.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sopgtr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sopmtr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_sorbdb.c | 39 +- .../LAPACKE/src/lapacke_sorbdb_work.c | 126 +- lapack-netlib/LAPACKE/src/lapacke_sorcsd.c | 39 +- .../LAPACKE/src/lapacke_sorcsd2by1.c | 23 +- .../LAPACKE/src/lapacke_sorcsd_work.c | 229 +- lapack-netlib/LAPACKE/src/lapacke_sorgbr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sorghr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sorglq.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sorgql.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sorgqr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sorgrq.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sorgtr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sormbr.c | 28 +- lapack-netlib/LAPACKE/src/lapacke_sormhr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_sormlq.c | 24 +- .../LAPACKE/src/lapacke_sormlq_work.c | 4 +- lapack-netlib/LAPACKE/src/lapacke_sormql.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_sormqr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_sormrq.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_sormrz.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_sormtr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_spbcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_spbequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_spbrfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_spbstf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_spbsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_spbsvx.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_spbtrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_spbtrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_spftrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_spftri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_spftrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_spocon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_spoequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_spoequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sporfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_sporfsx.c | 42 +- lapack-netlib/LAPACKE/src/lapacke_sposv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sposvx.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_sposvxx.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_spotrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_spotrf2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_spotri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_spotrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sppcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sppequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_spprfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_sppsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sppsvx.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_spptrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_spptri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_spptrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_spstrf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sptcon.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_spteqr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_sptrfs.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_sptsv.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_sptsvx.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_spttrf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_spttrs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_ssbev.c | 8 +- .../LAPACKE/src/lapacke_ssbev_2stage.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ssbevd.c | 8 +- .../LAPACKE/src/lapacke_ssbevd_2stage.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ssbevx.c | 30 +- .../LAPACKE/src/lapacke_ssbevx_2stage.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_ssbgst.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ssbgv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ssbgvd.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ssbgvx.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_ssbtrd.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_ssfrk.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_sspcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sspev.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sspevd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_sspevx.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_sspgst.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sspgv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sspgvd.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sspgvx.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_ssprfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_sspsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sspsvx.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_ssptrd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ssptrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ssptri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ssptrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sstebz.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_sstedc.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_sstegr.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_sstein.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_sstemr.c | 26 +- .../LAPACKE/src/lapacke_sstemr_work.c | 4 +- lapack-netlib/LAPACKE/src/lapacke_ssteqr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_ssterf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_sstev.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_sstevd.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_sstevr.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_sstevx.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_ssycon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ssycon_3.c | 23 +- lapack-netlib/LAPACKE/src/lapacke_ssyconv.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ssyequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ssyev.c | 8 +- .../LAPACKE/src/lapacke_ssyev_2stage.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ssyevd.c | 8 +- .../LAPACKE/src/lapacke_ssyevd_2stage.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ssyevr.c | 30 +- .../LAPACKE/src/lapacke_ssyevr_2stage.c | 30 +- .../LAPACKE/src/lapacke_ssyevr_work.c | 7 +- lapack-netlib/LAPACKE/src/lapacke_ssyevx.c | 30 +- .../LAPACKE/src/lapacke_ssyevx_2stage.c | 30 +- .../LAPACKE/src/lapacke_ssyevx_work.c | 7 +- lapack-netlib/LAPACKE/src/lapacke_ssygst.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ssygv.c | 14 +- .../LAPACKE/src/lapacke_ssygv_2stage.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ssygvd.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ssygvx.c | 36 +- .../LAPACKE/src/lapacke_ssygvx_work.c | 2 +- lapack-netlib/LAPACKE/src/lapacke_ssyrfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_ssyrfsx.c | 42 +- lapack-netlib/LAPACKE/src/lapacke_ssysv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ssysv_aa.c | 14 +- .../LAPACKE/src/lapacke_ssysv_aa_2stage.c | 89 + .../src/lapacke_ssysv_aa_2stage_work.c | 124 + lapack-netlib/LAPACKE/src/lapacke_ssysv_rk.c | 19 +- .../LAPACKE/src/lapacke_ssysv_rook.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ssysvx.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_ssysvxx.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_ssyswapr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ssytrd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ssytrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa.c | 8 +- .../LAPACKE/src/lapacke_ssytrf_aa_2stage.c | 86 + .../src/lapacke_ssytrf_aa_2stage_work.c | 108 + lapack-netlib/LAPACKE/src/lapacke_ssytrf_rk.c | 13 +- .../LAPACKE/src/lapacke_ssytrf_rook.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ssytri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ssytri2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ssytri2x.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ssytri_3.c | 17 +- lapack-netlib/LAPACKE/src/lapacke_ssytrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ssytrs2.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ssytrs_3.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_ssytrs_aa.c | 18 +- .../LAPACKE/src/lapacke_ssytrs_aa_2stage.c | 66 + .../src/lapacke_ssytrs_aa_2stage_work.c | 114 + .../LAPACKE/src/lapacke_ssytrs_rook.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_stbcon.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_stbrfs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_stbtrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_stfsm.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_stftri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_stfttp.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_stfttr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_stgevc.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_stgexc.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_stgsen.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_stgsja.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_stgsna.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_stgsyl.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_stpcon.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_stpmqrt.c | 40 +- lapack-netlib/LAPACKE/src/lapacke_stpqrt.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_stpqrt2.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_stprfb.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_stprfs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_stptri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_stptrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_stpttf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_stpttr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_strcon.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_strevc.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_strexc.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_strrfs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_strsen.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_strsna.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_strsyl.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_strtri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_strtrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_strttf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_strttp.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_stzrzf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zbbcsd.c | 59 +- .../LAPACKE/src/lapacke_zbbcsd_work.c | 168 +- lapack-netlib/LAPACKE/src/lapacke_zbdsqr.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_zcgesv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zcposv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zgbbrd.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_zgbcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zgbequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgbequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgbrfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_zgbrfsx.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_zgbsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zgbsvx.c | 44 +- lapack-netlib/LAPACKE/src/lapacke_zgbsvxx.c | 52 +- lapack-netlib/LAPACKE/src/lapacke_zgbtrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgbtrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zgebak.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zgebal.c | 12 +- lapack-netlib/LAPACKE/src/lapacke_zgebrd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgecon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zgeequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgeequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgees.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgeesx.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgeev.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgeevx.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgehrd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgejsv.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_zgelq.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_zgelq2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgelqf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgels.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zgelsd.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_zgelss.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_zgelsy.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_zgemlq.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_zgemqr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_zgemqrt.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_zgeqlf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgeqp3.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgeqpf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgeqr.c | 10 +- lapack-netlib/LAPACKE/src/lapacke_zgeqr2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgeqrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgeqrfp.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgeqrt.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgeqrt2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgeqrt3.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgerfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_zgerfsx.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_zgerqf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgesdd.c | 12 +- lapack-netlib/LAPACKE/src/lapacke_zgesv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zgesvd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgesvdx.c | 28 +- .../LAPACKE/src/lapacke_zgesvdx_work.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_zgesvj.c | 20 +- .../LAPACKE/src/lapacke_zgesvj_work.c | 2 +- lapack-netlib/LAPACKE/src/lapacke_zgesvx.c | 42 +- lapack-netlib/LAPACKE/src/lapacke_zgesvxx.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_zgetf2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgetrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgetrf2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgetri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zgetrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zgetsls.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zggbak.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_zggbal.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_zgges.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zgges3.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zggesx.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zggev.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zggev3.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zggevx.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zggglm.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_zgghd3.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_zgghrd.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_zgglse.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_zggqrf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zggrqf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zggsvd.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zggsvd3.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zggsvp.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_zggsvp3.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_zgtcon.c | 32 +- lapack-netlib/LAPACKE/src/lapacke_zgtrfs.c | 56 +- lapack-netlib/LAPACKE/src/lapacke_zgtsv.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_zgtsvx.c | 58 +- lapack-netlib/LAPACKE/src/lapacke_zgttrf.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_zgttrs.c | 32 +- lapack-netlib/LAPACKE/src/lapacke_zhbev.c | 8 +- .../LAPACKE/src/lapacke_zhbev_2stage.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zhbevd.c | 8 +- .../LAPACKE/src/lapacke_zhbevd_2stage.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zhbevx.c | 30 +- .../LAPACKE/src/lapacke_zhbevx_2stage.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_zhbgst.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhbgv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhbgvd.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhbgvx.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_zhbtrd.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_zhecon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhecon_3.c | 23 +- lapack-netlib/LAPACKE/src/lapacke_zheequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zheev.c | 8 +- .../LAPACKE/src/lapacke_zheev_2stage.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zheevd.c | 8 +- .../LAPACKE/src/lapacke_zheevd_2stage.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zheevr.c | 30 +- .../LAPACKE/src/lapacke_zheevr_2stage.c | 30 +- .../LAPACKE/src/lapacke_zheevr_work.c | 7 +- lapack-netlib/LAPACKE/src/lapacke_zheevx.c | 30 +- .../LAPACKE/src/lapacke_zheevx_2stage.c | 30 +- .../LAPACKE/src/lapacke_zheevx_work.c | 7 +- lapack-netlib/LAPACKE/src/lapacke_zhegst.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhegv.c | 14 +- .../LAPACKE/src/lapacke_zhegv_2stage.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhegvd.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhegvx.c | 36 +- .../LAPACKE/src/lapacke_zhegvx_work.c | 2 +- lapack-netlib/LAPACKE/src/lapacke_zherfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_zherfsx.c | 42 +- lapack-netlib/LAPACKE/src/lapacke_zhesv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhesv_aa.c | 14 +- .../LAPACKE/src/lapacke_zhesv_aa_2stage.c | 90 + .../src/lapacke_zhesv_aa_2stage_work.c | 124 + lapack-netlib/LAPACKE/src/lapacke_zhesv_rk.c | 19 +- lapack-netlib/LAPACKE/src/lapacke_zhesvx.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_zhesvxx.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_zheswapr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zhetrd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zhetrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa.c | 8 +- .../LAPACKE/src/lapacke_zhetrf_aa_2stage.c | 86 + .../src/lapacke_zhetrf_aa_2stage_work.c | 109 + lapack-netlib/LAPACKE/src/lapacke_zhetrf_rk.c | 13 +- .../LAPACKE/src/lapacke_zhetrf_rk_work.c | 4 +- .../LAPACKE/src/lapacke_zhetrf_rook.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zhetri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zhetri2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zhetri2x.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zhetri_3.c | 17 +- lapack-netlib/LAPACKE/src/lapacke_zhetrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhetrs2.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhetrs_3.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_zhetrs_aa.c | 14 +- .../LAPACKE/src/lapacke_zhetrs_aa_2stage.c | 66 + .../src/lapacke_zhetrs_aa_2stage_work.c | 114 + .../LAPACKE/src/lapacke_zhetrs_rook.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhfrk.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_zhgeqz.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_zhpcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhpev.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zhpevd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zhpevx.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_zhpgst.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhpgv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhpgvd.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhpgvx.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_zhprfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_zhpsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhpsvx.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_zhptrd.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zhptrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zhptri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zhptrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zhsein.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_zhseqr.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_zlacgv.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zlacn2.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zlacp2.c | 8 +- .../LAPACKE/src/lapacke_zlacp2_work.c | 1 - lapack-netlib/LAPACKE/src/lapacke_zlacpy.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zlacrm.c | 76 + .../LAPACKE/src/lapacke_zlacrm_work.c | 110 + lapack-netlib/LAPACKE/src/lapacke_zlag2c.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zlagge.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zlaghe.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zlagsy.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zlange.c | 12 +- .../LAPACKE/src/lapacke_zlange_work.c | 46 +- lapack-netlib/LAPACKE/src/lapacke_zlanhe.c | 12 +- .../LAPACKE/src/lapacke_zlanhe_work.c | 4 +- lapack-netlib/LAPACKE/src/lapacke_zlansy.c | 12 +- .../LAPACKE/src/lapacke_zlansy_work.c | 4 +- lapack-netlib/LAPACKE/src/lapacke_zlantr.c | 10 +- lapack-netlib/LAPACKE/src/lapacke_zlapmr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zlapmt.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zlarcm.c | 76 + .../LAPACKE/src/lapacke_zlarcm_work.c | 110 + lapack-netlib/LAPACKE/src/lapacke_zlarfb.c | 107 +- lapack-netlib/LAPACKE/src/lapacke_zlarfg.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zlarft.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_zlarfx.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_zlascl.c | 124 +- lapack-netlib/LAPACKE/src/lapacke_zlaset.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zlassq.c | 54 + .../LAPACKE/src/lapacke_zlassq_work.c | 42 + lapack-netlib/LAPACKE/src/lapacke_zlaswp.c | 28 +- .../LAPACKE/src/lapacke_zlaswp_work.c | 2 +- lapack-netlib/LAPACKE/src/lapacke_zlatms.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_zlauum.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zpbcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zpbequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zpbrfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_zpbstf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zpbsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zpbsvx.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_zpbtrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zpbtrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zpftrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zpftri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zpftrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zpocon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zpoequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zpoequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zporfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_zporfsx.c | 42 +- lapack-netlib/LAPACKE/src/lapacke_zposv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zposvx.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_zposvxx.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_zpotrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zpotrf2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zpotri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zpotrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zppcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zppequ.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zpprfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_zppsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zppsvx.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_zpptrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zpptri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zpptrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zpstrf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zptcon.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_zpteqr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_zptrfs.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_zptsv.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_zptsvx.c | 36 +- lapack-netlib/LAPACKE/src/lapacke_zpttrf.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zpttrs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_zspcon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zsprfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_zspsv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zspsvx.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_zsptrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zsptri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zsptrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zstedc.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_zstegr.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_zstein.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_zstemr.c | 26 +- .../LAPACKE/src/lapacke_zstemr_work.c | 4 +- lapack-netlib/LAPACKE/src/lapacke_zsteqr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_zsycon.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zsycon_3.c | 23 +- lapack-netlib/LAPACKE/src/lapacke_zsyconv.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zsyequb.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zsyr.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_zsyrfs.c | 26 +- lapack-netlib/LAPACKE/src/lapacke_zsyrfsx.c | 42 +- lapack-netlib/LAPACKE/src/lapacke_zsysv.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zsysv_aa.c | 14 +- .../LAPACKE/src/lapacke_zsysv_aa_2stage.c | 89 + .../src/lapacke_zsysv_aa_2stage_work.c | 124 + lapack-netlib/LAPACKE/src/lapacke_zsysv_rk.c | 19 +- .../LAPACKE/src/lapacke_zsysv_rook.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zsysvx.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_zsysvxx.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_zsyswapr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zsytrf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa.c | 8 +- .../LAPACKE/src/lapacke_zsytrf_aa_2stage.c | 86 + .../src/lapacke_zsytrf_aa_2stage_work.c | 109 + lapack-netlib/LAPACKE/src/lapacke_zsytrf_rk.c | 13 +- .../LAPACKE/src/lapacke_zsytrf_rook.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zsytri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zsytri2.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zsytri2x.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zsytri_3.c | 17 +- lapack-netlib/LAPACKE/src/lapacke_zsytrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zsytrs2.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zsytrs_3.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_zsytrs_aa.c | 14 +- .../LAPACKE/src/lapacke_zsytrs_aa_2stage.c | 65 + .../src/lapacke_zsytrs_aa_2stage_work.c | 114 + .../LAPACKE/src/lapacke_zsytrs_rook.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ztbcon.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ztbrfs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_ztbtrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ztfsm.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_ztftri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ztfttp.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ztfttr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ztgevc.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_ztgexc.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_ztgsen.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_ztgsja.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_ztgsna.c | 30 +- lapack-netlib/LAPACKE/src/lapacke_ztgsyl.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_ztpcon.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ztpmqrt.c | 38 +- lapack-netlib/LAPACKE/src/lapacke_ztpqrt.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ztpqrt2.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ztprfb.c | 50 +- lapack-netlib/LAPACKE/src/lapacke_ztprfs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_ztptri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ztptrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ztpttf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ztpttr.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ztrcon.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ztrevc.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_ztrexc.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_ztrrfs.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_ztrsen.c | 16 +- lapack-netlib/LAPACKE/src/lapacke_ztrsna.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_ztrsyl.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_ztrtri.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ztrtrs.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_ztrttf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ztrttp.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_ztzrzf.c | 8 +- lapack-netlib/LAPACKE/src/lapacke_zunbdb.c | 39 +- .../LAPACKE/src/lapacke_zunbdb_work.c | 134 +- lapack-netlib/LAPACKE/src/lapacke_zuncsd.c | 39 +- .../LAPACKE/src/lapacke_zuncsd2by1.c | 21 +- .../LAPACKE/src/lapacke_zuncsd_work.c | 242 +- lapack-netlib/LAPACKE/src/lapacke_zungbr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zunghr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zunglq.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zungql.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zungqr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zungrq.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zungtr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zunmbr.c | 24 +- lapack-netlib/LAPACKE/src/lapacke_zunmhr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_zunmlq.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_zunmql.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_zunmqr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_zunmrq.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_zunmrz.c | 20 +- lapack-netlib/LAPACKE/src/lapacke_zunmtr.c | 22 +- lapack-netlib/LAPACKE/src/lapacke_zupgtr.c | 14 +- lapack-netlib/LAPACKE/src/lapacke_zupmtr.c | 22 +- lapack-netlib/LAPACKE/utils/CMakeLists.txt | 2 +- lapack-netlib/LAPACKE/utils/Makefile | 9 +- .../LAPACKE/utils/lapacke_ctp_trans.c | 2 +- .../LAPACKE/utils/lapacke_dtp_trans.c | 2 +- .../LAPACKE/utils/lapacke_stp_trans.c | 2 +- .../LAPACKE/utils/lapacke_ztp_trans.c | 2 +- lapack-netlib/LICENSE | 6 +- lapack-netlib/Makefile | 197 +- lapack-netlib/README.md | 150 +- lapack-netlib/SRC/CMakeLists.txt | 95 +- lapack-netlib/SRC/Makefile | 20 +- lapack-netlib/SRC/VARIANTS/Makefile | 49 +- lapack-netlib/SRC/VARIANTS/README | 12 +- lapack-netlib/SRC/cbbcsd.f | 4 +- lapack-netlib/SRC/cgebd2.f | 8 +- lapack-netlib/SRC/cgebrd.f | 11 +- lapack-netlib/SRC/cgeevx.f | 20 +- lapack-netlib/SRC/cgejsv.f | 4470 ++++++++-------- lapack-netlib/SRC/cgelqt.f | 20 +- lapack-netlib/SRC/cgelqt3.f | 12 +- lapack-netlib/SRC/cgemlqt.f | 32 +- lapack-netlib/SRC/cgeqrt.f | 8 +- lapack-netlib/SRC/cgesv.f | 8 +- lapack-netlib/SRC/cgesvdx.f | 5 +- lapack-netlib/SRC/cgesvj.f | 12 +- lapack-netlib/SRC/cgetsls.f | 32 +- lapack-netlib/SRC/cggesx.f | 8 +- lapack-netlib/SRC/cgghd3.f | 5 +- lapack-netlib/SRC/cgsvj0.f | 6 +- lapack-netlib/SRC/cgsvj1.f | 6 +- lapack-netlib/SRC/chb2st_kernels.f | 102 +- lapack-netlib/SRC/chbev_2stage.f | 21 +- lapack-netlib/SRC/chbevd_2stage.f | 16 +- lapack-netlib/SRC/chbevx_2stage.f | 15 +- lapack-netlib/SRC/checon_3.f | 8 +- lapack-netlib/SRC/cheequb.f | 4 +- lapack-netlib/SRC/cheev_2stage.f | 18 +- lapack-netlib/SRC/cheevd_2stage.f | 22 +- lapack-netlib/SRC/cheevr_2stage.f | 14 +- lapack-netlib/SRC/cheevx_2stage.f | 18 +- lapack-netlib/SRC/chegv_2stage.f | 20 +- lapack-netlib/SRC/chesv_aa.f | 10 +- lapack-netlib/SRC/chesv_aa_2stage.f | 276 + lapack-netlib/SRC/chetrd_2stage.f | 20 +- lapack-netlib/SRC/chetrd_hb2st.F | 8 +- lapack-netlib/SRC/chetrd_he2hb.f | 14 +- lapack-netlib/SRC/chetrf_aa.f | 34 +- lapack-netlib/SRC/chetrf_aa_2stage.f | 664 +++ lapack-netlib/SRC/chetri2.f | 8 +- lapack-netlib/SRC/chetri_3.f | 10 +- lapack-netlib/SRC/chetri_3x.f | 8 +- lapack-netlib/SRC/chetrs_3.f | 8 +- lapack-netlib/SRC/chetrs_aa.f | 10 +- lapack-netlib/SRC/chetrs_aa_2stage.f | 283 ++ lapack-netlib/SRC/cla_gbrfsx_extended.f | 12 +- lapack-netlib/SRC/cla_heamv.f | 8 +- lapack-netlib/SRC/cla_herfsx_extended.f | 15 +- lapack-netlib/SRC/cla_porfsx_extended.f | 15 +- lapack-netlib/SRC/cla_syamv.f | 8 +- lapack-netlib/SRC/cla_syrfsx_extended.f | 15 +- lapack-netlib/SRC/clabrd.f | 8 +- lapack-netlib/SRC/clahef_aa.f | 76 +- lapack-netlib/SRC/clalsa.f | 9 +- lapack-netlib/SRC/clamswlq.f | 26 +- lapack-netlib/SRC/clamtsqr.f | 16 +- lapack-netlib/SRC/claqr1.f | 14 +- lapack-netlib/SRC/claqr2.f | 30 +- lapack-netlib/SRC/claqr3.f | 26 +- lapack-netlib/SRC/claqr4.f | 7 +- lapack-netlib/SRC/claqr5.f | 48 +- lapack-netlib/SRC/clarfg.f | 8 +- lapack-netlib/SRC/clarfgp.f | 8 +- lapack-netlib/SRC/clarrv.f | 11 +- lapack-netlib/SRC/claswlq.f | 6 +- lapack-netlib/SRC/claswp.f | 16 +- lapack-netlib/SRC/clasyf_aa.f | 131 +- lapack-netlib/SRC/cstegr.f | 4 +- lapack-netlib/SRC/cstemr.f | 4 +- lapack-netlib/SRC/csycon_3.f | 8 +- lapack-netlib/SRC/csyconvf.f | 10 +- lapack-netlib/SRC/csyconvf_rook.f | 10 +- lapack-netlib/SRC/csyequb.f | 8 +- lapack-netlib/SRC/csysv_aa.f | 8 +- lapack-netlib/SRC/csysv_aa_2stage.f | 276 + lapack-netlib/SRC/csytrf_aa.f | 35 +- lapack-netlib/SRC/csytrf_aa_2stage.f | 668 +++ lapack-netlib/SRC/csytri2.f | 8 +- lapack-netlib/SRC/csytri2x.f | 8 +- lapack-netlib/SRC/csytri_3.f | 10 +- lapack-netlib/SRC/csytri_3x.f | 8 +- lapack-netlib/SRC/csytrs_3.f | 8 +- lapack-netlib/SRC/csytrs_aa.f | 10 +- lapack-netlib/SRC/csytrs_aa_2stage.f | 281 ++ lapack-netlib/SRC/ctgex2.f | 12 +- lapack-netlib/SRC/ctgexc.f | 8 +- lapack-netlib/SRC/ctgsen.f | 4 +- lapack-netlib/SRC/ctplqt.f | 22 +- lapack-netlib/SRC/ctplqt2.f | 16 +- lapack-netlib/SRC/ctpmlqt.f | 26 +- lapack-netlib/SRC/ctpmqrt.f | 8 +- lapack-netlib/SRC/ctrevc3.f | 8 +- lapack-netlib/SRC/cunbdb1.f | 4 +- lapack-netlib/SRC/cunbdb2.f | 7 +- lapack-netlib/SRC/cunbdb3.f | 6 +- lapack-netlib/SRC/cunbdb4.f | 7 +- lapack-netlib/SRC/cunbdb5.f | 4 +- lapack-netlib/SRC/cunbdb6.f | 4 +- lapack-netlib/SRC/cuncsd.f | 10 +- lapack-netlib/SRC/cuncsd2by1.f | 4 +- lapack-netlib/SRC/cunm22.f | 6 +- lapack-netlib/SRC/dbbcsd.f | 4 +- lapack-netlib/SRC/dbdsdc.f | 6 +- lapack-netlib/SRC/dbdsqr.f | 15 +- lapack-netlib/SRC/dbdsvdx.f | 6 +- lapack-netlib/SRC/dgebal.f | 10 +- lapack-netlib/SRC/dgebd2.f | 8 +- lapack-netlib/SRC/dgebrd.f | 11 +- lapack-netlib/SRC/dgejsv.f | 6 +- lapack-netlib/SRC/dgelqt.f | 22 +- lapack-netlib/SRC/dgelqt3.f | 12 +- lapack-netlib/SRC/dgelsd.f | 8 +- lapack-netlib/SRC/dgemlqt.f | 22 +- lapack-netlib/SRC/dgeqrt.f | 8 +- lapack-netlib/SRC/dgesc2.f | 8 +- lapack-netlib/SRC/dgesvdx.f | 4 +- lapack-netlib/SRC/dgesvj.f | 10 +- lapack-netlib/SRC/dgetc2.f | 4 +- lapack-netlib/SRC/dgetsls.f | 24 +- lapack-netlib/SRC/dggesx.f | 8 +- lapack-netlib/SRC/dgghd3.f | 5 +- lapack-netlib/SRC/dgsvj0.f | 9 +- lapack-netlib/SRC/dgsvj1.f | 5 +- lapack-netlib/SRC/disnan.f | 10 +- lapack-netlib/SRC/dla_gbamv.f | 8 +- lapack-netlib/SRC/dla_gbrfsx_extended.f | 15 +- lapack-netlib/SRC/dla_geamv.f | 12 +- lapack-netlib/SRC/dla_gerfsx_extended.f | 15 +- lapack-netlib/SRC/dla_porfsx_extended.f | 15 +- lapack-netlib/SRC/dla_syamv.f | 8 +- lapack-netlib/SRC/dla_syrfsx_extended.f | 15 +- lapack-netlib/SRC/dlabrd.f | 8 +- lapack-netlib/SRC/dlaed3.f | 8 +- lapack-netlib/SRC/dlaisnan.f | 10 +- lapack-netlib/SRC/dlalsa.f | 12 +- lapack-netlib/SRC/dlamswlq.f | 12 +- lapack-netlib/SRC/dlamtsqr.f | 6 +- lapack-netlib/SRC/dlapy2.f | 34 +- lapack-netlib/SRC/dlaqr1.f | 14 +- lapack-netlib/SRC/dlaqr2.f | 26 +- lapack-netlib/SRC/dlaqr3.f | 22 +- lapack-netlib/SRC/dlaqr5.f | 50 +- lapack-netlib/SRC/dlarfg.f | 8 +- lapack-netlib/SRC/dlarfgp.f | 8 +- lapack-netlib/SRC/dlarra.f | 14 +- lapack-netlib/SRC/dlarrb.f | 12 +- lapack-netlib/SRC/dlarrc.f | 9 +- lapack-netlib/SRC/dlarrd.f | 8 +- lapack-netlib/SRC/dlarre.f | 11 +- lapack-netlib/SRC/dlarrf.f | 9 +- lapack-netlib/SRC/dlarrj.f | 12 +- lapack-netlib/SRC/dlarrk.f | 13 +- lapack-netlib/SRC/dlarrr.f | 13 +- lapack-netlib/SRC/dlarrv.f | 19 +- lapack-netlib/SRC/dlartgs.f | 9 +- lapack-netlib/SRC/dlasd0.f | 16 +- lapack-netlib/SRC/dlasd2.f | 16 +- lapack-netlib/SRC/dlasd3.f | 15 +- lapack-netlib/SRC/dlasd8.f | 8 +- lapack-netlib/SRC/dlasda.f | 9 +- lapack-netlib/SRC/dlasq4.f | 3 +- lapack-netlib/SRC/dlasq5.f | 8 +- lapack-netlib/SRC/dlaswlq.f | 6 +- lapack-netlib/SRC/dlaswp.f | 16 +- lapack-netlib/SRC/dlasyf_aa.f | 131 +- lapack-netlib/SRC/dorbdb1.f | 4 +- lapack-netlib/SRC/dorbdb2.f | 4 +- lapack-netlib/SRC/dorbdb3.f | 4 +- lapack-netlib/SRC/dorbdb4.f | 4 +- lapack-netlib/SRC/dorbdb5.f | 4 +- lapack-netlib/SRC/dorbdb6.f | 4 +- lapack-netlib/SRC/dorcsd.f | 14 +- lapack-netlib/SRC/dorcsd2by1.f | 2 +- lapack-netlib/SRC/dorm22.f | 6 +- lapack-netlib/SRC/dppsvx.f | 5 +- lapack-netlib/SRC/drscl.f | 8 +- lapack-netlib/SRC/dsb2st_kernels.f | 102 +- lapack-netlib/SRC/dsbev_2stage.f | 19 +- lapack-netlib/SRC/dsbevd_2stage.f | 16 +- lapack-netlib/SRC/dsbevx_2stage.f | 15 +- lapack-netlib/SRC/dsgesv.f | 6 +- lapack-netlib/SRC/dspevd.f | 9 +- lapack-netlib/SRC/dspgv.f | 9 +- lapack-netlib/SRC/dsposv.f | 4 +- lapack-netlib/SRC/dspsvx.f | 5 +- lapack-netlib/SRC/dstedc.f | 9 +- lapack-netlib/SRC/dstegr.f | 4 +- lapack-netlib/SRC/dstemr.f | 4 +- lapack-netlib/SRC/dsycon_3.f | 8 +- lapack-netlib/SRC/dsyconvf.f | 10 +- lapack-netlib/SRC/dsyconvf_rook.f | 10 +- lapack-netlib/SRC/dsyequb.f | 8 +- lapack-netlib/SRC/dsyev_2stage.f | 18 +- lapack-netlib/SRC/dsyevd_2stage.f | 22 +- lapack-netlib/SRC/dsyevr.f | 4 +- lapack-netlib/SRC/dsyevr_2stage.f | 14 +- lapack-netlib/SRC/dsyevx_2stage.f | 18 +- lapack-netlib/SRC/dsygv_2stage.f | 20 +- lapack-netlib/SRC/dsysv_aa.f | 10 +- lapack-netlib/SRC/dsysv_aa_2stage.f | 280 ++ lapack-netlib/SRC/dsytrd_2stage.f | 20 +- lapack-netlib/SRC/dsytrd_sb2st.F | 8 +- lapack-netlib/SRC/dsytrd_sy2sb.f | 14 +- lapack-netlib/SRC/dsytrf_aa.f | 35 +- lapack-netlib/SRC/dsytrf_aa_2stage.f | 647 +++ lapack-netlib/SRC/dsytri2.f | 8 +- lapack-netlib/SRC/dsytri2x.f | 8 +- lapack-netlib/SRC/dsytri_3.f | 10 +- lapack-netlib/SRC/dsytri_3x.f | 8 +- lapack-netlib/SRC/dsytrs_3.f | 8 +- lapack-netlib/SRC/dsytrs_aa.f | 10 +- lapack-netlib/SRC/dsytrs_aa_2stage.f | 281 ++ lapack-netlib/SRC/dtgsen.f | 7 +- lapack-netlib/SRC/dtplqt.f | 22 +- lapack-netlib/SRC/dtplqt2.f | 16 +- lapack-netlib/SRC/dtpmlqt.f | 8 +- lapack-netlib/SRC/dtpmqrt.f | 8 +- lapack-netlib/SRC/dtrevc.f | 9 +- lapack-netlib/SRC/dtrevc3.f | 8 +- lapack-netlib/SRC/dtrsna.f | 8 +- lapack-netlib/SRC/dtrttp.f | 8 +- lapack-netlib/SRC/ilaclr.f | 8 +- lapack-netlib/SRC/ilaenv.f | 40 +- lapack-netlib/SRC/ilaenv2stage.f | 191 + lapack-netlib/SRC/ilaslc.f | 8 +- lapack-netlib/SRC/ilaver.f | 72 - lapack-netlib/SRC/iparam2stage.F | 8 +- lapack-netlib/SRC/iparmq.f | 12 +- lapack-netlib/SRC/sbbcsd.f | 4 +- lapack-netlib/SRC/sbdsdc.f | 6 +- lapack-netlib/SRC/sbdsqr.f | 14 +- lapack-netlib/SRC/sbdsvdx.f | 6 +- lapack-netlib/SRC/sgebd2.f | 8 +- lapack-netlib/SRC/sgebrd.f | 11 +- lapack-netlib/SRC/sgees.f | 8 +- lapack-netlib/SRC/sgeevx.f | 16 +- lapack-netlib/SRC/sgejsv.f | 6 +- lapack-netlib/SRC/sgelqt.f | 22 +- lapack-netlib/SRC/sgelqt3.f | 16 +- lapack-netlib/SRC/sgelsd.f | 8 +- lapack-netlib/SRC/sgemlqt.f | 22 +- lapack-netlib/SRC/sgeqrt.f | 8 +- lapack-netlib/SRC/sgesvdx.f | 4 +- lapack-netlib/SRC/sgesvj.f | 10 +- lapack-netlib/SRC/sgetrf2.f | 4 +- lapack-netlib/SRC/sgetsls.f | 24 +- lapack-netlib/SRC/sggesx.f | 8 +- lapack-netlib/SRC/sgghd3.f | 5 +- lapack-netlib/SRC/sgsvj0.f | 11 +- lapack-netlib/SRC/sgsvj1.f | 11 +- lapack-netlib/SRC/sisnan.f | 10 +- lapack-netlib/SRC/sla_gbamv.f | 8 +- lapack-netlib/SRC/sla_gbrfsx_extended.f | 12 +- lapack-netlib/SRC/sla_geamv.f | 12 +- lapack-netlib/SRC/sla_syamv.f | 8 +- lapack-netlib/SRC/slabrd.f | 8 +- lapack-netlib/SRC/slaed3.f | 8 +- lapack-netlib/SRC/slaisnan.f | 10 +- lapack-netlib/SRC/slalsa.f | 12 +- lapack-netlib/SRC/slamswlq.f | 20 +- lapack-netlib/SRC/slamtsqr.f | 6 +- lapack-netlib/SRC/slapy2.f | 37 +- lapack-netlib/SRC/slaqr1.f | 14 +- lapack-netlib/SRC/slaqr2.f | 32 +- lapack-netlib/SRC/slaqr3.f | 28 +- lapack-netlib/SRC/slaqr5.f | 50 +- lapack-netlib/SRC/slarfg.f | 8 +- lapack-netlib/SRC/slarfgp.f | 8 +- lapack-netlib/SRC/slarra.f | 14 +- lapack-netlib/SRC/slarrb.f | 12 +- lapack-netlib/SRC/slarrc.f | 9 +- lapack-netlib/SRC/slarrd.f | 8 +- lapack-netlib/SRC/slarre.f | 11 +- lapack-netlib/SRC/slarrf.f | 9 +- lapack-netlib/SRC/slarrj.f | 12 +- lapack-netlib/SRC/slarrk.f | 13 +- lapack-netlib/SRC/slarrr.f | 13 +- lapack-netlib/SRC/slarrv.f | 19 +- lapack-netlib/SRC/slartgs.f | 9 +- lapack-netlib/SRC/slasd0.f | 10 +- lapack-netlib/SRC/slasd3.f | 9 +- lapack-netlib/SRC/slasd8.f | 8 +- lapack-netlib/SRC/slasq4.f | 3 +- lapack-netlib/SRC/slaswlq.f | 8 +- lapack-netlib/SRC/slaswp.f | 16 +- lapack-netlib/SRC/slasyf_aa.f | 131 +- lapack-netlib/SRC/sorbdb1.f | 4 +- lapack-netlib/SRC/sorbdb2.f | 4 +- lapack-netlib/SRC/sorbdb3.f | 4 +- lapack-netlib/SRC/sorbdb4.f | 4 +- lapack-netlib/SRC/sorbdb5.f | 4 +- lapack-netlib/SRC/sorbdb6.f | 4 +- lapack-netlib/SRC/sorcsd.f | 14 +- lapack-netlib/SRC/sorcsd2by1.f | 4 +- lapack-netlib/SRC/sorm22.f | 6 +- lapack-netlib/SRC/spotrf2.f | 8 +- lapack-netlib/SRC/sppsvx.f | 5 +- lapack-netlib/SRC/ssb2st_kernels.f | 105 +- lapack-netlib/SRC/ssbev_2stage.f | 19 +- lapack-netlib/SRC/ssbevd_2stage.f | 16 +- lapack-netlib/SRC/ssbevx_2stage.f | 15 +- lapack-netlib/SRC/ssbgvx.f | 6 +- lapack-netlib/SRC/ssfrk.f | 8 +- lapack-netlib/SRC/sspgv.f | 9 +- lapack-netlib/SRC/sspsvx.f | 5 +- lapack-netlib/SRC/sstegr.f | 4 +- lapack-netlib/SRC/sstemr.f | 4 +- lapack-netlib/SRC/ssycon_3.f | 8 +- lapack-netlib/SRC/ssyconvf.f | 10 +- lapack-netlib/SRC/ssyconvf_rook.f | 10 +- lapack-netlib/SRC/ssyequb.f | 8 +- lapack-netlib/SRC/ssyev_2stage.f | 18 +- lapack-netlib/SRC/ssyevd_2stage.f | 22 +- lapack-netlib/SRC/ssyevr_2stage.f | 14 +- lapack-netlib/SRC/ssyevx_2stage.f | 18 +- lapack-netlib/SRC/ssygv_2stage.f | 23 +- lapack-netlib/SRC/ssygvx.f | 4 +- lapack-netlib/SRC/ssysv_aa.f | 8 +- lapack-netlib/SRC/ssysv_aa_2stage.f | 279 + lapack-netlib/SRC/ssytrd_2stage.f | 20 +- lapack-netlib/SRC/ssytrd_sb2st.F | 8 +- lapack-netlib/SRC/ssytrd_sy2sb.f | 14 +- lapack-netlib/SRC/ssytrf_aa.f | 35 +- lapack-netlib/SRC/ssytrf_aa_2stage.f | 647 +++ lapack-netlib/SRC/ssytri2.f | 8 +- lapack-netlib/SRC/ssytri2x.f | 8 +- lapack-netlib/SRC/ssytri_3.f | 10 +- lapack-netlib/SRC/ssytri_3x.f | 8 +- lapack-netlib/SRC/ssytrs_3.f | 8 +- lapack-netlib/SRC/ssytrs_aa.f | 10 +- lapack-netlib/SRC/ssytrs_aa_2stage.f | 281 ++ lapack-netlib/SRC/stfsm.f | 8 +- lapack-netlib/SRC/stgex2.f | 12 +- lapack-netlib/SRC/stgexc.f | 8 +- lapack-netlib/SRC/stplqt.f | 22 +- lapack-netlib/SRC/stplqt2.f | 16 +- lapack-netlib/SRC/stpmlqt.f | 8 +- lapack-netlib/SRC/stpmqrt.f | 8 +- lapack-netlib/SRC/strevc3.f | 8 +- lapack-netlib/SRC/strttp.f | 8 +- lapack-netlib/SRC/zbbcsd.f | 4 +- lapack-netlib/SRC/zcgesv.f | 6 +- lapack-netlib/SRC/zcposv.f | 6 +- lapack-netlib/SRC/zgebal.f | 8 +- lapack-netlib/SRC/zgebd2.f | 8 +- lapack-netlib/SRC/zgebrd.f | 11 +- lapack-netlib/SRC/zgejsv.f | 4474 ++++++++--------- lapack-netlib/SRC/zgelqt.f | 20 +- lapack-netlib/SRC/zgelqt3.f | 12 +- lapack-netlib/SRC/zgelsd.f | 8 +- lapack-netlib/SRC/zgemlqt.f | 34 +- lapack-netlib/SRC/zgeqrt.f | 8 +- lapack-netlib/SRC/zgesc2.f | 8 +- lapack-netlib/SRC/zgesv.f | 8 +- lapack-netlib/SRC/zgesvdx.f | 6 +- lapack-netlib/SRC/zgesvj.f | 12 +- lapack-netlib/SRC/zgetc2.f | 4 +- lapack-netlib/SRC/zgetsls.f | 32 +- lapack-netlib/SRC/zggesx.f | 8 +- lapack-netlib/SRC/zgghd3.f | 5 +- lapack-netlib/SRC/zgsvj0.f | 6 +- lapack-netlib/SRC/zgsvj1.f | 4 +- lapack-netlib/SRC/zhb2st_kernels.f | 102 +- lapack-netlib/SRC/zhbev_2stage.f | 21 +- lapack-netlib/SRC/zhbevd_2stage.f | 16 +- lapack-netlib/SRC/zhbevx_2stage.f | 15 +- lapack-netlib/SRC/zhecon_3.f | 10 +- lapack-netlib/SRC/zhecon_rook.f | 10 +- lapack-netlib/SRC/zheequb.f | 4 +- lapack-netlib/SRC/zheev_2stage.f | 18 +- lapack-netlib/SRC/zheevd_2stage.f | 22 +- lapack-netlib/SRC/zheevr_2stage.f | 14 +- lapack-netlib/SRC/zheevx_2stage.f | 18 +- lapack-netlib/SRC/zhegv_2stage.f | 20 +- lapack-netlib/SRC/zhesv_aa.f | 10 +- lapack-netlib/SRC/zhesv_aa_2stage.f | 284 ++ lapack-netlib/SRC/zhetrd_2stage.f | 20 +- lapack-netlib/SRC/zhetrd_hb2st.F | 8 +- lapack-netlib/SRC/zhetrd_he2hb.f | 14 +- lapack-netlib/SRC/zhetrf_aa.f | 34 +- lapack-netlib/SRC/zhetrf_aa_2stage.f | 663 +++ lapack-netlib/SRC/zhetri2.f | 8 +- lapack-netlib/SRC/zhetri2x.f | 8 +- lapack-netlib/SRC/zhetri_3.f | 10 +- lapack-netlib/SRC/zhetri_3x.f | 8 +- lapack-netlib/SRC/zhetrs_3.f | 8 +- lapack-netlib/SRC/zhetrs_aa.f | 10 +- lapack-netlib/SRC/zhetrs_aa_2stage.f | 283 ++ lapack-netlib/SRC/zhfrk.f | 8 +- lapack-netlib/SRC/zhpevd.f | 9 +- lapack-netlib/SRC/zla_gbamv.f | 8 +- lapack-netlib/SRC/zla_gbrfsx_extended.f | 12 +- lapack-netlib/SRC/zla_geamv.f | 10 +- lapack-netlib/SRC/zla_gerfsx_extended.f | 12 +- lapack-netlib/SRC/zla_heamv.f | 10 +- lapack-netlib/SRC/zla_herfsx_extended.f | 15 +- lapack-netlib/SRC/zla_porfsx_extended.f | 15 +- lapack-netlib/SRC/zla_syamv.f | 10 +- lapack-netlib/SRC/zla_syrfsx_extended.f | 15 +- lapack-netlib/SRC/zlabrd.f | 8 +- lapack-netlib/SRC/zlahef_aa.f | 76 +- lapack-netlib/SRC/zlalsa.f | 9 +- lapack-netlib/SRC/zlalsd.f | 9 +- lapack-netlib/SRC/zlamswlq.f | 28 +- lapack-netlib/SRC/zlamtsqr.f | 16 +- lapack-netlib/SRC/zlaqr1.f | 14 +- lapack-netlib/SRC/zlaqr2.f | 30 +- lapack-netlib/SRC/zlaqr3.f | 26 +- lapack-netlib/SRC/zlaqr5.f | 48 +- lapack-netlib/SRC/zlarfg.f | 8 +- lapack-netlib/SRC/zlarfgp.f | 8 +- lapack-netlib/SRC/zlarrv.f | 9 +- lapack-netlib/SRC/zlaswlq.f | 6 +- lapack-netlib/SRC/zlaswp.f | 16 +- lapack-netlib/SRC/zlasyf_aa.f | 131 +- lapack-netlib/SRC/zlatbs.f | 8 +- lapack-netlib/SRC/zlatps.f | 8 +- lapack-netlib/SRC/zlatrs.f | 8 +- lapack-netlib/SRC/zstedc.f | 9 +- lapack-netlib/SRC/zstegr.f | 4 +- lapack-netlib/SRC/zstemr.f | 4 +- lapack-netlib/SRC/zsycon_3.f | 8 +- lapack-netlib/SRC/zsyconvf.f | 10 +- lapack-netlib/SRC/zsyconvf_rook.f | 10 +- lapack-netlib/SRC/zsyequb.f | 8 +- lapack-netlib/SRC/zsysv_aa.f | 8 +- lapack-netlib/SRC/zsysv_aa_2stage.f | 276 + lapack-netlib/SRC/zsytrf_aa.f | 35 +- lapack-netlib/SRC/zsytrf_aa_2stage.f | 668 +++ lapack-netlib/SRC/zsytri2.f | 8 +- lapack-netlib/SRC/zsytri2x.f | 8 +- lapack-netlib/SRC/zsytri_3.f | 10 +- lapack-netlib/SRC/zsytri_3x.f | 8 +- lapack-netlib/SRC/zsytrs_3.f | 8 +- lapack-netlib/SRC/zsytrs_aa.f | 10 +- lapack-netlib/SRC/zsytrs_aa_2stage.f | 281 ++ lapack-netlib/SRC/ztgex2.f | 12 +- lapack-netlib/SRC/ztgexc.f | 8 +- lapack-netlib/SRC/ztgsen.f | 4 +- lapack-netlib/SRC/ztplqt.f | 22 +- lapack-netlib/SRC/ztplqt2.f | 16 +- lapack-netlib/SRC/ztpmlqt.f | 28 +- lapack-netlib/SRC/ztpmqrt.f | 8 +- lapack-netlib/SRC/ztrevc.f | 8 +- lapack-netlib/SRC/ztrevc3.f | 8 +- lapack-netlib/SRC/ztrsna.f | 9 +- lapack-netlib/SRC/zunbdb1.f | 4 +- lapack-netlib/SRC/zunbdb2.f | 7 +- lapack-netlib/SRC/zunbdb3.f | 6 +- lapack-netlib/SRC/zunbdb4.f | 7 +- lapack-netlib/SRC/zunbdb5.f | 4 +- lapack-netlib/SRC/zunbdb6.f | 4 +- lapack-netlib/SRC/zuncsd.f | 14 +- lapack-netlib/SRC/zuncsd2by1.f | 4 +- lapack-netlib/SRC/zunm22.f | 6 +- lapack-netlib/TESTING/EIG/CMakeLists.txt | 39 +- lapack-netlib/TESTING/EIG/Makefile | 45 +- lapack-netlib/TESTING/EIG/cchkhb2stg.f | 8 +- lapack-netlib/TESTING/EIG/cdrvst2stg.f | 9 +- lapack-netlib/TESTING/EIG/cerrst.f | 14 +- lapack-netlib/TESTING/EIG/cget02.f | 10 +- lapack-netlib/TESTING/EIG/clarhs.f | 10 +- lapack-netlib/TESTING/EIG/dchksb2stg.f | 8 +- lapack-netlib/TESTING/EIG/dget02.f | 8 +- lapack-netlib/TESTING/EIG/dlarhs.f | 10 +- lapack-netlib/TESTING/EIG/ilaenv.f | 53 +- lapack-netlib/TESTING/EIG/schksb2stg.f | 8 +- lapack-netlib/TESTING/EIG/sget02.f | 8 +- lapack-netlib/TESTING/EIG/slarhs.f | 10 +- lapack-netlib/TESTING/EIG/zchkhb2stg.f | 6 +- lapack-netlib/TESTING/EIG/zdrvst2stg.f | 6 +- lapack-netlib/TESTING/EIG/zerrst.f | 6 +- lapack-netlib/TESTING/EIG/zget02.f | 10 +- lapack-netlib/TESTING/EIG/zlarhs.f | 10 +- lapack-netlib/TESTING/LIN/CMakeLists.txt | 146 +- lapack-netlib/TESTING/LIN/Makefile | 155 +- lapack-netlib/TESTING/LIN/cchkaa.f | 69 +- lapack-netlib/TESTING/LIN/cchkhe_aa.f | 98 +- lapack-netlib/TESTING/LIN/cchkhe_aa_2stage.f | 584 +++ lapack-netlib/TESTING/LIN/cchkrfp.f | 7 +- lapack-netlib/TESTING/LIN/cchksy_aa.f | 90 +- lapack-netlib/TESTING/LIN/cchksy_aa_2stage.f | 573 +++ lapack-netlib/TESTING/LIN/cdrvhe_aa.f | 9 +- lapack-netlib/TESTING/LIN/cdrvhe_aa_2stage.f | 490 ++ lapack-netlib/TESTING/LIN/cdrvls.f | 115 +- lapack-netlib/TESTING/LIN/cdrvrf3.f | 8 +- lapack-netlib/TESTING/LIN/cdrvrf4.f | 10 +- lapack-netlib/TESTING/LIN/cdrvsy_aa.f | 13 +- lapack-netlib/TESTING/LIN/cdrvsy_aa_2stage.f | 490 ++ lapack-netlib/TESTING/LIN/cerrhe.f | 74 +- lapack-netlib/TESTING/LIN/cerrsy.f | 63 +- lapack-netlib/TESTING/LIN/cerrvx.f | 68 +- lapack-netlib/TESTING/LIN/chet01_3.f | 6 +- lapack-netlib/TESTING/LIN/chkxer.f | 26 +- lapack-netlib/TESTING/LIN/clahilb.f | 24 +- lapack-netlib/TESTING/LIN/clarhs.f | 8 +- lapack-netlib/TESTING/LIN/csyt01_3.f | 6 +- lapack-netlib/TESTING/LIN/dchkaa.f | 34 +- lapack-netlib/TESTING/LIN/dchklqtp.f | 8 +- lapack-netlib/TESTING/LIN/dchkrfp.f | 7 +- lapack-netlib/TESTING/LIN/dchksy_aa.f | 90 +- lapack-netlib/TESTING/LIN/dchksy_aa_2stage.f | 572 +++ lapack-netlib/TESTING/LIN/ddrvls.f | 99 +- lapack-netlib/TESTING/LIN/ddrvrf3.f | 8 +- lapack-netlib/TESTING/LIN/ddrvsy_aa.f | 11 +- lapack-netlib/TESTING/LIN/ddrvsy_aa_2stage.f | 490 ++ lapack-netlib/TESTING/LIN/derrsy.f | 64 +- lapack-netlib/TESTING/LIN/derrvx.f | 39 +- lapack-netlib/TESTING/LIN/dlahilb.f | 18 +- lapack-netlib/TESTING/LIN/dsyt01_3.f | 6 +- lapack-netlib/TESTING/LIN/ilaenv.f | 53 +- lapack-netlib/TESTING/LIN/schkaa.f | 34 +- lapack-netlib/TESTING/LIN/schklqtp.f | 8 +- lapack-netlib/TESTING/LIN/schkqrtp.f | 8 +- lapack-netlib/TESTING/LIN/schkrfp.f | 7 +- lapack-netlib/TESTING/LIN/schksy_aa.f | 88 +- lapack-netlib/TESTING/LIN/schksy_aa_2stage.f | 572 +++ lapack-netlib/TESTING/LIN/schksy_rk.f | 10 +- lapack-netlib/TESTING/LIN/sdrvls.f | 101 +- lapack-netlib/TESTING/LIN/sdrvrf3.f | 8 +- lapack-netlib/TESTING/LIN/sdrvsy_aa.f | 11 +- lapack-netlib/TESTING/LIN/sdrvsy_aa_2stage.f | 490 ++ lapack-netlib/TESTING/LIN/serrsy.f | 66 +- lapack-netlib/TESTING/LIN/serrvx.f | 39 +- lapack-netlib/TESTING/LIN/slahilb.f | 12 +- lapack-netlib/TESTING/LIN/sqrt04.f | 5 +- lapack-netlib/TESTING/LIN/sqrt05.f | 6 +- lapack-netlib/TESTING/LIN/ssyt01_3.f | 6 +- lapack-netlib/TESTING/LIN/ssyt01_aa.f | 8 +- lapack-netlib/TESTING/LIN/zchkaa.f | 83 +- lapack-netlib/TESTING/LIN/zchkhe_aa.f | 89 +- lapack-netlib/TESTING/LIN/zchkhe_aa_2stage.f | 580 +++ lapack-netlib/TESTING/LIN/zchkrfp.f | 7 +- lapack-netlib/TESTING/LIN/zchksy_aa.f | 101 +- lapack-netlib/TESTING/LIN/zchksy_aa_2stage.f | 573 +++ lapack-netlib/TESTING/LIN/zchktsqr.f | 10 +- lapack-netlib/TESTING/LIN/zdrvhe_aa.f | 9 +- lapack-netlib/TESTING/LIN/zdrvhe_aa_2stage.f | 491 ++ lapack-netlib/TESTING/LIN/zdrvls.f | 115 +- lapack-netlib/TESTING/LIN/zdrvrf3.f | 8 +- lapack-netlib/TESTING/LIN/zdrvrf4.f | 10 +- lapack-netlib/TESTING/LIN/zdrvsy_aa.f | 15 +- lapack-netlib/TESTING/LIN/zdrvsy_aa_2stage.f | 492 ++ lapack-netlib/TESTING/LIN/zerrhe.f | 70 +- lapack-netlib/TESTING/LIN/zerrsy.f | 65 +- lapack-netlib/TESTING/LIN/zerrvx.f | 38 +- lapack-netlib/TESTING/LIN/zhet01_3.f | 6 +- lapack-netlib/TESTING/LIN/zlahilb.f | 20 +- lapack-netlib/TESTING/LIN/zlarhs.f | 8 +- lapack-netlib/TESTING/LIN/zsyt01_3.f | 6 +- lapack-netlib/TESTING/MATGEN/CMakeLists.txt | 57 +- lapack-netlib/TESTING/MATGEN/Makefile | 30 +- lapack-netlib/TESTING/MATGEN/clahilb.f | 22 +- lapack-netlib/TESTING/MATGEN/dlahilb.f | 24 +- lapack-netlib/TESTING/MATGEN/slahilb.f | 24 +- lapack-netlib/TESTING/MATGEN/zlahilb.f | 21 +- lapack-netlib/TESTING/Makefile | 430 +- lapack-netlib/TESTING/ctest.in | 3 + lapack-netlib/TESTING/dbal.in | 47 +- lapack-netlib/TESTING/dtest.in | 1 + lapack-netlib/TESTING/stest.in | 1 + lapack-netlib/TESTING/ztest.in | 3 + lapack-netlib/appveyor.yml | 64 + lapack-netlib/lapack.pc.in | 6 +- lapack-netlib/lapack_testing.py | 22 +- lapack-netlib/make.inc.example | 112 +- 2092 files changed, 45689 insertions(+), 23003 deletions(-) create mode 100644 lapack-netlib/.gitignore create mode 100644 lapack-netlib/.travis.yml create mode 100644 lapack-netlib/BLAS/Makefile delete mode 100644 lapack-netlib/BLAS/TESTING/Makeblat1 delete mode 100644 lapack-netlib/BLAS/TESTING/Makeblat2 delete mode 100644 lapack-netlib/BLAS/TESTING/Makeblat3 create mode 100644 lapack-netlib/BLAS/TESTING/Makefile rename lapack-netlib/BLAS/{ => TESTING}/cblat2.in (100%) rename lapack-netlib/BLAS/{ => TESTING}/cblat3.in (100%) rename lapack-netlib/BLAS/{ => TESTING}/dblat2.in (100%) rename lapack-netlib/BLAS/{ => TESTING}/dblat3.in (100%) rename lapack-netlib/BLAS/{ => TESTING}/sblat2.in (100%) rename lapack-netlib/BLAS/{ => TESTING}/sblat3.in (100%) rename lapack-netlib/BLAS/{ => TESTING}/zblat2.in (100%) rename lapack-netlib/BLAS/{ => TESTING}/zblat3.in (100%) create mode 100644 lapack-netlib/CMAKE/FindGcov.cmake create mode 100644 lapack-netlib/CMAKE/Findcodecov.cmake create mode 100644 lapack-netlib/LAPACKE/src/lapacke_chesv_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_chesv_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_chetrf_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_chetrf_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_chetrs_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_chetrs_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_clacrm.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_clacrm_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_clarcm.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_clarcm_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_classq.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_classq_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_csysv_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_csysv_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_csytrf_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_csytrf_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_csytrs_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_csytrs_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_dlassq.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_dlassq_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_dsysv_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_dsysv_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_dsytrs_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_dsytrs_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_nancheck.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_slassq.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_slassq_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_ssysv_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_ssysv_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_ssytrs_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_ssytrs_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zhesv_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zhesv_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zhetrs_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zhetrs_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zlacrm.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zlacrm_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zlarcm.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zlarcm_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zlassq.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zlassq_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zsysv_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zsysv_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa_2stage_work.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zsytrs_aa_2stage.c create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zsytrs_aa_2stage_work.c create mode 100644 lapack-netlib/SRC/chesv_aa_2stage.f create mode 100644 lapack-netlib/SRC/chetrf_aa_2stage.f create mode 100644 lapack-netlib/SRC/chetrs_aa_2stage.f create mode 100644 lapack-netlib/SRC/csysv_aa_2stage.f create mode 100644 lapack-netlib/SRC/csytrf_aa_2stage.f create mode 100644 lapack-netlib/SRC/csytrs_aa_2stage.f create mode 100644 lapack-netlib/SRC/dsysv_aa_2stage.f create mode 100644 lapack-netlib/SRC/dsytrf_aa_2stage.f create mode 100644 lapack-netlib/SRC/dsytrs_aa_2stage.f create mode 100644 lapack-netlib/SRC/ilaenv2stage.f delete mode 100644 lapack-netlib/SRC/ilaver.f create mode 100644 lapack-netlib/SRC/ssysv_aa_2stage.f create mode 100644 lapack-netlib/SRC/ssytrf_aa_2stage.f create mode 100644 lapack-netlib/SRC/ssytrs_aa_2stage.f create mode 100644 lapack-netlib/SRC/zhesv_aa_2stage.f create mode 100644 lapack-netlib/SRC/zhetrf_aa_2stage.f create mode 100644 lapack-netlib/SRC/zhetrs_aa_2stage.f create mode 100644 lapack-netlib/SRC/zsysv_aa_2stage.f create mode 100644 lapack-netlib/SRC/zsytrf_aa_2stage.f create mode 100644 lapack-netlib/SRC/zsytrs_aa_2stage.f create mode 100644 lapack-netlib/TESTING/LIN/cchkhe_aa_2stage.f create mode 100644 lapack-netlib/TESTING/LIN/cchksy_aa_2stage.f create mode 100644 lapack-netlib/TESTING/LIN/cdrvhe_aa_2stage.f create mode 100644 lapack-netlib/TESTING/LIN/cdrvsy_aa_2stage.f create mode 100644 lapack-netlib/TESTING/LIN/dchksy_aa_2stage.f create mode 100644 lapack-netlib/TESTING/LIN/ddrvsy_aa_2stage.f create mode 100644 lapack-netlib/TESTING/LIN/schksy_aa_2stage.f create mode 100644 lapack-netlib/TESTING/LIN/sdrvsy_aa_2stage.f create mode 100644 lapack-netlib/TESTING/LIN/zchkhe_aa_2stage.f create mode 100644 lapack-netlib/TESTING/LIN/zchksy_aa_2stage.f create mode 100644 lapack-netlib/TESTING/LIN/zdrvhe_aa_2stage.f create mode 100644 lapack-netlib/TESTING/LIN/zdrvsy_aa_2stage.f create mode 100644 lapack-netlib/appveyor.yml diff --git a/Makefile b/Makefile index a62573277..5198f9e2b 100644 --- a/Makefile +++ b/Makefile @@ -270,6 +270,7 @@ endif ifeq ($(BUILD_LAPACK_DEPRECATED), 1) -@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc endif + -@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc -@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc endif diff --git a/exports/gensymbol b/exports/gensymbol index f1983d458..b8c4b444b 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -159,7 +159,7 @@ ilaenv, ieeeck, lsamen, iparmq, ilaprec, ilatrans, ilauplo, iladiag, ilaver, slamch, slamc3, - + # SCLAUX -- Auxiliary routines called from both REAL and COMPLEX. # excluded: second_$(TIMER) sbdsdc, @@ -815,6 +815,28 @@ sladiv1, dladiv1, iparam2stage, + + # functions added for lapack-3.8.0 + + ilaenv2stage, + ssysv_aa_2stage, + ssytrf_aa_2stage, + ssytrs_aa_2stage, + chesv_aa_2stage, + chetrf_aa_2stage, + chetrs_aa_2stage, + csysv_aa_2stage, + csytrf_aa_2stage, + csytrs_aa_2stage, + dsysv_aa_2stage, + dsytrf_aa_2stage, + dsytrs_aa_2stage, + zhesv_aa_2stage, + zhetrf_aa_2stage, + zhetrs_aa_2stage, + zsysv_aa_2stage, + zsytrf_aa_2stage, + zsytrs_aa_2stage ); @lapack_extendedprecision_objs = ( @@ -3211,9 +3233,14 @@ LAPACKE_zuncsd2by1_work, ## new function from lapack-3.7.0 - + LAPACKE_cgelq, + LAPACKE_cgelq_work, + LAPACKE_cgemlq, + LAPACKE_cgemlq_work, LAPACKE_cgemqr, LAPACKE_cgemqr_work, + LAPACKE_cgeqr, + LAPACKE_cgeqr_work, LAPACKE_cgetsls, LAPACKE_cgetsls_work, LAPACKE_chbev_2stage, @@ -3264,8 +3291,14 @@ LAPACKE_csytrs_aa_work, LAPACKE_csytrs_3, LAPACKE_csytrs_3_work, + LAPACKE_dgelq, + LAPACKE_dgelq_work, + LAPACKE_dgemlq, + LAPACKE_dgemlq_work, LAPACKE_dgemqr, LAPACKE_dgemqr_work, + LAPACKE_dgeqr, + LAPACKE_dgeqr_work, LAPACKE_dgetsls, LAPACKE_dgetsls_work, LAPACKE_dsbev_2stage, @@ -3300,8 +3333,14 @@ LAPACKE_dsytrs_aa_work, LAPACKE_dsytrs_3, LAPACKE_dsytrs_3_work, + LAPACKE_sgelq, + LAPACKE_sgelq_work, + LAPACKE_sgemlq, + LAPACKE_sgemlq_work, LAPACKE_sgemqr, LAPACKE_sgemqr_work, + LAPACKE_sgeqr, + LAPACKE_sgeqr_work, LAPACKE_sgetsls, LAPACKE_sgetsls_work, LAPACKE_ssbev_2stage, @@ -3336,8 +3375,14 @@ LAPACKE_ssytrs_aa_work, LAPACKE_ssytrs_3, LAPACKE_ssytrs_3_work, + LAPACKE_zgelq, + LAPACKE_zgelq_work, + LAPACKE_zgemlq, + LAPACKE_zgemlq_work, LAPACKE_zgemqr, LAPACKE_zgemqr_work, + LAPACKE_zgeqr, + LAPACKE_zgeqr_work, LAPACKE_zgetsls, LAPACKE_zgetsls_work, LAPACKE_zhbev_2stage, @@ -3388,6 +3433,68 @@ LAPACKE_zsytrs_aa_work, LAPACKE_zsytrs_3, LAPACKE_zsytrs_3_work, + + ## new function from lapack-3.8.0 + LAPACKE_chesv_aa_2stage, + LAPACKE_chesv_aa_2stage_work, + LAPACKE_chetrf_aa_2stage, + LAPACKE_chetrf_aa_2stage_work, + LAPACKE_chetrs_aa_2stage, + LAPACKE_chetrs_aa_2stage_work, + LAPACKE_clacrm, + LAPACKE_clacrm_work, + LAPACKE_clarcm, + LAPACKE_clarcm_work, + LAPACKE_classq, + LAPACKE_classq_work, + LAPACKE_csysv_aa_2stage, + LAPACKE_csysv_aa_2stage_work, + LAPACKE_csytrf_aa_2stage, + LAPACKE_csytrf_aa_2stage_work, + LAPACKE_csytrs_aa_2stage, + LAPACKE_csytrs_aa_2stage_work, + LAPACKE_dlassq, + LAPACKE_dlassq_work, + LAPACKE_dsysv_aa_2stage, + LAPACKE_dsysv_aa_2stage_work, + LAPACKE_dsytrf_aa_2stage, + LAPACKE_dsytrf_aa_2stage_work, + LAPACKE_dsytrs_aa_2stage, + LAPACKE_dsytrs_aa_2stage_work, + LAPACKE_get_nancheck, + LAPACKE_set_nancheck, + LAPACKE_slassq, + LAPACKE_slassq_work, + LAPACKE_ssysv_aa_2stage, + LAPACKE_ssysv_aa_2stage_work, + LAPACKE_ssytrf_aa_2stage, + LAPACKE_ssytrf_aa_2stage_work, + LAPACKE_ssytrs_aa_2stage, + LAPACKE_ssytrs_aa_2stage_work, + LAPACKE_zhbev_2stage, + LAPACKE_zhbev_2stage_work, + LAPACKE_zhbevd_2stage, + LAPACKE_zhbevd_2stage_work, + LAPACKE_zhbevx_2stage, + LAPACKE_zhbevx_2stage_work, + LAPACKE_zhesv_aa_2stage, + LAPACKE_zhesv_aa_2stage_work, + LAPACKE_zhetrf_aa_2stage, + LAPACKE_zhetrf_aa_2stage_work, + LAPACKE_zhetrs_aa_2stage, + LAPACKE_zhetrs_aa_2stage_work, + LAPACKE_zlacrm, + LAPACKE_zlacrm_work, + LAPACKE_zlarcm, + LAPACKE_zlarcm_work, + LAPACKE_zlassq, + LAPACKE_zlassq_work, + LAPACKE_zsysv_aa_2stage, + LAPACKE_zsysv_aa_2stage_work, + LAPACKE_zsytrf_aa_2stage, + LAPACKE_zsytrf_aa_2stage_work, + LAPACKE_zsytrs_aa_2stage, + LAPACKE_zsytrs_aa_2stage_work, ); #These function may need 2 underscores. diff --git a/lapack-netlib/.gitignore b/lapack-netlib/.gitignore new file mode 100644 index 000000000..4ac90962e --- /dev/null +++ b/lapack-netlib/.gitignore @@ -0,0 +1,37 @@ +# ignore objects and archives, anywhere in the tree. +*.[oa] + +# test in INSTALL +INSTALL/test* + +# local make.inc +make.inc + +# BLAS testing +BLAS/TESTING/*.out +BLAS/TESTING/x* + +# CBLAS +CBLAS/include/cblas_mangling.h + +# CBLAS testing +CBLAS/testing/*.out +CBLAS/testing/x* + +# CBLAS examples +CBLAS/examples/cblas_ex1 +CBLAS/examples/cblas_ex2 + +# LAPACK testing +TESTING/LIN/xlintst* +TESTING/EIG/xeigtst* +TESTING/*.out +TESTING/*.txt +TESTING/x* + +# LAPACKE example +LAPACKE/example/xexample* + +# SED +SRC/*-e +LAPACKE/src/*-e diff --git a/lapack-netlib/.travis.yml b/lapack-netlib/.travis.yml new file mode 100644 index 000000000..68cfa607a --- /dev/null +++ b/lapack-netlib/.travis.yml @@ -0,0 +1,62 @@ +language: cpp + +addons: + apt: + sources: + - george-edison55-precise-backports # cmake + packages: + - cmake + - cmake-data + - gfortran + +os: + - linux + - osx + +env: + - CMAKE_BUILD_TYPE=Release + - CMAKE_BUILD_TYPE=Coverage + +install: + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; + then + for pkg in gcc cmake; do + if brew list -1 | grep -q "^${pkg}\$"; then + brew outdated $pkg || brew upgrade $pkg; + else + brew install $pkg; + fi + done + fi + +script: + - export PR=https://api.github.com/repos/$TRAVIS_REPO_SLUG/pulls/$TRAVIS_PULL_REQUEST + - export BRANCH=$(if [ "$TRAVIS_PULL_REQUEST" == "false" ]; then echo $TRAVIS_BRANCH; else echo `curl -s $PR | jq -r .head.ref`; fi) + - echo "TRAVIS_BRANCH=$TRAVIS_BRANCH, PR=$PR, BRANCH=$BRANCH" + - export SRC_DIR=$(pwd) + - export BLD_DIR=${SRC_DIR}/lapack-travis-bld + - export INST_DIR=${SRC_DIR}/../lapack-travis-install + - mkdir -p ${BLD_DIR} + - cd ${BLD_DIR} +# See issue #17 on github dashboard. Once resolved, use -DCBLAS=ON +# - cmake -DCMAKE_INSTALL_PREFIX=${INST_DIR} -DLAPACKE=ON ${SRC_DIR} + - cmake -DBUILDNAME:STRING="travis-${TRAVIS_OS_NAME}-${BRANCH}" + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_INSTALL_PREFIX=${INST_DIR} + -DCBLAS:BOOL=ON + -DLAPACKE:BOOL=ON + -DBUILD_TESTING=ON + -DLAPACKE_WITH_TMG:BOOL=ON + ${SRC_DIR} + - ctest -D ExperimentalStart + - ctest -D ExperimentalConfigure + - ctest -D ExperimentalBuild -j2 + - ctest -D ExperimentalTest --schedule-random -j2 --output-on-failure --timeout 100 + - ctest -D ExperimentalSubmit + - make install -j2 + - if [[ "$CMAKE_BUILD_TYPE" == "Coverage" ]]; + then + echo "Coverage"; + make coverage; + bash <(curl -s https://codecov.io/bash) -X gcov; + fi diff --git a/lapack-netlib/BLAS/CMakeLists.txt b/lapack-netlib/BLAS/CMakeLists.txt index 42cd4f619..e122b2b33 100644 --- a/lapack-netlib/BLAS/CMakeLists.txt +++ b/lapack-netlib/BLAS/CMakeLists.txt @@ -1,6 +1,6 @@ add_subdirectory(SRC) if(BUILD_TESTING) -add_subdirectory(TESTING) + add_subdirectory(TESTING) endif() configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/blas.pc @ONLY) install(FILES diff --git a/lapack-netlib/BLAS/Makefile b/lapack-netlib/BLAS/Makefile new file mode 100644 index 000000000..f9c4b534c --- /dev/null +++ b/lapack-netlib/BLAS/Makefile @@ -0,0 +1,22 @@ +include ../make.inc + +all: blas + +blas: + $(MAKE) -C SRC + +blas_testing: blas + $(MAKE) -C TESTING run + +clean: + $(MAKE) -C SRC clean + $(MAKE) -C TESTING clean +cleanobj: + $(MAKE) -C SRC cleanobj + $(MAKE) -C TESTING cleanobj +cleanlib: + $(MAKE) -C SRC cleanlib +cleanexe: + $(MAKE) -C TESTING cleanexe +cleantest: + $(MAKE) -C TESTING cleantest diff --git a/lapack-netlib/BLAS/SRC/CMakeLists.txt b/lapack-netlib/BLAS/SRC/CMakeLists.txt index a9306fc41..41c480432 100644 --- a/lapack-netlib/BLAS/SRC/CMakeLists.txt +++ b/lapack-netlib/BLAS/SRC/CMakeLists.txt @@ -23,39 +23,10 @@ # DBLAS3 -- Double precision real BLAS3 routines # ZBLAS3 -- Double precision complex BLAS3 routines # -# The library can be set up to include routines for any combination -# of the four precisions. To create or add to the library, enter make -# followed by one or more of the precisions desired. Some examples: -# make single -# make single complex -# make single double complex complex16 -# Note that these commands are not safe for parallel builds. -# -# Alternatively, the commands -# make all -# or -# make -# without any arguments creates a library of all four precisions. -# The name of the library is held in BLASLIB, which is set in the -# top-level make.inc -# -# To remove the object files after the library is created, enter -# make clean -# To force the source files to be recompiled, enter, for example, -# make single FRC=FRC -# -#--------------------------------------------------------------------- -# -# Edward Anderson, University of Tennessee -# March 26, 1990 -# Susan Ostrouchov, Last updated September 30, 1994 -# ejr, May 2006. -# ####################################################################### #--------------------------------------------------------- -# Comment out the next 6 definitions if you already have -# the Level 1 BLAS. +# Level 1 BLAS #--------------------------------------------------------- set(SBLAS1 isamax.f sasum.f saxpy.f scopy.f sdot.f snrm2.f srot.f srotg.f sscal.f sswap.f sdsdot.f srotmg.f srotm.f) @@ -74,15 +45,12 @@ set(CB1AUX isamax.f sasum.f saxpy.f scopy.f snrm2.f sscal.f) set(ZB1AUX idamax.f dasum.f daxpy.f dcopy.f dnrm2.f dscal.f) #--------------------------------------------------------------------- -# The following line defines auxiliary routines needed by both the -# Level 2 and Level 3 BLAS. Comment it out only if you already have -# both the Level 2 and 3 BLAS. +# Auxiliary routines needed by both the Level 2 and Level 3 BLAS #--------------------------------------------------------------------- set(ALLBLAS lsame.f xerbla.f xerbla_array.f) #--------------------------------------------------------- -# Comment out the next 4 definitions if you already have -# the Level 2 BLAS. +# Level 2 BLAS #--------------------------------------------------------- set(SBLAS2 sgemv.f sgbmv.f ssymv.f ssbmv.f sspmv.f strmv.f stbmv.f stpmv.f strsv.f stbsv.f stpsv.f @@ -101,8 +69,7 @@ set(ZBLAS2 zgemv.f zgbmv.f zhemv.f zhbmv.f zhpmv.f zgerc.f zgeru.f zher.f zhpr.f zher2.f zhpr2.f) #--------------------------------------------------------- -# Comment out the next 4 definitions if you already have -# the Level 3 BLAS. +# Level 3 BLAS #--------------------------------------------------------- set(SBLAS3 sgemm.f ssymm.f ssyrk.f ssyr2k.f strmm.f strsm.f) @@ -113,37 +80,27 @@ set(DBLAS3 dgemm.f dsymm.f dsyrk.f dsyr2k.f dtrmm.f dtrsm.f) set(ZBLAS3 zgemm.f zsymm.f zsyrk.f zsyr2k.f ztrmm.f ztrsm.f zhemm.f zherk.f zher2k.f) -# default build all of it -set(ALLOBJ ${SBLAS1} ${SBLAS2} ${SBLAS3} ${DBLAS1} ${DBLAS2} ${DBLAS3} - ${CBLAS1} ${CBLAS2} ${CBLAS3} ${ZBLAS1} - ${ZBLAS2} ${ZBLAS3} ${ALLBLAS}) - -if(BLAS_SINGLE) - set(ALLOBJ ${SBLAS1} ${ALLBLAS} - ${SBLAS2} ${SBLAS3}) + + +set(SOURCES) +if(BUILD_SINGLE) + list(APPEND SOURCES ${SBLAS1} ${ALLBLAS} ${SBLAS2} ${SBLAS3}) endif() -if(BLAS_DOUBLE) - set(ALLOBJ ${DBLAS1} ${ALLBLAS} - ${DBLAS2} ${DBLAS3}) +if(BUILD_DOUBLE) + list(APPEND SOURCES ${DBLAS1} ${ALLBLAS} ${DBLAS2} ${DBLAS3}) endif() -if(BLAS_COMPLEX) - set(ALLOBJ ${BLASLIB} ${CBLAS1} ${CB1AUX} - ${ALLBLAS} ${CBLAS2}) +if(BUILD_COMPLEX) + list(APPEND SOURCES ${CBLAS1} ${CB1AUX} ${ALLBLAS} ${CBLAS2} ${CBLAS3}) endif() -if(BLAS_COMPLEX16) - set(ALLOBJ ${BLASLIB} ${ZBLAS1} ${ZB1AUX} - ${ALLBLAS} ${ZBLAS2} ${ZBLAS3}) +if(BUILD_COMPLEX16) + list(APPEND SOURCES ${ZBLAS1} ${ZB1AUX} ${ALLBLAS} ${ZBLAS2} ${ZBLAS3}) endif() +list(REMOVE_DUPLICATES SOURCES) - -add_library(blas ${ALLOBJ}) -#if(UNIX) -# target_link_libraries(blas m) -#endif() +add_library(blas ${SOURCES}) set_target_properties( blas PROPERTIES VERSION ${LAPACK_VERSION} SOVERSION ${LAPACK_MAJOR_VERSION} ) -target_link_libraries(blas) lapack_install_library(blas) diff --git a/lapack-netlib/BLAS/SRC/Makefile b/lapack-netlib/BLAS/SRC/Makefile index 47a15824c..a436365aa 100644 --- a/lapack-netlib/BLAS/SRC/Makefile +++ b/lapack-netlib/BLAS/SRC/Makefile @@ -42,7 +42,7 @@ include ../../make.inc # top-level make.inc # # To remove the object files after the library is created, enter -# make clean +# make cleanobj # To force the source files to be recompiled, enter, for example, # make single FRC=FRC # @@ -138,34 +138,33 @@ ALLOBJ = $(SBLAS1) $(SBLAS2) $(SBLAS3) $(DBLAS1) $(DBLAS2) $(DBLAS3) \ $(ZBLAS2) $(ZBLAS3) $(ALLBLAS) $(BLASLIB): $(ALLOBJ) - $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ single: $(SBLAS1) $(ALLBLAS) $(SBLAS2) $(SBLAS3) - $(ARCH) $(ARCHFLAGS) $(BLASLIB) $(SBLAS1) $(ALLBLAS) \ - $(SBLAS2) $(SBLAS3) + $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3) - $(ARCH) $(ARCHFLAGS) $(BLASLIB) $(DBLAS1) $(ALLBLAS) \ - $(DBLAS2) $(DBLAS3) + $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) complex: $(CBLAS1) $(CB1AUX) $(ALLBLAS) $(CBLAS2) $(CBLAS3) - $(ARCH) $(ARCHFLAGS) $(BLASLIB) $(CBLAS1) $(CB1AUX) \ - $(ALLBLAS) $(CBLAS2) $(CBLAS3) + $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) complex16: $(ZBLAS1) $(ZB1AUX) $(ALLBLAS) $(ZBLAS2) $(ZBLAS3) - $(ARCH) $(ARCHFLAGS) $(BLASLIB) $(ZBLAS1) $(ZB1AUX) \ - $(ALLBLAS) $(ZBLAS2) $(ZBLAS3) + $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) FRC: @FRC=$(FRC) -clean: +clean: cleanobj cleanlib +cleanobj: rm -f *.o +cleanlib: + #rm -f $(BLASLIB) # May point to a system lib, e.g. -lblas .f.o: $(FORTRAN) $(OPTS) -c -o $@ $< diff --git a/lapack-netlib/BLAS/SRC/caxpy.f b/lapack-netlib/BLAS/SRC/caxpy.f index 7ee77747c..b0c7f786b 100644 --- a/lapack-netlib/BLAS/SRC/caxpy.f +++ b/lapack-netlib/BLAS/SRC/caxpy.f @@ -27,6 +27,43 @@ *> CAXPY constant times a vector plus a vector. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] CA +*> \verbatim +*> CA is COMPLEX +*> On entry, CA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] CX +*> \verbatim +*> CX is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of CX +*> \endverbatim +*> +*> \param[in,out] CY +*> \verbatim +*> CY is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of CY +*> \endverbatim +* * Authors: * ======== * @@ -35,7 +72,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex_blas_level1 * @@ -51,10 +88,10 @@ * ===================================================================== SUBROUTINE CAXPY(N,CA,CX,INCX,CY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. COMPLEX CA diff --git a/lapack-netlib/BLAS/SRC/ccopy.f b/lapack-netlib/BLAS/SRC/ccopy.f index eeb5f299a..dfcf7e405 100644 --- a/lapack-netlib/BLAS/SRC/ccopy.f +++ b/lapack-netlib/BLAS/SRC/ccopy.f @@ -26,6 +26,37 @@ *> CCOPY copies a vector x to a vector y. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] CX +*> \verbatim +*> CX is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of CX +*> \endverbatim +*> +*> \param[out] CY +*> \verbatim +*> CY is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of CY +*> \endverbatim +* * Authors: * ======== * @@ -34,7 +65,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex_blas_level1 * @@ -50,10 +81,10 @@ * ===================================================================== SUBROUTINE CCOPY(N,CX,INCX,CY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,INCY,N diff --git a/lapack-netlib/BLAS/SRC/cdotc.f b/lapack-netlib/BLAS/SRC/cdotc.f index cd3416980..79901b791 100644 --- a/lapack-netlib/BLAS/SRC/cdotc.f +++ b/lapack-netlib/BLAS/SRC/cdotc.f @@ -28,6 +28,37 @@ *> *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] CX +*> \verbatim +*> CX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of CX +*> \endverbatim +*> +*> \param[in] CY +*> \verbatim +*> CY is REAL array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of CY +*> \endverbatim +* * Authors: * ======== * @@ -36,7 +67,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex_blas_level1 * @@ -52,10 +83,10 @@ * ===================================================================== COMPLEX FUNCTION CDOTC(N,CX,INCX,CY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,INCY,N diff --git a/lapack-netlib/BLAS/SRC/cdotu.f b/lapack-netlib/BLAS/SRC/cdotu.f index 1e127bc0e..c05252cbc 100644 --- a/lapack-netlib/BLAS/SRC/cdotu.f +++ b/lapack-netlib/BLAS/SRC/cdotu.f @@ -28,6 +28,37 @@ *> *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] CX +*> \verbatim +*> CX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of CX +*> \endverbatim +*> +*> \param[in] CY +*> \verbatim +*> CY is REAL array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of CY +*> \endverbatim +* * Authors: * ======== * @@ -36,7 +67,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex_blas_level1 * @@ -52,10 +83,10 @@ * ===================================================================== COMPLEX FUNCTION CDOTU(N,CX,INCX,CY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,INCY,N diff --git a/lapack-netlib/BLAS/SRC/cgbmv.f b/lapack-netlib/BLAS/SRC/cgbmv.f index de12852a8..3cf351969 100644 --- a/lapack-netlib/BLAS/SRC/cgbmv.f +++ b/lapack-netlib/BLAS/SRC/cgbmv.f @@ -87,7 +87,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, n ). +*> A is COMPLEX array, dimension ( LDA, N ) *> Before entry, the leading ( kl + ku + 1 ) by n part of the *> array A must contain the matrix of coefficients, supplied *> column by column, with the leading diagonal of the matrix in @@ -118,7 +118,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX array of DIMENSION at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. @@ -142,7 +142,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is COMPLEX array of DIMENSION at least +*> Y is COMPLEX array, dimension at least *> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. diff --git a/lapack-netlib/BLAS/SRC/cgemm.f b/lapack-netlib/BLAS/SRC/cgemm.f index 018ffad62..ba1d71445 100644 --- a/lapack-netlib/BLAS/SRC/cgemm.f +++ b/lapack-netlib/BLAS/SRC/cgemm.f @@ -97,7 +97,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is +*> A is COMPLEX array, dimension ( LDA, ka ), where ka is *> k when TRANSA = 'N' or 'n', and is m otherwise. *> Before entry with TRANSA = 'N' or 'n', the leading m by k *> part of the array A must contain the matrix A, otherwise @@ -116,7 +116,7 @@ *> *> \param[in] B *> \verbatim -*> B is COMPLEX array of DIMENSION ( LDB, kb ), where kb is +*> B is COMPLEX array, dimension ( LDB, kb ), where kb is *> n when TRANSB = 'N' or 'n', and is k otherwise. *> Before entry with TRANSB = 'N' or 'n', the leading k by n *> part of the array B must contain the matrix B, otherwise @@ -142,7 +142,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is COMPLEX array of DIMENSION ( LDC, n ). +*> C is COMPLEX array, dimension ( LDC, N ) *> Before entry, the leading m by n part of the array C must *> contain the matrix C, except when beta is zero, in which *> case C need not be set on entry. diff --git a/lapack-netlib/BLAS/SRC/cgemv.f b/lapack-netlib/BLAS/SRC/cgemv.f index aeb94090c..99bcdcd1a 100644 --- a/lapack-netlib/BLAS/SRC/cgemv.f +++ b/lapack-netlib/BLAS/SRC/cgemv.f @@ -73,7 +73,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, n ). +*> A is COMPLEX array, dimension ( LDA, N ) *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. *> \endverbatim @@ -88,7 +88,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX array of DIMENSION at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. @@ -112,7 +112,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is COMPLEX array of DIMENSION at least +*> Y is COMPLEX array, dimension at least *> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. diff --git a/lapack-netlib/BLAS/SRC/cgerc.f b/lapack-netlib/BLAS/SRC/cgerc.f index e730edfde..f3f96a6a4 100644 --- a/lapack-netlib/BLAS/SRC/cgerc.f +++ b/lapack-netlib/BLAS/SRC/cgerc.f @@ -57,7 +57,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX array of dimension at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( m - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the m *> element vector x. @@ -72,7 +72,7 @@ *> *> \param[in] Y *> \verbatim -*> Y is COMPLEX array of dimension at least +*> Y is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. @@ -87,7 +87,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, n ). +*> A is COMPLEX array, dimension ( LDA, N ) *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. On exit, A is *> overwritten by the updated matrix. diff --git a/lapack-netlib/BLAS/SRC/cgeru.f b/lapack-netlib/BLAS/SRC/cgeru.f index bc7540faa..f8342b5bc 100644 --- a/lapack-netlib/BLAS/SRC/cgeru.f +++ b/lapack-netlib/BLAS/SRC/cgeru.f @@ -57,7 +57,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX array of dimension at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( m - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the m *> element vector x. @@ -72,7 +72,7 @@ *> *> \param[in] Y *> \verbatim -*> Y is COMPLEX array of dimension at least +*> Y is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. @@ -87,7 +87,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, n ). +*> A is COMPLEX array, dimension ( LDA, N ) *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. On exit, A is *> overwritten by the updated matrix. diff --git a/lapack-netlib/BLAS/SRC/chbmv.f b/lapack-netlib/BLAS/SRC/chbmv.f index 435c8dd2e..e25e6e2a7 100644 --- a/lapack-netlib/BLAS/SRC/chbmv.f +++ b/lapack-netlib/BLAS/SRC/chbmv.f @@ -72,7 +72,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, n ). +*> A is COMPLEX array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) *> by n part of the array A must contain the upper triangular *> band part of the hermitian matrix, supplied column by @@ -123,7 +123,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX array of DIMENSION at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the *> vector x. @@ -144,7 +144,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is COMPLEX array of DIMENSION at least +*> Y is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the *> vector y. On exit, Y is overwritten by the updated vector y. diff --git a/lapack-netlib/BLAS/SRC/chemm.f b/lapack-netlib/BLAS/SRC/chemm.f index 834b209a3..8cf94fa2b 100644 --- a/lapack-netlib/BLAS/SRC/chemm.f +++ b/lapack-netlib/BLAS/SRC/chemm.f @@ -87,7 +87,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is +*> A is COMPLEX array, dimension ( LDA, ka ), where ka is *> m when SIDE = 'L' or 'l' and is n otherwise. *> Before entry with SIDE = 'L' or 'l', the m by m part of *> the array A must contain the hermitian matrix, such that @@ -124,7 +124,7 @@ *> *> \param[in] B *> \verbatim -*> B is COMPLEX array of DIMENSION ( LDB, n ). +*> B is COMPLEX array, dimension ( LDB, N ) *> Before entry, the leading m by n part of the array B must *> contain the matrix B. *> \endverbatim @@ -146,7 +146,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is COMPLEX array of DIMENSION ( LDC, n ). +*> C is COMPLEX array, dimension ( LDC, N ) *> Before entry, the leading m by n part of the array C must *> contain the matrix C, except when beta is zero, in which *> case C need not be set on entry. diff --git a/lapack-netlib/BLAS/SRC/chemv.f b/lapack-netlib/BLAS/SRC/chemv.f index 215092979..be0f405c1 100644 --- a/lapack-netlib/BLAS/SRC/chemv.f +++ b/lapack-netlib/BLAS/SRC/chemv.f @@ -65,7 +65,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, n ). +*> A is COMPLEX array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular part of the hermitian matrix and the strictly @@ -88,7 +88,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX array of dimension at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -110,7 +110,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is COMPLEX array of dimension at least +*> Y is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. On exit, Y is overwritten by the updated diff --git a/lapack-netlib/BLAS/SRC/cher.f b/lapack-netlib/BLAS/SRC/cher.f index 78a4e0b7f..fde0c8551 100644 --- a/lapack-netlib/BLAS/SRC/cher.f +++ b/lapack-netlib/BLAS/SRC/cher.f @@ -65,7 +65,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX array of dimension at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -80,7 +80,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, n ). +*> A is COMPLEX array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular part of the hermitian matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/cher2.f b/lapack-netlib/BLAS/SRC/cher2.f index fd65f9707..ca12834fc 100644 --- a/lapack-netlib/BLAS/SRC/cher2.f +++ b/lapack-netlib/BLAS/SRC/cher2.f @@ -65,7 +65,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX array of dimension at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -80,7 +80,7 @@ *> *> \param[in] Y *> \verbatim -*> Y is COMPLEX array of dimension at least +*> Y is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. @@ -95,7 +95,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, n ). +*> A is COMPLEX array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular part of the hermitian matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/cher2k.f b/lapack-netlib/BLAS/SRC/cher2k.f index ace3c5d24..fb9925d5d 100644 --- a/lapack-netlib/BLAS/SRC/cher2k.f +++ b/lapack-netlib/BLAS/SRC/cher2k.f @@ -95,7 +95,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is +*> A is COMPLEX array, dimension ( LDA, ka ), where ka is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array A must contain the matrix A, otherwise @@ -114,7 +114,7 @@ *> *> \param[in] B *> \verbatim -*> B is COMPLEX array of DIMENSION ( LDB, kb ), where kb is +*> B is COMPLEX array, dimension ( LDB, kb ), where kb is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array B must contain the matrix B, otherwise @@ -139,7 +139,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is COMPLEX array of DIMENSION ( LDC, n ). +*> C is COMPLEX array, dimension ( LDC, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array C must contain the upper *> triangular part of the hermitian matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/cherk.f b/lapack-netlib/BLAS/SRC/cherk.f index 1c47e57bb..79f40783c 100644 --- a/lapack-netlib/BLAS/SRC/cherk.f +++ b/lapack-netlib/BLAS/SRC/cherk.f @@ -90,7 +90,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is +*> A is COMPLEX array, dimension ( LDA, ka ), where ka is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array A must contain the matrix A, otherwise @@ -115,7 +115,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is COMPLEX array of DIMENSION ( LDC, n ). +*> C is COMPLEX array, dimension ( LDC, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array C must contain the upper *> triangular part of the hermitian matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/chpmv.f b/lapack-netlib/BLAS/SRC/chpmv.f index b182bfb91..bc0026ae6 100644 --- a/lapack-netlib/BLAS/SRC/chpmv.f +++ b/lapack-netlib/BLAS/SRC/chpmv.f @@ -65,7 +65,7 @@ *> *> \param[in] AP *> \verbatim -*> AP is COMPLEX array of DIMENSION at least +*> AP is COMPLEX array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular part of the hermitian matrix @@ -83,7 +83,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX array of dimension at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -105,7 +105,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is COMPLEX array of dimension at least +*> Y is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. On exit, Y is overwritten by the updated diff --git a/lapack-netlib/BLAS/SRC/chpr.f b/lapack-netlib/BLAS/SRC/chpr.f index 6212c0438..25df89497 100644 --- a/lapack-netlib/BLAS/SRC/chpr.f +++ b/lapack-netlib/BLAS/SRC/chpr.f @@ -65,7 +65,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX array of dimension at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -80,7 +80,7 @@ *> *> \param[in,out] AP *> \verbatim -*> AP is COMPLEX array of DIMENSION at least +*> AP is COMPLEX array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular part of the hermitian matrix diff --git a/lapack-netlib/BLAS/SRC/chpr2.f b/lapack-netlib/BLAS/SRC/chpr2.f index 3ca388a48..66ef2f290 100644 --- a/lapack-netlib/BLAS/SRC/chpr2.f +++ b/lapack-netlib/BLAS/SRC/chpr2.f @@ -65,7 +65,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX array of dimension at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -80,7 +80,7 @@ *> *> \param[in] Y *> \verbatim -*> Y is COMPLEX array of dimension at least +*> Y is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. @@ -95,7 +95,7 @@ *> *> \param[in,out] AP *> \verbatim -*> AP is COMPLEX array of DIMENSION at least +*> AP is COMPLEX array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular part of the hermitian matrix diff --git a/lapack-netlib/BLAS/SRC/crotg.f b/lapack-netlib/BLAS/SRC/crotg.f index 1cdb662ee..6d06a5bcd 100644 --- a/lapack-netlib/BLAS/SRC/crotg.f +++ b/lapack-netlib/BLAS/SRC/crotg.f @@ -24,6 +24,29 @@ *> CROTG determines a complex Givens rotation. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] CA +*> \verbatim +*> CA is COMPLEX +*> \endverbatim +*> +*> \param[in] CB +*> \verbatim +*> CB is COMPLEX +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is REAL +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is COMPLEX +*> \endverbatim +* * Authors: * ======== * @@ -32,17 +55,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex_blas_level1 * * ===================================================================== SUBROUTINE CROTG(CA,CB,C,S) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. COMPLEX CA,CB,S diff --git a/lapack-netlib/BLAS/SRC/cscal.f b/lapack-netlib/BLAS/SRC/cscal.f index 1405a977d..b896af93d 100644 --- a/lapack-netlib/BLAS/SRC/cscal.f +++ b/lapack-netlib/BLAS/SRC/cscal.f @@ -27,6 +27,32 @@ *> CSCAL scales a vector by a constant. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] CA +*> \verbatim +*> CA is COMPLEX +*> On entry, CA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in,out] CX +*> \verbatim +*> CX is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of CX +*> \endverbatim +* * Authors: * ======== * @@ -35,7 +61,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex_blas_level1 * @@ -52,10 +78,10 @@ * ===================================================================== SUBROUTINE CSCAL(N,CA,CX,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. COMPLEX CA diff --git a/lapack-netlib/BLAS/SRC/csscal.f b/lapack-netlib/BLAS/SRC/csscal.f index dc02654f1..bc90c0d61 100644 --- a/lapack-netlib/BLAS/SRC/csscal.f +++ b/lapack-netlib/BLAS/SRC/csscal.f @@ -27,6 +27,32 @@ *> CSSCAL scales a complex vector by a real constant. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] SA +*> \verbatim +*> SA is REAL +*> On entry, SA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in,out] CX +*> \verbatim +*> CX is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of CX +*> \endverbatim +* * Authors: * ======== * @@ -35,7 +61,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex_blas_level1 * @@ -52,10 +78,10 @@ * ===================================================================== SUBROUTINE CSSCAL(N,SA,CX,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. REAL SA diff --git a/lapack-netlib/BLAS/SRC/cswap.f b/lapack-netlib/BLAS/SRC/cswap.f index 369a294ea..272acdfa4 100644 --- a/lapack-netlib/BLAS/SRC/cswap.f +++ b/lapack-netlib/BLAS/SRC/cswap.f @@ -26,6 +26,37 @@ *> CSWAP interchanges two vectors. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in,out] CX +*> \verbatim +*> CX is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of CX +*> \endverbatim +*> +*> \param[in,out] CY +*> \verbatim +*> CY is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of CY +*> \endverbatim +* * Authors: * ======== * @@ -34,7 +65,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex_blas_level1 * @@ -50,10 +81,10 @@ * ===================================================================== SUBROUTINE CSWAP(N,CX,INCX,CY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,INCY,N diff --git a/lapack-netlib/BLAS/SRC/csymm.f b/lapack-netlib/BLAS/SRC/csymm.f index 906a57201..8f0526410 100644 --- a/lapack-netlib/BLAS/SRC/csymm.f +++ b/lapack-netlib/BLAS/SRC/csymm.f @@ -87,7 +87,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is +*> A is COMPLEX array, dimension ( LDA, ka ), where ka is *> m when SIDE = 'L' or 'l' and is n otherwise. *> Before entry with SIDE = 'L' or 'l', the m by m part of *> the array A must contain the symmetric matrix, such that @@ -122,7 +122,7 @@ *> *> \param[in] B *> \verbatim -*> B is COMPLEX array of DIMENSION ( LDB, n ). +*> B is COMPLEX array, dimension ( LDB, N ) *> Before entry, the leading m by n part of the array B must *> contain the matrix B. *> \endverbatim @@ -144,7 +144,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is COMPLEX array of DIMENSION ( LDC, n ). +*> C is COMPLEX array, dimension ( LDC, N ) *> Before entry, the leading m by n part of the array C must *> contain the matrix C, except when beta is zero, in which *> case C need not be set on entry. diff --git a/lapack-netlib/BLAS/SRC/csyr2k.f b/lapack-netlib/BLAS/SRC/csyr2k.f index 1fdeadc0f..b321d092d 100644 --- a/lapack-netlib/BLAS/SRC/csyr2k.f +++ b/lapack-netlib/BLAS/SRC/csyr2k.f @@ -92,7 +92,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is +*> A is COMPLEX array, dimension ( LDA, ka ), where ka is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array A must contain the matrix A, otherwise @@ -111,7 +111,7 @@ *> *> \param[in] B *> \verbatim -*> B is COMPLEX array of DIMENSION ( LDB, kb ), where kb is +*> B is COMPLEX array, dimension ( LDB, kb ), where kb is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array B must contain the matrix B, otherwise @@ -136,7 +136,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is COMPLEX array of DIMENSION ( LDC, n ). +*> C is COMPLEX array, dimension ( LDC, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array C must contain the upper *> triangular part of the symmetric matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/csyrk.f b/lapack-netlib/BLAS/SRC/csyrk.f index c4494c5a8..c25384ac5 100644 --- a/lapack-netlib/BLAS/SRC/csyrk.f +++ b/lapack-netlib/BLAS/SRC/csyrk.f @@ -90,7 +90,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is +*> A is COMPLEX array, dimension ( LDA, ka ), where ka is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array A must contain the matrix A, otherwise @@ -115,7 +115,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is COMPLEX array of DIMENSION ( LDC, n ). +*> C is COMPLEX array, dimension ( LDC, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array C must contain the upper *> triangular part of the symmetric matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/ctbmv.f b/lapack-netlib/BLAS/SRC/ctbmv.f index 1513c1a34..205ab9c4e 100644 --- a/lapack-netlib/BLAS/SRC/ctbmv.f +++ b/lapack-netlib/BLAS/SRC/ctbmv.f @@ -90,7 +90,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, n ). +*> A is COMPLEX array, dimension ( LDA, N ). *> Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) *> by n part of the array A must contain the upper triangular *> band part of the matrix of coefficients, supplied column by @@ -142,7 +142,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is COMPLEX array of dimension at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. On exit, X is overwritten with the diff --git a/lapack-netlib/BLAS/SRC/ctbsv.f b/lapack-netlib/BLAS/SRC/ctbsv.f index f4cc3306f..16050f167 100644 --- a/lapack-netlib/BLAS/SRC/ctbsv.f +++ b/lapack-netlib/BLAS/SRC/ctbsv.f @@ -94,7 +94,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, n ). +*> A is COMPLEX array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) *> by n part of the array A must contain the upper triangular *> band part of the matrix of coefficients, supplied column by @@ -146,7 +146,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is COMPLEX array of dimension at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element right-hand side vector b. On exit, X is overwritten diff --git a/lapack-netlib/BLAS/SRC/ctpmv.f b/lapack-netlib/BLAS/SRC/ctpmv.f index 4582acc9f..e69979152 100644 --- a/lapack-netlib/BLAS/SRC/ctpmv.f +++ b/lapack-netlib/BLAS/SRC/ctpmv.f @@ -80,7 +80,7 @@ *> *> \param[in] AP *> \verbatim -*> AP is COMPLEX array of DIMENSION at least +*> AP is COMPLEX array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular matrix packed sequentially, @@ -98,7 +98,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is COMPLEX array of dimension at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. On exit, X is overwritten with the diff --git a/lapack-netlib/BLAS/SRC/ctpsv.f b/lapack-netlib/BLAS/SRC/ctpsv.f index 2fcd19bac..2335ef502 100644 --- a/lapack-netlib/BLAS/SRC/ctpsv.f +++ b/lapack-netlib/BLAS/SRC/ctpsv.f @@ -83,7 +83,7 @@ *> *> \param[in] AP *> \verbatim -*> AP is COMPLEX array of DIMENSION at least +*> AP is COMPLEX array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular matrix packed sequentially, @@ -101,7 +101,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is COMPLEX array of dimension at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element right-hand side vector b. On exit, X is overwritten diff --git a/lapack-netlib/BLAS/SRC/ctrmm.f b/lapack-netlib/BLAS/SRC/ctrmm.f index a23fb27c6..6f79d066d 100644 --- a/lapack-netlib/BLAS/SRC/ctrmm.f +++ b/lapack-netlib/BLAS/SRC/ctrmm.f @@ -109,7 +109,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, k ), where k is m +*> A is COMPLEX array, dimension ( LDA, k ), where k is m *> when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. *> Before entry with UPLO = 'U' or 'u', the leading k by k *> upper triangular part of the array A must contain the upper @@ -134,7 +134,7 @@ *> *> \param[in,out] B *> \verbatim -*> B is COMPLEX array of DIMENSION ( LDB, n ). +*> B is COMPLEX array, dimension ( LDB, N ). *> Before entry, the leading m by n part of the array B must *> contain the matrix B, and on exit is overwritten by the *> transformed matrix. diff --git a/lapack-netlib/BLAS/SRC/ctrmv.f b/lapack-netlib/BLAS/SRC/ctrmv.f index 8795e8702..1eec65be1 100644 --- a/lapack-netlib/BLAS/SRC/ctrmv.f +++ b/lapack-netlib/BLAS/SRC/ctrmv.f @@ -80,7 +80,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, n ). +*> A is COMPLEX array, dimension ( LDA, N ). *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular matrix and the strictly lower triangular part of @@ -103,7 +103,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is COMPLEX array of dimension at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. On exit, X is overwritten with the diff --git a/lapack-netlib/BLAS/SRC/ctrsm.f b/lapack-netlib/BLAS/SRC/ctrsm.f index 7ee5c9470..2c2aff020 100644 --- a/lapack-netlib/BLAS/SRC/ctrsm.f +++ b/lapack-netlib/BLAS/SRC/ctrsm.f @@ -111,7 +111,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, k ), +*> A is COMPLEX array, dimension ( LDA, k ), *> where k is m when SIDE = 'L' or 'l' *> and k is n when SIDE = 'R' or 'r'. *> Before entry with UPLO = 'U' or 'u', the leading k by k @@ -137,7 +137,7 @@ *> *> \param[in,out] B *> \verbatim -*> B is COMPLEX array of DIMENSION ( LDB, n ). +*> B is COMPLEX array, dimension ( LDB, N ) *> Before entry, the leading m by n part of the array B must *> contain the right-hand side matrix B, and on exit is *> overwritten by the solution matrix X. diff --git a/lapack-netlib/BLAS/SRC/ctrsv.f b/lapack-netlib/BLAS/SRC/ctrsv.f index 7981a21d1..81c08240f 100644 --- a/lapack-netlib/BLAS/SRC/ctrsv.f +++ b/lapack-netlib/BLAS/SRC/ctrsv.f @@ -83,7 +83,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, n ). +*> A is COMPLEX array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular matrix and the strictly lower triangular part of @@ -106,7 +106,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is COMPLEX array of dimension at least +*> X is COMPLEX array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element right-hand side vector b. On exit, X is overwritten diff --git a/lapack-netlib/BLAS/SRC/dasum.f b/lapack-netlib/BLAS/SRC/dasum.f index fd3d91044..fa0d55e3f 100644 --- a/lapack-netlib/BLAS/SRC/dasum.f +++ b/lapack-netlib/BLAS/SRC/dasum.f @@ -26,6 +26,26 @@ *> DASUM takes the sum of the absolute values. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] DX +*> \verbatim +*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of DX +*> \endverbatim +* * Authors: * ======== * @@ -34,7 +54,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_blas_level1 * @@ -51,10 +71,10 @@ * ===================================================================== DOUBLE PRECISION FUNCTION DASUM(N,DX,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,N diff --git a/lapack-netlib/BLAS/SRC/daxpy.f b/lapack-netlib/BLAS/SRC/daxpy.f index 5203e50cf..464673722 100644 --- a/lapack-netlib/BLAS/SRC/daxpy.f +++ b/lapack-netlib/BLAS/SRC/daxpy.f @@ -28,6 +28,43 @@ *> uses unrolled loops for increments equal to one. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] DA +*> \verbatim +*> DA is DOUBLE PRECISION +*> On entry, DA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] DX +*> \verbatim +*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of DX +*> \endverbatim +*> +*> \param[in,out] DY +*> \verbatim +*> DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of DY +*> \endverbatim +* * Authors: * ======== * @@ -36,7 +73,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_blas_level1 * @@ -52,10 +89,10 @@ * ===================================================================== SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. DOUBLE PRECISION DA diff --git a/lapack-netlib/BLAS/SRC/dcabs1.f b/lapack-netlib/BLAS/SRC/dcabs1.f index d71fe7af6..95016bdba 100644 --- a/lapack-netlib/BLAS/SRC/dcabs1.f +++ b/lapack-netlib/BLAS/SRC/dcabs1.f @@ -24,6 +24,14 @@ *> DCABS1 computes |Re(.)| + |Im(.)| of a double complex number *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] Z +*> \verbatim +*> Z is COMPLEX*16 +*> \endverbatim +* * Authors: * ======== * @@ -32,17 +40,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_blas_level1 * * ===================================================================== DOUBLE PRECISION FUNCTION DCABS1(Z) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. COMPLEX*16 Z diff --git a/lapack-netlib/BLAS/SRC/dcopy.f b/lapack-netlib/BLAS/SRC/dcopy.f index bbc38a75c..d7c350b42 100644 --- a/lapack-netlib/BLAS/SRC/dcopy.f +++ b/lapack-netlib/BLAS/SRC/dcopy.f @@ -24,7 +24,38 @@ *> \verbatim *> *> DCOPY copies a vector, x, to a vector, y. -*> uses unrolled loops for increments equal to one. +*> uses unrolled loops for increments equal to 1. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] DX +*> \verbatim +*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of DX +*> \endverbatim +*> +*> \param[out] DY +*> \verbatim +*> DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of DY *> \endverbatim * * Authors: @@ -35,7 +66,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_blas_level1 * @@ -51,10 +82,10 @@ * ===================================================================== SUBROUTINE DCOPY(N,DX,INCX,DY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,INCY,N diff --git a/lapack-netlib/BLAS/SRC/ddot.f b/lapack-netlib/BLAS/SRC/ddot.f index 1aea8240b..0edf2120f 100644 --- a/lapack-netlib/BLAS/SRC/ddot.f +++ b/lapack-netlib/BLAS/SRC/ddot.f @@ -27,6 +27,37 @@ *> uses unrolled loops for increments equal to one. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] DX +*> \verbatim +*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of DX +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of DY +*> \endverbatim +* * Authors: * ======== * @@ -35,7 +66,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_blas_level1 * @@ -51,10 +82,10 @@ * ===================================================================== DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,INCY,N diff --git a/lapack-netlib/BLAS/SRC/dgbmv.f b/lapack-netlib/BLAS/SRC/dgbmv.f index 3769e18b0..29fb54343 100644 --- a/lapack-netlib/BLAS/SRC/dgbmv.f +++ b/lapack-netlib/BLAS/SRC/dgbmv.f @@ -85,7 +85,7 @@ *> *> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, n ). +*> A is DOUBLE PRECISION array, dimension ( LDA, N ) *> Before entry, the leading ( kl + ku + 1 ) by n part of the *> array A must contain the matrix of coefficients, supplied *> column by column, with the leading diagonal of the matrix in @@ -116,7 +116,7 @@ *> *> \param[in] X *> \verbatim -*> X is DOUBLE PRECISION array of DIMENSION at least +*> X is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. @@ -140,7 +140,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is DOUBLE PRECISION array of DIMENSION at least +*> Y is DOUBLE PRECISION array, dimension at least *> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. diff --git a/lapack-netlib/BLAS/SRC/dgemm.f b/lapack-netlib/BLAS/SRC/dgemm.f index 5c5a2ac2b..3a60ca4e7 100644 --- a/lapack-netlib/BLAS/SRC/dgemm.f +++ b/lapack-netlib/BLAS/SRC/dgemm.f @@ -97,7 +97,7 @@ *> *> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is +*> A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is *> k when TRANSA = 'N' or 'n', and is m otherwise. *> Before entry with TRANSA = 'N' or 'n', the leading m by k *> part of the array A must contain the matrix A, otherwise @@ -116,7 +116,7 @@ *> *> \param[in] B *> \verbatim -*> B is DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is +*> B is DOUBLE PRECISION array, dimension ( LDB, kb ), where kb is *> n when TRANSB = 'N' or 'n', and is k otherwise. *> Before entry with TRANSB = 'N' or 'n', the leading k by n *> part of the array B must contain the matrix B, otherwise @@ -142,7 +142,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is DOUBLE PRECISION array of DIMENSION ( LDC, n ). +*> C is DOUBLE PRECISION array, dimension ( LDC, N ) *> Before entry, the leading m by n part of the array C must *> contain the matrix C, except when beta is zero, in which *> case C need not be set on entry. diff --git a/lapack-netlib/BLAS/SRC/dgemv.f b/lapack-netlib/BLAS/SRC/dgemv.f index dd14c3509..08e395b1c 100644 --- a/lapack-netlib/BLAS/SRC/dgemv.f +++ b/lapack-netlib/BLAS/SRC/dgemv.f @@ -71,7 +71,7 @@ *> *> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, n ). +*> A is DOUBLE PRECISION array, dimension ( LDA, N ) *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. *> \endverbatim @@ -86,7 +86,7 @@ *> *> \param[in] X *> \verbatim -*> X is DOUBLE PRECISION array of DIMENSION at least +*> X is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. @@ -110,7 +110,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is DOUBLE PRECISION array of DIMENSION at least +*> Y is DOUBLE PRECISION array, dimension at least *> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. diff --git a/lapack-netlib/BLAS/SRC/dger.f b/lapack-netlib/BLAS/SRC/dger.f index 289141e8e..bdc8ef434 100644 --- a/lapack-netlib/BLAS/SRC/dger.f +++ b/lapack-netlib/BLAS/SRC/dger.f @@ -57,7 +57,7 @@ *> *> \param[in] X *> \verbatim -*> X is DOUBLE PRECISION array of dimension at least +*> X is DOUBLE PRECISION array, dimension at least *> ( 1 + ( m - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the m *> element vector x. @@ -72,7 +72,7 @@ *> *> \param[in] Y *> \verbatim -*> Y is DOUBLE PRECISION array of dimension at least +*> Y is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. @@ -87,7 +87,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, n ). +*> A is DOUBLE PRECISION array, dimension ( LDA, N ) *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. On exit, A is *> overwritten by the updated matrix. diff --git a/lapack-netlib/BLAS/SRC/dnrm2.f b/lapack-netlib/BLAS/SRC/dnrm2.f index 0d7062fdc..9e069feaa 100644 --- a/lapack-netlib/BLAS/SRC/dnrm2.f +++ b/lapack-netlib/BLAS/SRC/dnrm2.f @@ -29,6 +29,26 @@ *> DNRM2 := sqrt( x'*x ) *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of DX +*> \endverbatim +* * Authors: * ======== * @@ -37,7 +57,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_blas_level1 * @@ -54,10 +74,10 @@ * ===================================================================== DOUBLE PRECISION FUNCTION DNRM2(N,X,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,N diff --git a/lapack-netlib/BLAS/SRC/drot.f b/lapack-netlib/BLAS/SRC/drot.f index baaae5c9f..abc90cd18 100644 --- a/lapack-netlib/BLAS/SRC/drot.f +++ b/lapack-netlib/BLAS/SRC/drot.f @@ -27,6 +27,47 @@ *> DROT applies a plane rotation. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in,out] DX +*> \verbatim +*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of DX +*> \endverbatim +*> +*> \param[in,out] DY +*> \verbatim +*> DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of DY +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION +*> \endverbatim +* * Authors: * ======== * @@ -35,7 +76,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_blas_level1 * @@ -51,10 +92,10 @@ * ===================================================================== SUBROUTINE DROT(N,DX,INCX,DY,INCY,C,S) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. DOUBLE PRECISION C,S diff --git a/lapack-netlib/BLAS/SRC/drotg.f b/lapack-netlib/BLAS/SRC/drotg.f index 85d04cd8f..c030d3c64 100644 --- a/lapack-netlib/BLAS/SRC/drotg.f +++ b/lapack-netlib/BLAS/SRC/drotg.f @@ -23,6 +23,29 @@ *> DROTG construct givens plane rotation. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] DA +*> \verbatim +*> DA is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] DB +*> \verbatim +*> DB is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION +*> \endverbatim +* * Authors: * ======== * @@ -31,7 +54,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_blas_level1 * @@ -46,10 +69,10 @@ * ===================================================================== SUBROUTINE DROTG(DA,DB,C,S) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. DOUBLE PRECISION C,DA,DB,S diff --git a/lapack-netlib/BLAS/SRC/drotm.f b/lapack-netlib/BLAS/SRC/drotm.f index b006dbd50..fc5d8fcb9 100644 --- a/lapack-netlib/BLAS/SRC/drotm.f +++ b/lapack-netlib/BLAS/SRC/drotm.f @@ -51,8 +51,7 @@ *> *> \param[in,out] DX *> \verbatim -*> DX is DOUBLE PRECISION array, dimension N -*> double precision vector with N elements +*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) *> \endverbatim *> *> \param[in] INCX @@ -63,8 +62,7 @@ *> *> \param[in,out] DY *> \verbatim -*> DY is DOUBLE PRECISION array, dimension N -*> double precision vector with N elements +*> DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) *> \endverbatim *> *> \param[in] INCY @@ -73,9 +71,9 @@ *> storage spacing between elements of DY *> \endverbatim *> -*> \param[in,out] DPARAM +*> \param[in] DPARAM *> \verbatim -*> DPARAM is DOUBLE PRECISION array, dimension 5 +*> DPARAM is DOUBLE PRECISION array, dimension (5) *> DPARAM(1)=DFLAG *> DPARAM(2)=DH11 *> DPARAM(3)=DH21 @@ -91,17 +89,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_blas_level1 * * ===================================================================== SUBROUTINE DROTM(N,DX,INCX,DY,INCY,DPARAM) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,INCY,N diff --git a/lapack-netlib/BLAS/SRC/drotmg.f b/lapack-netlib/BLAS/SRC/drotmg.f index 1fb025faa..701e9de31 100644 --- a/lapack-netlib/BLAS/SRC/drotmg.f +++ b/lapack-netlib/BLAS/SRC/drotmg.f @@ -65,9 +65,9 @@ *> DY1 is DOUBLE PRECISION *> \endverbatim *> -*> \param[in,out] DPARAM +*> \param[out] DPARAM *> \verbatim -*> DPARAM is DOUBLE PRECISION array, dimension 5 +*> DPARAM is DOUBLE PRECISION array, dimension (5) *> DPARAM(1)=DFLAG *> DPARAM(2)=DH11 *> DPARAM(3)=DH21 @@ -83,17 +83,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_blas_level1 * * ===================================================================== SUBROUTINE DROTMG(DD1,DD2,DX1,DY1,DPARAM) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. DOUBLE PRECISION DD1,DD2,DX1,DY1 diff --git a/lapack-netlib/BLAS/SRC/dsbmv.f b/lapack-netlib/BLAS/SRC/dsbmv.f index aea121345..0f7c69406 100644 --- a/lapack-netlib/BLAS/SRC/dsbmv.f +++ b/lapack-netlib/BLAS/SRC/dsbmv.f @@ -72,7 +72,7 @@ *> *> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, n ). +*> A is DOUBLE PRECISION array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) *> by n part of the array A must contain the upper triangular *> band part of the symmetric matrix, supplied column by @@ -120,7 +120,7 @@ *> *> \param[in] X *> \verbatim -*> X is DOUBLE PRECISION array of DIMENSION at least +*> X is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the *> vector x. @@ -141,7 +141,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is DOUBLE PRECISION array of DIMENSION at least +*> Y is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the *> vector y. On exit, Y is overwritten by the updated vector y. diff --git a/lapack-netlib/BLAS/SRC/dscal.f b/lapack-netlib/BLAS/SRC/dscal.f index 8bbfec6f3..5f8b0927c 100644 --- a/lapack-netlib/BLAS/SRC/dscal.f +++ b/lapack-netlib/BLAS/SRC/dscal.f @@ -25,7 +25,33 @@ *> \verbatim *> *> DSCAL scales a vector by a constant. -*> uses unrolled loops for increment equal to one. +*> uses unrolled loops for increment equal to 1. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] DA +*> \verbatim +*> DA is DOUBLE PRECISION +*> On entry, DA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in,out] DX +*> \verbatim +*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of DX *> \endverbatim * * Authors: @@ -36,7 +62,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_blas_level1 * @@ -53,10 +79,10 @@ * ===================================================================== SUBROUTINE DSCAL(N,DA,DX,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. DOUBLE PRECISION DA diff --git a/lapack-netlib/BLAS/SRC/dspmv.f b/lapack-netlib/BLAS/SRC/dspmv.f index 72a28fede..6e26c0f4f 100644 --- a/lapack-netlib/BLAS/SRC/dspmv.f +++ b/lapack-netlib/BLAS/SRC/dspmv.f @@ -65,7 +65,7 @@ *> *> \param[in] AP *> \verbatim -*> AP is DOUBLE PRECISION array of DIMENSION at least +*> AP is DOUBLE PRECISION array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular part of the symmetric matrix @@ -81,7 +81,7 @@ *> *> \param[in] X *> \verbatim -*> X is DOUBLE PRECISION array of dimension at least +*> X is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -103,7 +103,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is DOUBLE PRECISION array of dimension at least +*> Y is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. On exit, Y is overwritten by the updated diff --git a/lapack-netlib/BLAS/SRC/dspr.f b/lapack-netlib/BLAS/SRC/dspr.f index e89f87d4e..f9d709e2d 100644 --- a/lapack-netlib/BLAS/SRC/dspr.f +++ b/lapack-netlib/BLAS/SRC/dspr.f @@ -65,7 +65,7 @@ *> *> \param[in] X *> \verbatim -*> X is DOUBLE PRECISION array of dimension at least +*> X is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -80,7 +80,7 @@ *> *> \param[in,out] AP *> \verbatim -*> AP is DOUBLE PRECISION array of DIMENSION at least +*> AP is DOUBLE PRECISION array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular part of the symmetric matrix diff --git a/lapack-netlib/BLAS/SRC/dspr2.f b/lapack-netlib/BLAS/SRC/dspr2.f index 4cd416f57..175d8e84c 100644 --- a/lapack-netlib/BLAS/SRC/dspr2.f +++ b/lapack-netlib/BLAS/SRC/dspr2.f @@ -65,7 +65,7 @@ *> *> \param[in] X *> \verbatim -*> X is DOUBLE PRECISION array of dimension at least +*> X is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -80,7 +80,7 @@ *> *> \param[in] Y *> \verbatim -*> Y is DOUBLE PRECISION array of dimension at least +*> Y is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. @@ -95,7 +95,7 @@ *> *> \param[in,out] AP *> \verbatim -*> AP is DOUBLE PRECISION array of DIMENSION at least +*> AP is DOUBLE PRECISION array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular part of the symmetric matrix diff --git a/lapack-netlib/BLAS/SRC/dswap.f b/lapack-netlib/BLAS/SRC/dswap.f index 5bd8f7d29..e09cabcff 100644 --- a/lapack-netlib/BLAS/SRC/dswap.f +++ b/lapack-netlib/BLAS/SRC/dswap.f @@ -23,8 +23,39 @@ *> *> \verbatim *> -*> interchanges two vectors. -*> uses unrolled loops for increments equal one. +*> DSWAP interchanges two vectors. +*> uses unrolled loops for increments equal to 1. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in,out] DX +*> \verbatim +*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of DX +*> \endverbatim +*> +*> \param[in,out] DY +*> \verbatim +*> DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of DY *> \endverbatim * * Authors: @@ -35,7 +66,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_blas_level1 * @@ -51,10 +82,10 @@ * ===================================================================== SUBROUTINE DSWAP(N,DX,INCX,DY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,INCY,N diff --git a/lapack-netlib/BLAS/SRC/dsymm.f b/lapack-netlib/BLAS/SRC/dsymm.f index 77c797ea6..622d2469f 100644 --- a/lapack-netlib/BLAS/SRC/dsymm.f +++ b/lapack-netlib/BLAS/SRC/dsymm.f @@ -87,7 +87,7 @@ *> *> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is +*> A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is *> m when SIDE = 'L' or 'l' and is n otherwise. *> Before entry with SIDE = 'L' or 'l', the m by m part of *> the array A must contain the symmetric matrix, such that @@ -122,7 +122,7 @@ *> *> \param[in] B *> \verbatim -*> B is DOUBLE PRECISION array of DIMENSION ( LDB, n ). +*> B is DOUBLE PRECISION array, dimension ( LDB, N ) *> Before entry, the leading m by n part of the array B must *> contain the matrix B. *> \endverbatim @@ -144,7 +144,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is DOUBLE PRECISION array of DIMENSION ( LDC, n ). +*> C is DOUBLE PRECISION array, dimension ( LDC, N ) *> Before entry, the leading m by n part of the array C must *> contain the matrix C, except when beta is zero, in which *> case C need not be set on entry. diff --git a/lapack-netlib/BLAS/SRC/dsymv.f b/lapack-netlib/BLAS/SRC/dsymv.f index af2dfd2a2..4bf973f10 100644 --- a/lapack-netlib/BLAS/SRC/dsymv.f +++ b/lapack-netlib/BLAS/SRC/dsymv.f @@ -65,7 +65,7 @@ *> *> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, n ). +*> A is DOUBLE PRECISION array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular part of the symmetric matrix and the strictly @@ -86,7 +86,7 @@ *> *> \param[in] X *> \verbatim -*> X is DOUBLE PRECISION array of dimension at least +*> X is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -108,7 +108,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is DOUBLE PRECISION array of dimension at least +*> Y is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. On exit, Y is overwritten by the updated diff --git a/lapack-netlib/BLAS/SRC/dsyr.f b/lapack-netlib/BLAS/SRC/dsyr.f index c998ee821..7fe256fa5 100644 --- a/lapack-netlib/BLAS/SRC/dsyr.f +++ b/lapack-netlib/BLAS/SRC/dsyr.f @@ -65,7 +65,7 @@ *> *> \param[in] X *> \verbatim -*> X is DOUBLE PRECISION array of dimension at least +*> X is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -80,7 +80,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, n ). +*> A is DOUBLE PRECISION array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular part of the symmetric matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/dsyr2.f b/lapack-netlib/BLAS/SRC/dsyr2.f index 8bfa5fe0f..8970c4dcf 100644 --- a/lapack-netlib/BLAS/SRC/dsyr2.f +++ b/lapack-netlib/BLAS/SRC/dsyr2.f @@ -65,7 +65,7 @@ *> *> \param[in] X *> \verbatim -*> X is DOUBLE PRECISION array of dimension at least +*> X is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -80,7 +80,7 @@ *> *> \param[in] Y *> \verbatim -*> Y is DOUBLE PRECISION array of dimension at least +*> Y is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. @@ -95,7 +95,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, n ). +*> A is DOUBLE PRECISION array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular part of the symmetric matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/dsyr2k.f b/lapack-netlib/BLAS/SRC/dsyr2k.f index 6dd7ca295..f3a5940c7 100644 --- a/lapack-netlib/BLAS/SRC/dsyr2k.f +++ b/lapack-netlib/BLAS/SRC/dsyr2k.f @@ -95,7 +95,7 @@ *> *> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is +*> A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array A must contain the matrix A, otherwise @@ -114,7 +114,7 @@ *> *> \param[in] B *> \verbatim -*> B is DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is +*> B is DOUBLE PRECISION array, dimension ( LDB, kb ), where kb is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array B must contain the matrix B, otherwise @@ -139,7 +139,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is DOUBLE PRECISION array of DIMENSION ( LDC, n ). +*> C is DOUBLE PRECISION array, dimension ( LDC, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array C must contain the upper *> triangular part of the symmetric matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/dsyrk.f b/lapack-netlib/BLAS/SRC/dsyrk.f index bd70dfba0..4be4d8d3c 100644 --- a/lapack-netlib/BLAS/SRC/dsyrk.f +++ b/lapack-netlib/BLAS/SRC/dsyrk.f @@ -92,7 +92,7 @@ *> *> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is +*> A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array A must contain the matrix A, otherwise @@ -117,7 +117,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is DOUBLE PRECISION array of DIMENSION ( LDC, n ). +*> C is DOUBLE PRECISION array, dimension ( LDC, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array C must contain the upper *> triangular part of the symmetric matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/dtbmv.f b/lapack-netlib/BLAS/SRC/dtbmv.f index 20dd83eac..e27d50f2c 100644 --- a/lapack-netlib/BLAS/SRC/dtbmv.f +++ b/lapack-netlib/BLAS/SRC/dtbmv.f @@ -90,7 +90,7 @@ *> *> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, n ). +*> A is DOUBLE PRECISION array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) *> by n part of the array A must contain the upper triangular *> band part of the matrix of coefficients, supplied column by @@ -142,7 +142,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is DOUBLE PRECISION array of dimension at least +*> X is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. On exit, X is overwritten with the diff --git a/lapack-netlib/BLAS/SRC/dtbsv.f b/lapack-netlib/BLAS/SRC/dtbsv.f index ad468288d..d8c6f144c 100644 --- a/lapack-netlib/BLAS/SRC/dtbsv.f +++ b/lapack-netlib/BLAS/SRC/dtbsv.f @@ -94,7 +94,7 @@ *> *> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, n ). +*> A is DOUBLE PRECISION array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) *> by n part of the array A must contain the upper triangular *> band part of the matrix of coefficients, supplied column by @@ -146,7 +146,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is DOUBLE PRECISION array of dimension at least +*> X is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element right-hand side vector b. On exit, X is overwritten diff --git a/lapack-netlib/BLAS/SRC/dtpmv.f b/lapack-netlib/BLAS/SRC/dtpmv.f index 3b0e62094..bad91f32e 100644 --- a/lapack-netlib/BLAS/SRC/dtpmv.f +++ b/lapack-netlib/BLAS/SRC/dtpmv.f @@ -80,7 +80,7 @@ *> *> \param[in] AP *> \verbatim -*> AP is DOUBLE PRECISION array of DIMENSION at least +*> AP is DOUBLE PRECISION array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular matrix packed sequentially, @@ -98,7 +98,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is DOUBLE PRECISION array of dimension at least +*> X is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. On exit, X is overwritten with the diff --git a/lapack-netlib/BLAS/SRC/dtpsv.f b/lapack-netlib/BLAS/SRC/dtpsv.f index a5d9faa48..abcd0770c 100644 --- a/lapack-netlib/BLAS/SRC/dtpsv.f +++ b/lapack-netlib/BLAS/SRC/dtpsv.f @@ -83,7 +83,7 @@ *> *> \param[in] AP *> \verbatim -*> AP is DOUBLE PRECISION array of DIMENSION at least +*> AP is DOUBLE PRECISION array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular matrix packed sequentially, @@ -101,7 +101,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is DOUBLE PRECISION array of dimension at least +*> X is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element right-hand side vector b. On exit, X is overwritten diff --git a/lapack-netlib/BLAS/SRC/dtrmm.f b/lapack-netlib/BLAS/SRC/dtrmm.f index e315d5960..0241c4d14 100644 --- a/lapack-netlib/BLAS/SRC/dtrmm.f +++ b/lapack-netlib/BLAS/SRC/dtrmm.f @@ -109,7 +109,7 @@ *> *> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m +*> A is DOUBLE PRECISION array, dimension ( LDA, k ), where k is m *> when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. *> Before entry with UPLO = 'U' or 'u', the leading k by k *> upper triangular part of the array A must contain the upper @@ -134,7 +134,7 @@ *> *> \param[in,out] B *> \verbatim -*> B is DOUBLE PRECISION array of DIMENSION ( LDB, n ). +*> B is DOUBLE PRECISION array, dimension ( LDB, N ) *> Before entry, the leading m by n part of the array B must *> contain the matrix B, and on exit is overwritten by the *> transformed matrix. diff --git a/lapack-netlib/BLAS/SRC/dtrmv.f b/lapack-netlib/BLAS/SRC/dtrmv.f index 83959d064..11c12ac72 100644 --- a/lapack-netlib/BLAS/SRC/dtrmv.f +++ b/lapack-netlib/BLAS/SRC/dtrmv.f @@ -80,7 +80,7 @@ *> *> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, n ). +*> A is DOUBLE PRECISION array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular matrix and the strictly lower triangular part of @@ -103,7 +103,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is DOUBLE PRECISION array of dimension at least +*> X is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. On exit, X is overwritten with the diff --git a/lapack-netlib/BLAS/SRC/dtrsm.f b/lapack-netlib/BLAS/SRC/dtrsm.f index bc440f068..5a92bcafd 100644 --- a/lapack-netlib/BLAS/SRC/dtrsm.f +++ b/lapack-netlib/BLAS/SRC/dtrsm.f @@ -111,7 +111,7 @@ *> *> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, k ), +*> A is DOUBLE PRECISION array, dimension ( LDA, k ), *> where k is m when SIDE = 'L' or 'l' *> and k is n when SIDE = 'R' or 'r'. *> Before entry with UPLO = 'U' or 'u', the leading k by k @@ -137,7 +137,7 @@ *> *> \param[in,out] B *> \verbatim -*> B is DOUBLE PRECISION array of DIMENSION ( LDB, n ). +*> B is DOUBLE PRECISION array, dimension ( LDB, N ) *> Before entry, the leading m by n part of the array B must *> contain the right-hand side matrix B, and on exit is *> overwritten by the solution matrix X. diff --git a/lapack-netlib/BLAS/SRC/dtrsv.f b/lapack-netlib/BLAS/SRC/dtrsv.f index cab3fd989..331f1d431 100644 --- a/lapack-netlib/BLAS/SRC/dtrsv.f +++ b/lapack-netlib/BLAS/SRC/dtrsv.f @@ -83,7 +83,7 @@ *> *> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, n ). +*> A is DOUBLE PRECISION array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular matrix and the strictly lower triangular part of @@ -106,7 +106,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is DOUBLE PRECISION array of dimension at least +*> X is DOUBLE PRECISION array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element right-hand side vector b. On exit, X is overwritten diff --git a/lapack-netlib/BLAS/SRC/dzasum.f b/lapack-netlib/BLAS/SRC/dzasum.f index 9f0d3fd08..f42101820 100644 --- a/lapack-netlib/BLAS/SRC/dzasum.f +++ b/lapack-netlib/BLAS/SRC/dzasum.f @@ -27,6 +27,26 @@ *> returns a single precision result. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in,out] ZX +*> \verbatim +*> ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of ZX +*> \endverbatim +* * Authors: * ======== * @@ -35,7 +55,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_blas_level1 * @@ -52,10 +72,10 @@ * ===================================================================== DOUBLE PRECISION FUNCTION DZASUM(N,ZX,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,N diff --git a/lapack-netlib/BLAS/SRC/dznrm2.f b/lapack-netlib/BLAS/SRC/dznrm2.f index 3b6bf6132..f868c16e4 100644 --- a/lapack-netlib/BLAS/SRC/dznrm2.f +++ b/lapack-netlib/BLAS/SRC/dznrm2.f @@ -29,6 +29,27 @@ *> DZNRM2 := sqrt( x**H*x ) *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (N) +*> complex vector with N elements +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of X +*> \endverbatim +* * Authors: * ======== * @@ -37,7 +58,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_blas_level1 * @@ -54,10 +75,10 @@ * ===================================================================== DOUBLE PRECISION FUNCTION DZNRM2(N,X,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,N diff --git a/lapack-netlib/BLAS/SRC/icamax.f b/lapack-netlib/BLAS/SRC/icamax.f index 37035c7af..8057ab095 100644 --- a/lapack-netlib/BLAS/SRC/icamax.f +++ b/lapack-netlib/BLAS/SRC/icamax.f @@ -26,6 +26,26 @@ *> ICAMAX finds the index of the first element having maximum |Re(.)| + |Im(.)| *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] CX +*> \verbatim +*> CX is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of SX +*> \endverbatim +* * Authors: * ======== * @@ -34,7 +54,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup aux_blas * @@ -51,10 +71,10 @@ * ===================================================================== INTEGER FUNCTION ICAMAX(N,CX,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,N diff --git a/lapack-netlib/BLAS/SRC/idamax.f b/lapack-netlib/BLAS/SRC/idamax.f index 958566028..7268534db 100644 --- a/lapack-netlib/BLAS/SRC/idamax.f +++ b/lapack-netlib/BLAS/SRC/idamax.f @@ -26,6 +26,26 @@ *> IDAMAX finds the index of the first element having maximum absolute value. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] DX +*> \verbatim +*> DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of SX +*> \endverbatim +* * Authors: * ======== * @@ -34,7 +54,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup aux_blas * @@ -51,10 +71,10 @@ * ===================================================================== INTEGER FUNCTION IDAMAX(N,DX,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,N diff --git a/lapack-netlib/BLAS/SRC/isamax.f b/lapack-netlib/BLAS/SRC/isamax.f index e73122353..5aaa50e4b 100644 --- a/lapack-netlib/BLAS/SRC/isamax.f +++ b/lapack-netlib/BLAS/SRC/isamax.f @@ -26,6 +26,26 @@ *> ISAMAX finds the index of the first element having maximum absolute value. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] SX +*> \verbatim +*> SX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of SX +*> \endverbatim +* * Authors: * ======== * @@ -34,7 +54,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup aux_blas * @@ -51,10 +71,10 @@ * ===================================================================== INTEGER FUNCTION ISAMAX(N,SX,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,N diff --git a/lapack-netlib/BLAS/SRC/izamax.f b/lapack-netlib/BLAS/SRC/izamax.f index 2ee9b6643..63d8e97de 100644 --- a/lapack-netlib/BLAS/SRC/izamax.f +++ b/lapack-netlib/BLAS/SRC/izamax.f @@ -26,6 +26,26 @@ *> IZAMAX finds the index of the first element having maximum |Re(.)| + |Im(.)| *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] ZX +*> \verbatim +*> ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of SX +*> \endverbatim +* * Authors: * ======== * @@ -34,7 +54,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup aux_blas * @@ -51,10 +71,10 @@ * ===================================================================== INTEGER FUNCTION IZAMAX(N,ZX,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,N diff --git a/lapack-netlib/BLAS/SRC/sasum.f b/lapack-netlib/BLAS/SRC/sasum.f index a453ff708..ba363820e 100644 --- a/lapack-netlib/BLAS/SRC/sasum.f +++ b/lapack-netlib/BLAS/SRC/sasum.f @@ -27,6 +27,26 @@ *> uses unrolled loops for increment equal to one. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] SX +*> \verbatim +*> SX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of SX +*> \endverbatim +* * Authors: * ======== * @@ -35,7 +55,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_blas_level1 * @@ -52,10 +72,10 @@ * ===================================================================== REAL FUNCTION SASUM(N,SX,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,N diff --git a/lapack-netlib/BLAS/SRC/saxpy.f b/lapack-netlib/BLAS/SRC/saxpy.f index 610dfe795..bf49818d2 100644 --- a/lapack-netlib/BLAS/SRC/saxpy.f +++ b/lapack-netlib/BLAS/SRC/saxpy.f @@ -28,6 +28,43 @@ *> uses unrolled loops for increments equal to one. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] SA +*> \verbatim +*> SA is REAL +*> On entry, SA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] SX +*> \verbatim +*> SX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of SX +*> \endverbatim +*> +*> \param[in,out] SY +*> \verbatim +*> SY is REAL array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of SY +*> \endverbatim +* * Authors: * ======== * @@ -36,7 +73,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_blas_level1 * @@ -52,10 +89,10 @@ * ===================================================================== SUBROUTINE SAXPY(N,SA,SX,INCX,SY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. REAL SA diff --git a/lapack-netlib/BLAS/SRC/scabs1.f b/lapack-netlib/BLAS/SRC/scabs1.f index b68f76f2f..23036412e 100644 --- a/lapack-netlib/BLAS/SRC/scabs1.f +++ b/lapack-netlib/BLAS/SRC/scabs1.f @@ -23,6 +23,14 @@ *> SCABS1 computes |Re(.)| + |Im(.)| of a complex number *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] Z +*> \verbatim +*> Z is COMPLEX +*> \endverbatim +* * Authors: * ======== * @@ -31,17 +39,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_blas_level1 * * ===================================================================== REAL FUNCTION SCABS1(Z) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. COMPLEX Z diff --git a/lapack-netlib/BLAS/SRC/scasum.f b/lapack-netlib/BLAS/SRC/scasum.f index 5fc1a56a5..738b2ef15 100644 --- a/lapack-netlib/BLAS/SRC/scasum.f +++ b/lapack-netlib/BLAS/SRC/scasum.f @@ -27,6 +27,26 @@ *> returns a single precision result. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in,out] CX +*> \verbatim +*> CX is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of SX +*> \endverbatim +* * Authors: * ======== * @@ -35,7 +55,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_blas_level1 * @@ -52,10 +72,10 @@ * ===================================================================== REAL FUNCTION SCASUM(N,CX,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,N diff --git a/lapack-netlib/BLAS/SRC/scnrm2.f b/lapack-netlib/BLAS/SRC/scnrm2.f index 4f1f03a5f..9fd7c661c 100644 --- a/lapack-netlib/BLAS/SRC/scnrm2.f +++ b/lapack-netlib/BLAS/SRC/scnrm2.f @@ -29,6 +29,27 @@ *> SCNRM2 := sqrt( x**H*x ) *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension (N) +*> complex vector with N elements +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of X +*> \endverbatim +* * Authors: * ======== * @@ -37,7 +58,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_blas_level1 * @@ -54,10 +75,10 @@ * ===================================================================== REAL FUNCTION SCNRM2(N,X,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,N diff --git a/lapack-netlib/BLAS/SRC/scopy.f b/lapack-netlib/BLAS/SRC/scopy.f index 475579717..8406ba5a5 100644 --- a/lapack-netlib/BLAS/SRC/scopy.f +++ b/lapack-netlib/BLAS/SRC/scopy.f @@ -27,6 +27,37 @@ *> uses unrolled loops for increments equal to 1. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] SX +*> \verbatim +*> SX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of SX +*> \endverbatim +*> +*> \param[out] SY +*> \verbatim +*> SY is REAL array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of SY +*> \endverbatim +* * Authors: * ======== * @@ -35,7 +66,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_blas_level1 * @@ -51,10 +82,10 @@ * ===================================================================== SUBROUTINE SCOPY(N,SX,INCX,SY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,INCY,N diff --git a/lapack-netlib/BLAS/SRC/sdot.f b/lapack-netlib/BLAS/SRC/sdot.f index 5a54ee249..3d26f8308 100644 --- a/lapack-netlib/BLAS/SRC/sdot.f +++ b/lapack-netlib/BLAS/SRC/sdot.f @@ -27,6 +27,37 @@ *> uses unrolled loops for increments equal to one. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] SX +*> \verbatim +*> SX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of SX +*> \endverbatim +*> +*> \param[in] SY +*> \verbatim +*> SY is REAL array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of SY +*> \endverbatim +* * Authors: * ======== * @@ -35,7 +66,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_blas_level1 * @@ -51,10 +82,10 @@ * ===================================================================== REAL FUNCTION SDOT(N,SX,INCX,SY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,INCY,N diff --git a/lapack-netlib/BLAS/SRC/sdsdot.f b/lapack-netlib/BLAS/SRC/sdsdot.f index 7ee6ad6bf..a0ec32b6f 100644 --- a/lapack-netlib/BLAS/SRC/sdsdot.f +++ b/lapack-netlib/BLAS/SRC/sdsdot.f @@ -18,9 +18,11 @@ * REAL SX(*),SY(*) * .. * -* PURPOSE -* ======= -* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> * Compute the inner product of two vectors with extended * precision accumulation. * @@ -28,56 +30,78 @@ * SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY), * where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is * defined in a similar way using INCY. +*> \endverbatim * -* AUTHOR -* ====== -* Lawson, C. L., (JPL), Hanson, R. J., (SNLA), -* Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL) -* -* ARGUMENTS -* ========= -* -* N (input) INTEGER -* number of elements in input vector(s) -* -* SB (input) REAL -* single precision scalar to be added to inner product -* -* SX (input) REAL array, dimension (N) -* single precision vector with N elements -* -* INCX (input) INTEGER -* storage spacing between elements of SX -* -* SY (input) REAL array, dimension (N) -* single precision vector with N elements -* -* INCY (input) INTEGER -* storage spacing between elements of SY -* -* SDSDOT (output) REAL -* single precision dot product (SB if N .LE. 0) +* Arguments: +* ========== * -* Further Details -* =============== +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] SB +*> \verbatim +*> SB is REAL +*> single precision scalar to be added to inner product +*> \endverbatim +*> +*> \param[in] SX +*> \verbatim +*> SX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> single precision vector with N elements +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of SX +*> \endverbatim +*> +*> \param[in] SY +*> \verbatim +*> SY is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> single precision vector with N elements +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of SY +*> \endverbatim * -* REFERENCES +* Authors: +* ======== * -* C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T. -* Krogh, Basic linear algebra subprograms for Fortran -* usage, Algorithm No. 539, Transactions on Mathematical -* Software 5, 3 (September 1979), pp. 308-323. +*> \author Lawson, C. L., (JPL), Hanson, R. J., (SNLA), +*> \author Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL) * -* REVISION HISTORY (YYMMDD) +*> \ingroup complex_blas_level1 * -* 791001 DATE WRITTEN -* 890531 Changed all specific intrinsics to generic. (WRB) -* 890831 Modified array declarations. (WRB) -* 890831 REVISION DATE from Version 3.2 -* 891214 Prologue converted to Version 4.0 format. (BAB) -* 920310 Corrected definition of LX in DESCRIPTION. (WRB) -* 920501 Reformatted the REFERENCES section. (WRB) -* 070118 Reformat to LAPACK coding style +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> REFERENCES +*> +*> C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T. +*> Krogh, Basic linear algebra subprograms for Fortran +*> usage, Algorithm No. 539, Transactions on Mathematical +*> Software 5, 3 (September 1979), pp. 308-323. +*> +*> REVISION HISTORY (YYMMDD) +*> +*> 791001 DATE WRITTEN +*> 890531 Changed all specific intrinsics to generic. (WRB) +*> 890831 Modified array declarations. (WRB) +*> 890831 REVISION DATE from Version 3.2 +*> 891214 Prologue converted to Version 4.0 format. (BAB) +*> 920310 Corrected definition of LX in DESCRIPTION. (WRB) +*> 920501 Reformatted the REFERENCES section. (WRB) +*> 070118 Reformat to LAPACK coding style +*> \endverbatim * * ===================================================================== * @@ -133,17 +157,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_blas_level1 * * ===================================================================== REAL FUNCTION SDSDOT(N,SB,SX,INCX,SY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. REAL SB diff --git a/lapack-netlib/BLAS/SRC/sgbmv.f b/lapack-netlib/BLAS/SRC/sgbmv.f index 92896324e..df13b588f 100644 --- a/lapack-netlib/BLAS/SRC/sgbmv.f +++ b/lapack-netlib/BLAS/SRC/sgbmv.f @@ -85,7 +85,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, n ). +*> A is REAL array, dimension ( LDA, N ) *> Before entry, the leading ( kl + ku + 1 ) by n part of the *> array A must contain the matrix of coefficients, supplied *> column by column, with the leading diagonal of the matrix in @@ -116,7 +116,7 @@ *> *> \param[in] X *> \verbatim -*> X is REAL array of DIMENSION at least +*> X is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. @@ -140,7 +140,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is REAL array of DIMENSION at least +*> Y is REAL array, dimension at least *> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. diff --git a/lapack-netlib/BLAS/SRC/sgemm.f b/lapack-netlib/BLAS/SRC/sgemm.f index d7bdb9c4d..ca2fb175d 100644 --- a/lapack-netlib/BLAS/SRC/sgemm.f +++ b/lapack-netlib/BLAS/SRC/sgemm.f @@ -97,7 +97,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, ka ), where ka is +*> A is REAL array, dimension ( LDA, ka ), where ka is *> k when TRANSA = 'N' or 'n', and is m otherwise. *> Before entry with TRANSA = 'N' or 'n', the leading m by k *> part of the array A must contain the matrix A, otherwise @@ -116,7 +116,7 @@ *> *> \param[in] B *> \verbatim -*> B is REAL array of DIMENSION ( LDB, kb ), where kb is +*> B is REAL array, dimension ( LDB, kb ), where kb is *> n when TRANSB = 'N' or 'n', and is k otherwise. *> Before entry with TRANSB = 'N' or 'n', the leading k by n *> part of the array B must contain the matrix B, otherwise @@ -142,7 +142,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is REAL array of DIMENSION ( LDC, n ). +*> C is REAL array, dimension ( LDC, N ) *> Before entry, the leading m by n part of the array C must *> contain the matrix C, except when beta is zero, in which *> case C need not be set on entry. diff --git a/lapack-netlib/BLAS/SRC/sgemv.f b/lapack-netlib/BLAS/SRC/sgemv.f index 0dfb1fc08..a76913860 100644 --- a/lapack-netlib/BLAS/SRC/sgemv.f +++ b/lapack-netlib/BLAS/SRC/sgemv.f @@ -71,7 +71,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, n ). +*> A is REAL array, dimension ( LDA, N ) *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. *> \endverbatim @@ -86,7 +86,7 @@ *> *> \param[in] X *> \verbatim -*> X is REAL array of DIMENSION at least +*> X is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. @@ -110,7 +110,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is REAL array of DIMENSION at least +*> Y is REAL array, dimension at least *> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. diff --git a/lapack-netlib/BLAS/SRC/sger.f b/lapack-netlib/BLAS/SRC/sger.f index c2a9958f9..7dbff21d3 100644 --- a/lapack-netlib/BLAS/SRC/sger.f +++ b/lapack-netlib/BLAS/SRC/sger.f @@ -57,7 +57,7 @@ *> *> \param[in] X *> \verbatim -*> X is REAL array of dimension at least +*> X is REAL array, dimension at least *> ( 1 + ( m - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the m *> element vector x. @@ -72,7 +72,7 @@ *> *> \param[in] Y *> \verbatim -*> Y is REAL array of dimension at least +*> Y is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. @@ -87,7 +87,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, n ). +*> A is REAL array, dimension ( LDA, N ) *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. On exit, A is *> overwritten by the updated matrix. diff --git a/lapack-netlib/BLAS/SRC/snrm2.f b/lapack-netlib/BLAS/SRC/snrm2.f index 7de03d222..b8799f1d1 100644 --- a/lapack-netlib/BLAS/SRC/snrm2.f +++ b/lapack-netlib/BLAS/SRC/snrm2.f @@ -29,6 +29,26 @@ *> SNRM2 := sqrt( x'*x ). *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of SX +*> \endverbatim +* * Authors: * ======== * @@ -37,7 +57,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_blas_level1 * @@ -54,10 +74,10 @@ * ===================================================================== REAL FUNCTION SNRM2(N,X,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,N diff --git a/lapack-netlib/BLAS/SRC/srot.f b/lapack-netlib/BLAS/SRC/srot.f index fa8e2958f..22c1b8b60 100644 --- a/lapack-netlib/BLAS/SRC/srot.f +++ b/lapack-netlib/BLAS/SRC/srot.f @@ -27,6 +27,47 @@ *> applies a plane rotation. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in,out] SX +*> \verbatim +*> SX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of SX +*> \endverbatim +*> +*> \param[in,out] SY +*> \verbatim +*> SY is REAL array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of SY +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL +*> \endverbatim +* * Authors: * ======== * @@ -35,7 +76,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_blas_level1 * @@ -51,10 +92,10 @@ * ===================================================================== SUBROUTINE SROT(N,SX,INCX,SY,INCY,C,S) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. REAL C,S diff --git a/lapack-netlib/BLAS/SRC/srotg.f b/lapack-netlib/BLAS/SRC/srotg.f index b4484fb34..1e2dba222 100644 --- a/lapack-netlib/BLAS/SRC/srotg.f +++ b/lapack-netlib/BLAS/SRC/srotg.f @@ -23,6 +23,29 @@ *> SROTG construct givens plane rotation. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] SA +*> \verbatim +*> SA is REAL +*> \endverbatim +*> +*> \param[in] SB +*> \verbatim +*> SB is REAL +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is REAL +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL +*> \endverbatim +* * Authors: * ======== * @@ -31,7 +54,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_blas_level1 * @@ -46,10 +69,10 @@ * ===================================================================== SUBROUTINE SROTG(SA,SB,C,S) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. REAL C,S,SA,SB diff --git a/lapack-netlib/BLAS/SRC/srotm.f b/lapack-netlib/BLAS/SRC/srotm.f index c71f7f012..99e028185 100644 --- a/lapack-netlib/BLAS/SRC/srotm.f +++ b/lapack-netlib/BLAS/SRC/srotm.f @@ -52,8 +52,7 @@ *> *> \param[in,out] SX *> \verbatim -*> SX is REAL array, dimension N -*> double precision vector with N elements +*> SX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) *> \endverbatim *> *> \param[in] INCX @@ -64,8 +63,7 @@ *> *> \param[in,out] SY *> \verbatim -*> SY is REAL array, dimension N -*> double precision vector with N elements +*> SY is REAL array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) *> \endverbatim *> *> \param[in] INCY @@ -74,9 +72,9 @@ *> storage spacing between elements of SY *> \endverbatim *> -*> \param[in,out] SPARAM +*> \param[in] SPARAM *> \verbatim -*> SPARAM is REAL array, dimension 5 +*> SPARAM is REAL array, dimension (5) *> SPARAM(1)=SFLAG *> SPARAM(2)=SH11 *> SPARAM(3)=SH21 @@ -92,17 +90,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_blas_level1 * * ===================================================================== SUBROUTINE SROTM(N,SX,INCX,SY,INCY,SPARAM) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,INCY,N diff --git a/lapack-netlib/BLAS/SRC/srotmg.f b/lapack-netlib/BLAS/SRC/srotmg.f index a5077c069..7e7a2c5bf 100644 --- a/lapack-netlib/BLAS/SRC/srotmg.f +++ b/lapack-netlib/BLAS/SRC/srotmg.f @@ -65,9 +65,9 @@ *> SY1 is REAL *> \endverbatim *> -*> \param[in,out] SPARAM +*> \param[out] SPARAM *> \verbatim -*> SPARAM is REAL array, dimension 5 +*> SPARAM is REAL array, dimension (5) *> SPARAM(1)=SFLAG *> SPARAM(2)=SH11 *> SPARAM(3)=SH21 @@ -83,17 +83,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_blas_level1 * * ===================================================================== SUBROUTINE SROTMG(SD1,SD2,SX1,SY1,SPARAM) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. REAL SD1,SD2,SX1,SY1 diff --git a/lapack-netlib/BLAS/SRC/ssbmv.f b/lapack-netlib/BLAS/SRC/ssbmv.f index b711d8b06..f0e210052 100644 --- a/lapack-netlib/BLAS/SRC/ssbmv.f +++ b/lapack-netlib/BLAS/SRC/ssbmv.f @@ -72,7 +72,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, n ). +*> A is REAL array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) *> by n part of the array A must contain the upper triangular *> band part of the symmetric matrix, supplied column by @@ -120,7 +120,7 @@ *> *> \param[in] X *> \verbatim -*> X is REAL array of DIMENSION at least +*> X is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the *> vector x. @@ -141,7 +141,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is REAL array of DIMENSION at least +*> Y is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the *> vector y. On exit, Y is overwritten by the updated vector y. diff --git a/lapack-netlib/BLAS/SRC/sscal.f b/lapack-netlib/BLAS/SRC/sscal.f index 2ffc1a254..14d702178 100644 --- a/lapack-netlib/BLAS/SRC/sscal.f +++ b/lapack-netlib/BLAS/SRC/sscal.f @@ -24,10 +24,36 @@ *> *> \verbatim *> -*> scales a vector by a constant. +*> SSCAL scales a vector by a constant. *> uses unrolled loops for increment equal to 1. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] SA +*> \verbatim +*> SA is REAL +*> On entry, SA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in,out] SX +*> \verbatim +*> SX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of SX +*> \endverbatim +* * Authors: * ======== * @@ -36,7 +62,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_blas_level1 * @@ -53,10 +79,10 @@ * ===================================================================== SUBROUTINE SSCAL(N,SA,SX,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. REAL SA diff --git a/lapack-netlib/BLAS/SRC/sspmv.f b/lapack-netlib/BLAS/SRC/sspmv.f index bc8af3d44..39fe2776a 100644 --- a/lapack-netlib/BLAS/SRC/sspmv.f +++ b/lapack-netlib/BLAS/SRC/sspmv.f @@ -65,7 +65,7 @@ *> *> \param[in] AP *> \verbatim -*> AP is REAL array of DIMENSION at least +*> AP is REAL array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular part of the symmetric matrix @@ -81,7 +81,7 @@ *> *> \param[in] X *> \verbatim -*> X is REAL array of dimension at least +*> X is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -103,7 +103,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is REAL array of dimension at least +*> Y is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. On exit, Y is overwritten by the updated diff --git a/lapack-netlib/BLAS/SRC/sspr.f b/lapack-netlib/BLAS/SRC/sspr.f index 52cb73170..79df3c28b 100644 --- a/lapack-netlib/BLAS/SRC/sspr.f +++ b/lapack-netlib/BLAS/SRC/sspr.f @@ -65,7 +65,7 @@ *> *> \param[in] X *> \verbatim -*> X is REAL array of dimension at least +*> X is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -80,7 +80,7 @@ *> *> \param[in,out] AP *> \verbatim -*> AP is REAL array of DIMENSION at least +*> AP is REAL array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular part of the symmetric matrix diff --git a/lapack-netlib/BLAS/SRC/sspr2.f b/lapack-netlib/BLAS/SRC/sspr2.f index b4c81187c..da33c6cdc 100644 --- a/lapack-netlib/BLAS/SRC/sspr2.f +++ b/lapack-netlib/BLAS/SRC/sspr2.f @@ -65,7 +65,7 @@ *> *> \param[in] X *> \verbatim -*> X is REAL array of dimension at least +*> X is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -80,7 +80,7 @@ *> *> \param[in] Y *> \verbatim -*> Y is REAL array of dimension at least +*> Y is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. @@ -95,7 +95,7 @@ *> *> \param[in,out] AP *> \verbatim -*> AP is REAL array of DIMENSION at least +*> AP is REAL array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular part of the symmetric matrix diff --git a/lapack-netlib/BLAS/SRC/sswap.f b/lapack-netlib/BLAS/SRC/sswap.f index f821a1e70..fee7df20e 100644 --- a/lapack-netlib/BLAS/SRC/sswap.f +++ b/lapack-netlib/BLAS/SRC/sswap.f @@ -23,10 +23,41 @@ *> *> \verbatim *> -*> interchanges two vectors. +*> SSWAP interchanges two vectors. *> uses unrolled loops for increments equal to 1. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in,out] SX +*> \verbatim +*> SX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of SX +*> \endverbatim +*> +*> \param[in,out] SY +*> \verbatim +*> SY is REAL array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of SY +*> \endverbatim +* * Authors: * ======== * @@ -35,7 +66,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_blas_level1 * @@ -51,10 +82,10 @@ * ===================================================================== SUBROUTINE SSWAP(N,SX,INCX,SY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,INCY,N diff --git a/lapack-netlib/BLAS/SRC/ssymm.f b/lapack-netlib/BLAS/SRC/ssymm.f index d3a193f76..6263c1756 100644 --- a/lapack-netlib/BLAS/SRC/ssymm.f +++ b/lapack-netlib/BLAS/SRC/ssymm.f @@ -87,7 +87,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, ka ), where ka is +*> A is REAL array, dimension ( LDA, ka ), where ka is *> m when SIDE = 'L' or 'l' and is n otherwise. *> Before entry with SIDE = 'L' or 'l', the m by m part of *> the array A must contain the symmetric matrix, such that @@ -122,7 +122,7 @@ *> *> \param[in] B *> \verbatim -*> B is REAL array of DIMENSION ( LDB, n ). +*> B is REAL array, dimension ( LDB, N ) *> Before entry, the leading m by n part of the array B must *> contain the matrix B. *> \endverbatim @@ -144,7 +144,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is REAL array of DIMENSION ( LDC, n ). +*> C is REAL array, dimension ( LDC, N ) *> Before entry, the leading m by n part of the array C must *> contain the matrix C, except when beta is zero, in which *> case C need not be set on entry. diff --git a/lapack-netlib/BLAS/SRC/ssymv.f b/lapack-netlib/BLAS/SRC/ssymv.f index a1fa54f10..d3c4c38ca 100644 --- a/lapack-netlib/BLAS/SRC/ssymv.f +++ b/lapack-netlib/BLAS/SRC/ssymv.f @@ -65,7 +65,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, n ). +*> A is REAL array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular part of the symmetric matrix and the strictly @@ -86,7 +86,7 @@ *> *> \param[in] X *> \verbatim -*> X is REAL array of dimension at least +*> X is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -108,7 +108,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is REAL array of dimension at least +*> Y is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. On exit, Y is overwritten by the updated diff --git a/lapack-netlib/BLAS/SRC/ssyr.f b/lapack-netlib/BLAS/SRC/ssyr.f index 9d73f8686..bdc39449e 100644 --- a/lapack-netlib/BLAS/SRC/ssyr.f +++ b/lapack-netlib/BLAS/SRC/ssyr.f @@ -65,7 +65,7 @@ *> *> \param[in] X *> \verbatim -*> X is REAL array of dimension at least +*> X is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -80,7 +80,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, n ). +*> A is REAL array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular part of the symmetric matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/ssyr2.f b/lapack-netlib/BLAS/SRC/ssyr2.f index a2a083adc..d2dcf8d72 100644 --- a/lapack-netlib/BLAS/SRC/ssyr2.f +++ b/lapack-netlib/BLAS/SRC/ssyr2.f @@ -65,7 +65,7 @@ *> *> \param[in] X *> \verbatim -*> X is REAL array of dimension at least +*> X is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -80,7 +80,7 @@ *> *> \param[in] Y *> \verbatim -*> Y is REAL array of dimension at least +*> Y is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. @@ -95,7 +95,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, n ). +*> A is REAL array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular part of the symmetric matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/ssyr2k.f b/lapack-netlib/BLAS/SRC/ssyr2k.f index 4a705f79c..b271fdcd7 100644 --- a/lapack-netlib/BLAS/SRC/ssyr2k.f +++ b/lapack-netlib/BLAS/SRC/ssyr2k.f @@ -95,7 +95,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, ka ), where ka is +*> A is REAL array, dimension ( LDA, ka ), where ka is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array A must contain the matrix A, otherwise @@ -114,7 +114,7 @@ *> *> \param[in] B *> \verbatim -*> B is REAL array of DIMENSION ( LDB, kb ), where kb is +*> B is REAL array, dimension ( LDB, kb ), where kb is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array B must contain the matrix B, otherwise @@ -139,7 +139,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is REAL array of DIMENSION ( LDC, n ). +*> C is REAL array, dimension ( LDC, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array C must contain the upper *> triangular part of the symmetric matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/ssyrk.f b/lapack-netlib/BLAS/SRC/ssyrk.f index ecb1e4f17..abaddf99d 100644 --- a/lapack-netlib/BLAS/SRC/ssyrk.f +++ b/lapack-netlib/BLAS/SRC/ssyrk.f @@ -92,7 +92,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, ka ), where ka is +*> A is REAL array, dimension ( LDA, ka ), where ka is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array A must contain the matrix A, otherwise @@ -117,7 +117,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is REAL array of DIMENSION ( LDC, n ). +*> C is REAL array, dimension ( LDC, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array C must contain the upper *> triangular part of the symmetric matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/stbmv.f b/lapack-netlib/BLAS/SRC/stbmv.f index 4323864ec..a714f2059 100644 --- a/lapack-netlib/BLAS/SRC/stbmv.f +++ b/lapack-netlib/BLAS/SRC/stbmv.f @@ -90,7 +90,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, n ). +*> A is REAL array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) *> by n part of the array A must contain the upper triangular *> band part of the matrix of coefficients, supplied column by @@ -142,7 +142,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is REAL array of dimension at least +*> X is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. On exit, X is overwritten with the diff --git a/lapack-netlib/BLAS/SRC/stbsv.f b/lapack-netlib/BLAS/SRC/stbsv.f index 00aaeba67..721b80494 100644 --- a/lapack-netlib/BLAS/SRC/stbsv.f +++ b/lapack-netlib/BLAS/SRC/stbsv.f @@ -94,7 +94,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, n ). +*> A is REAL array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) *> by n part of the array A must contain the upper triangular *> band part of the matrix of coefficients, supplied column by @@ -146,7 +146,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is REAL array of dimension at least +*> X is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element right-hand side vector b. On exit, X is overwritten diff --git a/lapack-netlib/BLAS/SRC/stpmv.f b/lapack-netlib/BLAS/SRC/stpmv.f index 765e7f918..833f808bd 100644 --- a/lapack-netlib/BLAS/SRC/stpmv.f +++ b/lapack-netlib/BLAS/SRC/stpmv.f @@ -80,7 +80,7 @@ *> *> \param[in] AP *> \verbatim -*> AP is REAL array of DIMENSION at least +*> AP is REAL array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular matrix packed sequentially, @@ -98,7 +98,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is REAL array of dimension at least +*> X is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. On exit, X is overwritten with the diff --git a/lapack-netlib/BLAS/SRC/stpsv.f b/lapack-netlib/BLAS/SRC/stpsv.f index 5a29776da..fe1f40780 100644 --- a/lapack-netlib/BLAS/SRC/stpsv.f +++ b/lapack-netlib/BLAS/SRC/stpsv.f @@ -83,7 +83,7 @@ *> *> \param[in] AP *> \verbatim -*> AP is REAL array of DIMENSION at least +*> AP is REAL array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular matrix packed sequentially, @@ -101,7 +101,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is REAL array of dimension at least +*> X is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element right-hand side vector b. On exit, X is overwritten diff --git a/lapack-netlib/BLAS/SRC/strmm.f b/lapack-netlib/BLAS/SRC/strmm.f index dd2087218..e11330ae3 100644 --- a/lapack-netlib/BLAS/SRC/strmm.f +++ b/lapack-netlib/BLAS/SRC/strmm.f @@ -109,7 +109,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, k ), where k is m +*> A is REAL array, dimension ( LDA, k ), where k is m *> when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. *> Before entry with UPLO = 'U' or 'u', the leading k by k *> upper triangular part of the array A must contain the upper @@ -134,7 +134,7 @@ *> *> \param[in,out] B *> \verbatim -*> B is REAL array of DIMENSION ( LDB, n ). +*> B is REAL array, dimension ( LDB, N ) *> Before entry, the leading m by n part of the array B must *> contain the matrix B, and on exit is overwritten by the *> transformed matrix. diff --git a/lapack-netlib/BLAS/SRC/strmv.f b/lapack-netlib/BLAS/SRC/strmv.f index ba3d7b6a8..e9f681e89 100644 --- a/lapack-netlib/BLAS/SRC/strmv.f +++ b/lapack-netlib/BLAS/SRC/strmv.f @@ -80,7 +80,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, n ). +*> A is REAL array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular matrix and the strictly lower triangular part of @@ -103,7 +103,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is REAL array of dimension at least +*> X is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. On exit, X is overwritten with the diff --git a/lapack-netlib/BLAS/SRC/strsm.f b/lapack-netlib/BLAS/SRC/strsm.f index f2927fe3b..aa805f6b6 100644 --- a/lapack-netlib/BLAS/SRC/strsm.f +++ b/lapack-netlib/BLAS/SRC/strsm.f @@ -111,7 +111,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, k ), +*> A is REAL array, dimension ( LDA, k ), *> where k is m when SIDE = 'L' or 'l' *> and k is n when SIDE = 'R' or 'r'. *> Before entry with UPLO = 'U' or 'u', the leading k by k @@ -137,7 +137,7 @@ *> *> \param[in,out] B *> \verbatim -*> B is REAL array of DIMENSION ( LDB, n ). +*> B is REAL array, dimension ( LDB, N ) *> Before entry, the leading m by n part of the array B must *> contain the right-hand side matrix B, and on exit is *> overwritten by the solution matrix X. diff --git a/lapack-netlib/BLAS/SRC/strsv.f b/lapack-netlib/BLAS/SRC/strsv.f index a31651b9a..d9e41e763 100644 --- a/lapack-netlib/BLAS/SRC/strsv.f +++ b/lapack-netlib/BLAS/SRC/strsv.f @@ -83,7 +83,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, n ). +*> A is REAL array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular matrix and the strictly lower triangular part of @@ -106,7 +106,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is REAL array of dimension at least +*> X is REAL array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element right-hand side vector b. On exit, X is overwritten diff --git a/lapack-netlib/BLAS/SRC/zaxpy.f b/lapack-netlib/BLAS/SRC/zaxpy.f index bca78fb76..d670a2b78 100644 --- a/lapack-netlib/BLAS/SRC/zaxpy.f +++ b/lapack-netlib/BLAS/SRC/zaxpy.f @@ -27,6 +27,43 @@ *> ZAXPY constant times a vector plus a vector. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] ZA +*> \verbatim +*> ZA is COMPLEX*16 +*> On entry, ZA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] ZX +*> \verbatim +*> ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of ZX +*> \endverbatim +*> +*> \param[in,out] ZY +*> \verbatim +*> ZY is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of ZY +*> \endverbatim +* * Authors: * ======== * @@ -35,7 +72,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16_blas_level1 * @@ -51,10 +88,10 @@ * ===================================================================== SUBROUTINE ZAXPY(N,ZA,ZX,INCX,ZY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. COMPLEX*16 ZA diff --git a/lapack-netlib/BLAS/SRC/zcopy.f b/lapack-netlib/BLAS/SRC/zcopy.f index 830548ab6..5106916ac 100644 --- a/lapack-netlib/BLAS/SRC/zcopy.f +++ b/lapack-netlib/BLAS/SRC/zcopy.f @@ -26,6 +26,37 @@ *> ZCOPY copies a vector, x, to a vector, y. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] ZX +*> \verbatim +*> ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of ZX +*> \endverbatim +*> +*> \param[out] ZY +*> \verbatim +*> ZY is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of ZY +*> \endverbatim +* * Authors: * ======== * @@ -34,7 +65,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16_blas_level1 * @@ -50,10 +81,10 @@ * ===================================================================== SUBROUTINE ZCOPY(N,ZX,INCX,ZY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,INCY,N diff --git a/lapack-netlib/BLAS/SRC/zdotc.f b/lapack-netlib/BLAS/SRC/zdotc.f index 70119ec58..36e33aefb 100644 --- a/lapack-netlib/BLAS/SRC/zdotc.f +++ b/lapack-netlib/BLAS/SRC/zdotc.f @@ -28,6 +28,37 @@ *> *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] ZX +*> \verbatim +*> ZX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of ZX +*> \endverbatim +*> +*> \param[in] ZY +*> \verbatim +*> ZY is REAL array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of ZY +*> \endverbatim +* * Authors: * ======== * @@ -36,7 +67,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16_blas_level1 * @@ -52,10 +83,10 @@ * ===================================================================== COMPLEX*16 FUNCTION ZDOTC(N,ZX,INCX,ZY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,INCY,N diff --git a/lapack-netlib/BLAS/SRC/zdotu.f b/lapack-netlib/BLAS/SRC/zdotu.f index 318fae24e..366107583 100644 --- a/lapack-netlib/BLAS/SRC/zdotu.f +++ b/lapack-netlib/BLAS/SRC/zdotu.f @@ -28,6 +28,37 @@ *> *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] ZX +*> \verbatim +*> ZX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of ZX +*> \endverbatim +*> +*> \param[in] ZY +*> \verbatim +*> ZY is REAL array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of ZY +*> \endverbatim +* * Authors: * ======== * @@ -36,7 +67,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16_blas_level1 * @@ -52,10 +83,10 @@ * ===================================================================== COMPLEX*16 FUNCTION ZDOTU(N,ZX,INCX,ZY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,INCY,N diff --git a/lapack-netlib/BLAS/SRC/zdscal.f b/lapack-netlib/BLAS/SRC/zdscal.f index def90785a..a87a28fad 100644 --- a/lapack-netlib/BLAS/SRC/zdscal.f +++ b/lapack-netlib/BLAS/SRC/zdscal.f @@ -27,6 +27,32 @@ *> ZDSCAL scales a vector by a constant. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] DA +*> \verbatim +*> DA is DOUBLE PRECISION +*> On entry, DA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in,out] ZX +*> \verbatim +*> ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of ZX +*> \endverbatim +* * Authors: * ======== * @@ -35,7 +61,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16_blas_level1 * @@ -52,10 +78,10 @@ * ===================================================================== SUBROUTINE ZDSCAL(N,DA,ZX,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. DOUBLE PRECISION DA diff --git a/lapack-netlib/BLAS/SRC/zgbmv.f b/lapack-netlib/BLAS/SRC/zgbmv.f index f49da2218..7303df879 100644 --- a/lapack-netlib/BLAS/SRC/zgbmv.f +++ b/lapack-netlib/BLAS/SRC/zgbmv.f @@ -87,7 +87,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> A is COMPLEX*16 array, dimension ( LDA, N ) *> Before entry, the leading ( kl + ku + 1 ) by n part of the *> array A must contain the matrix of coefficients, supplied *> column by column, with the leading diagonal of the matrix in @@ -118,7 +118,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX*16 array of DIMENSION at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. @@ -142,7 +142,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is COMPLEX*16 array of DIMENSION at least +*> Y is COMPLEX*16 array, dimension at least *> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. diff --git a/lapack-netlib/BLAS/SRC/zgemm.f b/lapack-netlib/BLAS/SRC/zgemm.f index a17263210..c3ac7551d 100644 --- a/lapack-netlib/BLAS/SRC/zgemm.f +++ b/lapack-netlib/BLAS/SRC/zgemm.f @@ -97,7 +97,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +*> A is COMPLEX*16 array, dimension ( LDA, ka ), where ka is *> k when TRANSA = 'N' or 'n', and is m otherwise. *> Before entry with TRANSA = 'N' or 'n', the leading m by k *> part of the array A must contain the matrix A, otherwise @@ -116,7 +116,7 @@ *> *> \param[in] B *> \verbatim -*> B is COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is +*> B is COMPLEX*16 array, dimension ( LDB, kb ), where kb is *> n when TRANSB = 'N' or 'n', and is k otherwise. *> Before entry with TRANSB = 'N' or 'n', the leading k by n *> part of the array B must contain the matrix B, otherwise @@ -142,7 +142,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is COMPLEX*16 array of DIMENSION ( LDC, n ). +*> C is COMPLEX*16 array, dimension ( LDC, N ) *> Before entry, the leading m by n part of the array C must *> contain the matrix C, except when beta is zero, in which *> case C need not be set on entry. diff --git a/lapack-netlib/BLAS/SRC/zgemv.f b/lapack-netlib/BLAS/SRC/zgemv.f index 01e44d467..7088d383f 100644 --- a/lapack-netlib/BLAS/SRC/zgemv.f +++ b/lapack-netlib/BLAS/SRC/zgemv.f @@ -73,7 +73,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> A is COMPLEX*16 array, dimension ( LDA, N ) *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. *> \endverbatim @@ -88,7 +88,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX*16 array of DIMENSION at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. @@ -112,7 +112,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is COMPLEX*16 array of DIMENSION at least +*> Y is COMPLEX*16 array, dimension at least *> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. diff --git a/lapack-netlib/BLAS/SRC/zgerc.f b/lapack-netlib/BLAS/SRC/zgerc.f index cf8e17d35..058dccfc1 100644 --- a/lapack-netlib/BLAS/SRC/zgerc.f +++ b/lapack-netlib/BLAS/SRC/zgerc.f @@ -57,7 +57,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX*16 array of dimension at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( m - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the m *> element vector x. @@ -72,7 +72,7 @@ *> *> \param[in] Y *> \verbatim -*> Y is COMPLEX*16 array of dimension at least +*> Y is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. @@ -87,7 +87,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> A is COMPLEX*16 array, dimension ( LDA, N ) *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. On exit, A is *> overwritten by the updated matrix. diff --git a/lapack-netlib/BLAS/SRC/zgeru.f b/lapack-netlib/BLAS/SRC/zgeru.f index d191740cc..683a778d5 100644 --- a/lapack-netlib/BLAS/SRC/zgeru.f +++ b/lapack-netlib/BLAS/SRC/zgeru.f @@ -57,7 +57,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX*16 array of dimension at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( m - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the m *> element vector x. @@ -72,7 +72,7 @@ *> *> \param[in] Y *> \verbatim -*> Y is COMPLEX*16 array of dimension at least +*> Y is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. @@ -87,7 +87,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> A is COMPLEX*16 array, dimension ( LDA, N ) *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. On exit, A is *> overwritten by the updated matrix. diff --git a/lapack-netlib/BLAS/SRC/zhbmv.f b/lapack-netlib/BLAS/SRC/zhbmv.f index 87422152c..19d8f7d45 100644 --- a/lapack-netlib/BLAS/SRC/zhbmv.f +++ b/lapack-netlib/BLAS/SRC/zhbmv.f @@ -72,7 +72,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> A is COMPLEX*16 array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) *> by n part of the array A must contain the upper triangular *> band part of the hermitian matrix, supplied column by @@ -123,7 +123,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX*16 array of DIMENSION at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the *> vector x. @@ -144,7 +144,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is COMPLEX*16 array of DIMENSION at least +*> Y is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the *> vector y. On exit, Y is overwritten by the updated vector y. diff --git a/lapack-netlib/BLAS/SRC/zhemm.f b/lapack-netlib/BLAS/SRC/zhemm.f index 45a5eabd7..d63778b75 100644 --- a/lapack-netlib/BLAS/SRC/zhemm.f +++ b/lapack-netlib/BLAS/SRC/zhemm.f @@ -87,7 +87,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +*> A is COMPLEX*16 array, dimension ( LDA, ka ), where ka is *> m when SIDE = 'L' or 'l' and is n otherwise. *> Before entry with SIDE = 'L' or 'l', the m by m part of *> the array A must contain the hermitian matrix, such that @@ -124,7 +124,7 @@ *> *> \param[in] B *> \verbatim -*> B is COMPLEX*16 array of DIMENSION ( LDB, n ). +*> B is COMPLEX*16 array, dimension ( LDB, N ) *> Before entry, the leading m by n part of the array B must *> contain the matrix B. *> \endverbatim @@ -146,7 +146,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is COMPLEX*16 array of DIMENSION ( LDC, n ). +*> C is COMPLEX*16 array, dimension ( LDC, N ) *> Before entry, the leading m by n part of the array C must *> contain the matrix C, except when beta is zero, in which *> case C need not be set on entry. diff --git a/lapack-netlib/BLAS/SRC/zhemv.f b/lapack-netlib/BLAS/SRC/zhemv.f index 37917459a..3ea0753f4 100644 --- a/lapack-netlib/BLAS/SRC/zhemv.f +++ b/lapack-netlib/BLAS/SRC/zhemv.f @@ -65,7 +65,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> A is COMPLEX*16 array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular part of the hermitian matrix and the strictly @@ -88,7 +88,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX*16 array of dimension at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -110,7 +110,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is COMPLEX*16 array of dimension at least +*> Y is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. On exit, Y is overwritten by the updated diff --git a/lapack-netlib/BLAS/SRC/zher.f b/lapack-netlib/BLAS/SRC/zher.f index f7def7608..5e0c89634 100644 --- a/lapack-netlib/BLAS/SRC/zher.f +++ b/lapack-netlib/BLAS/SRC/zher.f @@ -65,7 +65,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX*16 array of dimension at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -80,7 +80,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> A is COMPLEX*16 array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular part of the hermitian matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/zher2.f b/lapack-netlib/BLAS/SRC/zher2.f index 94c132c4f..e3a383189 100644 --- a/lapack-netlib/BLAS/SRC/zher2.f +++ b/lapack-netlib/BLAS/SRC/zher2.f @@ -65,7 +65,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX*16 array of dimension at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -80,7 +80,7 @@ *> *> \param[in] Y *> \verbatim -*> Y is COMPLEX*16 array of dimension at least +*> Y is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. @@ -95,7 +95,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> A is COMPLEX*16 array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular part of the hermitian matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/zher2k.f b/lapack-netlib/BLAS/SRC/zher2k.f index 407e8db53..474c65e57 100644 --- a/lapack-netlib/BLAS/SRC/zher2k.f +++ b/lapack-netlib/BLAS/SRC/zher2k.f @@ -95,7 +95,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +*> A is COMPLEX*16 array, dimension ( LDA, ka ), where ka is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array A must contain the matrix A, otherwise @@ -114,7 +114,7 @@ *> *> \param[in] B *> \verbatim -*> B is COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is +*> B is COMPLEX*16 array, dimension ( LDB, kb ), where kb is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array B must contain the matrix B, otherwise @@ -140,7 +140,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is COMPLEX*16 array of DIMENSION ( LDC, n ). +*> C is COMPLEX*16 array, dimension ( LDC, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array C must contain the upper *> triangular part of the hermitian matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/zherk.f b/lapack-netlib/BLAS/SRC/zherk.f index d181ca0a8..0d11f227b 100644 --- a/lapack-netlib/BLAS/SRC/zherk.f +++ b/lapack-netlib/BLAS/SRC/zherk.f @@ -90,7 +90,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +*> A is COMPLEX*16 array, dimension ( LDA, ka ), where ka is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array A must contain the matrix A, otherwise @@ -115,7 +115,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is COMPLEX*16 array of DIMENSION ( LDC, n ). +*> C is COMPLEX*16 array, dimension ( LDC, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array C must contain the upper *> triangular part of the hermitian matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/zhpmv.f b/lapack-netlib/BLAS/SRC/zhpmv.f index 0d5d325bf..9bd3ea45a 100644 --- a/lapack-netlib/BLAS/SRC/zhpmv.f +++ b/lapack-netlib/BLAS/SRC/zhpmv.f @@ -65,7 +65,7 @@ *> *> \param[in] AP *> \verbatim -*> AP is COMPLEX*16 array of DIMENSION at least +*> AP is COMPLEX*16 array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular part of the hermitian matrix @@ -83,7 +83,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX*16 array of dimension at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -105,7 +105,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is COMPLEX*16 array of dimension at least +*> Y is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. On exit, Y is overwritten by the updated diff --git a/lapack-netlib/BLAS/SRC/zhpr.f b/lapack-netlib/BLAS/SRC/zhpr.f index 70051c8a5..af82dfbd8 100644 --- a/lapack-netlib/BLAS/SRC/zhpr.f +++ b/lapack-netlib/BLAS/SRC/zhpr.f @@ -65,7 +65,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX*16 array of dimension at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -80,7 +80,7 @@ *> *> \param[in,out] AP *> \verbatim -*> AP is COMPLEX*16 array of DIMENSION at least +*> AP is COMPLEX*16 array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular part of the hermitian matrix diff --git a/lapack-netlib/BLAS/SRC/zhpr2.f b/lapack-netlib/BLAS/SRC/zhpr2.f index c9fb75853..1b0fd3aac 100644 --- a/lapack-netlib/BLAS/SRC/zhpr2.f +++ b/lapack-netlib/BLAS/SRC/zhpr2.f @@ -65,7 +65,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX*16 array of dimension at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. @@ -80,7 +80,7 @@ *> *> \param[in] Y *> \verbatim -*> Y is COMPLEX*16 array of dimension at least +*> Y is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCY ) ). *> Before entry, the incremented array Y must contain the n *> element vector y. @@ -95,7 +95,7 @@ *> *> \param[in,out] AP *> \verbatim -*> AP is COMPLEX*16 array of DIMENSION at least +*> AP is COMPLEX*16 array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular part of the hermitian matrix diff --git a/lapack-netlib/BLAS/SRC/zrotg.f b/lapack-netlib/BLAS/SRC/zrotg.f index e5c406dba..581117d87 100644 --- a/lapack-netlib/BLAS/SRC/zrotg.f +++ b/lapack-netlib/BLAS/SRC/zrotg.f @@ -24,6 +24,29 @@ *> ZROTG determines a double complex Givens rotation. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] CA +*> \verbatim +*> CA is COMPLEX*16 +*> \endverbatim +*> +*> \param[in] CB +*> \verbatim +*> CB is COMPLEX*16 +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is COMPLEX*16 +*> \endverbatim +* * Authors: * ======== * @@ -32,17 +55,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16_blas_level1 * * ===================================================================== SUBROUTINE ZROTG(CA,CB,C,S) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. COMPLEX*16 CA,CB,S diff --git a/lapack-netlib/BLAS/SRC/zscal.f b/lapack-netlib/BLAS/SRC/zscal.f index ca038aacb..c52f62258 100644 --- a/lapack-netlib/BLAS/SRC/zscal.f +++ b/lapack-netlib/BLAS/SRC/zscal.f @@ -27,6 +27,32 @@ *> ZSCAL scales a vector by a constant. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in] ZA +*> \verbatim +*> ZA is COMPLEX*16 +*> On entry, ZA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in,out] ZX +*> \verbatim +*> ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of ZX +*> \endverbatim +* * Authors: * ======== * @@ -35,7 +61,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16_blas_level1 * @@ -52,10 +78,10 @@ * ===================================================================== SUBROUTINE ZSCAL(N,ZA,ZX,INCX) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. COMPLEX*16 ZA diff --git a/lapack-netlib/BLAS/SRC/zswap.f b/lapack-netlib/BLAS/SRC/zswap.f index 02a5b97e8..6fd2d7fe8 100644 --- a/lapack-netlib/BLAS/SRC/zswap.f +++ b/lapack-netlib/BLAS/SRC/zswap.f @@ -26,6 +26,37 @@ *> ZSWAP interchanges two vectors. *> \endverbatim * +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> number of elements in input vector(s) +*> \endverbatim +*> +*> \param[in,out] ZX +*> \verbatim +*> ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> storage spacing between elements of ZX +*> \endverbatim +*> +*> \param[in,out] ZY +*> \verbatim +*> ZY is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> storage spacing between elements of ZY +*> \endverbatim +* * Authors: * ======== * @@ -34,7 +65,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16_blas_level1 * @@ -50,10 +81,10 @@ * ===================================================================== SUBROUTINE ZSWAP(N,ZX,INCX,ZY,INCY) * -* -- Reference BLAS level1 routine (version 3.7.0) -- +* -- Reference BLAS level1 routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX,INCY,N diff --git a/lapack-netlib/BLAS/SRC/zsymm.f b/lapack-netlib/BLAS/SRC/zsymm.f index 1dc267a7a..bd37934ae 100644 --- a/lapack-netlib/BLAS/SRC/zsymm.f +++ b/lapack-netlib/BLAS/SRC/zsymm.f @@ -87,7 +87,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +*> A is COMPLEX*16 array, dimension ( LDA, ka ), where ka is *> m when SIDE = 'L' or 'l' and is n otherwise. *> Before entry with SIDE = 'L' or 'l', the m by m part of *> the array A must contain the symmetric matrix, such that @@ -122,7 +122,7 @@ *> *> \param[in] B *> \verbatim -*> B is COMPLEX*16 array of DIMENSION ( LDB, n ). +*> B is COMPLEX*16 array, dimension ( LDB, N ) *> Before entry, the leading m by n part of the array B must *> contain the matrix B. *> \endverbatim @@ -144,7 +144,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is COMPLEX*16 array of DIMENSION ( LDC, n ). +*> C is COMPLEX*16 array, dimension ( LDC, N ) *> Before entry, the leading m by n part of the array C must *> contain the matrix C, except when beta is zero, in which *> case C need not be set on entry. diff --git a/lapack-netlib/BLAS/SRC/zsyr2k.f b/lapack-netlib/BLAS/SRC/zsyr2k.f index d358ed00f..92bbfeeb5 100644 --- a/lapack-netlib/BLAS/SRC/zsyr2k.f +++ b/lapack-netlib/BLAS/SRC/zsyr2k.f @@ -92,7 +92,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +*> A is COMPLEX*16 array, dimension ( LDA, ka ), where ka is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array A must contain the matrix A, otherwise @@ -111,7 +111,7 @@ *> *> \param[in] B *> \verbatim -*> B is COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is +*> B is COMPLEX*16 array, dimension ( LDB, kb ), where kb is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array B must contain the matrix B, otherwise @@ -136,7 +136,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is COMPLEX*16 array of DIMENSION ( LDC, n ). +*> C is COMPLEX*16 array, dimension ( LDC, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array C must contain the upper *> triangular part of the symmetric matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/zsyrk.f b/lapack-netlib/BLAS/SRC/zsyrk.f index 79591b45e..122539f58 100644 --- a/lapack-netlib/BLAS/SRC/zsyrk.f +++ b/lapack-netlib/BLAS/SRC/zsyrk.f @@ -90,7 +90,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +*> A is COMPLEX*16 array, dimension ( LDA, ka ), where ka is *> k when TRANS = 'N' or 'n', and is n otherwise. *> Before entry with TRANS = 'N' or 'n', the leading n by k *> part of the array A must contain the matrix A, otherwise @@ -115,7 +115,7 @@ *> *> \param[in,out] C *> \verbatim -*> C is COMPLEX*16 array of DIMENSION ( LDC, n ). +*> C is COMPLEX*16 array, dimension ( LDC, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array C must contain the upper *> triangular part of the symmetric matrix and the strictly diff --git a/lapack-netlib/BLAS/SRC/ztbmv.f b/lapack-netlib/BLAS/SRC/ztbmv.f index 1e03f2bad..a4d9c2ed1 100644 --- a/lapack-netlib/BLAS/SRC/ztbmv.f +++ b/lapack-netlib/BLAS/SRC/ztbmv.f @@ -90,7 +90,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> A is COMPLEX*16 array, dimension ( LDA, N ). *> Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) *> by n part of the array A must contain the upper triangular *> band part of the matrix of coefficients, supplied column by @@ -140,9 +140,9 @@ *> ( k + 1 ). *> \endverbatim *> -*> \param[in] X +*> \param[in,out] X *> \verbatim -*> X is (input/output) COMPLEX*16 array of dimension at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. On exit, X is overwritten with the diff --git a/lapack-netlib/BLAS/SRC/ztbsv.f b/lapack-netlib/BLAS/SRC/ztbsv.f index 50c4bb42e..eaf850046 100644 --- a/lapack-netlib/BLAS/SRC/ztbsv.f +++ b/lapack-netlib/BLAS/SRC/ztbsv.f @@ -94,7 +94,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> A is COMPLEX*16 array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) *> by n part of the array A must contain the upper triangular *> band part of the matrix of coefficients, supplied column by @@ -146,7 +146,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is COMPLEX*16 array of dimension at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element right-hand side vector b. On exit, X is overwritten diff --git a/lapack-netlib/BLAS/SRC/ztpmv.f b/lapack-netlib/BLAS/SRC/ztpmv.f index d9aae4259..65aa2a0ab 100644 --- a/lapack-netlib/BLAS/SRC/ztpmv.f +++ b/lapack-netlib/BLAS/SRC/ztpmv.f @@ -80,7 +80,7 @@ *> *> \param[in] AP *> \verbatim -*> AP is COMPLEX*16 array of DIMENSION at least +*> AP is COMPLEX*16 array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular matrix packed sequentially, @@ -96,9 +96,9 @@ *> A are not referenced, but are assumed to be unity. *> \endverbatim *> -*> \param[in] X +*> \param[in,out] X *> \verbatim -*> X is (input/output) COMPLEX*16 array of dimension at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. On exit, X is overwritten with the diff --git a/lapack-netlib/BLAS/SRC/ztpsv.f b/lapack-netlib/BLAS/SRC/ztpsv.f index 5874fdc43..538888424 100644 --- a/lapack-netlib/BLAS/SRC/ztpsv.f +++ b/lapack-netlib/BLAS/SRC/ztpsv.f @@ -83,7 +83,7 @@ *> *> \param[in] AP *> \verbatim -*> AP is COMPLEX*16 array of DIMENSION at least +*> AP is COMPLEX*16 array, dimension at least *> ( ( n*( n + 1 ) )/2 ). *> Before entry with UPLO = 'U' or 'u', the array AP must *> contain the upper triangular matrix packed sequentially, @@ -101,7 +101,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is COMPLEX*16 array of dimension at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element right-hand side vector b. On exit, X is overwritten diff --git a/lapack-netlib/BLAS/SRC/ztrmm.f b/lapack-netlib/BLAS/SRC/ztrmm.f index 229f3322b..0f445f52a 100644 --- a/lapack-netlib/BLAS/SRC/ztrmm.f +++ b/lapack-netlib/BLAS/SRC/ztrmm.f @@ -109,7 +109,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, k ), where k is m +*> A is COMPLEX*16 array, dimension ( LDA, k ), where k is m *> when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. *> Before entry with UPLO = 'U' or 'u', the leading k by k *> upper triangular part of the array A must contain the upper @@ -132,9 +132,9 @@ *> then LDA must be at least max( 1, n ). *> \endverbatim *> -*> \param[in] B +*> \param[in,out] B *> \verbatim -*> B is (input/output) COMPLEX*16 array of DIMENSION ( LDB, n ). +*> B is COMPLEX*16 array, dimension ( LDB, N ). *> Before entry, the leading m by n part of the array B must *> contain the matrix B, and on exit is overwritten by the *> transformed matrix. diff --git a/lapack-netlib/BLAS/SRC/ztrmv.f b/lapack-netlib/BLAS/SRC/ztrmv.f index ab9065cf1..52d1ae679 100644 --- a/lapack-netlib/BLAS/SRC/ztrmv.f +++ b/lapack-netlib/BLAS/SRC/ztrmv.f @@ -80,7 +80,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> A is COMPLEX*16 array, dimension ( LDA, N ). *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular matrix and the strictly lower triangular part of @@ -101,9 +101,9 @@ *> max( 1, n ). *> \endverbatim *> -*> \param[in] X +*> \param[in,out] X *> \verbatim -*> X is (input/output) COMPLEX*16 array of dimension at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element vector x. On exit, X is overwritten with the diff --git a/lapack-netlib/BLAS/SRC/ztrsm.f b/lapack-netlib/BLAS/SRC/ztrsm.f index cc1af763d..46a6afc77 100644 --- a/lapack-netlib/BLAS/SRC/ztrsm.f +++ b/lapack-netlib/BLAS/SRC/ztrsm.f @@ -111,7 +111,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, k ), +*> A is COMPLEX*16 array, dimension ( LDA, k ), *> where k is m when SIDE = 'L' or 'l' *> and k is n when SIDE = 'R' or 'r'. *> Before entry with UPLO = 'U' or 'u', the leading k by k @@ -137,7 +137,7 @@ *> *> \param[in,out] B *> \verbatim -*> B is COMPLEX*16 array of DIMENSION ( LDB, n ). +*> B is COMPLEX*16 array, dimension ( LDB, N ) *> Before entry, the leading m by n part of the array B must *> contain the right-hand side matrix B, and on exit is *> overwritten by the solution matrix X. diff --git a/lapack-netlib/BLAS/SRC/ztrsv.f b/lapack-netlib/BLAS/SRC/ztrsv.f index 577b5cae3..ba7aa35c3 100644 --- a/lapack-netlib/BLAS/SRC/ztrsv.f +++ b/lapack-netlib/BLAS/SRC/ztrsv.f @@ -83,7 +83,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> A is COMPLEX*16 array, dimension ( LDA, N ) *> Before entry with UPLO = 'U' or 'u', the leading n by n *> upper triangular part of the array A must contain the upper *> triangular matrix and the strictly lower triangular part of @@ -106,7 +106,7 @@ *> *> \param[in,out] X *> \verbatim -*> X is COMPLEX*16 array of dimension at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ). *> Before entry, the incremented array X must contain the n *> element right-hand side vector b. On exit, X is overwritten diff --git a/lapack-netlib/BLAS/TESTING/CMakeLists.txt b/lapack-netlib/BLAS/TESTING/CMakeLists.txt index f88c9a8ac..9b130db0f 100644 --- a/lapack-netlib/BLAS/TESTING/CMakeLists.txt +++ b/lapack-netlib/BLAS/TESTING/CMakeLists.txt @@ -1,34 +1,6 @@ -####################################################################### -# This makefile creates the test programs for the BLAS 1 routines. -# The test files are grouped as follows: -# SBLAT1 -- Single precision real test routines -# CBLAT1 -- Single precision complex test routines -# DBLAT1 -- Double precision real test routines -# ZBLAT1 -- Double precision complex test routines -# -# Test programs can be generated for all or some of the four different -# precisions. To create the test programs, enter make followed by one -# or more of the precisions desired. Some examples: -# make single -# make single complex -# make single double complex complex16 -# Alternatively, the command -# make -# without any arguments creates all four test programs. -# The executable files which are created are called -# ../xblat1s, ../xblat1d, ../xblat1c, and ../xblat1z -# -# To remove the object files after the executable files have been -# created, enter -# make clean -# To force the source files to be recompiled, enter, for example, -# make single FRC=FRC -# -####################################################################### - macro(add_blas_test name src) get_filename_component(baseNAME ${src} NAME_WE) - set(TEST_INPUT "${LAPACK_SOURCE_DIR}/BLAS/${baseNAME}.in") + set(TEST_INPUT "${CMAKE_CURRENT_SOURCE_DIR}/${baseNAME}.in") add_executable(${name} ${src}) target_link_libraries(${name} blas) if(EXISTS "${TEST_INPUT}") diff --git a/lapack-netlib/BLAS/TESTING/Makeblat1 b/lapack-netlib/BLAS/TESTING/Makeblat1 deleted file mode 100644 index ccd7e3c6e..000000000 --- a/lapack-netlib/BLAS/TESTING/Makeblat1 +++ /dev/null @@ -1,67 +0,0 @@ -include ../../make.inc - -####################################################################### -# This makefile creates the test programs for the BLAS 1 routines. -# The test files are grouped as follows: -# SBLAT1 -- Single precision real test routines -# CBLAT1 -- Single precision complex test routines -# DBLAT1 -- Double precision real test routines -# ZBLAT1 -- Double precision complex test routines -# -# Test programs can be generated for all or some of the four different -# precisions. To create the test programs, enter make followed by one -# or more of the precisions desired. Some examples: -# make single -# make single complex -# make single double complex complex16 -# Alternatively, the command -# make -# without any arguments creates all four test programs. -# The executable files which are created are called -# ../xblat1s, ../xblat1d, ../xblat1c, and ../xblat1z -# -# To remove the object files after the executable files have been -# created, enter -# make clean -# To force the source files to be recompiled, enter, for example, -# make single FRC=FRC -# -####################################################################### - -SBLAT1 = sblat1.o -CBLAT1 = cblat1.o -DBLAT1 = dblat1.o -ZBLAT1 = zblat1.o - -all: single double complex complex16 - -single: ../xblat1s -double: ../xblat1d -complex: ../xblat1c -complex16: ../xblat1z - -../xblat1s: $(SBLAT1) - $(LOADER) $(LOADOPTS) -o $@ $(SBLAT1) $(BLASLIB) - -../xblat1c: $(CBLAT1) - $(LOADER) $(LOADOPTS) -o $@ $(CBLAT1) $(BLASLIB) - -../xblat1d: $(DBLAT1) - $(LOADER) $(LOADOPTS) -o $@ $(DBLAT1) $(BLASLIB) - -../xblat1z: $(ZBLAT1) - $(LOADER) $(LOADOPTS) -o $@ $(ZBLAT1) $(BLASLIB) - -$(SBLAT1): $(FRC) -$(CBLAT1): $(FRC) -$(DBLAT1): $(FRC) -$(ZBLAT1): $(FRC) - -FRC: - @FRC=$(FRC) - -clean: - rm -f *.o - -.f.o: - $(FORTRAN) $(OPTS) -c -o $@ $< diff --git a/lapack-netlib/BLAS/TESTING/Makeblat2 b/lapack-netlib/BLAS/TESTING/Makeblat2 deleted file mode 100644 index 920607889..000000000 --- a/lapack-netlib/BLAS/TESTING/Makeblat2 +++ /dev/null @@ -1,67 +0,0 @@ -include ../../make.inc - -####################################################################### -# This makefile creates the test programs for the BLAS 2 routines. -# The test files are grouped as follows: -# SBLAT2 -- Single precision real test routines -# CBLAT2 -- Single precision complex test routines -# DBLAT2 -- Double precision real test routines -# ZBLAT2 -- Double precision complex test routines -# -# Test programs can be generated for all or some of the four different -# precisions. To create the test programs, enter make followed by one -# or more of the precisions desired. Some examples: -# make single -# make single complex -# make single double complex complex16 -# Alternatively, the command -# make -# without any arguments creates all four test programs. -# The executable files which are created are called -# ../xblat2s, ../xblat2d, ../xblat2c, and ../xblat2z -# -# To remove the object files after the executable files have been -# created, enter -# make clean -# To force the source files to be recompiled, enter, for example, -# make single FRC=FRC -# -####################################################################### - -SBLAT2 = sblat2.o -CBLAT2 = cblat2.o -DBLAT2 = dblat2.o -ZBLAT2 = zblat2.o - -all: single double complex complex16 - -single: ../xblat2s -double: ../xblat2d -complex: ../xblat2c -complex16: ../xblat2z - -../xblat2s: $(SBLAT2) - $(LOADER) $(LOADOPTS) -o $@ $(SBLAT2) $(BLASLIB) - -../xblat2c: $(CBLAT2) - $(LOADER) $(LOADOPTS) -o $@ $(CBLAT2) $(BLASLIB) - -../xblat2d: $(DBLAT2) - $(LOADER) $(LOADOPTS) -o $@ $(DBLAT2) $(BLASLIB) - -../xblat2z: $(ZBLAT2) - $(LOADER) $(LOADOPTS) -o $@ $(ZBLAT2) $(BLASLIB) - -$(SBLAT2): $(FRC) -$(CBLAT2): $(FRC) -$(DBLAT2): $(FRC) -$(ZBLAT2): $(FRC) - -FRC: - @FRC=$(FRC) - -clean: - rm -f *.o - -.f.o: - $(FORTRAN) $(OPTS) -c -o $@ $< diff --git a/lapack-netlib/BLAS/TESTING/Makeblat3 b/lapack-netlib/BLAS/TESTING/Makeblat3 deleted file mode 100644 index e454b3485..000000000 --- a/lapack-netlib/BLAS/TESTING/Makeblat3 +++ /dev/null @@ -1,67 +0,0 @@ -include ../../make.inc - -####################################################################### -# This makefile creates the test programs for the BLAS 3 routines. -# The test files are grouped as follows: -# SBLAT3 -- Single precision real test routines -# CBLAT3 -- Single precision complex test routines -# DBLAT3 -- Double precision real test routines -# ZBLAT3 -- Double precision complex test routines -# -# Test programs can be generated for all or some of the four different -# precisions. To create the test programs, enter make followed by one -# or more of the precisions desired. Some examples: -# make single -# make single complex -# make single double complex complex16 -# Alternatively, the command -# make -# without any arguments creates all four test programs. -# The executable files which are created are called -# ../xblat3s, ../xblat3d, ../xblat3c, and ../xblat3z -# -# To remove the object files after the executable files have been -# created, enter -# make clean -# To force the source files to be recompiled, enter, for example, -# make single FRC=FRC -# -####################################################################### - -SBLAT3 = sblat3.o -CBLAT3 = cblat3.o -DBLAT3 = dblat3.o -ZBLAT3 = zblat3.o - -all: single double complex complex16 - -single: ../xblat3s -double: ../xblat3d -complex: ../xblat3c -complex16: ../xblat3z - -../xblat3s: $(SBLAT3) - $(LOADER) $(LOADOPTS) -o $@ $(SBLAT3) $(BLASLIB) - -../xblat3c: $(CBLAT3) - $(LOADER) $(LOADOPTS) -o $@ $(CBLAT3) $(BLASLIB) - -../xblat3d: $(DBLAT3) - $(LOADER) $(LOADOPTS) -o $@ $(DBLAT3) $(BLASLIB) - -../xblat3z: $(ZBLAT3) - $(LOADER) $(LOADOPTS) -o $@ $(ZBLAT3) $(BLASLIB) - -$(SBLAT3): $(FRC) -$(CBLAT3): $(FRC) -$(DBLAT3): $(FRC) -$(ZBLAT3): $(FRC) - -FRC: - @FRC=$(FRC) - -clean: - rm -f *.o - -.f.o: - $(FORTRAN) $(OPTS) -c -o $@ $< diff --git a/lapack-netlib/BLAS/TESTING/Makefile b/lapack-netlib/BLAS/TESTING/Makefile new file mode 100644 index 000000000..97150b1a3 --- /dev/null +++ b/lapack-netlib/BLAS/TESTING/Makefile @@ -0,0 +1,59 @@ +include ../../make.inc + +all: single double complex complex16 +single: xblat1s xblat2s xblat3s +double: xblat1d xblat2d xblat3d +complex: xblat1c xblat2c xblat3c +complex16: xblat1z xblat2z xblat3z + +xblat1s: sblat1.o $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xblat1d: dblat1.o $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xblat1c: cblat1.o $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xblat1z: zblat1.o $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ + +xblat2s: sblat2.o $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xblat2d: dblat2.o $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xblat2c: cblat2.o $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xblat2z: zblat2.o $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ + +xblat3s: sblat3.o $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xblat3d: dblat3.o $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xblat3c: cblat3.o $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xblat3z: zblat3.o $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ + +run: all + ./xblat1s > sblat1.out + ./xblat1d > dblat1.out + ./xblat1c > cblat1.out + ./xblat1z > zblat1.out + ./xblat2s < sblat2.in + ./xblat2d < dblat2.in + ./xblat2c < cblat2.in + ./xblat2z < zblat2.in + ./xblat3s < sblat3.in + ./xblat3d < dblat3.in + ./xblat3c < cblat3.in + ./xblat3z < zblat3.in + +clean: cleanobj cleanexe cleantest +cleanobj: + rm -f *.o +cleanexe: + rm -f xblat* +cleantest: + rm -f *.out core + +.f.o: + $(FORTRAN) $(OPTS) -c -o $@ $< diff --git a/lapack-netlib/BLAS/cblat2.in b/lapack-netlib/BLAS/TESTING/cblat2.in similarity index 100% rename from lapack-netlib/BLAS/cblat2.in rename to lapack-netlib/BLAS/TESTING/cblat2.in diff --git a/lapack-netlib/BLAS/cblat3.in b/lapack-netlib/BLAS/TESTING/cblat3.in similarity index 100% rename from lapack-netlib/BLAS/cblat3.in rename to lapack-netlib/BLAS/TESTING/cblat3.in diff --git a/lapack-netlib/BLAS/TESTING/dblat1.f b/lapack-netlib/BLAS/TESTING/dblat1.f index 7f606aa39..f3255fef4 100644 --- a/lapack-netlib/BLAS/TESTING/dblat1.f +++ b/lapack-netlib/BLAS/TESTING/dblat1.f @@ -37,7 +37,7 @@ * ===================================================================== PROGRAM DBLAT1 * -* -- Reference BLAS test routine (version 3.7.0) -- +* -- Reference BLAS test routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 @@ -139,7 +139,7 @@ DOUBLE PRECISION DA1(8), DATRUE(8), DB1(8), DBTRUE(8), DC1(8), $ DS1(8), DAB(4,9), DTEMP(9), DTRUE(9,9) * .. External Subroutines .. - EXTERNAL DROTG, DROTMG, STEST1 + EXTERNAL DROTG, DROTMG, STEST, STEST1 * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. @@ -360,7 +360,8 @@ DOUBLE PRECISION DDOT, DSDOT EXTERNAL DDOT, DSDOT * .. External Subroutines .. - EXTERNAL DAXPY, DCOPY, DROTM, DSWAP, STEST, STEST1 + EXTERNAL DAXPY, DCOPY, DROTM, DSWAP, STEST, STEST1, + $ TESTDSDOT * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. diff --git a/lapack-netlib/BLAS/dblat2.in b/lapack-netlib/BLAS/TESTING/dblat2.in similarity index 100% rename from lapack-netlib/BLAS/dblat2.in rename to lapack-netlib/BLAS/TESTING/dblat2.in diff --git a/lapack-netlib/BLAS/dblat3.in b/lapack-netlib/BLAS/TESTING/dblat3.in similarity index 100% rename from lapack-netlib/BLAS/dblat3.in rename to lapack-netlib/BLAS/TESTING/dblat3.in diff --git a/lapack-netlib/BLAS/TESTING/sblat1.f b/lapack-netlib/BLAS/TESTING/sblat1.f index 3ea607be4..a5c1c6af6 100644 --- a/lapack-netlib/BLAS/TESTING/sblat1.f +++ b/lapack-netlib/BLAS/TESTING/sblat1.f @@ -37,7 +37,7 @@ * ===================================================================== PROGRAM SBLAT1 * -* -- Reference BLAS test routine (version 3.7.0) -- +* -- Reference BLAS test routine (version 3.8.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 @@ -139,7 +139,7 @@ REAL DA1(8), DATRUE(8), DB1(8), DBTRUE(8), DC1(8), + DS1(8), DAB(4,9), DTEMP(9), DTRUE(9,9) * .. External Subroutines .. - EXTERNAL SROTG, SROTMG, STEST1 + EXTERNAL SROTG, SROTMG, STEST, STEST1 * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. diff --git a/lapack-netlib/BLAS/sblat2.in b/lapack-netlib/BLAS/TESTING/sblat2.in similarity index 100% rename from lapack-netlib/BLAS/sblat2.in rename to lapack-netlib/BLAS/TESTING/sblat2.in diff --git a/lapack-netlib/BLAS/sblat3.in b/lapack-netlib/BLAS/TESTING/sblat3.in similarity index 100% rename from lapack-netlib/BLAS/sblat3.in rename to lapack-netlib/BLAS/TESTING/sblat3.in diff --git a/lapack-netlib/BLAS/zblat2.in b/lapack-netlib/BLAS/TESTING/zblat2.in similarity index 100% rename from lapack-netlib/BLAS/zblat2.in rename to lapack-netlib/BLAS/TESTING/zblat2.in diff --git a/lapack-netlib/BLAS/zblat3.in b/lapack-netlib/BLAS/TESTING/zblat3.in similarity index 100% rename from lapack-netlib/BLAS/zblat3.in rename to lapack-netlib/BLAS/TESTING/zblat3.in diff --git a/lapack-netlib/BLAS/blas.pc.in b/lapack-netlib/BLAS/blas.pc.in index 7fd6f1e73..37809773b 100644 --- a/lapack-netlib/BLAS/blas.pc.in +++ b/lapack-netlib/BLAS/blas.pc.in @@ -1,9 +1,8 @@ -prefix=@prefix@ -libdir=@libdir@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ Name: BLAS Description: FORTRAN reference implementation of BLAS Basic Linear Algebra Subprograms Version: @LAPACK_VERSION@ URL: http://www.netlib.org/blas/ Libs: -L${libdir} -lblas -Libs.private: -lm diff --git a/lapack-netlib/CBLAS/CMakeLists.txt b/lapack-netlib/CBLAS/CMakeLists.txt index 580864fba..d9fa24530 100644 --- a/lapack-netlib/CBLAS/CMakeLists.txt +++ b/lapack-netlib/CBLAS/CMakeLists.txt @@ -28,7 +28,7 @@ endforeach() endmacro() append_subdir_files(CBLAS_INCLUDE "include") -install(FILES ${CBLAS_INCLUDE} ${LAPACK_BINARY_DIR}/include/cblas_mangling.h DESTINATION include) +install(FILES ${CBLAS_INCLUDE} ${LAPACK_BINARY_DIR}/include/cblas_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) # -------------------------------------------------- if(BUILD_TESTING) @@ -45,7 +45,7 @@ endif() set(_cblas_config_install_guard_target "") if(ALL_TARGETS) install(EXPORT cblas-targets - DESTINATION ${LIBRARY_DIR}/cmake/cblas-${LAPACK_VERSION}) + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}) # Choose one of the cblas targets to use as a guard for # cblas-config.cmake to load targets from the install tree. list(GET ALL_TARGETS 0 _cblas_config_install_guard_target) @@ -78,8 +78,8 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-install.cmake.in install(FILES ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake ${LAPACK_BINARY_DIR}/cblas-config-version.cmake - DESTINATION ${LIBRARY_DIR}/cmake/cblas-${LAPACK_VERSION} + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} ) #install(EXPORT cblas-targets -# DESTINATION ${LIBRARY_DIR}/cmake/cblas-${LAPACK_VERSION}) +# DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}) diff --git a/lapack-netlib/CBLAS/Makefile b/lapack-netlib/CBLAS/Makefile index 5a398f800..513e8fc82 100644 --- a/lapack-netlib/CBLAS/Makefile +++ b/lapack-netlib/CBLAS/Makefile @@ -1,27 +1,31 @@ include ../make.inc -all: - cd include && cp cblas_mangling_with_flags.h.in cblas_mangling.h - cd src && $(MAKE) all +all: cblas +cblas: include/cblas_mangling.h + $(MAKE) -C src -clean: cleanlib +include/cblas_mangling.h: include/cblas_mangling_with_flags.h.in + cp $< $@ -cleanlib: - cd src && $(MAKE) clean - -cleanexe: - cd testing && $(MAKE) cleanexe - -cleanall: clean cleanexe - rm -f $(CBLASLIB) - cd examples && rm -f *.o cblas_ex1 cblas_ex2 +cblas_testing: cblas + $(MAKE) -C testing run -cblas_testing: - cd testing && $(MAKE) all +cblas_example: cblas + $(MAKE) -C examples -runtst: - cd testing && $(MAKE) run - -example: all - cd examples && $(MAKE) all +clean: + $(MAKE) -C src clean + $(MAKE) -C testing clean + $(MAKE) -C examples clean +cleanobj: + $(MAKE) -C src cleanobj + $(MAKE) -C testing cleanobj + $(MAKE) -C examples cleanobj +cleanlib: + $(MAKE) -C src cleanlib +cleanexe: + $(MAKE) -C testing cleanexe + $(MAKE) -C examples cleanexe +cleantest: + $(MAKE) -C testing cleantest diff --git a/lapack-netlib/CBLAS/cblas.pc.in b/lapack-netlib/CBLAS/cblas.pc.in index 4a938fe15..7c95ebbb4 100644 --- a/lapack-netlib/CBLAS/cblas.pc.in +++ b/lapack-netlib/CBLAS/cblas.pc.in @@ -1,9 +1,10 @@ -prefix=@prefix@ -libdir=@libdir@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ Name: CBLAS Description: C Standard Interface to BLAS Basic Linear Algebra Subprograms Version: @LAPACK_VERSION@ URL: http://www.netlib.org/blas/#_cblas Libs: -L${libdir} -lcblas -Requires: blas +Cflags: -I${includedir} +Requires.private: blas diff --git a/lapack-netlib/CBLAS/cmake/cblas-config-install.cmake.in b/lapack-netlib/CBLAS/cmake/cblas-config-install.cmake.in index a5e2183e1..215e28a57 100644 --- a/lapack-netlib/CBLAS/cmake/cblas-config-install.cmake.in +++ b/lapack-netlib/CBLAS/cmake/cblas-config-install.cmake.in @@ -5,7 +5,7 @@ get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH) get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH) # Load the LAPACK package with which we were built. -set(LAPACK_DIR "${_CBLAS_PREFIX}/@{LIBRARY_DIR@/cmake/lapack-@LAPACK_VERSION@") +set(LAPACK_DIR "${_CBLAS_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/lapack-@LAPACK_VERSION@") find_package(LAPACK NO_MODULE) # Load lapacke targets from the install tree. diff --git a/lapack-netlib/CBLAS/examples/CMakeLists.txt b/lapack-netlib/CBLAS/examples/CMakeLists.txt index a4bab6bee..0241fd164 100644 --- a/lapack-netlib/CBLAS/examples/CMakeLists.txt +++ b/lapack-netlib/CBLAS/examples/CMakeLists.txt @@ -1,7 +1,7 @@ add_executable(xexample1_CBLAS cblas_example1.c) add_executable(xexample2_CBLAS cblas_example2.c) -target_link_libraries(xexample1_CBLAS cblas ${BLAS_LIBRARIES}) +target_link_libraries(xexample1_CBLAS cblas) target_link_libraries(xexample2_CBLAS cblas ${BLAS_LIBRARIES}) add_test(example1_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample1_CBLAS) diff --git a/lapack-netlib/CBLAS/examples/Makefile b/lapack-netlib/CBLAS/examples/Makefile index 1d416a881..664b8bc57 100644 --- a/lapack-netlib/CBLAS/examples/Makefile +++ b/lapack-netlib/CBLAS/examples/Makefile @@ -1,14 +1,17 @@ include ../../make.inc -all: example1 example2 +all: cblas_ex1 cblas_ex2 -example1: - $(CC) $(CFLAGS) -I../include -c cblas_example1.c - $(LOADER) $(LOADOPTS) -o cblas_ex1 cblas_example1.o $(CBLASLIB) $(BLASLIB) +cblas_ex1: cblas_example1.o $(CBLASLIB) $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +cblas_ex2: cblas_example2.o $(CBLASLIB) $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ -example2: - $(CC) $(CFLAGS) -I../include -c cblas_example2.c - $(LOADER) $(LOADOPTS) -o cblas_ex2 cblas_example2.o $(CBLASLIB) $(BLASLIB) +clean: cleanobj cleanexe +cleanobj: + rm -f *.o +cleanexe: + rm -f cblas_ex1 cblas_ex2 -cleanall: - rm -f *.o cblas_ex1 cblas_ex2 +.c.o: + $(CC) $(CFLAGS) -I../include -c -o $@ $< diff --git a/lapack-netlib/CBLAS/src/CMakeLists.txt b/lapack-netlib/CBLAS/src/CMakeLists.txt index 20f8eb4cb..90e19f818 100644 --- a/lapack-netlib/CBLAS/src/CMakeLists.txt +++ b/lapack-netlib/CBLAS/src/CMakeLists.txt @@ -1,7 +1,6 @@ # This Makefile compiles the CBLAS routines -# -# Error handling routines for level 2 & 3 +# Error handling routines for level 2 & 3 set(ERRHAND cblas_globals.c cblas_xerbla.c xerbla.c) # @@ -12,49 +11,34 @@ set(ERRHAND cblas_globals.c cblas_xerbla.c xerbla.c) # # -# -# All object files for single real precision -# +# Files for level 1 single precision real set(SLEV1 cblas_srotg.c cblas_srotmg.c cblas_srot.c cblas_srotm.c cblas_sswap.c cblas_sscal.c cblas_scopy.c cblas_saxpy.c cblas_sdot.c cblas_sdsdot.c cblas_snrm2.c cblas_sasum.c cblas_isamax.c sdotsub.f sdsdotsub.f snrm2sub.f sasumsub.f isamaxsub.f) -# -# All object files for double real precision -# +# Files for level 1 double precision real set(DLEV1 cblas_drotg.c cblas_drotmg.c cblas_drot.c cblas_drotm.c cblas_dswap.c cblas_dscal.c cblas_dcopy.c cblas_daxpy.c cblas_ddot.c cblas_dsdot.c cblas_dnrm2.c cblas_dasum.c cblas_idamax.c ddotsub.f dsdotsub.f dnrm2sub.f dasumsub.f idamaxsub.f) -# -# All object files for single complex precision -# +# Files for level 1 single precision complex set(CLEV1 cblas_cswap.c cblas_cscal.c cblas_csscal.c cblas_ccopy.c cblas_caxpy.c cblas_cdotu_sub.c cblas_cdotc_sub.c cblas_icamax.c cdotcsub.f cdotusub.f icamaxsub.f) -# -# All object files for double complex precision -# +# Files for level 1 double precision complex set(ZLEV1 cblas_zswap.c cblas_zscal.c cblas_zdscal.c cblas_zcopy.c cblas_zaxpy.c cblas_zdotu_sub.c cblas_zdotc_sub.c cblas_dznrm2.c cblas_dzasum.c cblas_izamax.c zdotcsub.f zdotusub.f dzasumsub.f dznrm2sub.f izamaxsub.f) -# -# Common files for single complex precision -# +# Common files for level 1 single precision set(SCLEV1 cblas_scasum.c scasumsub.f cblas_scnrm2.c scnrm2sub.f) -# -# All object files -# -set(ALEV1 ${slev1} ${dlev1} ${clev1} ${zlev1} ${sclev1}) - # # # CBLAS routines @@ -63,43 +47,30 @@ set(ALEV1 ${slev1} ${dlev1} ${clev1} ${zlev1} ${sclev1}) # # -# -# All object files for single real precision -# +# Files for level 2 single precision real set(SLEV2 cblas_sgemv.c cblas_sgbmv.c cblas_sger.c cblas_ssbmv.c cblas_sspmv.c cblas_sspr.c cblas_sspr2.c cblas_ssymv.c cblas_ssyr.c cblas_ssyr2.c cblas_stbmv.c cblas_stbsv.c cblas_stpmv.c cblas_stpsv.c cblas_strmv.c cblas_strsv.c) -# -# All object files for double real precision -# +# Files for level 2 double precision real set(DLEV2 cblas_dgemv.c cblas_dgbmv.c cblas_dger.c cblas_dsbmv.c cblas_dspmv.c cblas_dspr.c cblas_dspr2.c cblas_dsymv.c cblas_dsyr.c cblas_dsyr2.c cblas_dtbmv.c cblas_dtbsv.c cblas_dtpmv.c cblas_dtpsv.c cblas_dtrmv.c cblas_dtrsv.c) -# -# All object files for single complex precision -# +# Files for level 2 single precision complex set(CLEV2 cblas_cgemv.c cblas_cgbmv.c cblas_chemv.c cblas_chbmv.c cblas_chpmv.c cblas_ctrmv.c cblas_ctbmv.c cblas_ctpmv.c cblas_ctrsv.c cblas_ctbsv.c cblas_ctpsv.c cblas_cgeru.c cblas_cgerc.c cblas_cher.c cblas_cher2.c cblas_chpr.c cblas_chpr2.c) -# -# All object files for double complex precision -# +# Files for level 2 double precision complex set(ZLEV2 cblas_zgemv.c cblas_zgbmv.c cblas_zhemv.c cblas_zhbmv.c cblas_zhpmv.c cblas_ztrmv.c cblas_ztbmv.c cblas_ztpmv.c cblas_ztrsv.c cblas_ztbsv.c cblas_ztpsv.c cblas_zgeru.c cblas_zgerc.c cblas_zher.c cblas_zher2.c cblas_zhpr.c cblas_zhpr2.c) -# -# All object files -# -set(AVEL2 ${slev2} ${dlev2} ${clev2} ${zlev2}) - # # # CBLAS routines @@ -108,63 +79,50 @@ set(AVEL2 ${slev2} ${dlev2} ${clev2} ${zlev2}) # # -# -# All object files for single real precision -# +# Files for level 3 single precision real set(SLEV3 cblas_sgemm.c cblas_ssymm.c cblas_ssyrk.c cblas_ssyr2k.c cblas_strmm.c cblas_strsm.c) -# -# All object files for double real precision -# +# Files for level 3 double precision real set(DLEV3 cblas_dgemm.c cblas_dsymm.c cblas_dsyrk.c cblas_dsyr2k.c cblas_dtrmm.c cblas_dtrsm.c) -# -# All object files for single complex precision -# +# Files for level 3 single precision complex set(CLEV3 cblas_cgemm.c cblas_csymm.c cblas_chemm.c cblas_cherk.c cblas_cher2k.c cblas_ctrmm.c cblas_ctrsm.c cblas_csyrk.c cblas_csyr2k.c) -# -# All object files for double complex precision -# +# Files for level 3 double precision complex set(ZLEV3 cblas_zgemm.c cblas_zsymm.c cblas_zhemm.c cblas_zherk.c cblas_zher2k.c cblas_ztrmm.c cblas_ztrsm.c cblas_zsyrk.c cblas_zsyr2k.c) -# -# All object files -# -set(ALEV3 ${slev3} ${dlev3} ${clev3} ${zlev3}) - -# default build all of it -set(ALLOBJ ${SCLEV1} ${SLEV1} ${SLEV2} ${SLEV3} ${ERRHAND} - ${DLEV1} ${DLEV2} ${DLEV3} - ${CLEV1} ${CLEV2} ${CLEV3} - ${ZLEV1} ${ZLEV2} ${ZLEV3}) -# Single real precision -if(CBLAS_SINGLE) - set(ALLOBJ ${SCLEV1} ${SLEV1} ${SLEV2} ${SLEV3} ${ERRHAND}) +set(SOURCES) +if(BUILD_SINGLE) + list(APPEND SOURCES ${SLEV1} ${SCLEV1} ${SLEV2} ${SLEV3} ${ERRHAND}) endif() - -# Double real precision -if(CBLAS_DOUBLE) - set(ALLOBJ ${DLEV1} ${DLEV2} ${DLEV3} ${ERRHAND}) +if(BUILD_DOUBLE) + list(APPEND SOURCES ${DLEV1} ${DLEV2} ${DLEV3} ${ERRHAND}) endif() - -# Single complex precision -if(CBLAS_COMPLEX) - set(ALLOBJ ${CLEV1} ${SCLEV1} ${CLEV2} ${CLEV3} ${ERRHAND}) +if(BUILD_COMPLEX) + list(APPEND SOURCES ${CLEV1} ${SCLEV1} ${CLEV2} ${CLEV3} ${ERRHAND}) endif() - -# Double complex precision -if(CBLAS_COMPLEX16) - set(ALLOBJ ${ZLEV1} ${ZLEV2} ${ZLEV3} ${ERRHAND}) +if(BUILD_COMPLEX16) + list(APPEND SOURCES ${ZLEV1} ${ZLEV2} ${ZLEV3} ${ERRHAND}) endif() - -add_library(cblas ${ALLOBJ}) -target_link_libraries(cblas ${BLAS_LIBRARIES}) +list(REMOVE_DUPLICATES SOURCES) + +add_library(cblas ${SOURCES}) +set_target_properties( + cblas PROPERTIES + LINKER_LANGUAGE C + VERSION ${LAPACK_VERSION} + SOVERSION ${LAPACK_MAJOR_VERSION} + ) +target_include_directories(cblas PUBLIC + $ + $ +) +target_link_libraries(cblas PRIVATE ${BLAS_LIBRARIES}) lapack_install_library(cblas) diff --git a/lapack-netlib/CBLAS/src/Makefile b/lapack-netlib/CBLAS/src/Makefile index 1d1a0db88..6c0518ac7 100644 --- a/lapack-netlib/CBLAS/src/Makefile +++ b/lapack-netlib/CBLAS/src/Makefile @@ -1,22 +1,12 @@ # This Makefile compiles the CBLAS routines -# -include ../../make.inc -# -# Erase all object and archive files -# -all: cblaslib +include ../../make.inc -clean: - rm -f *.o a.out core +all: $(CBLASLIB) # Error handling routines for level 2 & 3 - errhand = cblas_globals.o cblas_xerbla.o xerbla.o -# Object files of all routines -alev = $(alev1) $(alev2) $(alev3) $(errhand) - # # # CBLAS routines @@ -25,76 +15,52 @@ alev = $(alev1) $(alev2) $(alev3) $(errhand) # # -# -# All object files for single real precision -# +# Files for level 1 single precision real slev1 = cblas_srotg.o cblas_srotmg.o cblas_srot.o cblas_srotm.o \ cblas_sswap.o cblas_sscal.o cblas_scopy.o cblas_saxpy.o \ cblas_sdot.o cblas_sdsdot.o cblas_snrm2.o cblas_sasum.o \ cblas_isamax.o sdotsub.o sdsdotsub.o snrm2sub.o sasumsub.o \ isamaxsub.o -# -# All object files for double real precision -# +# Files for level 1 double precision real dlev1 = cblas_drotg.o cblas_drotmg.o cblas_drot.o cblas_drotm.o \ cblas_dswap.o cblas_dscal.o cblas_dcopy.o cblas_daxpy.o \ cblas_ddot.o cblas_dsdot.o cblas_dnrm2.o cblas_dasum.o \ cblas_idamax.o ddotsub.o dsdotsub.o dnrm2sub.o \ dasumsub.o idamaxsub.o -# -# All object files for single complex precision -# +# Files for level 1 single precision complex clev1 = cblas_cswap.o cblas_cscal.o cblas_csscal.o cblas_ccopy.o \ cblas_caxpy.o cblas_cdotu_sub.o cblas_cdotc_sub.o \ cblas_icamax.o cdotcsub.o cdotusub.o icamaxsub.o -# -# All object files for double complex precision -# +# Files for level 1 double precision complex zlev1 = cblas_zswap.o cblas_zscal.o cblas_zdscal.o cblas_zcopy.o \ cblas_zaxpy.o cblas_zdotu_sub.o cblas_zdotc_sub.o cblas_dznrm2.o \ cblas_dzasum.o cblas_izamax.o zdotcsub.o zdotusub.o \ dzasumsub.o dznrm2sub.o izamaxsub.o -# -# Common files for single / complex precision -# +# Common files for level 1 single precision sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o -# -# All object files -# -alev1 = $(slev1) $(dlev1) $(clev1) $(zlev1) $(sclev1) - -# -# Make an archive file -# - -# Single real precision +# Single precision real slib1: $(slev1) $(sclev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev1) $(sclev1) + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) -# Double real precision +# Double precision real dlib1: $(dlev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev1) + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) -# Single complex precision +# Single precision complex clib1: $(clev1) $(sclev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev1) $(sclev1) + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) -# Double complex precision +# Double precision complex zlib1: $(zlev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev1) - $(RANLIB) $(CBLASLIB) - -# All precisions -all1: $(alev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev1) + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # @@ -105,70 +71,48 @@ all1: $(alev1) # # -# -# All object files for single real precision -# +# Files for level 2 single precision real slev2 = cblas_sgemv.o cblas_sgbmv.o cblas_sger.o cblas_ssbmv.o cblas_sspmv.o \ cblas_sspr.o cblas_sspr2.o cblas_ssymv.o cblas_ssyr.o cblas_ssyr2.o \ cblas_stbmv.o cblas_stbsv.o cblas_stpmv.o cblas_stpsv.o cblas_strmv.o \ cblas_strsv.o -# -# All object files for double real precision -# +# Files for level 2 double precision real dlev2 = cblas_dgemv.o cblas_dgbmv.o cblas_dger.o cblas_dsbmv.o cblas_dspmv.o \ cblas_dspr.o cblas_dspr2.o cblas_dsymv.o cblas_dsyr.o cblas_dsyr2.o \ cblas_dtbmv.o cblas_dtbsv.o cblas_dtpmv.o cblas_dtpsv.o cblas_dtrmv.o \ cblas_dtrsv.o -# -# All object files for single complex precision -# +# Files for level 2 single precision complex clev2 = cblas_cgemv.o cblas_cgbmv.o cblas_chemv.o cblas_chbmv.o cblas_chpmv.o \ cblas_ctrmv.o cblas_ctbmv.o cblas_ctpmv.o cblas_ctrsv.o cblas_ctbsv.o \ cblas_ctpsv.o cblas_cgeru.o cblas_cgerc.o cblas_cher.o cblas_cher2.o \ cblas_chpr.o cblas_chpr2.o -# -# All object files for double complex precision -# +# Files for level 2 double precision complex zlev2 = cblas_zgemv.o cblas_zgbmv.o cblas_zhemv.o cblas_zhbmv.o cblas_zhpmv.o \ cblas_ztrmv.o cblas_ztbmv.o cblas_ztpmv.o cblas_ztrsv.o cblas_ztbsv.o \ cblas_ztpsv.o cblas_zgeru.o cblas_zgerc.o cblas_zher.o cblas_zher2.o \ cblas_zhpr.o cblas_zhpr2.o -# -# All object files -# -alev2 = $(slev2) $(dlev2) $(clev2) $(zlev2) - -# -# Make an archive file -# - -# Single real precision +# Single precision real slib2: $(slev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev2) $(errhand) + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) -# Double real precision +# Double precision real dlib2: $(dlev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev2) $(errhand) + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) -# Single complex precision +# Single precision complex clib2: $(clev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev2) $(errhand) + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) -# Double complex precision +# Double precision complex zlib2: $(zlev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev2) $(errhand) - $(RANLIB) $(CBLASLIB) - -# All precisions -all2: $(alev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev2) $(errhand) + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # @@ -179,76 +123,79 @@ all2: $(alev2) $(errhand) # # -# -# All object files for single real precision -# +# Files for level 3 single precision real slev3 = cblas_sgemm.o cblas_ssymm.o cblas_ssyrk.o cblas_ssyr2k.o cblas_strmm.o \ cblas_strsm.o -# -# All object files for double real precision -# +# Files for level 3 double precision real dlev3 = cblas_dgemm.o cblas_dsymm.o cblas_dsyrk.o cblas_dsyr2k.o cblas_dtrmm.o \ cblas_dtrsm.o -# -# All object files for single complex precision -# +# Files for level 3 single precision complex clev3 = cblas_cgemm.o cblas_csymm.o cblas_chemm.o cblas_cherk.o \ cblas_cher2k.o cblas_ctrmm.o cblas_ctrsm.o cblas_csyrk.o \ cblas_csyr2k.o -# -# All object files for double complex precision -# +# Files for level 3 double precision complex zlev3 = cblas_zgemm.o cblas_zsymm.o cblas_zhemm.o cblas_zherk.o \ cblas_zher2k.o cblas_ztrmm.o cblas_ztrsm.o cblas_zsyrk.o \ cblas_zsyr2k.o -# -# All object files -# -alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3) - -# -# Make an archive file -# - -# Single real precision +# Single precision real slib3: $(slev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev3) $(errhand) + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) -# Double real precision +# Double precision real dlib3: $(dlev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev3) $(errhand) + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) -# Single complex precision +# Single precision complex clib3: $(clev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev3) $(errhand) + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) -# Single complex precision +# Double precision complex zlib3: $(zlev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev3) $(errhand) + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(RANLIB) $(CBLASLIB) + + +alev1 = $(slev1) $(dlev1) $(clev1) $(zlev1) $(sclev1) +alev2 = $(slev2) $(dlev2) $(clev2) $(zlev2) +alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3) + +# All level 1 +all1: $(alev1) + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) -# All precisions +# All level 2 +all2: $(alev2) $(errhand) + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(RANLIB) $(CBLASLIB) + +# All level 3 all3: $(alev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev3) + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # All levels and precisions -cblaslib: $(alev) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev) - $(RANLIB) $(CBLASLIB) +$(CBLASLIB): $(alev1) $(alev2) $(alev3) $(errhand) + $(ARCH) $(ARCHFLAGS) $@ $^ + $(RANLIB) $@ FRC: @FRC=$(FRC) +clean: cleanobj cleanlib +cleanobj: + rm -f *.o +cleanlib: + rm -f $(CBLASLIB) + .c.o: $(CC) $(CFLAGS) -I../include -c -o $@ $< - .f.o: $(FORTRAN) $(OPTS) -c -o $@ $< diff --git a/lapack-netlib/CBLAS/testing/CMakeLists.txt b/lapack-netlib/CBLAS/testing/CMakeLists.txt index fe9a51e16..2459695b8 100644 --- a/lapack-netlib/CBLAS/testing/CMakeLists.txt +++ b/lapack-netlib/CBLAS/testing/CMakeLists.txt @@ -25,22 +25,22 @@ macro(add_cblas_test output input target) endmacro() -# Object files for single real precision +# Object files for single precision real set(STESTL1O c_sblas1.c) set(STESTL2O c_sblas2.c c_s2chke.c auxiliary.c c_xerbla.c) set(STESTL3O c_sblas3.c c_s3chke.c auxiliary.c c_xerbla.c) -# Object files for double real precision +# Object files for double precision real set(DTESTL1O c_dblas1.c) set(DTESTL2O c_dblas2.c c_d2chke.c auxiliary.c c_xerbla.c) set(DTESTL3O c_dblas3.c c_d3chke.c auxiliary.c c_xerbla.c) -# Object files for single complex precision +# Object files for single precision complex set(CTESTL1O c_cblat1.f c_cblas1.c) set(CTESTL2O c_cblas2.c c_c2chke.c auxiliary.c c_xerbla.c) set(CTESTL3O c_cblas3.c c_c3chke.c auxiliary.c c_xerbla.c) -# Object files for double complex precision +# Object files for double precision complex set(ZTESTL1O c_zblas1.c) set(ZTESTL2O c_zblas2.c c_z2chke.c auxiliary.c c_xerbla.c) set(ZTESTL3O c_zblas3.c c_z3chke.c auxiliary.c c_xerbla.c) @@ -52,9 +52,9 @@ if(BUILD_SINGLE) add_executable(xscblat2 c_sblat2.f ${STESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) add_executable(xscblat3 c_sblat3.f ${STESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) - target_link_libraries(xscblat1 cblas ${BLAS_LIBRARIES}) - target_link_libraries(xscblat2 cblas ${BLAS_LIBRARIES}) - target_link_libraries(xscblat3 cblas ${BLAS_LIBRARIES}) + target_link_libraries(xscblat1 cblas) + target_link_libraries(xscblat2 cblas) + target_link_libraries(xscblat3 cblas) add_cblas_test(stest1.out "" xscblat1) add_cblas_test(stest2.out sin2 xscblat2) @@ -66,9 +66,9 @@ if(BUILD_DOUBLE) add_executable(xdcblat2 c_dblat2.f ${DTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) add_executable(xdcblat3 c_dblat3.f ${DTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) - target_link_libraries(xdcblat1 cblas ${BLAS_LIBRARIES}) - target_link_libraries(xdcblat2 cblas ${BLAS_LIBRARIES}) - target_link_libraries(xdcblat3 cblas ${BLAS_LIBRARIES}) + target_link_libraries(xdcblat1 cblas) + target_link_libraries(xdcblat2 cblas) + target_link_libraries(xdcblat3 cblas) add_cblas_test(dtest1.out "" xdcblat1) add_cblas_test(dtest2.out din2 xdcblat2) @@ -81,8 +81,8 @@ if(BUILD_COMPLEX) add_executable(xccblat3 c_cblat3.f ${CTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) target_link_libraries(xccblat1 cblas ${BLAS_LIBRARIES}) - target_link_libraries(xccblat2 cblas ${BLAS_LIBRARIES}) - target_link_libraries(xccblat3 cblas ${BLAS_LIBRARIES}) + target_link_libraries(xccblat2 cblas) + target_link_libraries(xccblat3 cblas) add_cblas_test(ctest1.out "" xccblat1) add_cblas_test(ctest2.out cin2 xccblat2) @@ -94,9 +94,9 @@ if(BUILD_COMPLEX16) add_executable(xzcblat2 c_zblat2.f ${ZTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) add_executable(xzcblat3 c_zblat3.f ${ZTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) - target_link_libraries(xzcblat1 cblas ${BLAS_LIBRARIES}) - target_link_libraries(xzcblat2 cblas ${BLAS_LIBRARIES}) - target_link_libraries(xzcblat3 cblas ${BLAS_LIBRARIES}) + target_link_libraries(xzcblat1 cblas) + target_link_libraries(xzcblat2 cblas) + target_link_libraries(xzcblat3 cblas) add_cblas_test(ztest1.out "" xzcblat1) add_cblas_test(ztest2.out zin2 xzcblat2) diff --git a/lapack-netlib/CBLAS/testing/Makefile b/lapack-netlib/CBLAS/testing/Makefile index a5a078372..0182c3e88 100644 --- a/lapack-netlib/CBLAS/testing/Makefile +++ b/lapack-netlib/CBLAS/testing/Makefile @@ -7,120 +7,104 @@ include ../../make.inc # Archive files necessary to compile LIB = $(CBLASLIB) $(BLASLIB) -# Object files for single real precision +# Object files for single precision real stestl1o = c_sblas1.o stestl2o = c_sblas2.o c_s2chke.o auxiliary.o c_xerbla.o stestl3o = c_sblas3.o c_s3chke.o auxiliary.o c_xerbla.o -# Object files for double real precision +# Object files for double precision real dtestl1o = c_dblas1.o dtestl2o = c_dblas2.o c_d2chke.o auxiliary.o c_xerbla.o dtestl3o = c_dblas3.o c_d3chke.o auxiliary.o c_xerbla.o -# Object files for single complex precision +# Object files for single precision complex ctestl1o = c_cblas1.o ctestl2o = c_cblas2.o c_c2chke.o auxiliary.o c_xerbla.o ctestl3o = c_cblas3.o c_c3chke.o auxiliary.o c_xerbla.o -# Object files for double complex precision +# Object files for double precision complex ztestl1o = c_zblas1.o ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o all: all1 all2 all3 -all1: stest1 dtest1 ctest1 ztest1 -all2: stest2 dtest2 ctest2 ztest2 -all3: stest3 dtest3 ctest3 ztest3 - -clean: - rm -f core *.o *.out x* -cleanobj: - rm -f core *.o a.out -cleanexe: - rm -f x* - -stest1: xscblat1 -dtest1: xdcblat1 -ctest1: xccblat1 -ztest1: xzcblat1 - -stest2: xscblat2 -dtest2: xdcblat2 -ctest2: xccblat2 -ztest2: xzcblat2 - -stest3: xscblat3 -dtest3: xdcblat3 -ctest3: xccblat3 -ztest3: xzcblat3 +all1: xscblat1 xdcblat1 xccblat1 xzcblat1 +all2: xscblat2 xdcblat2 xccblat2 xzcblat2 +all3: xscblat3 xdcblat3 xccblat3 xzcblat3 # # Compile each precision # # Single real -xscblat1: $(stestl1o) c_sblat1.o - $(LOADER) $(LOADOPTS) -o $@ c_sblat1.o $(stestl1o) $(LIB) -xscblat2: $(stestl2o) c_sblat2.o - $(LOADER) $(LOADOPTS) -o $@ c_sblat2.o $(stestl2o) $(LIB) -xscblat3: $(stestl3o) c_sblat3.o - $(LOADER) $(LOADOPTS) -o $@ c_sblat3.o $(stestl3o) $(LIB) +xscblat1: c_sblat1.o $(stestl1o) $(LIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xscblat2: c_sblat2.o $(stestl2o) $(LIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xscblat3: c_sblat3.o $(stestl3o) $(LIB) + $(LOADER) $(LOADOPTS) -o $@ $^ # Double real -xdcblat1: $(dtestl1o) c_dblat1.o - $(LOADER) $(LOADOPTS) -o $@ c_dblat1.o $(dtestl1o) $(LIB) -xdcblat2: $(dtestl2o) c_dblat2.o - $(LOADER) $(LOADOPTS) -o $@ c_dblat2.o $(dtestl2o) $(LIB) -xdcblat3: $(dtestl3o) c_dblat3.o - $(LOADER) $(LOADOPTS) -o $@ c_dblat3.o $(dtestl3o) $(LIB) +xdcblat1: c_dblat1.o $(dtestl1o) $(LIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xdcblat2: c_dblat2.o $(dtestl2o) $(LIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xdcblat3: c_dblat3.o $(dtestl3o) $(LIB) + $(LOADER) $(LOADOPTS) -o $@ $^ # Single complex -xccblat1: $(ctestl1o) c_cblat1.o - $(LOADER) $(LOADOPTS) -o $@ c_cblat1.o $(ctestl1o) $(LIB) -xccblat2: $(ctestl2o) c_cblat2.o - $(LOADER) $(LOADOPTS) -o $@ c_cblat2.o $(ctestl2o) $(LIB) -xccblat3: $(ctestl3o) c_cblat3.o - $(LOADER) $(LOADOPTS) -o $@ c_cblat3.o $(ctestl3o) $(LIB) +xccblat1: c_cblat1.o $(ctestl1o) $(LIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xccblat2: c_cblat2.o $(ctestl2o) $(LIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xccblat3: c_cblat3.o $(ctestl3o) $(LIB) + $(LOADER) $(LOADOPTS) -o $@ $^ # Double complex -xzcblat1: $(ztestl1o) c_zblat1.o - $(LOADER) $(LOADOPTS) -o $@ c_zblat1.o $(ztestl1o) $(LIB) -xzcblat2: $(ztestl2o) c_zblat2.o - $(LOADER) $(LOADOPTS) -o $@ c_zblat2.o $(ztestl2o) $(LIB) -xzcblat3: $(ztestl3o) c_zblat3.o - $(LOADER) $(LOADOPTS) -o $@ c_zblat3.o $(ztestl3o) $(LIB) +xzcblat1: c_zblat1.o $(ztestl1o) $(LIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xzcblat2: c_zblat2.o $(ztestl2o) $(LIB) + $(LOADER) $(LOADOPTS) -o $@ $^ +xzcblat3: c_zblat3.o $(ztestl3o) $(LIB) + $(LOADER) $(LOADOPTS) -o $@ $^ # RUN TESTS -run: - @echo "--> TESTING CBLAS 1 - SINGLE PRECISION <--" +run: all + @echo "--> TESTING CBLAS 1 - SINGLE PRECISION REAL <--" @./xscblat1 > stest1.out - @echo "--> TESTING CBLAS 1 - DOUBLE PRECISION <--" + @echo "--> TESTING CBLAS 1 - DOUBLE PRECISION REAL <--" @./xdcblat1 > dtest1.out - @echo "--> TESTING CBLAS 1 - COMPLEX PRECISION <--" + @echo "--> TESTING CBLAS 1 - SINGLE PRECISION COMPLEX <--" @./xccblat1 > ctest1.out - @echo "--> TESTING CBLAS 1 - DOUBLE COMPLEX PRECISION <--" + @echo "--> TESTING CBLAS 1 - DOUBLE PRECISION COMPLEX <--" @./xzcblat1 > ztest1.out - @echo "--> TESTING CBLAS 2 - SINGLE PRECISION <--" + @echo "--> TESTING CBLAS 2 - SINGLE PRECISION REAL <--" @./xscblat2 < sin2 > stest2.out - @echo "--> TESTING CBLAS 2 - DOUBLE PRECISION <--" + @echo "--> TESTING CBLAS 2 - DOUBLE PRECISION REAL <--" @./xdcblat2 < din2 > dtest2.out - @echo "--> TESTING CBLAS 2 - COMPLEX PRECISION <--" + @echo "--> TESTING CBLAS 2 - SINGLE PRECISION COMPLEX <--" @./xccblat2 < cin2 > ctest2.out - @echo "--> TESTING CBLAS 2 - DOUBLE COMPLEX PRECISION <--" + @echo "--> TESTING CBLAS 2 - DOUBLE PRECISION COMPLEX <--" @./xzcblat2 < zin2 > ztest2.out - @echo "--> TESTING CBLAS 3 - SINGLE PRECISION <--" + @echo "--> TESTING CBLAS 3 - SINGLE PRECISION REAL <--" @./xscblat3 < sin3 > stest3.out - @echo "--> TESTING CBLAS 3 - DOUBLE PRECISION <--" + @echo "--> TESTING CBLAS 3 - DOUBLE PRECISION REAL <--" @./xdcblat3 < din3 > dtest3.out - @echo "--> TESTING CBLAS 3 - COMPLEX PRECISION <--" + @echo "--> TESTING CBLAS 3 - SINGLE PRECISION COMPLEX <--" @./xccblat3 < cin3 > ctest3.out - @echo "--> TESTING CBLAS 3 - DOUBLE COMPLEX PRECISION <--" + @echo "--> TESTING CBLAS 3 - DOUBLE PRECISION COMPLEX <--" @./xzcblat3 < zin3 > ztest3.out -.SUFFIXES: .o .f .c +clean: cleanobj cleanexe cleantest +cleanobj: + rm -f *.o +cleanexe: + rm -f x* +cleantest: + rm -f *.out core +.SUFFIXES: .o .f .c .c.o: $(CC) $(CFLAGS) -I../include -c -o $@ $< - .f.o: $(FORTRAN) $(OPTS) -c -o $@ $< diff --git a/lapack-netlib/CMAKE/FindGcov.cmake b/lapack-netlib/CMAKE/FindGcov.cmake new file mode 100644 index 000000000..4807f903e --- /dev/null +++ b/lapack-netlib/CMAKE/FindGcov.cmake @@ -0,0 +1,155 @@ +# This file is part of CMake-codecov. +# +# https://github.com/RWTH-ELP/CMake-codecov +# +# Copyright (c) +# 2015-2016 RWTH Aachen University, Federal Republic of Germany +# +# LICENSE : BSD 3-Clause License +# +# Written by Alexander Haase, alexander.haase@rwth-aachen.de +# Updated by Guillaume Jacquenot, guillaume.jacquenot@gmail.com + +# include required Modules +include(FindPackageHandleStandardArgs) + + +# Search for gcov binary. +set(CMAKE_REQUIRED_QUIET_SAVE ${CMAKE_REQUIRED_QUIET}) +set(CMAKE_REQUIRED_QUIET ${codecov_FIND_QUIETLY}) + +get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) +foreach (LANG ${ENABLED_LANGUAGES}) + # Gcov evaluation is dependend on the used compiler. Check gcov support for + # each compiler that is used. If gcov binary was already found for this + # compiler, do not try to find it again. + if(NOT GCOV_${CMAKE_${LANG}_COMPILER_ID}_BIN) + get_filename_component(COMPILER_PATH "${CMAKE_${LANG}_COMPILER}" PATH) + + if("${CMAKE_${LANG}_COMPILER_ID}" STREQUAL "GNU") + # Some distributions like OSX (homebrew) ship gcov with the compiler + # version appended as gcov-x. To find this binary we'll build the + # suggested binary name with the compiler version. + string(REGEX MATCH "^[0-9]+" GCC_VERSION + "${CMAKE_${LANG}_COMPILER_VERSION}") + + find_program(GCOV_BIN NAMES gcov-${GCC_VERSION} gcov + HINTS ${COMPILER_PATH}) + + elseif("${CMAKE_${LANG}_COMPILER_ID}" STREQUAL "Clang") + # Some distributions like Debian ship llvm-cov with the compiler + # version appended as llvm-cov-x.y. To find this binary we'll build + # the suggested binary name with the compiler version. + string(REGEX MATCH "^[0-9]+.[0-9]+" LLVM_VERSION + "${CMAKE_${LANG}_COMPILER_VERSION}") + + # llvm-cov prior version 3.5 seems to be not working with coverage + # evaluation tools, but these versions are compatible with the gcc + # gcov tool. + if(LLVM_VERSION VERSION_GREATER 3.4) + find_program(LLVM_COV_BIN NAMES "llvm-cov-${LLVM_VERSION}" + "llvm-cov" HINTS ${COMPILER_PATH}) + mark_as_advanced(LLVM_COV_BIN) + + if(LLVM_COV_BIN) + find_program(LLVM_COV_WRAPPER "llvm-cov-wrapper" PATHS + ${CMAKE_MODULE_PATH}) + if(LLVM_COV_WRAPPER) + set(GCOV_BIN "${LLVM_COV_WRAPPER}" CACHE FILEPATH "") + + # set additional parameters + set(GCOV_${CMAKE_${LANG}_COMPILER_ID}_ENV + "LLVM_COV_BIN=${LLVM_COV_BIN}" CACHE STRING + "Environment variables for llvm-cov-wrapper.") + mark_as_advanced(GCOV_${CMAKE_${LANG}_COMPILER_ID}_ENV) + endif() + endif() + endif() + + if(NOT GCOV_BIN) + # Fall back to gcov binary if llvm-cov was not found or is + # incompatible. This is the default on OSX, but may crash on + # recent Linux versions. + find_program(GCOV_BIN gcov HINTS ${COMPILER_PATH}) + endif() + endif() + + + if(GCOV_BIN) + set(GCOV_${CMAKE_${LANG}_COMPILER_ID}_BIN "${GCOV_BIN}" CACHE STRING + "${LANG} gcov binary.") + + if(NOT CMAKE_REQUIRED_QUIET) + message("-- Found gcov evaluation for " + "${CMAKE_${LANG}_COMPILER_ID}: ${GCOV_BIN}") + endif() + + unset(GCOV_BIN CACHE) + endif() + endif() +endforeach () + + +# Add a new global target for all gcov targets. This target could be used to +# generate the gcov files for the whole project instead of calling -gcov +# for each target. +if(NOT TARGET coverage) + add_custom_target(coverage) +endif() + + +# This function will add gcov evaluation for target . Only sources of +# this target will be evaluated and no dependencies will be added. It will call +# Gcov on any source file of once and store the gcov file in the same +# directory. +function (add_gcov_target TNAME) + set(TDIR ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${TNAME}.dir) + + # We don't have to check, if the target has support for coverage, thus this + # will be checked by add_coverage_target in Findcoverage.cmake. Instead we + # have to determine which gcov binary to use. + get_target_property(TSOURCES ${TNAME} SOURCES) + set(SOURCES "") + set(TCOMPILER "") + foreach (FILE ${TSOURCES}) + codecov_path_of_source(${FILE} FILE) + if(NOT "${FILE}" STREQUAL "") + codecov_lang_of_source(${FILE} LANG) + if(NOT "${LANG}" STREQUAL "") + list(APPEND SOURCES "${FILE}") + set(TCOMPILER ${CMAKE_${LANG}_COMPILER_ID}) + endif() + endif() + endforeach() + + # If no gcov binary was found, coverage data can't be evaluated. + if(NOT GCOV_${TCOMPILER}_BIN) + message(WARNING "No coverage evaluation binary found for ${TCOMPILER}.") + return() + endif() + + set(GCOV_BIN "${GCOV_${TCOMPILER}_BIN}") + set(GCOV_ENV "${GCOV_${TCOMPILER}_ENV}") + + + set(BUFFER "") + foreach(FILE ${SOURCES}) + get_filename_component(FILE_PATH "${TDIR}/${FILE}" PATH) + + # call gcov + add_custom_command(OUTPUT ${TDIR}/${FILE}.gcov + COMMAND ${GCOV_ENV} ${GCOV_BIN} ${TDIR}/${FILE}.gcno > /dev/null + DEPENDS ${TNAME} ${TDIR}/${FILE}.gcno + WORKING_DIRECTORY ${FILE_PATH} + ) + + list(APPEND BUFFER ${TDIR}/${FILE}.gcov) + endforeach() + + + # add target for gcov evaluation of + add_custom_target(${TNAME}-gcov DEPENDS ${BUFFER}) + + # add evaluation target to the global gcov target. + add_dependencies(coverage ${TNAME}-gcov) +endfunction() diff --git a/lapack-netlib/CMAKE/Findcodecov.cmake b/lapack-netlib/CMAKE/Findcodecov.cmake new file mode 100644 index 000000000..1f33b2c09 --- /dev/null +++ b/lapack-netlib/CMAKE/Findcodecov.cmake @@ -0,0 +1,202 @@ +# This file is part of CMake-codecov. +# +# https://github.com/RWTH-ELP/CMake-codecov +# +# Copyright (c) +# 2015-2016 RWTH Aachen University, Federal Republic of Germany +# +# LICENSE : BSD 3-Clause License +# +# Written by Alexander Haase, alexander.haase@rwth-aachen.de +# Updated by Guillaume Jacquenot, guillaume.jacquenot@gmail.com + +set(COVERAGE_FLAG_CANDIDATES + # gcc and clang + "-O0 -g -fprofile-arcs -ftest-coverage" + + # gcc and clang fallback + "-O0 -g --coverage" +) + + +# To avoid error messages about CMP0051, this policy will be set to new. There +# will be no problem, as TARGET_OBJECTS generator expressions will be filtered +# with a regular expression from the sources. +if(POLICY CMP0051) + cmake_policy(SET CMP0051 NEW) +endif() + + +# Add coverage support for target ${TNAME} and register target for coverage +# evaluation. +function(add_coverage TNAME) + foreach (TNAME ${ARGV}) + add_coverage_target(${TNAME}) + endforeach() +endfunction() + + +# Find the reuired flags foreach language. +set(CMAKE_REQUIRED_QUIET_SAVE ${CMAKE_REQUIRED_QUIET}) +set(CMAKE_REQUIRED_QUIET ${codecov_FIND_QUIETLY}) + +get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) +foreach (LANG ${ENABLED_LANGUAGES}) + # Coverage flags are not dependend on language, but the used compiler. So + # instead of searching flags foreach language, search flags foreach compiler + # used. + set(COMPILER ${CMAKE_${LANG}_COMPILER_ID}) + if(NOT COVERAGE_${COMPILER}_FLAGS) + foreach (FLAG ${COVERAGE_FLAG_CANDIDATES}) + if(NOT CMAKE_REQUIRED_QUIET) + message(STATUS "Try ${COMPILER} code coverage flag = [${FLAG}]") + endif() + + set(CMAKE_REQUIRED_FLAGS "${FLAG}") + unset(COVERAGE_FLAG_DETECTED CACHE) + + if(${LANG} STREQUAL "C") + include(CheckCCompilerFlag) + check_c_compiler_flag("${FLAG}" COVERAGE_FLAG_DETECTED) + + elseif(${LANG} STREQUAL "CXX") + include(CheckCXXCompilerFlag) + check_cxx_compiler_flag("${FLAG}" COVERAGE_FLAG_DETECTED) + + elseif(${LANG} STREQUAL "Fortran") + # CheckFortranCompilerFlag was introduced in CMake 3.x. To be + # compatible with older Cmake versions, we will check if this + # module is present before we use it. Otherwise we will define + # Fortran coverage support as not available. + include(CheckFortranCompilerFlag OPTIONAL + RESULT_VARIABLE INCLUDED) + if(INCLUDED) + check_fortran_compiler_flag("${FLAG}" + COVERAGE_FLAG_DETECTED) + elseif(NOT CMAKE_REQUIRED_QUIET) + message("-- Performing Test COVERAGE_FLAG_DETECTED") + message("-- Performing Test COVERAGE_FLAG_DETECTED - Failed" + " (Check not supported)") + endif() + endif() + + if(COVERAGE_FLAG_DETECTED) + set(COVERAGE_${COMPILER}_FLAGS "${FLAG}" + CACHE STRING "${COMPILER} flags for code coverage.") + mark_as_advanced(COVERAGE_${COMPILER}_FLAGS) + break() + endif() + endforeach() + endif() +endforeach() + +set(CMAKE_REQUIRED_QUIET ${CMAKE_REQUIRED_QUIET_SAVE}) + +# Helper function to get the language of a source file. +function (codecov_lang_of_source FILE RETURN_VAR) + get_filename_component(FILE_EXT "${FILE}" EXT) + string(TOLOWER "${FILE_EXT}" FILE_EXT) + string(SUBSTRING "${FILE_EXT}" 1 -1 FILE_EXT) + + get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) + foreach (LANG ${ENABLED_LANGUAGES}) + list(FIND CMAKE_${LANG}_SOURCE_FILE_EXTENSIONS "${FILE_EXT}" TEMP) + if(NOT ${TEMP} EQUAL -1) + set(${RETURN_VAR} "${LANG}" PARENT_SCOPE) + return() + endif() + endforeach() + + set(${RETURN_VAR} "" PARENT_SCOPE) +endfunction() + +# Helper function to get the relative path of the source file destination path. +# This path is needed by FindGcov and FindLcov cmake files to locate the +# captured data. +function (codecov_path_of_source FILE RETURN_VAR) + string(REGEX MATCH "TARGET_OBJECTS:([^ >]+)" _source ${FILE}) + + # If expression was found, SOURCEFILE is a generator-expression for an + # object library. Currently we found no way to call this function automatic + # for the referenced target, so it must be called in the directoryso of the + # object library definition. + if(NOT "${_source}" STREQUAL "") + set(${RETURN_VAR} "" PARENT_SCOPE) + return() + endif() + + string(REPLACE "${CMAKE_CURRENT_BINARY_DIR}/" "" FILE "${FILE}") + if(IS_ABSOLUTE ${FILE}) + file(RELATIVE_PATH FILE ${CMAKE_CURRENT_SOURCE_DIR} ${FILE}) + endif() + + # get the right path for file + string(REPLACE ".." "__" PATH "${FILE}") + + set(${RETURN_VAR} "${PATH}" PARENT_SCOPE) +endfunction() + +# Add coverage support for target ${TNAME} and register target for coverage +# evaluation. +function(add_coverage_target TNAME) + # Check if all sources for target use the same compiler. If a target uses + # e.g. C and Fortran mixed and uses different compilers (e.g. clang and + # gfortran) this can trigger huge problems, because different compilers may + # use different implementations for code coverage. + get_target_property(TSOURCES ${TNAME} SOURCES) + set(TARGET_COMPILER "") + set(ADDITIONAL_FILES "") + foreach (FILE ${TSOURCES}) + # If expression was found, FILE is a generator-expression for an object + # library. Object libraries will be ignored. + string(REGEX MATCH "TARGET_OBJECTS:([^ >]+)" _file ${FILE}) + if("${_file}" STREQUAL "") + codecov_lang_of_source(${FILE} LANG) + if(LANG) + list(APPEND TARGET_COMPILER ${CMAKE_${LANG}_COMPILER_ID}) + + list(APPEND ADDITIONAL_FILES "${FILE}.gcno") + list(APPEND ADDITIONAL_FILES "${FILE}.gcda") + endif() + endif() + endforeach () + + list(REMOVE_DUPLICATES TARGET_COMPILER) + list(LENGTH TARGET_COMPILER NUM_COMPILERS) + + if(NUM_COMPILERS GREATER 1) + message(AUTHOR_WARNING "Coverage disabled for target ${TNAME} because " + "it will be compiled by different compilers.") + return() + + elseif((NUM_COMPILERS EQUAL 0) OR + (NOT DEFINED "COVERAGE_${TARGET_COMPILER}_FLAGS")) + message(AUTHOR_WARNING "Coverage disabled for target ${TNAME} " + "because there is no sanitizer available for target sources.") + return() + endif() + + + # enable coverage for target + set_property(TARGET ${TNAME} APPEND_STRING + PROPERTY COMPILE_FLAGS " ${COVERAGE_${TARGET_COMPILER}_FLAGS}") + set_property(TARGET ${TNAME} APPEND_STRING + PROPERTY LINK_FLAGS " ${COVERAGE_${TARGET_COMPILER}_FLAGS}") + + + # Add gcov files generated by compiler to clean target. + set(CLEAN_FILES "") + foreach (FILE ${ADDITIONAL_FILES}) + codecov_path_of_source(${FILE} FILE) + list(APPEND CLEAN_FILES "CMakeFiles/${TNAME}.dir/${FILE}") + endforeach() + + set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES + "${CLEAN_FILES}") + + add_gcov_target(${TNAME}) +endfunction() + +# Include modules for parsing the collected data and output it in a readable +# format (like gcov). +find_package(Gcov) diff --git a/lapack-netlib/CMakeLists.txt b/lapack-netlib/CMakeLists.txt index beb732106..caa0e7107 100644 --- a/lapack-netlib/CMakeLists.txt +++ b/lapack-netlib/CMakeLists.txt @@ -1,22 +1,36 @@ -cmake_minimum_required(VERSION 2.8.10) +cmake_minimum_required(VERSION 2.8.12) + +project(LAPACK Fortran C) + +set(LAPACK_MAJOR_VERSION 3) +set(LAPACK_MINOR_VERSION 8) +set(LAPACK_PATCH_VERSION 0) +set( + LAPACK_VERSION + ${LAPACK_MAJOR_VERSION}.${LAPACK_MINOR_VERSION}.${LAPACK_PATCH_VERSION} + ) + +# Add the CMake directory for custon CMake modules +set(CMAKE_MODULE_PATH "${LAPACK_SOURCE_DIR}/CMAKE" ${CMAKE_MODULE_PATH}) # Set a default build type if none was specified if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) message(STATUS "Setting build type to 'Release' as none was specified.") set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) # Set the possible values of build type for cmake-gui - set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "Coverage") endif() -project(LAPACK Fortran) +string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER) +if(${CMAKE_BUILD_TYPE_UPPER} STREQUAL "COVERAGE") + message(STATUS "Adding coverage") + find_package(codecov) +endif() -set(LAPACK_MAJOR_VERSION 3) -set(LAPACK_MINOR_VERSION 7) -set(LAPACK_PATCH_VERSION 0) -set( - LAPACK_VERSION - ${LAPACK_MAJOR_VERSION}.${LAPACK_MINOR_VERSION}.${LAPACK_PATCH_VERSION} - ) +# By default static library +option(BUILD_SHARED_LIBS "Build shared libraries" OFF) + +include(GNUInstallDirs) # Updated OSX RPATH settings # In response to CMake 3.0 generating warnings regarding policy CMP0042, @@ -26,9 +40,9 @@ set( set(CMAKE_MACOSX_RPATH ON) set(CMAKE_SKIP_BUILD_RPATH FALSE) set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) -list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}" isSystemDir) +list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES ${CMAKE_INSTALL_FULL_LIBDIR} isSystemDir) if("${isSystemDir}" STREQUAL "-1") - set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}") + set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_FULL_LIBDIR}) set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) endif() @@ -40,8 +54,6 @@ configure_file( @ONLY ) -# Add the CMake directory for custon CMake modules -set(CMAKE_MODULE_PATH "${LAPACK_SOURCE_DIR}/CMAKE" ${CMAKE_MODULE_PATH}) include(PreventInSourceBuilds) include(PreventInBuildInstalls) @@ -96,42 +108,26 @@ endif() set(LAPACK_INSTALL_EXPORT_NAME lapack-targets) -if(UNIX) - include(GNUInstallDirs) - set(ARCHIVE_DIR ${CMAKE_INSTALL_LIBDIR}) - set(LIBRARY_DIR ${CMAKE_INSTALL_LIBDIR}) - set(RUNTIME_DIR ${CMAKE_INSTALL_BINDIR}) -else() - set(ARCHIVE_DIR lib${LIB_SUFFIX}) - set(LIBRARY_DIR lib${LIB_SUFFIX}) - set(RUNTIME_DIR bin) -endif() - macro(lapack_install_library lib) install(TARGETS ${lib} EXPORT ${LAPACK_INSTALL_EXPORT_NAME} - ARCHIVE DESTINATION ${ARCHIVE_DIR} - LIBRARY DESTINATION ${LIBRARY_DIR} - RUNTIME DESTINATION ${RUNTIME_DIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ) endmacro() -set(PKG_CONFIG_DIR ${LIBRARY_DIR}/pkgconfig) -set(prefix ${CMAKE_INSTALL_PREFIX}) -if(NOT IS_ABSOLUTE ${LIBRARY_DIR}) - set(libdir "\${prefix}/${LIBRARY_DIR}") -else() - set(libdir "${LIBRARY_DIR}") -endif() +set(PKG_CONFIG_DIR ${CMAKE_INSTALL_LIBDIR}/pkgconfig) # -------------------------------------------------- # Testing - +option(BUILD_TESTING "Build tests" OFF) enable_testing() include(CTest) enable_testing() -# -------------------------------------------------- +message(STATUS "Build tests: ${BUILD_TESTING}") +# -------------------------------------------------- # Organize output files. On Windows this also keeps .dll files next # to the .exe files that need them, making tests easy to run. set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LAPACK_BINARY_DIR}/bin) @@ -158,21 +154,27 @@ message(STATUS "--> Will use second_${TIME_FUNC}.f and dsecnd_${TIME_FUNC}.f as set(SECOND_SRC ${LAPACK_SOURCE_DIR}/INSTALL/second_${TIME_FUNC}.f) set(DSECOND_SRC ${LAPACK_SOURCE_DIR}/INSTALL/dsecnd_${TIME_FUNC}.f) -# By default static library -option(BUILD_SHARED_LIBS "Build shared libraries" OFF) - -option(BUILD_TESTING "Build tests" OFF) - -# deprecated LAPACK routines +# deprecated LAPACK and LAPACKE routines option(BUILD_DEPRECATED "Build deprecated routines" OFF) +message(STATUS "Build deprecated routines: ${BUILD_DEPRECATED}") # -------------------------------------------------- # Precision to build # By default all precisions are generated -option(BUILD_SINGLE "Build LAPACK Single Precision" ON) -option(BUILD_DOUBLE "Build LAPACK Double Precision" ON) -option(BUILD_COMPLEX "Build LAPACK Complex Precision" ON) -option(BUILD_COMPLEX16 "Build LAPACK Double Complex Precision" ON) +option(BUILD_SINGLE "Build single precision real" ON) +option(BUILD_DOUBLE "Build double precision real" ON) +option(BUILD_COMPLEX "Build single precision complex" ON) +option(BUILD_COMPLEX16 "Build double precision complex" ON) +message(STATUS "Build single precision real: ${BUILD_SINGLE}") +message(STATUS "Build double precision real: ${BUILD_DOUBLE}") +message(STATUS "Build single precision complex: ${BUILD_COMPLEX}") +message(STATUS "Build double precision complex: ${BUILD_COMPLEX16}") + +if(NOT (BUILD_SINGLE OR BUILD_DOUBLE OR BUILD_COMPLEX OR BUILD_COMPLEX16)) + message(FATAL_ERROR "Nothing to build, no precision selected. + Please enable at least one of these: + BUILD_SINGLE, BUILD_DOUBLE, BUILD_COMPLEX, BUILD_COMPLEX16.") +endif() # -------------------------------------------------- # Subdirectories that need to be processed @@ -275,7 +277,6 @@ else() CACHE STRING "Linker flags for shared libs" FORCE) endif() -message(STATUS "BUILD TESTING : ${BUILD_TESTING}") if(BUILD_TESTING) add_subdirectory(TESTING) endif() @@ -346,7 +347,7 @@ endif() set(_lapack_config_install_guard_target "") if(ALL_TARGETS) install(EXPORT lapack-targets - DESTINATION ${LIBRARY_DIR}/cmake/lapack-${LAPACK_VERSION}) + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION}) # Choose one of the lapack targets to use as a guard for # lapack-config.cmake to load targets from the install tree. @@ -396,5 +397,5 @@ write_basic_package_version_file( install(FILES ${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake ${LAPACK_BINARY_DIR}/lapack-config-version.cmake - DESTINATION ${LIBRARY_DIR}/cmake/lapack-${LAPACK_VERSION} + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION} ) diff --git a/lapack-netlib/DOCS/Doxyfile b/lapack-netlib/DOCS/Doxyfile index db9bb4725..8f3558597 100644 --- a/lapack-netlib/DOCS/Doxyfile +++ b/lapack-netlib/DOCS/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = LAPACK # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 3.7.0 +PROJECT_NUMBER = 3.8.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/lapack-netlib/DOCS/Doxyfile_man b/lapack-netlib/DOCS/Doxyfile_man index 7b048a29f..6fb339a73 100644 --- a/lapack-netlib/DOCS/Doxyfile_man +++ b/lapack-netlib/DOCS/Doxyfile_man @@ -38,7 +38,7 @@ PROJECT_NAME = LAPACK # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 3.7.0 +PROJECT_NUMBER = 3.8.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/lapack-netlib/DOCS/lawn81.tex b/lapack-netlib/DOCS/lawn81.tex index 73a443775..291735299 100644 --- a/lapack-netlib/DOCS/lawn81.tex +++ b/lapack-netlib/DOCS/lawn81.tex @@ -507,7 +507,7 @@ LAPACK library, you would modify the \texttt{lapacklib} definition to be: \begin{verbatim} lapacklib: - ( cd SRC; $(MAKE) single ) + $(MAKE) -C SRC single \end{verbatim} Likewise, you could specify \texttt{double, complex, or complex16} to @@ -560,7 +560,7 @@ can be accomplished by the following: \begin{list}{}{} \item \texttt{cd LAPACK} -\item \texttt{make clean} +\item \texttt{make cleanobj} \end{list} \section{Further Details of the Installation Process}\label{furtherdetails} diff --git a/lapack-netlib/INSTALL/LAPACK_version.f b/lapack-netlib/INSTALL/LAPACK_version.f index 163b7e2b2..0902e1239 100644 --- a/lapack-netlib/INSTALL/LAPACK_version.f +++ b/lapack-netlib/INSTALL/LAPACK_version.f @@ -18,20 +18,24 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup auxOTHERauxiliary * -* ===================================================================== PROGRAM LAPACK_VERSION +* ===================================================================== + PROGRAM LAPACK_VERSION * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * INTEGER MAJOR, MINOR, PATCH +* .. +* .. External Subroutines .. + EXTERNAL ILAVER * - CALL ILAVER ( MAJOR,MINOR, PATCH ) + CALL ILAVER ( MAJOR, MINOR, PATCH ) WRITE(*,*) "LAPACK ",MAJOR,".",MINOR,".",PATCH * END diff --git a/lapack-netlib/INSTALL/Makefile b/lapack-netlib/INSTALL/Makefile index 15f5252be..150a061d6 100644 --- a/lapack-netlib/INSTALL/Makefile +++ b/lapack-netlib/INSTALL/Makefile @@ -1,33 +1,48 @@ include ../make.inc -.SUFFIXES: .o .f all: testlsame testslamch testdlamch testsecond testdsecnd testieee testversion testlsame: lsame.o lsametst.o - $(LOADER) $(LOADOPTS) -o $@ lsame.o lsametst.o + $(LOADER) $(LOADOPTS) -o $@ $^ testslamch: slamch.o lsame.o slamchtst.o - $(LOADER) $(LOADOPTS) -o $@ slamch.o lsame.o slamchtst.o + $(LOADER) $(LOADOPTS) -o $@ $^ testdlamch: dlamch.o lsame.o dlamchtst.o - $(LOADER) $(LOADOPTS) -o $@ dlamch.o lsame.o dlamchtst.o + $(LOADER) $(LOADOPTS) -o $@ $^ testsecond: second_$(TIMER).o secondtst.o @echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)" - $(LOADER) $(LOADOPTS) -o $@ second_$(TIMER).o secondtst.o + $(LOADER) $(LOADOPTS) -o $@ $^ testdsecnd: dsecnd_$(TIMER).o dsecndtst.o @echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)" - $(LOADER) $(LOADOPTS) -o $@ dsecnd_$(TIMER).o dsecndtst.o + $(LOADER) $(LOADOPTS) -o $@ $^ testieee: tstiee.o - $(LOADER) $(LOADOPTS) -o $@ tstiee.o + $(LOADER) $(LOADOPTS) -o $@ $^ testversion: ilaver.o LAPACK_version.o - $(LOADER) $(LOADOPTS) -o $@ ilaver.o LAPACK_version.o - -clean: + $(LOADER) $(LOADOPTS) -o $@ $^ + +run: all + ./testlsame + ./testslamch + ./testdlamch + ./testsecond + ./testdsecnd + ./testieee + ./testversion + +clean: cleanobj cleanexe cleantest +cleanobj: rm -f *.o +cleanexe: + rm -f test* +cleantest: + rm -f core + +.SUFFIXES: .o .f .f.o: $(FORTRAN) $(OPTS) -c -o $@ $< diff --git a/lapack-netlib/INSTALL/dsecndtst.f b/lapack-netlib/INSTALL/dsecndtst.f index a39e00c95..430a46a53 100644 --- a/lapack-netlib/INSTALL/dsecndtst.f +++ b/lapack-netlib/INSTALL/dsecndtst.f @@ -18,18 +18,18 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup auxOTHERauxiliary * * ===================================================================== PROGRAM DSECNDTST * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * ===================================================================== * @@ -48,6 +48,9 @@ DOUBLE PRECISION DSECND EXTERNAL DSECND * .. +* .. External Subroutines .. + EXTERNAL MYSUB +* .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. diff --git a/lapack-netlib/INSTALL/ilaver.f b/lapack-netlib/INSTALL/ilaver.f index 8578953a3..e1d59f465 100644 --- a/lapack-netlib/INSTALL/ilaver.f +++ b/lapack-netlib/INSTALL/ilaver.f @@ -41,24 +41,24 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup auxOTHERauxiliary * * ===================================================================== SUBROUTINE ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* June 2016 +* June 2017 * * ===================================================================== * INTEGER VERS_MAJOR, VERS_MINOR, VERS_PATCH * ===================================================================== VERS_MAJOR = 3 - VERS_MINOR = 7 + VERS_MINOR = 8 VERS_PATCH = 0 * ===================================================================== * diff --git a/lapack-netlib/INSTALL/make.inc.ALPHA b/lapack-netlib/INSTALL/make.inc.ALPHA index b5815876e..0ceeaa155 100644 --- a/lapack-netlib/INSTALL/make.inc.ALPHA +++ b/lapack-netlib/INSTALL/make.inc.ALPHA @@ -1,76 +1,81 @@ #################################################################### # LAPACK make include file. # -# LAPACK, Version 3.6.0 # -# November 2015 # +# LAPACK, Version 3.7.0 # +# December 2016 # #################################################################### -# + SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = cc +CFLAGS = -O4 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. # -# Modify the FORTRAN and OPTS definitions to refer to the -# compiler and desired compiler options for your machine. NOOPT -# refers to the compiler options desired when NO OPTIMIZATION is -# selected. Define LOADER and LOADOPTS to refer to the loader and -# desired load options for your machine. +FORTRAN = f77 +OPTS = -O4 -fpe1 +DRVOPTS = $(OPTS) +NOOPT = + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. # -FORTRAN = f77 -OPTS = -O4 -fpe1 -DRVOPTS = $(OPTS) -NOOPT = LOADER = f77 LOADOPTS = + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. # -# Timer for the SECOND and DSECND routines -# -# Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME -TIMER = EXT_ETIME -# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ -# TIMER = EXT_ETIME_ -# For gfortran compiler: SECOND and DSECND will use a call to the INTERNAL FUNCTION ETIME -# TIMER = INT_ETIME -# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) -# SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME -# TIMER = INT_CPU_TIME -# If neither of this works...you can use the NONE value... In that case, SECOND and DSECND will always return 0 -# TIMER = NONE -# -# Configuration LAPACKE: Native C interface to LAPACK -# To generate LAPACKE library: type 'make lapackelib' -# Configuration file: turned off (default) -# Complex types: C99 (default) -# Name pattern: mixed case (default) -# (64-bit) Data model: LP64 (default) +ARCH = ar +ARCHFLAGS = cr +RANLIB = ranlib + +# Timer for the SECOND and DSECND routines # -# CC is the C compiler, normally invoked with options CFLAGS. +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. # -CC = cc -CFLAGS = -O4 +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. # -# LAPACKE has also the interface to some routines from tmglib, -# if LAPACKE_WITH_TMG is selected, we need to add those routines to LAPACKE #LAPACKE_WITH_TMG = Yes -# -# The archiver and the flag(s) to use when building archive (library) -# If you system has no ranlib, set RANLIB = echo. -# -ARCH = ar -ARCHFLAGS= cr -RANLIB = ranlib -# + # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The -# relevant routines will be compiled and XBLAS will be linked only if -# USEXBLAS is defined. -# -# USEXBLAS = Yes -XBLASLIB = -# XBLASLIB = -lxblas +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. # +#USEXBLAS = Yes +#XBLASLIB = -lxblas + # The location of the libraries to which you will link. (The # machine-specific, optimized BLAS library should be used whenever # possible.) # #BLASLIB = ../../librefblas.a -BLASLIB = -ldxml +BLASLIB = -ldxml CBLASLIB = ../../libcblas.a LAPACKLIB = liblapack.a TMGLIB = libtmglib.a diff --git a/lapack-netlib/INSTALL/make.inc.HPPA b/lapack-netlib/INSTALL/make.inc.HPPA index 869a7ec7e..8eabbbdf4 100644 --- a/lapack-netlib/INSTALL/make.inc.HPPA +++ b/lapack-netlib/INSTALL/make.inc.HPPA @@ -1,76 +1,81 @@ #################################################################### # LAPACK make include file. # -# LAPACK, Version 3.6.0 # -# November 2015 # +# LAPACK, Version 3.7.0 # +# December 2016 # #################################################################### -# + SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = cc +CFLAGS = + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. # -# Modify the FORTRAN and OPTS definitions to refer to the -# compiler and desired compiler options for your machine. NOOPT -# refers to the compiler options desired when NO OPTIMIZATION is -# selected. Define LOADER and LOADOPTS to refer to the loader and -# desired load options for your machine. +FORTRAN = f77 +OPTS = +O4 +U77 +DRVOPTS = $(OPTS) -K +NOOPT = +U77 + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. # -FORTRAN = f77 -OPTS = +O4 +U77 -DRVOPTS = $(OPTS) -K -NOOPT = +U77 LOADER = f77 LOADOPTS = -Aa +U77 + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. # -# Timer for the SECOND and DSECND routines -# -# Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME -TIMER = EXT_ETIME -# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ -# TIMER = EXT_ETIME_ -# For gfortran compiler: SECOND and DSECND will use a call to the INTERNAL FUNCTION ETIME -# TIMER = INT_ETIME -# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) -# SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME -# TIMER = INT_CPU_TIME -# If neither of this works...you can use the NONE value... In that case, SECOND and DSECND will always return 0 -# TIMER = NONE -# -# Configuration LAPACKE: Native C interface to LAPACK -# To generate LAPACKE library: type 'make lapackelib' -# Configuration file: turned off (default) -# Complex types: C99 (default) -# Name pattern: mixed case (default) -# (64-bit) Data model: LP64 (default) +ARCH = ar +ARCHFLAGS = cr +RANLIB = echo + +# Timer for the SECOND and DSECND routines # -# CC is the C compiler, normally invoked with options CFLAGS. +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. # -CC = cc -CFLAGS = +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. # -# LAPACKE has also the interface to some routines from tmglib, -# if LAPACKE_WITH_TMG is selected, we need to add those routines to LAPACKE #LAPACKE_WITH_TMG = Yes -# -# The archiver and the flag(s) to use when building archive (library) -# If you system has no ranlib, set RANLIB = echo. -# -ARCH = ar -ARCHFLAGS= cr -RANLIB = echo -# + # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The -# relevant routines will be compiled and XBLAS will be linked only if -# USEXBLAS is defined. -# -# USEXBLAS = Yes -XBLASLIB = -# XBLASLIB = -lxblas +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. # +#USEXBLAS = Yes +#XBLASLIB = -lxblas + # The location of the libraries to which you will link. (The # machine-specific, optimized BLAS library should be used whenever # possible.) # #BLASLIB = ../../librefblas.a -BLASLIB = -lblas +BLASLIB = -lblas CBLASLIB = ../../libcblas.a LAPACKLIB = liblapack.a TMGLIB = libtmglib.a diff --git a/lapack-netlib/INSTALL/make.inc.IRIX64 b/lapack-netlib/INSTALL/make.inc.IRIX64 index 68482060b..d9e71e1bf 100644 --- a/lapack-netlib/INSTALL/make.inc.IRIX64 +++ b/lapack-netlib/INSTALL/make.inc.IRIX64 @@ -1,78 +1,83 @@ #################################################################### # LAPACK make include file. # -# LAPACK, Version 3.6.0 # -# November 2015 # +# LAPACK, Version 3.7.0 # +# December 2016 # #################################################################### -# + SHELL = /sbin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = cc +CFLAGS = -O3 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. # -# Modify the FORTRAN and OPTS definitions to refer to the -# compiler and desired compiler options for your machine. NOOPT -# refers to the compiler options desired when NO OPTIMIZATION is -# selected. Define LOADER and LOADOPTS to refer to the loader and -# desired load options for your machine. +FORTRAN = f77 +OPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON +#OPTS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON +DRVOPTS = $(OPTS) -static +NOOPT = -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON +#NOOPT = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. # -FORTRAN = f77 -OPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON -#OPTS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON -DRVOPTS = $(OPTS) -static -NOOPT = -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON -#NOOPT = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON LOADER = f77 LOADOPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON #LOADOPTS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. # -# Timer for the SECOND and DSECND routines -# -# Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME -TIMER = EXT_ETIME -# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ -# TIMER = EXT_ETIME_ -# For gfortran compiler: SECOND and DSECND will use a call to the INTERNAL FUNCTION ETIME -# TIMER = INT_ETIME -# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) -# SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME -# TIMER = INT_CPU_TIME -# If neither of this works...you can use the NONE value... In that case, SECOND and DSECND will always return 0 -# TIMER = NONE -# -# Configuration LAPACKE: Native C interface to LAPACK -# To generate LAPACKE library: type 'make lapackelib' -# Configuration file: turned off (default) -# Complex types: C99 (default) -# Name pattern: mixed case (default) -# (64-bit) Data model: LP64 (default) +ARCH = ar +ARCHFLAGS = cr +RANLIB = echo + +# Timer for the SECOND and DSECND routines # -# CC is the C compiler, normally invoked with options CFLAGS. +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. # -CC = cc -CFLAGS = -O3 +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. # -# LAPACKE has also the interface to some routines from tmglib, -# if LAPACKE_WITH_TMG is selected, we need to add those routines to LAPACKE #LAPACKE_WITH_TMG = Yes -# -# The archiver and the flag(s) to use when building archive (library) -# If you system has no ranlib, set RANLIB = echo. -# -ARCH = ar -ARCHFLAGS= cr -RANLIB = echo -# + # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The -# relevant routines will be compiled and XBLAS will be linked only if -# USEXBLAS is defined. -# -# USEXBLAS = Yes -XBLASLIB = -# XBLASLIB = -lxblas +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. # +#USEXBLAS = Yes +#XBLASLIB = -lxblas + # The location of the libraries to which you will link. (The # machine-specific, optimized BLAS library should be used whenever # possible.) # -#BLASLIB = -lblas +#BLASLIB = -lblas BLASLIB = ../../librefblas.a CBLASLIB = ../../libcblas.a LAPACKLIB = liblapack.a diff --git a/lapack-netlib/INSTALL/make.inc.O2K b/lapack-netlib/INSTALL/make.inc.O2K index ceeccef78..3ffcadacc 100644 --- a/lapack-netlib/INSTALL/make.inc.O2K +++ b/lapack-netlib/INSTALL/make.inc.O2K @@ -1,79 +1,84 @@ #################################################################### # LAPACK make include file. # -# LAPACK, Version 3.6.0 # -# November 2015 # +# LAPACK, Version 3.7.0 # +# December 2016 # #################################################################### -# + SHELL = /sbin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = cc +CFLAGS = -O3 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. # -# Modify the FORTRAN and OPTS definitions to refer to the -# compiler and desired compiler options for your machine. NOOPT -# refers to the compiler options desired when NO OPTIMIZATION is -# selected. Define LOADER and LOADOPTS to refer to the loader and -# desired load options for your machine. +FORTRAN = f77 +OPTS = -O3 -64 -mips4 -r10000 +#OPTS = -O3 -64 -mips4 -r10000 -mp +DRVOPTS = $(OPTS) -static +NOOPT = -64 -mips4 -r10000 +#NOOPT = -64 -mips4 -r10000 -mp + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. # -FORTRAN = f77 -OPTS = -O3 -64 -mips4 -r10000 -#OPTS = -O3 -64 -mips4 -r10000 -mp -DRVOPTS = $(OPTS) -static -NOOPT = -64 -mips4 -r10000 -#NOOPT = -64 -mips4 -r10000 -mp LOADER = f77 LOADOPTS = -O3 -64 -mips4 -r10000 #LOADOPTS = -O3 -64 -mips4 -r10000 -mp + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. # -# Timer for the SECOND and DSECND routines -# -# Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME -TIMER = EXT_ETIME -# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ -# TIMER = EXT_ETIME_ -# For gfortran compiler: SECOND and DSECND will use a call to the INTERNAL FUNCTION ETIME -# TIMER = INT_ETIME -# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) -# SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME -# TIMER = INT_CPU_TIME -# If neither of this works...you can use the NONE value... In that case, SECOND and DSECND will always return 0 -# TIMER = NONE -# -# Configuration LAPACKE: Native C interface to LAPACK -# To generate LAPACKE library: type 'make lapackelib' -# Configuration file: turned off (default) -# Complex types: C99 (default) -# Name pattern: mixed case (default) -# (64-bit) Data model: LP64 (default) +ARCH = ar +ARCHFLAGS = cr +RANLIB = echo + +# Timer for the SECOND and DSECND routines # -# CC is the C compiler, normally invoked with options CFLAGS. +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. # -CC = cc -CFLAGS = -O3 +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. # -# LAPACKE has also the interface to some routines from tmglib, -# if LAPACKE_WITH_TMG is selected, we need to add those routines to LAPACKE #LAPACKE_WITH_TMG = Yes -# -# The archiver and the flag(s) to use when building archive (library) -# If you system has no ranlib, set RANLIB = echo. -# -ARCH = ar -ARCHFLAGS= cr -RANLIB = echo -# + # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The -# relevant routines will be compiled and XBLAS will be linked only if -# USEXBLAS is defined. -# -# USEXBLAS = Yes -XBLASLIB = -# XBLASLIB = -lxblas +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. # +#USEXBLAS = Yes +#XBLASLIB = -lxblas + # The location of the libraries to which you will link. (The # machine-specific, optimized BLAS library should be used whenever # possible.) # -BLASLIB = -lblas -#BLASLIB = -lblas_mp +BLASLIB = -lblas +#BLASLIB = -lblas_mp #BLASLIB = ../../librefblas.a CBLASLIB = ../../libcblas.a LAPACKLIB = liblapack.a diff --git a/lapack-netlib/INSTALL/make.inc.SGI5 b/lapack-netlib/INSTALL/make.inc.SGI5 index de55f1521..c7019ac16 100644 --- a/lapack-netlib/INSTALL/make.inc.SGI5 +++ b/lapack-netlib/INSTALL/make.inc.SGI5 @@ -1,75 +1,80 @@ #################################################################### # LAPACK make include file. # -# LAPACK, Version 3.6.0 # -# November 2015 # +# LAPACK, Version 3.7.0 # +# December 2016 # #################################################################### -# + SHELL = /sbin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = cc +CFLAGS = -O4 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. # -# Modify the FORTRAN and OPTS definitions to refer to the -# compiler and desired compiler options for your machine. NOOPT -# refers to the compiler options desired when NO OPTIMIZATION is -# selected. Define LOADER and LOADOPTS to refer to the loader and -# desired load options for your machine. +FORTRAN = f77 +OPTS = -O4 +DRVOPTS = $(OPTS) -static +NOOPT = + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. # -FORTRAN = f77 -OPTS = -O4 -DRVOPTS = $(OPTS) -static -NOOPT = LOADER = f77 LOADOPTS = + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. # -# Timer for the SECOND and DSECND routines -# -# Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME -TIMER = EXT_ETIME -# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ -# TIMER = EXT_ETIME_ -# For gfortran compiler: SECOND and DSECND will use a call to the INTERNAL FUNCTION ETIME -# TIMER = INT_ETIME -# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) -# SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME -# TIMER = INT_CPU_TIME -# If neither of this works...you can use the NONE value... In that case, SECOND and DSECND will always return 0 -# TIMER = NONE -# -# Configuration LAPACKE: Native C interface to LAPACK -# To generate LAPACKE library: type 'make lapackelib' -# Configuration file: turned off (default) -# Complex types: C99 (default) -# Name pattern: mixed case (default) -# (64-bit) Data model: LP64 (default) +ARCH = ar +ARCHFLAGS = cr +RANLIB = echo + +# Timer for the SECOND and DSECND routines # -# CC is the C compiler, normally invoked with options CFLAGS. +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. # -CC = cc -CFLAGS = -O4 +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. # -# LAPACKE has also the interface to some routines from tmglib, -# if LAPACKE_WITH_TMG is selected, we need to add those routines to LAPACKE #LAPACKE_WITH_TMG = Yes -# -# The archiver and the flag(s) to use when building archive (library) -# If you system has no ranlib, set RANLIB = echo. -# -ARCH = ar -ARCHFLAGS= cr -RANLIB = echo -# + # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The -# relevant routines will be compiled and XBLAS will be linked only if -# USEXBLAS is defined. -# -# USEXBLAS = Yes -XBLASLIB = -# XBLASLIB = -lxblas +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. # +#USEXBLAS = Yes +#XBLASLIB = -lxblas + # The location of the libraries to which you will link. (The # machine-specific, optimized BLAS library should be used whenever # possible.) # -#BLASLIB = -lblas +#BLASLIB = -lblas BLASLIB = ../../librefblas.a CBLASLIB = ../../libcblas.a LAPACKLIB = liblapack.a diff --git a/lapack-netlib/INSTALL/make.inc.SUN4 b/lapack-netlib/INSTALL/make.inc.SUN4 index d9c68c4c7..4e44f1beb 100644 --- a/lapack-netlib/INSTALL/make.inc.SUN4 +++ b/lapack-netlib/INSTALL/make.inc.SUN4 @@ -1,75 +1,80 @@ #################################################################### # LAPACK make include file. # -# LAPACK, Version 3.6.0 # -# November 2015 # +# LAPACK, Version 3.7.0 # +# December 2016 # #################################################################### -# + SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = cc +CFLAGS = -O3 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. # -# Modify the FORTRAN and OPTS definitions to refer to the -# compiler and desired compiler options for your machine. NOOPT -# refers to the compiler options desired when NO OPTIMIZATION is -# selected. Define LOADER and LOADOPTS to refer to the loader and -# desired load options for your machine. +FORTRAN = f77 +OPTS = -dalign -O4 -fast +DRVOPTS = $(OPTS) +NOOPT = + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. # -FORTRAN = f77 -OPTS = -dalign -O4 -fast -DRVOPTS = $(OPTS) -NOOPT = LOADER = f77 LOADOPTS = -dalign -O4 -fast + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. # -# Timer for the SECOND and DSECND routines -# -# Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME -TIMER = EXT_ETIME -# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ -# TIMER = EXT_ETIME_ -# For gfortran compiler: SECOND and DSECND will use a call to the INTERNAL FUNCTION ETIME -# TIMER = INT_ETIME -# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) -# SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME -# TIMER = INT_CPU_TIME -# If neither of this works...you can use the NONE value... In that case, SECOND and DSECND will always return 0 -# TIMER = NONE -# -# Configuration LAPACKE: Native C interface to LAPACK -# To generate LAPACKE library: type 'make lapackelib' -# Configuration file: turned off (default) -# Complex types: C99 (default) -# Name pattern: mixed case (default) -# (64-bit) Data model: LP64 (default) +ARCH = ar +ARCHFLAGS = cr +RANLIB = ranlib + +# Timer for the SECOND and DSECND routines # -# CC is the C compiler, normally invoked with options CFLAGS. +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. # -CC = cc -CFLAGS = -O3 +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. # -# LAPACKE has also the interface to some routines from tmglib, -# if LAPACKE_WITH_TMG is selected, we need to add those routines to LAPACKE #LAPACKE_WITH_TMG = Yes -# -# The archiver and the flag(s) to use when building archive (library) -# If you system has no ranlib, set RANLIB = echo. -# -ARCH = ar -ARCHFLAGS= cr -RANLIB = ranlib -# + # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The -# relevant routines will be compiled and XBLAS will be linked only if -# USEXBLAS is defined. -# -# USEXBLAS = Yes -XBLASLIB = -# XBLASLIB = -lxblas +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. # +#USEXBLAS = Yes +#XBLASLIB = -lxblas + # The location of the libraries to which you will link. (The # machine-specific, optimized BLAS library should be used whenever # possible.) # -#BLASLIB = -lblas +#BLASLIB = -lblas BLASLIB = ../../librefblas.a CBLASLIB = ../../libcblas.a LAPACKLIB = liblapack.a diff --git a/lapack-netlib/INSTALL/make.inc.SUN4SOL2 b/lapack-netlib/INSTALL/make.inc.SUN4SOL2 index d3c78437e..e6d79add3 100644 --- a/lapack-netlib/INSTALL/make.inc.SUN4SOL2 +++ b/lapack-netlib/INSTALL/make.inc.SUN4SOL2 @@ -1,81 +1,86 @@ #################################################################### # LAPACK make include file. # -# LAPACK, Version 3.6.0 # -# November 2015 # +# LAPACK, Version 3.7.0 # +# December 2016 # #################################################################### -# + SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = cc +CFLAGS = -O3 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. # -# Modify the FORTRAN and OPTS definitions to refer to the -# compiler and desired compiler options for your machine. NOOPT -# refers to the compiler options desired when NO OPTIMIZATION is -# selected. Define LOADER and LOADOPTS to refer to the loader and -# desired load options for your machine. +FORTRAN = f77 +#OPTS = -O4 -u -f -mt +#OPTS = -u -f -dalign -native -xO5 -xarch=v8plusa +OPTS = -u -f -dalign -native -xO2 -xarch=v8plusa +DRVOPTS = $(OPTS) +NOOPT = -u -f +#NOOPT = -u -f -mt + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. # -FORTRAN = f77 -#OPTS = -O4 -u -f -mt -#OPTS = -u -f -dalign -native -xO5 -xarch=v8plusa -OPTS = -u -f -dalign -native -xO2 -xarch=v8plusa -DRVOPTS = $(OPTS) -NOOPT = -u -f -#NOOPT = -u -f -mt LOADER = f77 #LOADOPTS = -mt LOADOPTS = -f -dalign -native -xO2 -xarch=v8plusa + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. # -# Timer for the SECOND and DSECND routines -# -# Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME -TIMER = EXT_ETIME -# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ -# TIMER = EXT_ETIME_ -# For gfortran compiler: SECOND and DSECND will use a call to the INTERNAL FUNCTION ETIME -# TIMER = INT_ETIME -# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) -# SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME -# TIMER = INT_CPU_TIME -# If neither of this works...you can use the NONE value... In that case, SECOND and DSECND will always return 0 -# TIMER = NONE -# -# Configuration LAPACKE: Native C interface to LAPACK -# To generate LAPACKE library: type 'make lapackelib' -# Configuration file: turned off (default) -# Complex types: C99 (default) -# Name pattern: mixed case (default) -# (64-bit) Data model: LP64 (default) +ARCH = ar +ARCHFLAGS = cr +RANLIB = echo + +# Timer for the SECOND and DSECND routines # -# CC is the C compiler, normally invoked with options CFLAGS. +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. # -CC = cc -CFLAGS = -O3 +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. # -# LAPACKE has also the interface to some routines from tmglib, -# if LAPACKE_WITH_TMG is selected, we need to add those routines to LAPACKE #LAPACKE_WITH_TMG = Yes -# -# The archiver and the flag(s) to use when building archive (library) -# If you system has no ranlib, set RANLIB = echo. -# -ARCH = ar -ARCHFLAGS= cr -RANLIB = echo -# + # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The -# relevant routines will be compiled and XBLAS will be linked only if -# USEXBLAS is defined. -# -# USEXBLAS = Yes -XBLASLIB = -# XBLASLIB = -lxblas +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. # +#USEXBLAS = Yes +#XBLASLIB = -lxblas + # The location of the libraries to which you will link. (The # machine-specific, optimized BLAS library should be used whenever # possible.) # #BLASLIB = ../../librefblas.a -#BLASLIB = -xlic_lib=sunperf_mt -BLASLIB = -xlic_lib=sunperf +#BLASLIB = -xlic_lib=sunperf_mt +BLASLIB = -xlic_lib=sunperf CBLASLIB = ../../libcblas.a LAPACKLIB = liblapack.a TMGLIB = libtmglib.a diff --git a/lapack-netlib/INSTALL/make.inc.XLF b/lapack-netlib/INSTALL/make.inc.XLF index d7fa4b73b..9466ee332 100644 --- a/lapack-netlib/INSTALL/make.inc.XLF +++ b/lapack-netlib/INSTALL/make.inc.XLF @@ -1,77 +1,82 @@ #################################################################### # LAPACK make include file. # -# LAPACK, Version 3.6.0 # -# November 2015 # +# LAPACK, Version 3.7.0 # +# December 2016 # #################################################################### -# + SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. # -# Modify the FORTRAN and OPTS definitions to refer to the -# compiler and desired compiler options for your machine. NOOPT -# refers to the compiler options desired when NO OPTIMIZATION is -# selected. Define LOADER and LOADOPTS to refer to the loader and -# desired load options for your machine. +CC = xlc +CFLAGS = -O3 -qnosave + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. # -FORTRAN = xlf -OPTS = -O3 -qfixed -qnosave +FORTRAN = xlf +OPTS = -O3 -qfixed -qnosave # For -O2, add -qstrict=none -DRVOPTS = $(OPTS) -NOOPT = -O0 -qfixed -qnosave +DRVOPTS = $(OPTS) +NOOPT = -O0 -qfixed -qnosave + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# LOADER = xlf LOADOPTS = -qnosave + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. # -# Timer for the SECOND and DSECND routines -# -# Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME -#TIMER = EXT_ETIME -# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ -TIMER = EXT_ETIME_ -# For gfortran compiler: SECOND and DSECND will use a call to the INTERNAL FUNCTION ETIME -# TIMER = INT_ETIME -# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) -# SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME -# TIMER = INT_CPU_TIME -# If neither of this works...you can use the NONE value... In that case, SECOND and DSECND will always return 0 -# TIMER = NONE -# -# Configuration LAPACKE: Native C interface to LAPACK -# To generate LAPACKE library: type 'make lapackelib' -# Configuration file: turned off (default) -# Complex types: C99 (default) -# Name pattern: mixed case (default) -# (64-bit) Data model: LP64 (default) +ARCH = ar +ARCHFLAGS = cr +RANLIB = ranlib + +# Timer for the SECOND and DSECND routines # -# CC is the C compiler, normally invoked with options CFLAGS. +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +#TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. # -CC = xlc -CFLAGS = -O3 -qnosave +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. # -# LAPACKE has also the interface to some routines from tmglib, -# if LAPACKE_WITH_TMG is selected, we need to add those routines to LAPACKE #LAPACKE_WITH_TMG = Yes -# -# The archiver and the flag(s) to use when building archive (library) -# If you system has no ranlib, set RANLIB = echo. -# -ARCH = ar -ARCHFLAGS= cr -RANLIB = ranlib -# + # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The -# relevant routines will be compiled and XBLAS will be linked only if -# USEXBLAS is defined. -# -# USEXBLAS = Yes -XBLASLIB = -# XBLASLIB = -lxblas +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. # +#USEXBLAS = Yes +#XBLASLIB = -lxblas + # The location of the libraries to which you will link. (The # machine-specific, optimized BLAS library should be used whenever # possible.) # #BLASLIB = ../../librefblas.a -BLASLIB = -lessl +BLASLIB = -lessl CBLASLIB = ../../libcblas.a LAPACKLIB = liblapack.a TMGLIB = libtmglib.a diff --git a/lapack-netlib/INSTALL/make.inc.gfortran b/lapack-netlib/INSTALL/make.inc.gfortran index ccd0994f9..39d98d4d4 100644 --- a/lapack-netlib/INSTALL/make.inc.gfortran +++ b/lapack-netlib/INSTALL/make.inc.gfortran @@ -1,74 +1,79 @@ #################################################################### # LAPACK make include file. # -# LAPACK, Version 3.6.0 # -# November 2015 # +# LAPACK, Version 3.7.0 # +# November 2017 # #################################################################### -# + SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. # -# Modify the FORTRAN and OPTS definitions to refer to the -# compiler and desired compiler options for your machine. NOOPT -# refers to the compiler options desired when NO OPTIMIZATION is -# selected. Define LOADER and LOADOPTS to refer to the loader and -# desired load options for your machine. +CC = gcc +CFLAGS = -O3 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. # # Note: During a regular execution, LAPACK might create NaN and Inf # and handle these quantities appropriately. As a consequence, one # should not compile LAPACK with flags such as -ffpe-trap=overflow. # -FORTRAN = gfortran -OPTS = -O2 -frecursive -DRVOPTS = $(OPTS) -NOOPT = -O0 -frecursive +FORTRAN = gfortran +OPTS = -O2 -frecursive +DRVOPTS = $(OPTS) +NOOPT = -O0 -frecursive + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# LOADER = gfortran LOADOPTS = + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = ranlib + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +#TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. +# +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. # -# Timer for the SECOND and DSECND routines -# -# Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME -#TIMER = EXT_ETIME -# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ -# TIMER = EXT_ETIME_ -# For gfortran compiler: SECOND and DSECND will use a call to the INTERNAL FUNCTION ETIME -TIMER = INT_ETIME -# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) -# SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME -# TIMER = INT_CPU_TIME -# If neither of this works...you can use the NONE value... In that case, SECOND and DSECND will always return 0 -# TIMER = NONE -# -# Configuration LAPACKE: Native C interface to LAPACK -# To generate LAPACKE library: type 'make lapackelib' -# Configuration file: turned off (default) -# Complex types: C99 (default) -# Name pattern: mixed case (default) -# (64-bit) Data model: LP64 (default) -# -# CC is the C compiler, normally invoked with options CFLAGS. -# -CC = gcc -CFLAGS = -O3 -# -# LAPACKE has also the interface to some routines from tmglib, -# if LAPACKE_WITH_TMG is selected, we need to add those routines to LAPACKE #LAPACKE_WITH_TMG = Yes -# -# The archiver and the flag(s) to use when building archive (library) -# If you system has no ranlib, set RANLIB = echo. -# -ARCH = ar -ARCHFLAGS= cr -RANLIB = ranlib -# + # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The -# relevant routines will be compiled and XBLAS will be linked only if -# USEXBLAS is defined. -# -# USEXBLAS = Yes -XBLASLIB = -# XBLASLIB = -lxblas +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. # +#USEXBLAS = Yes +#XBLASLIB = -lxblas + # The location of the libraries to which you will link. (The # machine-specific, optimized BLAS library should be used whenever # possible.) diff --git a/lapack-netlib/INSTALL/make.inc.gfortran_debug b/lapack-netlib/INSTALL/make.inc.gfortran_debug index f87b949c4..10e6381df 100644 --- a/lapack-netlib/INSTALL/make.inc.gfortran_debug +++ b/lapack-netlib/INSTALL/make.inc.gfortran_debug @@ -1,74 +1,79 @@ #################################################################### # LAPACK make include file. # -# LAPACK, Version 3.6.0 # -# November 2015 # +# LAPACK, Version 3.7.0 # +# November 2017 # #################################################################### -# + SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. # -# Modify the FORTRAN and OPTS definitions to refer to the -# compiler and desired compiler options for your machine. NOOPT -# refers to the compiler options desired when NO OPTIMIZATION is -# selected. Define LOADER and LOADOPTS to refer to the loader -# and desired load options for your machine. +CC = gcc +CFLAGS = -g + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. # # Note: During a regular execution, LAPACK might create NaN and Inf # and handle these quantities appropriately. As a consequence, one # should not compile LAPACK with flags such as -ffpe-trap=overflow. # -FORTRAN = gfortran -fimplicit-none -g -frecursive -OPTS = -DRVOPTS = $(OPTS) -NOOPT = -g -O0 -frecursive +FORTRAN = gfortran -fimplicit-none -g -frecursive +OPTS = +DRVOPTS = $(OPTS) +NOOPT = -g -O0 -frecursive + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# LOADER = gfortran -g LOADOPTS = + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = ranlib + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +#TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. +# +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. # -# Timer for the SECOND and DSECND routines -# -# Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME -# TIMER = EXT_ETIME -# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ -# TIMER = EXT_ETIME_ -# For gfortran compiler: SECOND and DSECND will use a call to the INTERNAL FUNCTION ETIME -# TIMER = INT_ETIME -# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) -# SECOND and DSECND will use a call to the Fortran standard INTERNAL FUNCTION CPU_TIME -TIMER = INT_CPU_TIME -# If neither of this works...you can use the NONE value... In that case, SECOND and DSECND will always return 0 -# TIMER = NONE -# -# Configuration LAPACKE: Native C interface to LAPACK -# To generate LAPACKE library: type 'make lapackelib' -# Configuration file: turned off (default) -# Complex types: C99 (default) -# Name pattern: mixed case (default) -# (64-bit) Data model: LP64 (default) -# -# CC is the C compiler, normally invoked with options CFLAGS. -# -CC = gcc -CFLAGS = -g -# -# LAPACKE has also the interface to some routines from tmglib, -# if LAPACKE_WITH_TMG is selected, we need to add those routines to LAPACKE #LAPACKE_WITH_TMG = Yes -# -# The archiver and the flag(s) to use when building archive (library) -# If you system has no ranlib, set RANLIB = echo. -# -ARCH = ar -ARCHFLAGS= cr -RANLIB = ranlib -# + # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The -# relevant routines will be compiled and XBLAS will be linked only if -# USEXBLAS is defined. -# -# USEXBLAS = Yes -XBLASLIB = -# XBLASLIB = -lxblas +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. # +#USEXBLAS = Yes +#XBLASLIB = -lxblas + # The location of the libraries to which you will link. (The # machine-specific, optimized BLAS library should be used whenever # possible.) diff --git a/lapack-netlib/INSTALL/make.inc.ifort b/lapack-netlib/INSTALL/make.inc.ifort index b26e9601c..b067bd484 100644 --- a/lapack-netlib/INSTALL/make.inc.ifort +++ b/lapack-netlib/INSTALL/make.inc.ifort @@ -1,70 +1,75 @@ #################################################################### # LAPACK make include file. # -# LAPACK, Version 3.6.0 # -# June 2016 # +# LAPACK, Version 3.7.0 # +# December 2016 # #################################################################### -# + SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = icc +CFLAGS = -O3 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. # -# Modify the FORTRAN and OPTS definitions to refer to the -# compiler and desired compiler options for your machine. NOOPT -# refers to the compiler options desired when NO OPTIMIZATION is -# selected. Define LOADER and LOADOPTS to refer to the loader -# and desired load options for your machine. +FORTRAN = ifort +OPTS = -O3 -fp-model strict -assume protect_parens +DRVOPTS = $(OPTS) +NOOPT = -O0 -fp-model strict -assume protect_parens + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. # -FORTRAN = ifort -OPTS = -O3 -fp-model strict -assume protect_parens -DRVOPTS = $(OPTS) -NOOPT = -O0 -fp-model strict -assume protect_parens LOADER = ifort LOADOPTS = + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. # -# Timer for the SECOND and DSECND routines -# -# Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME -TIMER = EXT_ETIME -# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ -# TIMER = EXT_ETIME_ -# For gfortran compiler: SECOND and DSECND will use a call to the INTERNAL FUNCTION ETIME -# TIMER = INT_ETIME -# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) -# SECOND and DSECND will use a call to the Fortran standard INTERNAL FUNCTION CPU_TIME -# TIMER = INT_CPU_TIME -# If neither of this works...you can use the NONE value... In that case, SECOND and DSECND will always return 0 -# TIMER = NONE -# -# Configuration LAPACKE: Native C interface to LAPACK -# To generate LAPACKE library: type 'make lapackelib' -# Configuration file: turned off (default) -# Complex types: C99 (default) -# Name pattern: mixed case (default) -# (64-bit) Data model: LP64 (default) +ARCH = ar +ARCHFLAGS = cr +RANLIB = ranlib + +# Timer for the SECOND and DSECND routines # -# CC is the C compiler, normally invoked with options CFLAGS. +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. # -CC = icc -CFLAGS = -O3 +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. # -# LAPACKE has also the interface to some routines from tmglib, -# if LAPACKE_WITH_TMG is selected, we need to add those routines to LAPACKE #LAPACKE_WITH_TMG = Yes -# -# The archiver and the flag(s) to use when building archive (library) -# If you system has no ranlib, set RANLIB = echo. -# -ARCH = ar -ARCHFLAGS= cr -RANLIB = ranlib -# + # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The -# relevant routines will be compiled and XBLAS will be linked only if -# USEXBLAS is defined. -# -# USEXBLAS = Yes -XBLASLIB = -# XBLASLIB = -lxblas +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. # +#USEXBLAS = Yes +#XBLASLIB = -lxblas + # The location of the libraries to which you will link. (The # machine-specific, optimized BLAS library should be used whenever # possible.) diff --git a/lapack-netlib/INSTALL/make.inc.pgf95 b/lapack-netlib/INSTALL/make.inc.pgf95 index 595b64c87..a9a5cec98 100644 --- a/lapack-netlib/INSTALL/make.inc.pgf95 +++ b/lapack-netlib/INSTALL/make.inc.pgf95 @@ -1,70 +1,75 @@ #################################################################### # LAPACK make include file. # -# LAPACK, Version 3.6.0 # -# November 2015 # +# LAPACK, Version 3.7.0 # +# December 2016 # #################################################################### -# + SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = pgcc +CFLAGS = + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. # -# Modify the FORTRAN and OPTS definitions to refer to the -# compiler and desired compiler options for your machine. NOOPT -# refers to the compiler options desired when NO OPTIMIZATION is -# selected. Define LOADER and LOADOPTS to refer to the loader and -# desired load options for your machine. +FORTRAN = pgf95 +OPTS = -O3 +DRVOPTS = $(OPTS) +NOOPT = -O0 + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. # -FORTRAN = pgf95 -OPTS = -O3 -DRVOPTS = $(OPTS) -NOOPT = -O0 LOADER = $(FORTRAN) LOADOPTS = + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. # -# Timer for the SECOND and DSECND routines -# -# Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME -# TIMER = EXT_ETIME -# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ -# TIMER = EXT_ETIME_ -# For gfortran compiler: SECOND and DSECND will use a call to the INTERNAL FUNCTION ETIME -# TIMER = INT_ETIME -# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) -# SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME - TIMER = INT_CPU_TIME -# If neither of this works...you can use the NONE value... In that case, SECOND and DSECND will always return 0 -# TIMER = NONE -# -# Configuration LAPACKE: Native C interface to LAPACK -# To generate LAPACKE library: type 'make lapackelib' -# Configuration file: turned off (default) -# Complex types: C99 (default) -# Name pattern: mixed case (default) -# (64-bit) Data model: LP64 (default) +ARCH = ar +ARCHFLAGS = cr +RANLIB = echo + +# Timer for the SECOND and DSECND routines # -# CC is the C compiler, normally invoked with options CFLAGS. +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +#TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. # -CC = pgcc -CFLAGS = +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. # -# LAPACKE has also the interface to some routines from tmglib, -# if LAPACKE_WITH_TMG is selected, we need to add those routines to LAPACKE #LAPACKE_WITH_TMG = Yes -# -# The archiver and the flag(s) to use when building archive (library) -# If you system has no ranlib, set RANLIB = echo. -# -ARCH = ar -ARCHFLAGS= cr -RANLIB = echo -# + # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The -# relevant routines will be compiled and XBLAS will be linked only if -# USEXBLAS is defined. -# -# USEXBLAS = Yes -XBLASLIB = -# XBLASLIB = -lxblas +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. # +#USEXBLAS = Yes +#XBLASLIB = -lxblas + # The location of the libraries to which you will link. (The # machine-specific, optimized BLAS library should be used whenever # possible.) diff --git a/lapack-netlib/INSTALL/make.inc.pghpf b/lapack-netlib/INSTALL/make.inc.pghpf index 8639530a5..1d9bf549c 100644 --- a/lapack-netlib/INSTALL/make.inc.pghpf +++ b/lapack-netlib/INSTALL/make.inc.pghpf @@ -1,77 +1,82 @@ #################################################################### # LAPACK make include file. # -# LAPACK, Version 3.6.0 # -# November 2015 # +# LAPACK, Version 3.7.0 # +# December 2016 # #################################################################### -# + SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = pghpc +CFLAGS = + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. # -# Modify the FORTRAN and OPTS definitions to refer to the -# compiler and desired compiler options for your machine. NOOPT -# refers to the compiler options desired when NO OPTIMIZATION is -# selected. Define LOADER and LOADOPTS to refer to the loader and -# desired load options for your machine. +FORTRAN = pghpf +OPTS = -O4 -Mnohpfc -Mdclchk +DRVOPTS = $(OPTS) +NOOPT = -Mnohpfc -Mdclchk + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. # -FORTRAN = pghpf -OPTS = -O4 -Mnohpfc -Mdclchk -DRVOPTS = $(OPTS) -NOOPT = -Mnohpfc -Mdclchk LOADER = pghpf LOADOPTS = + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. # -# Timer for the SECOND and DSECND routines -# -# Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME -TIMER = EXT_ETIME -# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ -# TIMER = EXT_ETIME_ -# For gfortran compiler: SECOND and DSECND will use a call to the INTERNAL FUNCTION ETIME -# TIMER = INT_ETIME -# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) -# SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME -# TIMER = INT_CPU_TIME -# If neither of this works...you can use the NONE value... In that case, SECOND and DSECND will always return 0 -# TIMER = NONE -# -# Configuration LAPACKE: Native C interface to LAPACK -# To generate LAPACKE library: type 'make lapackelib' -# Configuration file: turned off (default) -# Complex types: C99 (default) -# Name pattern: mixed case (default) -# (64-bit) Data model: LP64 (default) +ARCH = ar +ARCHFLAGS = cr +RANLIB = echo + +# Timer for the SECOND and DSECND routines # -# CC is the C compiler, normally invoked with options CFLAGS. +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. # -CC = pghpc -CFLAGS = +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. # -# LAPACKE has also the interface to some routines from tmglib, -# if LAPACKE_WITH_TMG is selected, we need to add those routines to LAPACKE #LAPACKE_WITH_TMG = Yes -# -# The archiver and the flag(s) to use when building archive (library) -# If you system has no ranlib, set RANLIB = echo. -# -ARCH = ar -ARCHFLAGS= cr -RANLIB = echo -# + # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The -# relevant routines will be compiled and XBLAS will be linked only if -# USEXBLAS is defined. -# -# USEXBLAS = Yes -XBLASLIB = -# XBLASLIB = -lxblas +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. # +#USEXBLAS = Yes +#XBLASLIB = -lxblas + # The location of the libraries to which you will link. (The # machine-specific, optimized BLAS library should be used whenever # possible.) # -#BLASLIB = -lessl +#BLASLIB = -lessl BLASLIB = ../../librefblas.a -CBLASLIB = ../../libcblas.a +CBLASLIB = ../../libcblas.a LAPACKLIB = liblapack.a TMGLIB = libtmglib.a LAPACKELIB = liblapacke.a diff --git a/lapack-netlib/INSTALL/secondtst.f b/lapack-netlib/INSTALL/secondtst.f index 03f19ab6e..9eaa1818a 100644 --- a/lapack-netlib/INSTALL/secondtst.f +++ b/lapack-netlib/INSTALL/secondtst.f @@ -14,18 +14,18 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup auxOTHERcomputational * * ===================================================================== PROGRAM SECONDTST * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * ===================================================================== * @@ -44,6 +44,9 @@ REAL SECOND EXTERNAL SECOND * .. +* .. External Subroutines .. + EXTERNAL MYSUB +* .. * .. Intrinsic Functions .. INTRINSIC REAL * .. diff --git a/lapack-netlib/LAPACKE/CMakeLists.txt b/lapack-netlib/LAPACKE/CMakeLists.txt index 2a60a1ea1..42faef5dd 100644 --- a/lapack-netlib/LAPACKE/CMakeLists.txt +++ b/lapack-netlib/LAPACKE/CMakeLists.txt @@ -36,26 +36,42 @@ macro(append_subdir_files variable dirname) endmacro() append_subdir_files(LAPACKE_INCLUDE "include") -append_subdir_files(SRC_OBJ "src") -append_subdir_files(SRCX_OBJ "src") -append_subdir_files(MATGEN_OBJ "src") -append_subdir_files(UTILS_OBJ "utils") +append_subdir_files(SOURCES "src") +append_subdir_files(DEPRECATED "src") +append_subdir_files(EXTENDED "src") +append_subdir_files(MATGEN "src") +append_subdir_files(UTILS "utils") +if(BUILD_DEPRECATED) + list(APPEND SOURCES ${DEPRECATED}) +endif() if(USE_XBLAS) - add_library(lapacke ${SRC_OBJ} ${SRCX_OBJ} ${UTILS_OBJ}) - target_link_libraries(lapacke ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${XBLAS_LIBRARY}) -else() - if(LAPACKE_WITH_TMG) - add_library(lapacke ${SRC_OBJ} ${MATGEN_OBJ} ${UTILS_OBJ}) - target_link_libraries(lapacke tmglib ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) - else() - add_library(lapacke ${SRC_OBJ} ${UTILS_OBJ}) - target_link_libraries(lapacke ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) - endif() + list(APPEND SOURCES ${EXTENDED}) +endif() +if(LAPACKE_WITH_TMG) + list(APPEND SOURCES ${MATGEN}) +endif() +list(APPEND SOURCES ${UTILS}) + +add_library(lapacke ${SOURCES}) +set_target_properties( + lapacke PROPERTIES + LINKER_LANGUAGE C + VERSION ${LAPACK_VERSION} + SOVERSION ${LAPACK_MAJOR_VERSION} + ) +target_include_directories(lapacke PUBLIC + $ + $ +) + +if(LAPACKE_WITH_TMG) + target_link_libraries(lapacke PRIVATE tmglib) endif() +target_link_libraries(lapacke PRIVATE ${LAPACK_LIBRARIES}) lapack_install_library(lapacke) -install(FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h DESTINATION include) +install(FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) if(BUILD_TESTING) add_subdirectory(example) @@ -78,8 +94,8 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-install.cmake.in install(FILES ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake ${LAPACK_BINARY_DIR}/lapacke-config-version.cmake - DESTINATION ${LIBRARY_DIR}/cmake/lapacke-${LAPACK_VERSION} + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION} ) install(EXPORT lapacke-targets - DESTINATION ${LIBRARY_DIR}/cmake/lapacke-${LAPACK_VERSION}) + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION}) diff --git a/lapack-netlib/LAPACKE/Makefile b/lapack-netlib/LAPACKE/Makefile index f1b07ad5a..016f8a2f2 100644 --- a/lapack-netlib/LAPACKE/Makefile +++ b/lapack-netlib/LAPACKE/Makefile @@ -35,7 +35,7 @@ # To generate lapacke library type 'make lapacke' # To make both just type 'make' # -# To remove lapacke object files type 'make cleanlib' +# To remove lapacke object files type 'make cleanobj' # To clean all above type 'make clean' # To clean everything including lapacke library type # 'make cleanall' @@ -44,20 +44,26 @@ include ../make.inc all: lapacke -lapacke: - cd include && cp lapacke_mangling_with_flags.h.in lapacke_mangling.h - cd src && $(MAKE) - cd utils && $(MAKE) +lapacke: include/lapacke_mangling.h + $(MAKE) -C src + $(MAKE) -C utils -lapacke_example: - cd example && $(MAKE) +include/lapacke_mangling.h: include/lapacke_mangling_with_flags.h.in + cp $< $@ -clean: cleanlib +lapacke_example: lapacke + $(MAKE) -C example +#clean: cleanlib +clean: cleanobj + $(MAKE) -C src clean + $(MAKE) -C utils clean + $(MAKE) -C example clean +cleanobj: + $(MAKE) -C src cleanobj + $(MAKE) -C utils cleanobj + $(MAKE) -C example cleanobj cleanlib: - cd src && $(MAKE) clean - cd utils && $(MAKE) clean - -cleanall: clean - rm -f $(LAPACKE) - cd example && $(MAKE) clean + rm -f ../$(LAPACKELIB) +cleanexe: + $(MAKE) -C example cleanexe diff --git a/lapack-netlib/LAPACKE/cmake/lapacke-config-install.cmake.in b/lapack-netlib/LAPACKE/cmake/lapacke-config-install.cmake.in index 9b2452b80..caa459a24 100644 --- a/lapack-netlib/LAPACKE/cmake/lapacke-config-install.cmake.in +++ b/lapack-netlib/LAPACKE/cmake/lapacke-config-install.cmake.in @@ -5,7 +5,7 @@ get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH) get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH) # Load the LAPACK package with which we were built. -set(LAPACK_DIR "${_LAPACKE_PREFIX}/@{LIBRARY_DIR@/cmake/lapack-@LAPACK_VERSION@") +set(LAPACK_DIR "${_LAPACKE_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/lapack-@LAPACK_VERSION@") find_package(LAPACK NO_MODULE) # Load lapacke targets from the install tree. diff --git a/lapack-netlib/LAPACKE/example/CMakeLists.txt b/lapack-netlib/LAPACKE/example/CMakeLists.txt index a1c590965..fa75c731c 100644 --- a/lapack-netlib/LAPACKE/example/CMakeLists.txt +++ b/lapack-netlib/LAPACKE/example/CMakeLists.txt @@ -3,10 +3,10 @@ add_executable(xexample_DGESV_colmajor example_DGESV_colmajor.c lapacke_example_ add_executable(xexample_DGELS_rowmajor example_DGELS_rowmajor.c lapacke_example_aux.c lapacke_example_aux.h) add_executable(xexample_DGELS_colmajor example_DGELS_colmajor.c lapacke_example_aux.c lapacke_example_aux.h) -target_link_libraries(xexample_DGESV_rowmajor lapacke ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) -target_link_libraries(xexample_DGESV_colmajor lapacke ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) -target_link_libraries(xexample_DGELS_rowmajor lapacke ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) -target_link_libraries(xexample_DGELS_colmajor lapacke ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) +target_link_libraries(xexample_DGESV_rowmajor lapacke) +target_link_libraries(xexample_DGESV_colmajor lapacke) +target_link_libraries(xexample_DGELS_rowmajor lapacke) +target_link_libraries(xexample_DGELS_colmajor lapacke) add_test(example_DGESV_rowmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_rowmajor) add_test(example_DGESV_colmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_colmajor) diff --git a/lapack-netlib/LAPACKE/example/Makefile b/lapack-netlib/LAPACKE/example/Makefile index 80968e8c9..f959a2be0 100644 --- a/lapack-netlib/LAPACKE/example/Makefile +++ b/lapack-netlib/LAPACKE/example/Makefile @@ -9,23 +9,26 @@ LIBRARIES = ../../$(LAPACKELIB) ../../$(LAPACKLIB) $(BLASLIB) # Double Precision Examples xexample_DGESV_rowmajor: example_DGESV_rowmajor.o lapacke_example_aux.o $(LIBRARIES) - $(LOADER) $(LOADOPTS) -o $@ example_DGESV_rowmajor.o lapacke_example_aux.o $(LIBRARIES) + $(LOADER) $(LOADOPTS) -o $@ $^ ./$@ xexample_DGESV_colmajor: example_DGESV_colmajor.o lapacke_example_aux.o $(LIBRARIES) - $(LOADER) $(LOADOPTS) -o $@ example_DGESV_colmajor.o lapacke_example_aux.o $(LIBRARIES) + $(LOADER) $(LOADOPTS) -o $@ $^ ./$@ xexample_DGELS_rowmajor: example_DGELS_rowmajor.o lapacke_example_aux.o $(LIBRARIES) - $(LOADER) $(LOADOPTS) -o $@ example_DGELS_rowmajor.o lapacke_example_aux.o $(LIBRARIES) + $(LOADER) $(LOADOPTS) -o $@ $^ ./$@ xexample_DGELS_colmajor: example_DGELS_colmajor.o lapacke_example_aux.o $(LIBRARIES) - $(LOADER) $(LOADOPTS) -o $@ example_DGELS_colmajor.o lapacke_example_aux.o $(LIBRARIES) + $(LOADER) $(LOADOPTS) -o $@ $^ ./$@ +clean: cleanobj cleanexe +cleanobj: + rm -f *.o +cleanexe: + rm -f x* + .c.o: $(CC) $(CFLAGS) -I. -I../include -c -o $@ $< - -clean: - rm -f *.o x* diff --git a/lapack-netlib/LAPACKE/include/lapacke.h b/lapack-netlib/LAPACKE/include/lapacke.h index cacdef962..6ded78c8b 100644 --- a/lapack-netlib/LAPACKE/include/lapacke.h +++ b/lapack-netlib/LAPACKE/include/lapacke.h @@ -2398,6 +2398,28 @@ float LAPACKE_clanhe( int matrix_layout, char norm, char uplo, lapack_int n, double LAPACKE_zlanhe( int matrix_layout, char norm, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda ); +lapack_int LAPACKE_clacrm( int matrix_layout, lapack_int m, lapack_int n, + const lapack_complex_float* a, + lapack_int lda, const float* b, + lapack_int ldb, lapack_complex_float* c, + lapack_int ldc ); +lapack_int LAPACKE_zlacrm( int matrix_layout, lapack_int m, lapack_int n, + const lapack_complex_double* a, + lapack_int lda, const double* b, + lapack_int ldb, lapack_complex_double* c, + lapack_int ldc ); + +lapack_int LAPACKE_clarcm( int matrix_layout, lapack_int m, lapack_int n, + const float* a, lapack_int lda, + const lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* c, + lapack_int ldc ); +lapack_int LAPACKE_zlarcm( int matrix_layout, lapack_int m, lapack_int n, + const double* a, lapack_int lda, + const lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* c, + lapack_int ldc ); + float LAPACKE_slansy( int matrix_layout, char norm, char uplo, lapack_int n, const float* a, lapack_int lda ); double LAPACKE_dlansy( int matrix_layout, char norm, char uplo, lapack_int n, @@ -2533,6 +2555,11 @@ lapack_int LAPACKE_zlaset( int matrix_layout, char uplo, lapack_int m, lapack_int LAPACKE_slasrt( char id, lapack_int n, float* d ); lapack_int LAPACKE_dlasrt( char id, lapack_int n, double* d ); +lapack_int LAPACKE_slassq( lapack_int n, float* x, lapack_int incx, float* scale, float* sumsq ); +lapack_int LAPACKE_dlassq( lapack_int n, double* x, lapack_int incx, double* scale, double* sumsq ); +lapack_int LAPACKE_classq( lapack_int n, lapack_complex_float* x, lapack_int incx, float* scale, float* sumsq ); +lapack_int LAPACKE_zlassq( lapack_int n, lapack_complex_double* x, lapack_int incx, double* scale, double* sumsq ); + lapack_int LAPACKE_slaswp( int matrix_layout, lapack_int n, float* a, lapack_int lda, lapack_int k1, lapack_int k2, const lapack_int* ipiv, lapack_int incx ); @@ -5759,35 +5786,35 @@ lapack_int LAPACKE_zgesvd_work( int matrix_layout, char jobu, char jobvt, lapack_int lwork, double* rwork ); lapack_int LAPACKE_sgesvdx_work( int matrix_layout, char jobu, char jobvt, char range, - lapack_int m, lapack_int n, float* a, - lapack_int lda, float vl, float vu, - lapack_int il, lapack_int iu, lapack_int* ns, - float* s, float* u, lapack_int ldu, - float* vt, lapack_int ldvt, - float* work, lapack_int lwork, lapack_int* iwork ); + lapack_int m, lapack_int n, float* a, + lapack_int lda, float vl, float vu, + lapack_int il, lapack_int iu, lapack_int* ns, + float* s, float* u, lapack_int ldu, + float* vt, lapack_int ldvt, + float* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_dgesvdx_work( int matrix_layout, char jobu, char jobvt, char range, - lapack_int m, lapack_int n, double* a, - lapack_int lda, double vl, double vu, - lapack_int il, lapack_int iu, lapack_int* ns, - double* s, double* u, lapack_int ldu, - double* vt, lapack_int ldvt, - double* work, lapack_int lwork, lapack_int* iwork ); + lapack_int m, lapack_int n, double* a, + lapack_int lda, double vl, double vu, + lapack_int il, lapack_int iu, lapack_int* ns, + double* s, double* u, lapack_int ldu, + double* vt, lapack_int ldvt, + double* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_cgesvdx_work( int matrix_layout, char jobu, char jobvt, char range, - lapack_int m, lapack_int n, lapack_complex_float* a, - lapack_int lda, float vl, float vu, - lapack_int il, lapack_int iu, lapack_int* ns, - float* s, lapack_complex_float* u, lapack_int ldu, - lapack_complex_float* vt, lapack_int ldvt, - lapack_complex_float* work, lapack_int lwork, - float* rwork, lapack_int* iwork ); + lapack_int m, lapack_int n, lapack_complex_float* a, + lapack_int lda, float vl, float vu, + lapack_int il, lapack_int iu, lapack_int* ns, + float* s, lapack_complex_float* u, lapack_int ldu, + lapack_complex_float* vt, lapack_int ldvt, + lapack_complex_float* work, lapack_int lwork, + float* rwork, lapack_int* iwork ); lapack_int LAPACKE_zgesvdx_work( int matrix_layout, char jobu, char jobvt, char range, - lapack_int m, lapack_int n, lapack_complex_double* a, - lapack_int lda, double vl, double vu, - lapack_int il, lapack_int iu, lapack_int* ns, - double* s, lapack_complex_double* u, lapack_int ldu, - lapack_complex_double* vt, lapack_int ldvt, - lapack_complex_double* work, lapack_int lwork, - double* rwork, lapack_int* iwork ); + lapack_int m, lapack_int n, lapack_complex_double* a, + lapack_int lda, double vl, double vu, + lapack_int il, lapack_int iu, lapack_int* ns, + double* s, lapack_complex_double* u, lapack_int ldu, + lapack_complex_double* vt, lapack_int ldvt, + lapack_complex_double* work, lapack_int lwork, + double* rwork, lapack_int* iwork ); lapack_int LAPACKE_sgesvj_work( int matrix_layout, char joba, char jobu, char jobv, lapack_int m, lapack_int n, float* a, @@ -5804,7 +5831,7 @@ lapack_int LAPACKE_cgesvj_work( int matrix_layout, char joba, char jobu, lapack_int lda, float* sva, lapack_int mv, lapack_complex_float* v, lapack_int ldv, lapack_complex_float* cwork, lapack_int lwork, - float* rwork,lapack_int lrwork ); + float* rwork,lapack_int lrwork ); lapack_int LAPACKE_zgesvj_work( int matrix_layout, char joba, char jobu, char jobv, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, double* sva, @@ -7581,6 +7608,28 @@ double LAPACKE_zlanhe_work( int matrix_layout, char norm, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* work ); +lapack_int LAPACKE_clacrm_work( int matrix_layout, lapack_int m, lapack_int n, + const lapack_complex_float* a, + lapack_int lda, const float* b, + lapack_int ldb, lapack_complex_float* c, + lapack_int ldc, float* work ); +lapack_int LAPACKE_zlacrm_work( int matrix_layout, lapack_int m, lapack_int n, + const lapack_complex_double* a, + lapack_int lda, const double* b, + lapack_int ldb, lapack_complex_double* c, + lapack_int ldc, double* work ); + +lapack_int LAPACKE_clarcm_work( int matrix_layout, lapack_int m, lapack_int n, + const float* a, lapack_int lda, + const lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* c, + lapack_int ldc, float* work ); +lapack_int LAPACKE_zlarcm_work( int matrix_layout, lapack_int m, lapack_int n, + const double* a, lapack_int lda, + const lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* c, + lapack_int ldc, double* work ); + float LAPACKE_slansy_work( int matrix_layout, char norm, char uplo, lapack_int n, const float* a, lapack_int lda, float* work ); @@ -7729,6 +7778,11 @@ lapack_int LAPACKE_zlaset_work( int matrix_layout, char uplo, lapack_int m, lapack_int LAPACKE_slasrt_work( char id, lapack_int n, float* d ); lapack_int LAPACKE_dlasrt_work( char id, lapack_int n, double* d ); +lapack_int LAPACKE_slassq_work( lapack_int n, float* x, lapack_int incx, float* scale, float* sumsq ); +lapack_int LAPACKE_dlassq_work( lapack_int n, double* x, lapack_int incx, double* scale, double* sumsq ); +lapack_int LAPACKE_classq_work( lapack_int n, lapack_complex_float* x, lapack_int incx, float* scale, float* sumsq ); +lapack_int LAPACKE_zlassq_work( lapack_int n, lapack_complex_double* x, lapack_int incx, double* scale, double* sumsq ); + lapack_int LAPACKE_slaswp_work( int matrix_layout, lapack_int n, float* a, lapack_int lda, lapack_int k1, lapack_int k2, const lapack_int* ipiv, lapack_int incx ); @@ -11619,7 +11673,7 @@ lapack_int LAPACKE_zhesv_rk_work( int matrix_layout, char uplo, lapack_int n, lapack_int lda, lapack_complex_double* e, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* work, lapack_int lwork ); - + lapack_int LAPACKE_ssytrf_rk( int matrix_layout, char uplo, lapack_int n, float* a, lapack_int lda, float* e, lapack_int* ipiv ); lapack_int LAPACKE_dsytrf_rk( int matrix_layout, char uplo, lapack_int n, double* a, @@ -11773,12 +11827,12 @@ lapack_int LAPACKE_csycon_3( int matrix_layout, char uplo, lapack_int n, const lapack_int* ipiv, float anorm, float* rcond ); lapack_int LAPACKE_zsycon_3( int matrix_layout, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, - const lapack_complex_double* e, + const lapack_complex_double* e, const lapack_int* ipiv, double anorm, double* rcond ); lapack_int LAPACKE_checon_3( int matrix_layout, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, - const lapack_complex_float* e, + const lapack_complex_float* e, const lapack_int* ipiv, float anorm, float* rcond ); lapack_int LAPACKE_zhecon_3( int matrix_layout, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, @@ -11803,7 +11857,7 @@ lapack_int LAPACKE_zsycon_3_work( int matrix_layout, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* e, const lapack_int* ipiv, double anorm, - double* rcond, lapack_complex_double* work ); + double* rcond, lapack_complex_double* work ); lapack_int LAPACKE_checon_3_work( int matrix_layout, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* e, @@ -12367,6 +12421,187 @@ lapack_int LAPACKE_zhegv_2stage_work( int matrix_layout, lapack_int itype, char double* w, lapack_complex_double* work, lapack_int lwork, double* rwork ); +//LAPACK 3.8.0 +lapack_int LAPACKE_ssysv_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, + float* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, float* b, lapack_int ldb ); +lapack_int LAPACKE_ssysv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, + float* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, float* b, lapack_int ldb, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dsysv_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* tb, lapack_int ltb, + lapack_int* ipiv, lapack_int* ipiv2, + double* b, lapack_int ldb ); +lapack_int LAPACKE_dsysv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* tb, lapack_int ltb, + lapack_int* ipiv, lapack_int* ipiv2, + double* b, lapack_int ldb, + double* work, lapack_int lwork ); +lapack_int LAPACKE_csysv_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_csysv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zsysv_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* b, lapack_int ldb ); +lapack_int LAPACKE_zsysv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* work, lapack_int lwork ); +lapack_int LAPACKE_chesv_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_chesv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zhesv_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* b, lapack_int ldb ); +lapack_int LAPACKE_zhesv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_ssytrf_aa_2stage( int matrix_layout, char uplo, lapack_int n, + float* a, lapack_int lda, + float* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2 ); +lapack_int LAPACKE_ssytrf_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + float* a, lapack_int lda, + float* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dsytrf_aa_2stage( int matrix_layout, char uplo, lapack_int n, + double* a, lapack_int lda, + double* tb, lapack_int ltb, + lapack_int* ipiv, lapack_int* ipiv2 ); +lapack_int LAPACKE_dsytrf_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + double* a, lapack_int lda, + double* tb, lapack_int ltb, + lapack_int* ipiv, lapack_int* ipiv2, + double* work, lapack_int lwork ); +lapack_int LAPACKE_csytrf_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2 ); +lapack_int LAPACKE_csytrf_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zsytrf_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2 ); +lapack_int LAPACKE_zsytrf_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* work, lapack_int lwork ); +lapack_int LAPACKE_chetrf_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2 ); +lapack_int LAPACKE_chetrf_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zhetrf_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2 ); +lapack_int LAPACKE_zhetrf_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* work, lapack_int lwork ); + + +lapack_int LAPACKE_ssytrs_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, + float* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, float* b, lapack_int ldb ); +lapack_int LAPACKE_ssytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, + float* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, float* b, lapack_int ldb ); +lapack_int LAPACKE_dsytrs_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* tb, lapack_int ltb, + lapack_int* ipiv, lapack_int* ipiv2, + double* b, lapack_int ldb ); +lapack_int LAPACKE_dsytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* tb, lapack_int ltb, + lapack_int* ipiv, lapack_int* ipiv2, + double* b, lapack_int ldb ); +lapack_int LAPACKE_csytrs_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_csytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zsytrs_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* b, lapack_int ldb ); +lapack_int LAPACKE_zsytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* b, lapack_int ldb ); +lapack_int LAPACKE_chetrs_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_chetrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zhetrs_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* b, lapack_int ldb ); +lapack_int LAPACKE_zhetrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* b, lapack_int ldb ); + #define LAPACK_sgetrf LAPACK_GLOBAL(sgetrf,SGETRF) #define LAPACK_dgetrf LAPACK_GLOBAL(dgetrf,DGETRF) #define LAPACK_cgetrf LAPACK_GLOBAL(cgetrf,CGETRF) @@ -13337,6 +13572,10 @@ lapack_int LAPACKE_zhegv_2stage_work( int matrix_layout, lapack_int itype, char #define LAPACK_zlange LAPACK_GLOBAL(zlange,ZLANGE) #define LAPACK_clanhe LAPACK_GLOBAL(clanhe,CLANHE) #define LAPACK_zlanhe LAPACK_GLOBAL(zlanhe,ZLANHE) +#define LAPACK_clarcm LAPACK_GLOBAL(clarcm,CLARCM) +#define LAPACK_zlarcm LAPACK_GLOBAL(zlarcm,ZLARCM) +#define LAPACK_clacrm LAPACK_GLOBAL(clacrm,CLACRM) +#define LAPACK_zlacrm LAPACK_GLOBAL(zlacrm,ZLACRM) #define LAPACK_slansy LAPACK_GLOBAL(slansy,SLANSY) #define LAPACK_dlansy LAPACK_GLOBAL(dlansy,DLANSY) #define LAPACK_clansy LAPACK_GLOBAL(clansy,CLANSY) @@ -13359,6 +13598,10 @@ lapack_int LAPACKE_zhegv_2stage_work( int matrix_layout, lapack_int itype, char #define LAPACK_dlarfg LAPACK_GLOBAL(dlarfg,DLARFG) #define LAPACK_clarfg LAPACK_GLOBAL(clarfg,CLARFG) #define LAPACK_zlarfg LAPACK_GLOBAL(zlarfg,ZLARFG) +#define LAPACK_slassq LAPACK_GLOBAL(slassq,SLASSQ) +#define LAPACK_dlassq LAPACK_GLOBAL(dlassq,DLASSQ) +#define LAPACK_classq LAPACK_GLOBAL(classq,CLASSQ) +#define LAPACK_zlassq LAPACK_GLOBAL(zlassq,ZLASSQ) #define LAPACK_slarft LAPACK_GLOBAL(slarft,SLARFT) #define LAPACK_dlarft LAPACK_GLOBAL(dlarft,DLARFT) #define LAPACK_clarft LAPACK_GLOBAL(clarft,CLARFT) @@ -13577,6 +13820,26 @@ lapack_int LAPACKE_zhegv_2stage_work( int matrix_layout, lapack_int itype, char #define LAPACK_cgetsls LAPACK_GLOBAL(cgetsls,CGETSLS) #define LAPACK_zgetsls LAPACK_GLOBAL(zgetsls,ZGETSLS) +// LAPACK 3.8.0 +#define LAPACK_ssysv_aa_2stage LAPACK_GLOBAL(ssysv_aa_2stage,SSYSV_AA_2STAGE) +#define LAPACK_dsysv_aa_2stage LAPACK_GLOBAL(dsysv_aa_2stage,DSYSV_AA_2STAGE) +#define LAPACK_chesv_aa_2stage LAPACK_GLOBAL(chesv_aa_2stage,CHESV_AA_2STAGE) +#define LAPACK_zsysv_aa_2stage LAPACK_GLOBAL(zsysv_aa_2stage,ZSYSV_AA_2STAGE) +#define LAPACK_csysv_aa_2stage LAPACK_GLOBAL(csysv_aa_2stage,CSYSV_AA_2STAGE) +#define LAPACK_zhesv_aa_2stage LAPACK_GLOBAL(zhesv_aa_2stage,ZHESV_AA_2STAGE) +#define LAPACK_ssytrs_aa_2stage LAPACK_GLOBAL(ssytrs_aa_2stage,SSYTRS_AA_2STAGE) +#define LAPACK_dsytrs_aa_2stage LAPACK_GLOBAL(dsytrs_aa_2stage,DSYTRS_AA_2STAGE) +#define LAPACK_csytrs_aa_2stage LAPACK_GLOBAL(csytrs_aa_2stage,CSYTRS_AA_2STAGE) +#define LAPACK_zsytrs_aa_2stage LAPACK_GLOBAL(zsytrs_aa_2stage,ZSYTRS_AA_2STAGE) +#define LAPACK_chetrs_aa_2stage LAPACK_GLOBAL(chetrs_aa_2stage,CHETRS_AA_2STAGE) +#define LAPACK_zhetrs_aa_2stage LAPACK_GLOBAL(zhetrs_aa_2stage,ZHETRS_AA_2STAGE) +#define LAPACK_ssytrf_aa_2stage LAPACK_GLOBAL(ssytrf_aa_2stage,SSYTRF_AA_2STAGE) +#define LAPACK_dsytrf_aa_2stage LAPACK_GLOBAL(dsytrf_aa_2stage,DSYTRF_AA_2STAGE) +#define LAPACK_csytrf_aa_2stage LAPACK_GLOBAL(csytrf_aa_2stage,CSYTRF_AA_2STAGE) +#define LAPACK_zsytrf_aa_2stage LAPACK_GLOBAL(zsytrf_aa_2stage,ZSYTRF_AA_2STAGE) +#define LAPACK_chetrf_aa_2stage LAPACK_GLOBAL(chetrf_aa_2stage,CHETRF_AA_2STAGE) +#define LAPACK_zhetrf_aa_2stage LAPACK_GLOBAL(zhetrf_aa_2stage,ZHETRF_AA_2STAGE) + void LAPACK_sgetrf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, lapack_int* ipiv, lapack_int *info ); @@ -17882,6 +18145,22 @@ float LAPACK_clanhe( char* norm, char* uplo, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, float* work ); double LAPACK_zlanhe( char* norm, char* uplo, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, double* work ); +void LAPACK_clarcm( lapack_int* m, lapack_int* n, const float* a, + lapack_int* lda, const lapack_complex_float* b, + lapack_int* ldb, lapack_complex_float* c, + lapack_int* ldc, float* work ); +void LAPACK_zlarcm( lapack_int* m, lapack_int* n, const double* a, + lapack_int* lda, const lapack_complex_double* b, + lapack_int* ldb, lapack_complex_double* c, + lapack_int* ldc, double* work ); +void LAPACK_clacrm( lapack_int* m, lapack_int* n, const lapack_complex_float* a, + lapack_int* lda, const float* b, + lapack_int* ldb, lapack_complex_float* c, + lapack_int* ldc, float* work ); +void LAPACK_zlacrm( lapack_int* m, lapack_int* n, const lapack_complex_double* a, + lapack_int* lda, const double* b, + lapack_int* ldb, lapack_complex_double* c, + lapack_int* ldc, double* work ); float LAPACK_slansy( char* norm, char* uplo, lapack_int* n, const float* a, lapack_int* lda, float* work ); double LAPACK_dlansy( char* norm, char* uplo, lapack_int* n, const double* a, @@ -17943,6 +18222,10 @@ void LAPACK_clarfg( lapack_int* n, lapack_complex_float* alpha, void LAPACK_zlarfg( lapack_int* n, lapack_complex_double* alpha, lapack_complex_double* x, lapack_int* incx, lapack_complex_double* tau ); +void LAPACK_slassq( lapack_int *n, float* x, lapack_int *incx, float* scale, float* sumsq ); +void LAPACK_dlassq( lapack_int *n, double* x, lapack_int *incx, double* scale, double* sumsq ); +void LAPACK_classq( lapack_int *n, lapack_complex_float* x, lapack_int *incx, float* scale, float* sumsq ); +void LAPACK_zlassq( lapack_int *n, lapack_complex_double* x, lapack_int *incx, double* scale, double* sumsq ); void LAPACK_slarft( char* direct, char* storev, lapack_int* n, lapack_int* k, const float* v, lapack_int* ldv, const float* tau, float* t, lapack_int* ldt ); @@ -18758,7 +19041,7 @@ void LAPACK_zhetrs_3( char* uplo, lapack_int* n, const lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); -void LAPACK_ssytri_3( char* uplo, lapack_int* n, float* a, lapack_int* lda, const float* e, +void LAPACK_ssytri_3( char* uplo, lapack_int* n, float* a, lapack_int* lda, const float* e, const lapack_int* ipiv, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dsytri_3( char* uplo, lapack_int* n, double* a, lapack_int* lda, const double* e, const lapack_int* ipiv, double* work, lapack_int* lwork, lapack_int *info ); @@ -18775,7 +19058,7 @@ void LAPACK_zhetri_3( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* e, const lapack_int* ipiv, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); -void LAPACK_ssycon_3( char* uplo, lapack_int* n, const float* a, lapack_int* lda, const float* e, +void LAPACK_ssycon_3( char* uplo, lapack_int* n, const float* a, lapack_int* lda, const float* e, const lapack_int* ipiv, float* anorm, float* rcond, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dsycon_3( char* uplo, lapack_int* n, const double* a, lapack_int* lda, const double* e, @@ -19054,6 +19337,117 @@ void LAPACK_zhegv_2stage( lapack_int* itype, char* jobz, char* uplo, lapack_int* lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int *info ); +//LAPACK 3.8.0 + +void LAPACK_ssysv_aa_2stage( char* uplo, lapack_int* n, lapack_int* nrhs, + float* a, lapack_int* lda, float* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, float* b, lapack_int* ldb, + float* work, lapack_int* lwork, lapack_int *info ); +void LAPACK_dsysv_aa_2stage( char* uplo, lapack_int* n, lapack_int* nrhs, double* a, + lapack_int* lda, double* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, double* b, + lapack_int* ldb, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_csysv_aa_2stage( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zsysv_aa_2stage( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_chesv_aa_2stage( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zhesv_aa_2stage( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); + +void LAPACK_ssytrf_aa_2stage( char* uplo, lapack_int* n, + float* a, lapack_int* lda, float* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, + float* work, lapack_int* lwork, lapack_int *info ); +void LAPACK_dsytrf_aa_2stage( char* uplo, lapack_int* n, double* a, + lapack_int* lda, double* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, + double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_csytrf_aa_2stage( char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zsytrf_aa_2stage( char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_chetrf_aa_2stage( char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zhetrf_aa_2stage( char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); + +void LAPACK_ssytrs_aa_2stage( char* uplo, lapack_int* n, lapack_int* nrhs, + float* a, lapack_int* lda, float* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_dsytrs_aa_2stage( char* uplo, lapack_int* n, lapack_int* nrhs, double* a, + lapack_int* lda, double* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, double* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_csytrs_aa_2stage( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_zsytrs_aa_2stage( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_chetrs_aa_2stage( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_zhetrs_aa_2stage( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* tb, lapack_int* ltb, + lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); + +/* APIs for set/get nancheck flags */ +void LAPACKE_set_nancheck( int flag ); +int LAPACKE_get_nancheck( ); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/lapack-netlib/LAPACKE/lapacke.pc.in b/lapack-netlib/LAPACKE/lapacke.pc.in index 028f8da6d..68da73957 100644 --- a/lapack-netlib/LAPACKE/lapacke.pc.in +++ b/lapack-netlib/LAPACKE/lapacke.pc.in @@ -1,9 +1,10 @@ -prefix=@prefix@ -libdir=@libdir@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ Name: LAPACKE Description: C Standard Interface to LAPACK Linear Algebra PACKage Version: @LAPACK_VERSION@ URL: http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack Libs: -L${libdir} -llapacke -Requires: lapack blas +Cflags: -I${includedir} +Requires.private: lapack diff --git a/lapack-netlib/LAPACKE/src/CMakeLists.txt b/lapack-netlib/LAPACKE/src/CMakeLists.txt index 1144e977c..26e52acfa 100644 --- a/lapack-netlib/LAPACKE/src/CMakeLists.txt +++ b/lapack-netlib/LAPACKE/src/CMakeLists.txt @@ -1,6 +1,4 @@ -#aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} SRC_OBJ) - -set(SRC_OBJ +set(SOURCES lapacke_cbbcsd.c lapacke_cbbcsd_work.c lapacke_cbdsqr.c @@ -47,6 +45,8 @@ lapacke_cgehrd.c lapacke_cgehrd_work.c lapacke_cgejsv.c lapacke_cgejsv_work.c +lapacke_cgelq.c +lapacke_cgelq_work.c lapacke_cgelq2.c lapacke_cgelq2_work.c lapacke_cgelqf.c @@ -59,6 +59,8 @@ lapacke_cgelss.c lapacke_cgelss_work.c lapacke_cgelsy.c lapacke_cgelsy_work.c +lapacke_cgemlq.c +lapacke_cgemlq_work.c lapacke_cgemqr.c lapacke_cgemqr_work.c lapacke_cgemqrt.c @@ -67,6 +69,8 @@ lapacke_cgeqlf.c lapacke_cgeqlf_work.c lapacke_cgeqp3.c lapacke_cgeqp3_work.c +lapacke_cgeqr.c +lapacke_cgeqr_work.c lapacke_cgeqr2.c lapacke_cgeqr2_work.c lapacke_cgeqrf.c @@ -210,7 +214,9 @@ lapacke_cherfs_work.c lapacke_chesv.c lapacke_chesv_work.c lapacke_chesv_aa.c +lapacke_chesv_aa_2stage.c lapacke_chesv_aa_work.c +lapacke_chesv_aa_2stage_work.c lapacke_chesv_rk.c lapacke_chesv_rk_work.c lapacke_chesvx.c @@ -224,7 +230,9 @@ lapacke_chetrf_rook.c lapacke_chetrf_work.c lapacke_chetrf_rook_work.c lapacke_chetrf_aa.c +lapacke_chetrf_aa_2stage.c lapacke_chetrf_aa_work.c +lapacke_chetrf_aa_2stage_work.c lapacke_chetrf_rk.c lapacke_chetrf_rk_work.c lapacke_chetri.c @@ -242,7 +250,9 @@ lapacke_chetrs2_work.c lapacke_chetrs_work.c lapacke_chetrs_rook_work.c lapacke_chetrs_aa.c +lapacke_chetrs_aa_2stage.c lapacke_chetrs_aa_work.c +lapacke_chetrs_aa_2stage_work.c lapacke_chetrs_3.c lapacke_chetrs_3_work.c lapacke_chfrk.c @@ -291,6 +301,8 @@ lapacke_clacp2.c lapacke_clacp2_work.c lapacke_clacpy.c lapacke_clacpy_work.c +lapacke_clacrm.c +lapacke_clacrm_work.c lapacke_clag2z.c lapacke_clag2z_work.c lapacke_clange.c @@ -305,6 +317,8 @@ lapacke_clapmr.c lapacke_clapmr_work.c lapacke_clapmt.c lapacke_clapmt_work.c +lapacke_clarcm.c +lapacke_clarcm_work.c lapacke_clarfb.c lapacke_clarfb_work.c lapacke_clarfg.c @@ -319,6 +333,8 @@ lapacke_clascl.c lapacke_clascl_work.c lapacke_claset.c lapacke_claset_work.c +lapacke_classq.c +lapacke_classq_work.c lapacke_claswp.c lapacke_claswp_work.c lapacke_clauum.c @@ -436,7 +452,9 @@ lapacke_csysv_rook.c lapacke_csysv_rook_work.c lapacke_csysv_work.c lapacke_csysv_aa.c +lapacke_csysv_aa_2stage.c lapacke_csysv_aa_work.c +lapacke_csysv_aa_2stage_work.c lapacke_csysv_rk.c lapacke_csysv_rk_work.c lapacke_csysvx.c @@ -448,7 +466,9 @@ lapacke_csytrf_work.c lapacke_csytrf_rook.c lapacke_csytrf_rook_work.c lapacke_csytrf_aa.c +lapacke_csytrf_aa_2stage.c lapacke_csytrf_aa_work.c +lapacke_csytrf_aa_2stage_work.c lapacke_csytrf_rk.c lapacke_csytrf_rk_work.c lapacke_csytri.c @@ -466,7 +486,9 @@ lapacke_csytrs2_work.c lapacke_csytrs_work.c lapacke_csytrs_rook_work.c lapacke_csytrs_aa.c +lapacke_csytrs_aa_2stage.c lapacke_csytrs_aa_work.c +lapacke_csytrs_aa_2stage_work.c lapacke_csytrs_3.c lapacke_csytrs_3_work.c lapacke_ctbcon.c @@ -631,6 +653,8 @@ lapacke_dgehrd.c lapacke_dgehrd_work.c lapacke_dgejsv.c lapacke_dgejsv_work.c +lapacke_dgelq.c +lapacke_dgelq_work.c lapacke_dgelq2.c lapacke_dgelq2_work.c lapacke_dgelqf.c @@ -643,6 +667,8 @@ lapacke_dgelss.c lapacke_dgelss_work.c lapacke_dgelsy.c lapacke_dgelsy_work.c +lapacke_dgemlq.c +lapacke_dgemlq_work.c lapacke_dgemqr.c lapacke_dgemqr_work.c lapacke_dgemqrt.c @@ -651,6 +677,8 @@ lapacke_dgeqlf.c lapacke_dgeqlf_work.c lapacke_dgeqp3.c lapacke_dgeqp3_work.c +lapacke_dgeqr.c +lapacke_dgeqr_work.c lapacke_dgeqr2.c lapacke_dgeqr2_work.c lapacke_dgeqrf.c @@ -783,6 +811,8 @@ lapacke_dlaset.c lapacke_dlaset_work.c lapacke_dlasrt.c lapacke_dlasrt_work.c +lapacke_dlassq.c +lapacke_dlassq_work.c lapacke_dlaswp.c lapacke_dlaswp_work.c lapacke_dlauum.c @@ -1022,7 +1052,9 @@ lapacke_dsysv_rook.c lapacke_dsysv_rook_work.c lapacke_dsysv_work.c lapacke_dsysv_aa.c +lapacke_dsysv_aa_2stage.c lapacke_dsysv_aa_work.c +lapacke_dsysv_aa_2stage_work.c lapacke_dsysv_rk.c lapacke_dsysv_rk_work.c lapacke_dsysvx.c @@ -1036,7 +1068,9 @@ lapacke_dsytrf_work.c lapacke_dsytrf_rook.c lapacke_dsytrf_rook_work.c lapacke_dsytrf_aa.c +lapacke_dsytrf_aa_2stage.c lapacke_dsytrf_aa_work.c +lapacke_dsytrf_aa_2stage_work.c lapacke_dsytrf_rk.c lapacke_dsytrf_rk_work.c lapacke_dsytri.c @@ -1052,7 +1086,9 @@ lapacke_dsytrs_rook.c lapacke_dsytrs2.c lapacke_dsytrs2_work.c lapacke_dsytrs_aa.c +lapacke_dsytrs_aa_2stage.c lapacke_dsytrs_aa_work.c +lapacke_dsytrs_aa_2stage_work.c lapacke_dsytrs_3.c lapacke_dsytrs_3_work.c lapacke_dsytrs_work.c @@ -1127,6 +1163,7 @@ lapacke_dtrttp.c lapacke_dtrttp_work.c lapacke_dtzrzf.c lapacke_dtzrzf_work.c +lapacke_nancheck.c lapacke_sbbcsd.c lapacke_sbbcsd_work.c lapacke_sbdsdc.c @@ -1179,6 +1216,8 @@ lapacke_sgehrd.c lapacke_sgehrd_work.c lapacke_sgejsv.c lapacke_sgejsv_work.c +lapacke_sgelq.c +lapacke_sgelq_work.c lapacke_sgelq2.c lapacke_sgelq2_work.c lapacke_sgelqf.c @@ -1191,6 +1230,8 @@ lapacke_sgelss.c lapacke_sgelss_work.c lapacke_sgelsy.c lapacke_sgelsy_work.c +lapacke_sgemlq.c +lapacke_sgemlq_work.c lapacke_sgemqr.c lapacke_sgemqr_work.c lapacke_sgemqrt.c @@ -1199,6 +1240,8 @@ lapacke_sgeqlf.c lapacke_sgeqlf_work.c lapacke_sgeqp3.c lapacke_sgeqp3_work.c +lapacke_sgeqr.c +lapacke_sgeqr_work.c lapacke_sgeqr2.c lapacke_sgeqr2_work.c lapacke_sgeqrf.c @@ -1331,6 +1374,8 @@ lapacke_slaset.c lapacke_slaset_work.c lapacke_slasrt.c lapacke_slasrt_work.c +lapacke_slassq.c +lapacke_slassq_work.c lapacke_slaswp.c lapacke_slaswp_work.c lapacke_slauum.c @@ -1567,6 +1612,8 @@ lapacke_ssysv_rook_work.c lapacke_ssysv_work.c lapacke_ssysv_aa.c lapacke_ssysv_aa_work.c +lapacke_ssysv_aa_2stage.c +lapacke_ssysv_aa_2stage_work.c lapacke_ssysv_rk.c lapacke_ssysv_rk_work.c lapacke_ssysvx.c @@ -1580,7 +1627,9 @@ lapacke_ssytrf_work.c lapacke_ssytrf_rook.c lapacke_ssytrf_rook_work.c lapacke_ssytrf_aa.c +lapacke_ssytrf_aa_2stage.c lapacke_ssytrf_aa_work.c +lapacke_ssytrf_aa_2stage_work.c lapacke_ssytrf_rk.c lapacke_ssytrf_rk_work.c lapacke_ssytri.c @@ -1596,7 +1645,9 @@ lapacke_ssytrs_rook.c lapacke_ssytrs2.c lapacke_ssytrs2_work.c lapacke_ssytrs_aa.c +lapacke_ssytrs_aa_2stage.c lapacke_ssytrs_aa_work.c +lapacke_ssytrs_aa_2stage_work.c lapacke_ssytrs_3.c lapacke_ssytrs_3_work.c lapacke_ssytrs_work.c @@ -1721,6 +1772,8 @@ lapacke_zgehrd.c lapacke_zgehrd_work.c lapacke_zgejsv.c lapacke_zgejsv_work.c +lapacke_zgelq.c +lapacke_zgelq_work.c lapacke_zgelq2.c lapacke_zgelq2_work.c lapacke_zgelqf.c @@ -1733,6 +1786,8 @@ lapacke_zgelss.c lapacke_zgelss_work.c lapacke_zgelsy.c lapacke_zgelsy_work.c +lapacke_zgemlq.c +lapacke_zgemlq_work.c lapacke_zgemqr.c lapacke_zgemqr_work.c lapacke_zgemqrt.c @@ -1741,6 +1796,8 @@ lapacke_zgeqlf.c lapacke_zgeqlf_work.c lapacke_zgeqp3.c lapacke_zgeqp3_work.c +lapacke_zgeqr.c +lapacke_zgeqr_work.c lapacke_zgeqr2.c lapacke_zgeqr2_work.c lapacke_zgeqrf.c @@ -1831,6 +1888,12 @@ lapacke_zhbevd.c lapacke_zhbevd_work.c lapacke_zhbevx.c lapacke_zhbevx_work.c +lapacke_zhbev_2stage.c +lapacke_zhbev_2stage_work.c +lapacke_zhbevd_2stage.c +lapacke_zhbevd_2stage_work.c +lapacke_zhbevx_2stage.c +lapacke_zhbevx_2stage_work.c lapacke_zhbgst.c lapacke_zhbgst_work.c lapacke_zhbgv.c @@ -1878,7 +1941,9 @@ lapacke_zherfs_work.c lapacke_zhesv.c lapacke_zhesv_work.c lapacke_zhesv_aa.c +lapacke_zhesv_aa_2stage.c lapacke_zhesv_aa_work.c +lapacke_zhesv_aa_2stage_work.c lapacke_zhesv_rk.c lapacke_zhesv_rk_work.c lapacke_zhesvx.c @@ -1892,7 +1957,9 @@ lapacke_zhetrf_rook.c lapacke_zhetrf_work.c lapacke_zhetrf_rook_work.c lapacke_zhetrf_aa.c +lapacke_zhetrf_aa_2stage.c lapacke_zhetrf_aa_work.c +lapacke_zhetrf_aa_2stage_work.c lapacke_zhetrf_rk.c lapacke_zhetrf_rk_work.c lapacke_zhetri.c @@ -1909,7 +1976,9 @@ lapacke_zhetrs2.c lapacke_zhetrs2_work.c lapacke_zhetrs_work.c lapacke_zhetrs_aa.c +lapacke_zhetrs_aa_2stage.c lapacke_zhetrs_aa_work.c +lapacke_zhetrs_aa_2stage_work.c lapacke_zhetrs_3.c lapacke_zhetrs_3_work.c lapacke_zhetrs_rook_work.c @@ -1959,6 +2028,8 @@ lapacke_zlacp2.c lapacke_zlacp2_work.c lapacke_zlacpy.c lapacke_zlacpy_work.c +lapacke_zlacrm.c +lapacke_zlacrm_work.c lapacke_zlag2c.c lapacke_zlag2c_work.c lapacke_zlange.c @@ -1973,6 +2044,8 @@ lapacke_zlapmr.c lapacke_zlapmr_work.c lapacke_zlapmt.c lapacke_zlapmt_work.c +lapacke_zlarcm.c +lapacke_zlarcm_work.c lapacke_zlarfb.c lapacke_zlarfb_work.c lapacke_zlarfg.c @@ -1987,6 +2060,8 @@ lapacke_zlascl.c lapacke_zlascl_work.c lapacke_zlaset.c lapacke_zlaset_work.c +lapacke_zlassq.c +lapacke_zlassq_work.c lapacke_zlaswp.c lapacke_zlaswp_work.c lapacke_zlauum.c @@ -2104,7 +2179,9 @@ lapacke_zsysv_rook.c lapacke_zsysv_rook_work.c lapacke_zsysv_work.c lapacke_zsysv_aa.c +lapacke_zsysv_aa_2stage.c lapacke_zsysv_aa_work.c +lapacke_zsysv_aa_2stage_work.c lapacke_zsysv_rk.c lapacke_zsysv_rk_work.c lapacke_zsysvx.c @@ -2116,7 +2193,9 @@ lapacke_zsytrf_work.c lapacke_zsytrf_rook.c lapacke_zsytrf_rook_work.c lapacke_zsytrf_aa.c +lapacke_zsytrf_aa_2stage.c lapacke_zsytrf_aa_work.c +lapacke_zsytrf_aa_2stage_work.c lapacke_zsytrf_rk.c lapacke_zsytrf_rk_work.c lapacke_zsytri.c @@ -2134,7 +2213,9 @@ lapacke_zsytrs2_work.c lapacke_zsytrs_work.c lapacke_zsytrs_rook_work.c lapacke_zsytrs_aa.c +lapacke_zsytrs_aa_2stage.c lapacke_zsytrs_aa_work.c +lapacke_zsytrs_aa_2stage_work.c lapacke_zsytrs_3.c lapacke_zsytrs_3_work.c lapacke_ztbcon.c @@ -2254,36 +2335,34 @@ lapacke_csyr_work.c lapacke_ilaver.c ) -if(BUILD_DEPRECATED) - list(APPEND SRC_OBJ - lapacke_cggsvp.c - lapacke_cggsvp_work.c - lapacke_dggsvp.c - lapacke_dggsvp_work.c - lapacke_sggsvp.c - lapacke_sggsvp_work.c - lapacke_zggsvp.c - lapacke_zggsvp_work.c - lapacke_cggsvd.c - lapacke_cggsvd_work.c - lapacke_dggsvd.c - lapacke_dggsvd_work.c - lapacke_sggsvd.c - lapacke_sggsvd_work.c - lapacke_zggsvd.c - lapacke_zggsvd_work.c - lapacke_cgeqpf.c - lapacke_cgeqpf_work.c - lapacke_dgeqpf.c - lapacke_dgeqpf_work.c - lapacke_sgeqpf.c - lapacke_sgeqpf_work.c - lapacke_zgeqpf.c - lapacke_zgeqpf_work.c) - message(STATUS "Building LAPACKE deprecated routines") -endif() +set(DEPRECATED +lapacke_cggsvp.c +lapacke_cggsvp_work.c +lapacke_dggsvp.c +lapacke_dggsvp_work.c +lapacke_sggsvp.c +lapacke_sggsvp_work.c +lapacke_zggsvp.c +lapacke_zggsvp_work.c +lapacke_cggsvd.c +lapacke_cggsvd_work.c +lapacke_dggsvd.c +lapacke_dggsvd_work.c +lapacke_sggsvd.c +lapacke_sggsvd_work.c +lapacke_zggsvd.c +lapacke_zggsvd_work.c +lapacke_cgeqpf.c +lapacke_cgeqpf_work.c +lapacke_dgeqpf.c +lapacke_dgeqpf_work.c +lapacke_sgeqpf.c +lapacke_sgeqpf_work.c +lapacke_zgeqpf.c +lapacke_zgeqpf_work.c +) -set(SRCX_OBJ +set(EXTENDED lapacke_cgbrfsx.c lapacke_cporfsx.c lapacke_dgerfsx.c lapacke_sgbrfsx.c lapacke_ssyrfsx.c lapacke_zherfsx.c lapacke_cgbrfsx_work.c lapacke_cporfsx_work.c lapacke_dgerfsx_work.c lapacke_sgbrfsx_work.c lapacke_ssyrfsx_work.c lapacke_zherfsx_work.c lapacke_cgerfsx.c lapacke_csyrfsx.c lapacke_dporfsx.c lapacke_sgerfsx.c lapacke_zgbrfsx.c lapacke_zporfsx.c @@ -2299,7 +2378,7 @@ lapacke_chesvxx_work.c lapacke_dgbsvxx_work.c lapacke_dsysvxx_work.c lapacke_ ) # FILE PARTS OF TMGLIB -set(MATGEN_OBJ +set(MATGEN lapacke_clatms.c lapacke_clatms_work.c lapacke_dlatms.c diff --git a/lapack-netlib/LAPACKE/src/Makefile b/lapack-netlib/LAPACKE/src/Makefile index c899c631a..600984308 100644 --- a/lapack-netlib/LAPACKE/src/Makefile +++ b/lapack-netlib/LAPACKE/src/Makefile @@ -32,9 +32,12 @@ ############################################################################## # makefile for LAPACKE, used to build lapacke binary. # +# Note: we use multiple OBJ_A, OBJ_B, etc, instead of a single OBJ +# to allow build with mingw (argument list too long for the msys ar) +# include ../../make.inc -SRC_OBJA = \ +OBJ_A = \ lapacke_cbbcsd.o \ lapacke_cbbcsd_work.o \ lapacke_cbdsqr.o \ @@ -79,6 +82,8 @@ lapacke_cgeevx.o \ lapacke_cgeevx_work.o \ lapacke_cgehrd.o \ lapacke_cgehrd_work.o \ +lapacke_cgelq.o \ +lapacke_cgelq_work.o \ lapacke_cgelq2.o \ lapacke_cgelq2_work.o \ lapacke_cgejsv.o \ @@ -93,6 +98,8 @@ lapacke_cgelss.o \ lapacke_cgelss_work.o \ lapacke_cgelsy.o \ lapacke_cgelsy_work.o \ +lapacke_cgemlq.o \ +lapacke_cgemlq_work.o \ lapacke_cgemqr.o \ lapacke_cgemqr_work.o \ lapacke_cgemqrt.o \ @@ -101,6 +108,8 @@ lapacke_cgeqlf.o \ lapacke_cgeqlf_work.o \ lapacke_cgeqp3.o \ lapacke_cgeqp3_work.o \ +lapacke_cgeqr.o \ +lapacke_cgeqr_work.o \ lapacke_cgeqr2.o \ lapacke_cgeqr2_work.o \ lapacke_cgeqrf.o \ @@ -245,6 +254,8 @@ lapacke_chesv.o \ lapacke_chesv_work.o \ lapacke_chesv_aa.o \ lapacke_chesv_aa_work.o \ +lapacke_chesv_aa_2stage.o \ +lapacke_chesv_aa_2stage_work.o \ lapacke_chesv_rk.o \ lapacke_chesv_rk_work.o \ lapacke_chesvx.o \ @@ -258,7 +269,9 @@ lapacke_chetrf_rook.o \ lapacke_chetrf_work.o \ lapacke_chetrf_rook_work.o \ lapacke_chetrf_aa.o \ +lapacke_chetrf_aa_2stage.o \ lapacke_chetrf_aa_work.o \ +lapacke_chetrf_aa_2stage_work.o \ lapacke_chetrf_rk.o \ lapacke_chetrf_rk_work.o \ lapacke_chetri.o \ @@ -276,7 +289,9 @@ lapacke_chetrs2_work.o \ lapacke_chetrs_work.o \ lapacke_chetrs_rook_work.o \ lapacke_chetrs_aa.o \ +lapacke_chetrs_aa_2stage.o \ lapacke_chetrs_aa_work.o \ +lapacke_chetrs_aa_2stage_work.o \ lapacke_chetrs_3.o \ lapacke_chetrs_3_work.o \ lapacke_chfrk.o \ @@ -325,6 +340,8 @@ lapacke_clacp2.o \ lapacke_clacp2_work.o \ lapacke_clacpy.o \ lapacke_clacpy_work.o \ +lapacke_clacrm.o \ +lapacke_clacrm_work.o \ lapacke_clag2z.o \ lapacke_clag2z_work.o \ lapacke_clange.o \ @@ -339,6 +356,8 @@ lapacke_clapmr.o \ lapacke_clapmr_work.o \ lapacke_clapmt.o \ lapacke_clapmt_work.o \ +lapacke_clarcm.o \ +lapacke_clarcm_work.o \ lapacke_clarfb.o \ lapacke_clarfb_work.o \ lapacke_clarfg.o \ @@ -353,6 +372,8 @@ lapacke_clascl.o \ lapacke_clascl_work.o \ lapacke_claset.o \ lapacke_claset_work.o \ +lapacke_classq.o \ +lapacke_classq_work.o \ lapacke_claswp.o \ lapacke_claswp_work.o \ lapacke_clauum.o \ @@ -471,6 +492,8 @@ lapacke_csysv_rook_work.o \ lapacke_csysv_work.o \ lapacke_csysv_aa.o \ lapacke_csysv_aa_work.o \ +lapacke_csysv_aa_2stage.o \ +lapacke_csysv_aa_2stage_work.o \ lapacke_csysv_rk.o \ lapacke_csysv_rk_work.o \ lapacke_csysvx.o \ @@ -482,7 +505,9 @@ lapacke_csytrf_work.o \ lapacke_csytrf_rook.o \ lapacke_csytrf_rook_work.o \ lapacke_csytrf_aa.o \ +lapacke_csytrf_aa_2stage.o \ lapacke_csytrf_aa_work.o \ +lapacke_csytrf_aa_2stage_work.o \ lapacke_csytrf_rk.o \ lapacke_csytrf_rk_work.o \ lapacke_csytri.o \ @@ -500,7 +525,9 @@ lapacke_csytrs2_work.o \ lapacke_csytrs_work.o \ lapacke_csytrs_rook_work.o \ lapacke_csytrs_aa.o \ +lapacke_csytrs_aa_2stage.o \ lapacke_csytrs_aa_work.o \ +lapacke_csytrs_aa_2stage_work.o \ lapacke_csytrs_3.o \ lapacke_csytrs_3_work.o \ lapacke_ctbcon.o \ @@ -665,6 +692,8 @@ lapacke_dgehrd.o \ lapacke_dgehrd_work.o \ lapacke_dgejsv.o \ lapacke_dgejsv_work.o \ +lapacke_dgelq.o \ +lapacke_dgelq_work.o \ lapacke_dgelq2.o \ lapacke_dgelq2_work.o \ lapacke_dgelqf.o \ @@ -677,6 +706,8 @@ lapacke_dgelss.o \ lapacke_dgelss_work.o \ lapacke_dgelsy.o \ lapacke_dgelsy_work.o \ +lapacke_dgemlq.o \ +lapacke_dgemlq_work.o \ lapacke_dgemqr.o \ lapacke_dgemqr_work.o \ lapacke_dgemqrt.o \ @@ -685,6 +716,8 @@ lapacke_dgeqlf.o \ lapacke_dgeqlf_work.o \ lapacke_dgeqp3.o \ lapacke_dgeqp3_work.o \ +lapacke_dgeqr.o \ +lapacke_dgeqr_work.o \ lapacke_dgeqr2.o \ lapacke_dgeqr2_work.o \ lapacke_dgeqrf.o \ @@ -817,6 +850,8 @@ lapacke_dlaset.o \ lapacke_dlaset_work.o \ lapacke_dlasrt.o \ lapacke_dlasrt_work.o \ +lapacke_dlassq.o \ +lapacke_dlassq_work.o \ lapacke_dlaswp.o \ lapacke_dlaswp_work.o \ lapacke_dlauum.o \ @@ -1057,6 +1092,8 @@ lapacke_dsysv_rook_work.o \ lapacke_dsysv_work.o \ lapacke_dsysv_aa.o \ lapacke_dsysv_aa_work.o \ +lapacke_dsysv_aa_2stage.o \ +lapacke_dsysv_aa_2stage_work.o \ lapacke_dsysv_rk.o \ lapacke_dsysv_rk_work.o \ lapacke_dsysvx.o \ @@ -1071,6 +1108,8 @@ lapacke_dsytrf_rook.o \ lapacke_dsytrf_rook_work.o \ lapacke_dsytrf_aa.o \ lapacke_dsytrf_aa_work.o \ +lapacke_dsytrf_aa_2stage.o \ +lapacke_dsytrf_aa_2stage_work.o \ lapacke_dsytrf_rk.o \ lapacke_dsytrf_rk_work.o \ lapacke_dsytri.o \ @@ -1080,9 +1119,9 @@ lapacke_dsytri_3.o \ lapacke_dsytri_3_work.o \ lapacke_dsytri2x.o \ lapacke_dsytri2x_work.o \ -lapacke_dsytri_work.o +lapacke_dsytri_work.o -SRC_OBJB = \ +OBJ_B = \ lapacke_dsytrs.o \ lapacke_dsytrs_rook.o \ lapacke_dsytrs2.o \ @@ -1090,7 +1129,9 @@ lapacke_dsytrs2_work.o \ lapacke_dsytrs_work.o \ lapacke_dsytrs_rook_work.o \ lapacke_dsytrs_aa.o \ +lapacke_dsytrs_aa_2stage.o \ lapacke_dsytrs_aa_work.o \ +lapacke_dsytrs_aa_2stage_work.o \ lapacke_dsytrs_3.o \ lapacke_dsytrs_3_work.o \ lapacke_dtbcon.o \ @@ -1163,6 +1204,7 @@ lapacke_dtrttp.o \ lapacke_dtrttp_work.o \ lapacke_dtzrzf.o \ lapacke_dtzrzf_work.o \ +lapacke_nancheck.o \ lapacke_sbbcsd.o \ lapacke_sbbcsd_work.o \ lapacke_sbdsdc.o \ @@ -1215,6 +1257,8 @@ lapacke_sgehrd.o \ lapacke_sgehrd_work.o \ lapacke_sgejsv.o \ lapacke_sgejsv_work.o \ +lapacke_sgelq.o \ +lapacke_sgelq_work.o \ lapacke_sgelq2.o \ lapacke_sgelq2_work.o \ lapacke_sgelqf.o \ @@ -1227,6 +1271,8 @@ lapacke_sgelss.o \ lapacke_sgelss_work.o \ lapacke_sgelsy.o \ lapacke_sgelsy_work.o \ +lapacke_sgemlq.o \ +lapacke_sgemlq_work.o \ lapacke_sgemqr.o \ lapacke_sgemqr_work.o \ lapacke_sgemqrt.o \ @@ -1235,6 +1281,8 @@ lapacke_sgeqlf.o \ lapacke_sgeqlf_work.o \ lapacke_sgeqp3.o \ lapacke_sgeqp3_work.o \ +lapacke_sgeqr.o \ +lapacke_sgeqr_work.o \ lapacke_sgeqr2.o \ lapacke_sgeqr2_work.o \ lapacke_sgeqrf.o \ @@ -1367,6 +1415,8 @@ lapacke_slaset.o \ lapacke_slaset_work.o \ lapacke_slasrt.o \ lapacke_slasrt_work.o \ +lapacke_slassq.o \ +lapacke_slassq_work.o \ lapacke_slaswp.o \ lapacke_slaswp_work.o \ lapacke_slauum.o \ @@ -1603,6 +1653,8 @@ lapacke_ssysv_rook_work.o \ lapacke_ssysv_work.o \ lapacke_ssysv_aa.o \ lapacke_ssysv_aa_work.o \ +lapacke_ssysv_aa_2stage.o \ +lapacke_ssysv_aa_2stage_work.o \ lapacke_ssysv_rk.o \ lapacke_ssysv_rk_work.o \ lapacke_ssysvx.o \ @@ -1617,6 +1669,8 @@ lapacke_ssytrf_rook.o \ lapacke_ssytrf_rook_work.o \ lapacke_ssytrf_aa.o \ lapacke_ssytrf_aa_work.o \ +lapacke_ssytrf_aa_2stage.o \ +lapacke_ssytrf_aa_2stage_work.o \ lapacke_ssytrf_rk.o \ lapacke_ssytrf_rk_work.o \ lapacke_ssytri.o \ @@ -1634,7 +1688,9 @@ lapacke_ssytrs2_work.o \ lapacke_ssytrs_work.o \ lapacke_ssytrs_rook_work.o \ lapacke_ssytrs_aa.o \ +lapacke_ssytrs_aa_2stage.o \ lapacke_ssytrs_aa_work.o \ +lapacke_ssytrs_aa_2stage_work.o \ lapacke_ssytrs_3.o \ lapacke_ssytrs_3_work.o \ lapacke_stbcon.o \ @@ -1757,6 +1813,8 @@ lapacke_zgehrd.o \ lapacke_zgehrd_work.o \ lapacke_zgejsv.o \ lapacke_zgejsv_work.o \ +lapacke_zgelq.o \ +lapacke_zgelq_work.o \ lapacke_zgelq2.o \ lapacke_zgelq2_work.o \ lapacke_zgelqf.o \ @@ -1769,6 +1827,8 @@ lapacke_zgelss.o \ lapacke_zgelss_work.o \ lapacke_zgelsy.o \ lapacke_zgelsy_work.o \ +lapacke_zgemlq.o \ +lapacke_zgemlq_work.o \ lapacke_zgemqr.o \ lapacke_zgemqr_work.o \ lapacke_zgemqrt.o \ @@ -1777,6 +1837,8 @@ lapacke_zgeqlf.o \ lapacke_zgeqlf_work.o \ lapacke_zgeqp3.o \ lapacke_zgeqp3_work.o \ +lapacke_zgeqr.o \ +lapacke_zgeqr_work.o \ lapacke_zgeqr2.o \ lapacke_zgeqr2_work.o \ lapacke_zgeqrf.o \ @@ -1921,6 +1983,8 @@ lapacke_zhesv.o \ lapacke_zhesv_work.o \ lapacke_zhesv_aa.o \ lapacke_zhesv_aa_work.o \ +lapacke_zhesv_aa_2stage.o \ +lapacke_zhesv_aa_2stage_work.o \ lapacke_zhesv_rk.o \ lapacke_zhesv_rk_work.o \ lapacke_zhesvx.o \ @@ -1934,7 +1998,9 @@ lapacke_zhetrf_rook.o \ lapacke_zhetrf_work.o \ lapacke_zhetrf_rook_work.o \ lapacke_zhetrf_aa.o \ +lapacke_zhetrf_aa_2stage.o \ lapacke_zhetrf_aa_work.o \ +lapacke_zhetrf_aa_2stage_work.o \ lapacke_zhetrf_rk.o \ lapacke_zhetrf_rk_work.o \ lapacke_zhetri.o \ @@ -1952,7 +2018,9 @@ lapacke_zhetrs2_work.o \ lapacke_zhetrs_work.o \ lapacke_zhetrs_rook_work.o \ lapacke_zhetrs_aa.o \ +lapacke_zhetrs_aa_2stage.o \ lapacke_zhetrs_aa_work.o \ +lapacke_zhetrs_aa_2stage_work.o \ lapacke_zhetrs_3.o \ lapacke_zhetrs_3_work.o \ lapacke_zhfrk.o \ @@ -2001,6 +2069,8 @@ lapacke_zlacp2.o \ lapacke_zlacp2_work.o \ lapacke_zlacpy.o \ lapacke_zlacpy_work.o \ +lapacke_zlacrm.o \ +lapacke_zlacrm_work.o \ lapacke_zlag2c.o \ lapacke_zlag2c_work.o \ lapacke_zlange.o \ @@ -2015,6 +2085,8 @@ lapacke_zlapmr.o \ lapacke_zlapmr_work.o \ lapacke_zlapmt.o \ lapacke_zlapmt_work.o \ +lapacke_zlarcm.o \ +lapacke_zlarcm_work.o \ lapacke_zlarfb.o \ lapacke_zlarfb_work.o \ lapacke_zlarfg.o \ @@ -2029,6 +2101,8 @@ lapacke_zlascl.o \ lapacke_zlascl_work.o \ lapacke_zlaset.o \ lapacke_zlaset_work.o \ +lapacke_zlassq.o \ +lapacke_zlassq_work.o \ lapacke_zlaswp.o \ lapacke_zlaswp_work.o \ lapacke_zlauum.o \ @@ -2147,6 +2221,8 @@ lapacke_zsysv_rook_work.o \ lapacke_zsysv_work.o \ lapacke_zsysv_aa.o \ lapacke_zsysv_aa_work.o \ +lapacke_zsysv_aa_2stage.o \ +lapacke_zsysv_aa_2stage_work.o \ lapacke_zsysv_rk.o \ lapacke_zsysv_rk_work.o \ lapacke_zsysvx.o \ @@ -2158,7 +2234,9 @@ lapacke_zsytrf_work.o \ lapacke_zsytrf_rook.o \ lapacke_zsytrf_rook_work.o \ lapacke_zsytrf_aa.o \ +lapacke_zsytrf_aa_2stage.o \ lapacke_zsytrf_aa_work.o \ +lapacke_zsytrf_aa_2stage_work.o \ lapacke_zsytrf_rk.o \ lapacke_zsytrf_rk_work.o \ lapacke_zsytri.o \ @@ -2176,7 +2254,9 @@ lapacke_zsytrs2_work.o \ lapacke_zsytrs_work.o \ lapacke_zsytrs_rook_work.o \ lapacke_zsytrs_aa.o \ +lapacke_zsytrs_aa_2stage.o \ lapacke_zsytrs_aa_work.o \ +lapacke_zsytrs_aa_2stage_work.o \ lapacke_zsytrs_3.o \ lapacke_zsytrs_3_work.o \ lapacke_ztbcon.o \ @@ -2295,7 +2375,8 @@ lapacke_zsyr_work.o \ lapacke_csyr_work.o \ lapacke_ilaver.o -DEPRECSRC = \ +ifdef BUILD_DEPRECATED +DEPRECATED = \ lapacke_cggsvp.o \ lapacke_cggsvp_work.o \ lapacke_dggsvp.o \ @@ -2320,8 +2401,10 @@ lapacke_sgeqpf.o \ lapacke_sgeqpf_work.o \ lapacke_zgeqpf.o \ lapacke_zgeqpf_work.o +endif -SRCX_OBJ = \ +ifdef USEXBLAS +EXTENDED = \ lapacke_cgbrfsx.o lapacke_cporfsx.o lapacke_dgerfsx.o lapacke_sgbrfsx.o lapacke_ssyrfsx.o lapacke_zherfsx.o \ lapacke_cgbrfsx_work.o lapacke_cporfsx_work.o lapacke_dgerfsx_work.o lapacke_sgbrfsx_work.o lapacke_ssyrfsx_work.o lapacke_zherfsx_work.o \ lapacke_cgerfsx.o lapacke_csyrfsx.o lapacke_dporfsx.o lapacke_sgerfsx.o lapacke_zgbrfsx.o lapacke_zporfsx.o \ @@ -2334,10 +2417,11 @@ lapacke_cgesvxx.o lapacke_csysvxx.o lapacke_dposvxx.o lapacke_ lapacke_cgesvxx_work.o lapacke_csysvxx_work.o lapacke_dposvxx_work.o lapacke_sgesvxx_work.o lapacke_zgbsvxx_work.o lapacke_zposvxx_work.o \ lapacke_chesvxx.o lapacke_dgbsvxx.o lapacke_dsysvxx.o lapacke_sposvxx.o lapacke_zgesvxx.o lapacke_zsysvxx.o \ lapacke_chesvxx_work.o lapacke_dgbsvxx_work.o lapacke_dsysvxx_work.o lapacke_sposvxx_work.o lapacke_zgesvxx_work.o lapacke_zsysvxx_work.o +endif - +ifdef LAPACKE_WITH_TMG # FILE PARTS OF TMGLIB -MATGEN_OBJ = \ +MATGEN = \ lapacke_clatms.o \ lapacke_clatms_work.o \ lapacke_dlatms.o \ @@ -2366,27 +2450,21 @@ lapacke_slagsy.o \ lapacke_slagsy_work.o \ lapacke_zlagsy.o \ lapacke_zlagsy_work.o - -ALLOBJA = $(SRC_OBJA) -ALLOBJB = $(SRC_OBJB) $(MATGEN_OBJ) - -ifdef USEXBLAS -ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC) -endif - -ifdef BUILD_DEPRECATED -DEPRECATED = $(DEPRECSRC) endif all: ../../$(LAPACKELIB) -../../$(LAPACKELIB): $(ALLOBJA) $(ALLOBJB) $(ALLXOBJ) $(DEPRECATED) - $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJA) - $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJB) $(ALLXOBJ) $(DEPRECATED) +../../$(LAPACKELIB): $(OBJ_A) $(OBJ_B) $(DEPRECATED) $(EXTENDED) $(MATGEN) + $(ARCH) $(ARCHFLAGS) $@ $(OBJ_A) + $(ARCH) $(ARCHFLAGS) $@ $(OBJ_B) + $(ARCH) $(ARCHFLAGS) $@ $(DEPRECATED) + $(ARCH) $(ARCHFLAGS) $@ $(EXTENDED) + $(ARCH) $(ARCHFLAGS) $@ $(MATGEN) $(RANLIB) $@ +clean: cleanobj +cleanobj: + rm -f *.o + .c.o: $(CC) $(CFLAGS) -I../include -c -o $@ $< - -clean: - rm -f *.o diff --git a/lapack-netlib/LAPACKE/src/lapacke_cbbcsd.c b/lapack-netlib/LAPACKE/src/lapacke_cbbcsd.c index 5fba06d9e..32dd5cc7a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cbbcsd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cbbcsd.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function cbbcsd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -47,41 +47,44 @@ lapack_int LAPACKE_cbbcsd( int matrix_layout, char jobu1, char jobu2, lapack_int lrwork = -1; float* rwork = NULL; float rwork_query; - lapack_int nrows_u1, nrows_u2, nrows_v1t, nrows_v2t; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_cbbcsd", -1 ); return -1; } -#ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - if( LAPACKE_s_nancheck( q-1, phi, 1 ) ) { - return -11; - } - if( LAPACKE_s_nancheck( q, theta, 1 ) ) { - return -10; + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; } - if( LAPACKE_lsame( jobu1, 'y' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, nrows_u1, p, u1, ldu1 ) ) { - return -12; +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( q-1, phi, 1 ) ) { + return -11; } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, nrows_u2, m-p, u2, ldu2 ) ) { - return -14; + if( LAPACKE_s_nancheck( q, theta, 1 ) ) { + return -10; } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v1t, q, v1t, ldv1t ) ) { - return -16; + if( LAPACKE_lsame( jobu1, 'y' ) ) { + if( LAPACKE_cge_nancheck( lapack_layout, p, p, u1, ldu1 ) ) { + return -12; + } } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v2t, m-q, v2t, ldv2t ) ) { - return -18; + if( LAPACKE_lsame( jobu2, 'y' ) ) { + if( LAPACKE_cge_nancheck( lapack_layout, m-p, m-p, u2, ldu2 ) ) { + return -14; + } + } + if( LAPACKE_lsame( jobv1t, 'y' ) ) { + if( LAPACKE_cge_nancheck( lapack_layout, q, q, v1t, ldv1t ) ) { + return -16; + } + } + if( LAPACKE_lsame( jobv2t, 'y' ) ) { + if( LAPACKE_cge_nancheck( lapack_layout, m-q, m-q, v2t, ldv2t ) ) { + return -18; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cbbcsd_work.c b/lapack-netlib/LAPACKE/src/lapacke_cbbcsd_work.c index 31ad6b55e..054747248 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cbbcsd_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cbbcsd_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function cbbcsd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -47,156 +47,36 @@ lapack_int LAPACKE_cbbcsd_work( int matrix_layout, char jobu1, char jobu2, lapack_int lrwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; + } /* Call LAPACK function and adjust info */ - LAPACK_cbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, + LAPACK_cbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, <rans, &m, &p, &q, theta, phi, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, &ldv2t, b11d, b11e, b12d, b12e, b21d, b21e, b22d, b22e, rwork, &lrwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - lapack_int nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - lapack_int nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - lapack_int nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - lapack_int ldu1_t = MAX(1,nrows_u1); - lapack_int ldu2_t = MAX(1,nrows_u2); - lapack_int ldv1t_t = MAX(1,nrows_v1t); - lapack_int ldv2t_t = MAX(1,nrows_v2t); - lapack_complex_float* u1_t = NULL; - lapack_complex_float* u2_t = NULL; - lapack_complex_float* v1t_t = NULL; - lapack_complex_float* v2t_t = NULL; - /* Check leading dimension(s) */ - if( ldu1 < p ) { - info = -13; - LAPACKE_xerbla( "LAPACKE_cbbcsd_work", info ); - return info; - } - if( ldu2 < m-p ) { - info = -15; - LAPACKE_xerbla( "LAPACKE_cbbcsd_work", info ); - return info; - } - if( ldv1t < q ) { - info = -17; - LAPACKE_xerbla( "LAPACKE_cbbcsd_work", info ); - return info; - } - if( ldv2t < m-q ) { - info = -19; - LAPACKE_xerbla( "LAPACKE_cbbcsd_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lrwork == -1 ) { - LAPACK_cbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, - theta, phi, u1, &ldu1_t, u2, &ldu2_t, v1t, &ldv1t_t, - v2t, &ldv2t_t, b11d, b11e, b12d, b12e, b21d, b21e, - b22d, b22e, rwork, &lrwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - u1_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldu1_t * MAX(1,p) ); - if( u1_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - u2_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldu2_t * MAX(1,m-p) ); - if( u2_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - v1t_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldv1t_t * MAX(1,q) ); - if( v1t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; - } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - v2t_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldv2t_t * MAX(1,m-q) ); - if( v2t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - } - /* Transpose input matrices */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_cge_trans( matrix_layout, nrows_u1, p, u1, ldu1, u1_t, - ldu1_t ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_cge_trans( matrix_layout, nrows_u2, m-p, u2, ldu2, u2_t, - ldu2_t ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_cge_trans( matrix_layout, nrows_v1t, q, v1t, ldv1t, v1t_t, - ldv1t_t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_cge_trans( matrix_layout, nrows_v2t, m-q, v2t, ldv2t, v2t_t, - ldv2t_t ); - } - /* Call LAPACK function and adjust info */ - LAPACK_cbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, - theta, phi, u1_t, &ldu1_t, u2_t, &ldu2_t, v1t_t, - &ldv1t_t, v2t_t, &ldv2t_t, b11d, b11e, b12d, b12e, b21d, - b21e, b22d, b22e, rwork, &lrwork, &info ); - if( info < 0 ) { - info = info - 1; - } - /* Transpose output matrices */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u1, p, u1_t, ldu1_t, u1, - ldu1 ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u2, m-p, u2_t, ldu2_t, - u2, ldu2 ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_v1t, q, v1t_t, ldv1t_t, - v1t, ldv1t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_v2t, m-q, v2t_t, ldv2t_t, - v2t, ldv2t ); - } - /* Release memory and exit */ - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_free( v2t_t ); - } -exit_level_3: - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_free( v1t_t ); - } -exit_level_2: - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_free( u2_t ); - } -exit_level_1: - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_free( u1_t ); - } -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_cbbcsd_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_cbbcsd_work", info ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cbdsqr.c b/lapack-netlib/LAPACKE/src/lapacke_cbdsqr.c index a9f142a1f..dac495f61 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cbdsqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cbdsqr.c @@ -47,26 +47,28 @@ lapack_int LAPACKE_cbdsqr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( ncc != 0 ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, ncc, c, ldc ) ) { - return -13; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( ncc != 0 ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, ncc, c, ldc ) ) { + return -13; + } } - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -7; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -8; - } - if( nru != 0 ) { - if( LAPACKE_cge_nancheck( matrix_layout, nru, n, u, ldu ) ) { - return -11; + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -7; } - } - if( ncvt != 0 ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, ncvt, vt, ldvt ) ) { - return -9; + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -8; + } + if( nru != 0 ) { + if( LAPACKE_cge_nancheck( matrix_layout, nru, n, u, ldu ) ) { + return -11; + } + } + if( ncvt != 0 ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, ncvt, vt, ldvt ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgbbrd.c b/lapack-netlib/LAPACKE/src/lapacke_cgbbrd.c index 18182a627..ef5997b93 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgbbrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgbbrd.c @@ -49,13 +49,15 @@ lapack_int LAPACKE_cgbbrd( int matrix_layout, char vect, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { - return -8; - } - if( ncc != 0 ) { - if( LAPACKE_cge_nancheck( matrix_layout, m, ncc, c, ldc ) ) { - return -16; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { + return -8; + } + if( ncc != 0 ) { + if( LAPACKE_cge_nancheck( matrix_layout, m, ncc, c, ldc ) ) { + return -16; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgbcon.c b/lapack-netlib/LAPACKE/src/lapacke_cgbcon.c index 3fb0b5de1..3ec851769 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgbcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgbcon.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_cgbcon( int matrix_layout, char norm, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { - return -6; - } - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { + return -6; + } + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgbequ.c b/lapack-netlib/LAPACKE/src/lapacke_cgbequ.c index e7d8f08ed..05b1f2b4d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgbequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgbequ.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_cgbequ( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { + return -6; + } } #endif return LAPACKE_cgbequ_work( matrix_layout, m, n, kl, ku, ab, ldab, r, c, diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgbequb.c b/lapack-netlib/LAPACKE/src/lapacke_cgbequb.c index 4eca4c898..142886314 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgbequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgbequb.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_cgbequb( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { + return -6; + } } #endif return LAPACKE_cgbequb_work( matrix_layout, m, n, kl, ku, ab, ldab, r, c, diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgbrfs.c b/lapack-netlib/LAPACKE/src/lapacke_cgbrfs.c index 3c1dc3f5f..84924afa1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgbrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgbrfs.c @@ -50,18 +50,20 @@ lapack_int LAPACKE_cgbrfs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { - return -7; - } - if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, ldafb ) ) { - return -9; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -14; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { + return -7; + } + if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, ldafb ) ) { + return -9; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -14; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgbrfsx.c b/lapack-netlib/LAPACKE/src/lapacke_cgbrfsx.c index ed6325949..fb70b3a18 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgbrfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgbrfsx.c @@ -53,33 +53,35 @@ lapack_int LAPACKE_cgbrfsx( int matrix_layout, char trans, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { - return -8; - } - if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, ldafb ) ) { - return -10; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -15; - } - if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'c' ) ) { - if( LAPACKE_s_nancheck( n, c, 1 ) ) { - return -14; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { + return -8; } - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -25; + if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, ldafb ) ) { + return -10; } - } - if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'r' ) ) { - if( LAPACKE_s_nancheck( n, r, 1 ) ) { - return -13; + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -15; + } + if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'c' ) ) { + if( LAPACKE_s_nancheck( n, c, 1 ) ) { + return -14; + } + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -25; + } + } + if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'r' ) ) { + if( LAPACKE_s_nancheck( n, r, 1 ) ) { + return -13; + } + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -17; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -17; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgbsv.c b/lapack-netlib/LAPACKE/src/lapacke_cgbsv.c index e5164e47e..1b48c3d65 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgbsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgbsv.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_cgbsv( int matrix_layout, lapack_int n, lapack_int kl, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { - return -6; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { + return -6; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif return LAPACKE_cgbsv_work( matrix_layout, n, kl, ku, nrhs, ab, ldab, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgbsvx.c b/lapack-netlib/LAPACKE/src/lapacke_cgbsvx.c index faf1ba3dd..95855a486 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgbsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgbsvx.c @@ -51,29 +51,31 @@ lapack_int LAPACKE_cgbsvx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { - return -8; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, - ldafb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { + return -8; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -16; - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'c' ) ) ) { - if( LAPACKE_s_nancheck( n, c, 1 ) ) { - return -15; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, + ldafb ) ) { + return -10; + } } - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'r' ) ) ) { - if( LAPACKE_s_nancheck( n, r, 1 ) ) { - return -14; + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -16; + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'c' ) ) ) { + if( LAPACKE_s_nancheck( n, c, 1 ) ) { + return -15; + } + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'r' ) ) ) { + if( LAPACKE_s_nancheck( n, r, 1 ) ) { + return -14; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgbsvxx.c b/lapack-netlib/LAPACKE/src/lapacke_cgbsvxx.c index 6c371e73c..332cdf92b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgbsvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgbsvxx.c @@ -53,34 +53,36 @@ lapack_int LAPACKE_cgbsvxx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { - return -8; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, - ldafb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { + return -8; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -16; - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'c' ) ) ) { - if( LAPACKE_s_nancheck( n, c, 1 ) ) { - return -15; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, + ldafb ) ) { + return -10; + } } - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -27; + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -16; } - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'r' ) ) ) { - if( LAPACKE_s_nancheck( n, r, 1 ) ) { - return -14; + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'c' ) ) ) { + if( LAPACKE_s_nancheck( n, c, 1 ) ) { + return -15; + } + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -27; + } + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'r' ) ) ) { + if( LAPACKE_s_nancheck( n, r, 1 ) ) { + return -14; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgbtrf.c b/lapack-netlib/LAPACKE/src/lapacke_cgbtrf.c index 7aaa5d685..0cc2b38a1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgbtrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgbtrf.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_cgbtrf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cgb_nancheck( matrix_layout, m, n, kl, kl+ku, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cgb_nancheck( matrix_layout, m, n, kl, kl+ku, ab, ldab ) ) { + return -6; + } } #endif return LAPACKE_cgbtrf_work( matrix_layout, m, n, kl, ku, ab, ldab, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgbtrs.c b/lapack-netlib/LAPACKE/src/lapacke_cgbtrs.c index 165731a95..3e009f9be 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgbtrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgbtrs.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_cgbtrs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } } #endif return LAPACKE_cgbtrs_work( matrix_layout, trans, n, kl, ku, nrhs, ab, ldab, diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgebak.c b/lapack-netlib/LAPACKE/src/lapacke_cgebak.c index eb8348b29..28f5e938b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgebak.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgebak.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_cgebak( int matrix_layout, char job, char side, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, scale, 1 ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, m, v, ldv ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, scale, 1 ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, m, v, ldv ) ) { + return -9; + } } #endif return LAPACKE_cgebak_work( matrix_layout, job, side, n, ilo, ihi, scale, m, diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgebal.c b/lapack-netlib/LAPACKE/src/lapacke_cgebal.c index cb2f8bced..9cfa2a756 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgebal.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgebal.c @@ -42,11 +42,13 @@ lapack_int LAPACKE_cgebal( int matrix_layout, char job, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'p' ) || - LAPACKE_lsame( job, 's' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'p' ) || + LAPACKE_lsame( job, 's' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgebrd.c b/lapack-netlib/LAPACKE/src/lapacke_cgebrd.c index 37cfce55f..9c112797b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgebrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgebrd.c @@ -47,9 +47,11 @@ lapack_int LAPACKE_cgebrd( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgecon.c b/lapack-netlib/LAPACKE/src/lapacke_cgecon.c index 0eec38f55..5029ee978 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgecon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgecon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_cgecon( int matrix_layout, char norm, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgeequ.c b/lapack-netlib/LAPACKE/src/lapacke_cgeequ.c index 26c5d5435..f2095cafa 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgeequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgeequ.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_cgeequ( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_cgeequ_work( matrix_layout, m, n, a, lda, r, c, rowcnd, diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgeequb.c b/lapack-netlib/LAPACKE/src/lapacke_cgeequb.c index 1ae9481d2..728686d70 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgeequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgeequb.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_cgeequb( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_cgeequb_work( matrix_layout, m, n, a, lda, r, c, rowcnd, diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgees.c b/lapack-netlib/LAPACKE/src/lapacke_cgees.c index 781815e63..6efd6776f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgees.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgees.c @@ -50,9 +50,11 @@ lapack_int LAPACKE_cgees( int matrix_layout, char jobvs, char sort, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgeesx.c b/lapack-netlib/LAPACKE/src/lapacke_cgeesx.c index a5d440c41..efff6edd2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgeesx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgeesx.c @@ -51,9 +51,11 @@ lapack_int LAPACKE_cgeesx( int matrix_layout, char jobvs, char sort, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgeev.c b/lapack-netlib/LAPACKE/src/lapacke_cgeev.c index 82a2207d9..2c0c2a1da 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgeev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgeev.c @@ -49,9 +49,11 @@ lapack_int LAPACKE_cgeev( int matrix_layout, char jobvl, char jobvr, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgeevx.c b/lapack-netlib/LAPACKE/src/lapacke_cgeevx.c index 98dee6b76..c3d99b244 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgeevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgeevx.c @@ -52,9 +52,11 @@ lapack_int LAPACKE_cgeevx( int matrix_layout, char balanc, char jobvl, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgehrd.c b/lapack-netlib/LAPACKE/src/lapacke_cgehrd.c index a1694e45a..23600c29e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgehrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgehrd.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_cgehrd( int matrix_layout, lapack_int n, lapack_int ilo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgejsv.c b/lapack-netlib/LAPACKE/src/lapacke_cgejsv.c index fbf8d11c3..7d371f660 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgejsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgejsv.c @@ -41,22 +41,22 @@ lapack_int LAPACKE_cgejsv( int matrix_layout, char joba, char jobu, char jobv, { lapack_int info = 0; lapack_int lwork = ( - // 1.1 - ( LAPACKE_lsame( jobu, 'n' ) && LAPACKE_lsame( jobv, 'n' ) && + // 1.1 + ( LAPACKE_lsame( jobu, 'n' ) && LAPACKE_lsame( jobv, 'n' ) && ( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) )) ? 2*n+1 : //1.2 - ( ( LAPACKE_lsame( jobu, 'n' ) && LAPACKE_lsame( jobv, 'n' ) && + ( ( LAPACKE_lsame( jobu, 'n' ) && LAPACKE_lsame( jobv, 'n' ) && !( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) )) ? n*n+3*n : //2.1 - ( ( ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && - !( LAPACKE_lsame( jobu, 'u' ) || LAPACKE_lsame( jobu, 'f' ) ) && + ( ( ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && + !( LAPACKE_lsame( jobu, 'u') || LAPACKE_lsame( jobu, 'f' ) )&& ( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) ))? 3*n : //2.2 - ( ( ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && + ( ( ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && !( LAPACKE_lsame( jobu, 'u' ) || LAPACKE_lsame( jobu, 'f' ) ) && !( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) ))? 3*n : @@ -81,8 +81,8 @@ lapack_int LAPACKE_cgejsv( int matrix_layout, char joba, char jobu, char jobv, ( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) ))? 4*n*n: 1) ) ) ) ) ) ) ); lapack_int lrwork = ( - // 1.1 - ( LAPACKE_lsame( jobu, 'n' ) && LAPACKE_lsame( jobv, 'n' ) && + // 1.1 + ( LAPACKE_lsame( jobu, 'n' ) && LAPACKE_lsame( jobv, 'n' ) && ( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) )) ? MAX(7,n+2*m) : //1.2 @@ -90,13 +90,13 @@ lapack_int LAPACKE_cgejsv( int matrix_layout, char joba, char jobu, char jobv, !( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) )) ? MAX(7,2*n) : //2.1 - ( ( ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && - !( LAPACKE_lsame( jobu, 'u' ) || LAPACKE_lsame( jobu, 'f' ) ) && + ( ( ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && + !( LAPACKE_lsame( jobu, 'u') || LAPACKE_lsame( jobu, 'f' ) ) && ( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) ))? MAX( 7, n+ 2*m ) : //2.2 - ( ( ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && + ( ( ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && !( LAPACKE_lsame( jobu, 'u' ) || LAPACKE_lsame( jobu, 'f' ) ) && !( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) ))? MAX(7,2*n) : @@ -119,7 +119,7 @@ lapack_int LAPACKE_cgejsv( int matrix_layout, char joba, char jobu, char jobv, ( ( ( LAPACKE_lsame( jobu, 'u' ) || LAPACKE_lsame( jobu, 'f' ) ) && ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && ( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) ))? MAX(7,2*n) : - 7 )))))))); + 7) ) ) ) ) ) ) ); lapack_int* iwork = NULL; float* rwork = NULL; lapack_complex_float* cwork = NULL; @@ -130,11 +130,13 @@ lapack_int LAPACKE_cgejsv( int matrix_layout, char joba, char jobu, char jobv, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nu = LAPACKE_lsame( jobu, 'n' ) ? 1 : m; - nv = LAPACKE_lsame( jobv, 'n' ) ? 1 : n; - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nu = LAPACKE_lsame( jobu, 'n' ) ? 1 : m; + nv = LAPACKE_lsame( jobv, 'n' ) ? 1 : n; + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgelq.c b/lapack-netlib/LAPACKE/src/lapacke_cgelq.c index df1d4882c..2dda8465a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgelq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgelq.c @@ -28,14 +28,14 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function cgelq * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" -lapack_int LAPACKE_cgelq_work( int matrix_layout, lapack_int m, lapack_int n, - lapack_complex_float* a, lapack_int lda, - lapack_complex_float* t, lapack_int tsize ) +lapack_int LAPACKE_cgelq( int matrix_layout, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* t, lapack_int tsize ) { lapack_int info = 0; lapack_int lwork = -1; @@ -46,9 +46,11 @@ lapack_int LAPACKE_cgelq_work( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgelq2.c b/lapack-netlib/LAPACKE/src/lapacke_cgelq2.c index e67e13ddf..6b14b85ec 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgelq2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgelq2.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_cgelq2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgelqf.c b/lapack-netlib/LAPACKE/src/lapacke_cgelqf.c index bf77f0f57..18bc040be 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgelqf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgelqf.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_cgelqf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgels.c b/lapack-netlib/LAPACKE/src/lapacke_cgels.c index 8a2dfd663..f53bb3b63 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgels.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgels.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_cgels( int matrix_layout, char trans, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; - } - if( LAPACKE_cge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } + if( LAPACKE_cge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgelsd.c b/lapack-netlib/LAPACKE/src/lapacke_cgelsd.c index cf1587aaf..2ee891977 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgelsd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgelsd.c @@ -55,15 +55,17 @@ lapack_int LAPACKE_cgelsd( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &rcond, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_s_nancheck( 1, &rcond, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgelss.c b/lapack-netlib/LAPACKE/src/lapacke_cgelss.c index 33bc7b74f..94efbddb6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgelss.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgelss.c @@ -49,15 +49,17 @@ lapack_int LAPACKE_cgelss( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &rcond, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_s_nancheck( 1, &rcond, 1 ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgelsy.c b/lapack-netlib/LAPACKE/src/lapacke_cgelsy.c index 3bf9efd9c..2fea9670e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgelsy.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgelsy.c @@ -49,15 +49,17 @@ lapack_int LAPACKE_cgelsy( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &rcond, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_s_nancheck( 1, &rcond, 1 ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgemlq.c b/lapack-netlib/LAPACKE/src/lapacke_cgemlq.c index ae3911108..ef0aec1b5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgemlq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgemlq.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function cgemlq * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -48,15 +48,17 @@ lapack_int LAPACKE_cgemlq( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_d_nancheck( tsize, t, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_c_nancheck( tsize, t, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgemqr.c b/lapack-netlib/LAPACKE/src/lapacke_cgemqr.c index 229a41327..8ad40e6c6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgemqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgemqr.c @@ -49,16 +49,18 @@ lapack_int LAPACKE_cgemqr( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_cge_nancheck( matrix_layout, r, k, a, lda ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_c_nancheck( tsize, t, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_cge_nancheck( matrix_layout, r, k, a, lda ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_c_nancheck( tsize, t, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgemqrt.c b/lapack-netlib/LAPACKE/src/lapacke_cgemqrt.c index c0631af44..c29f260e2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgemqrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgemqrt.c @@ -48,17 +48,19 @@ lapack_int LAPACKE_cgemqrt( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_v = LAPACKE_lsame( side, 'L' ) ? m : - ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -12; - } - if( LAPACKE_cge_nancheck( matrix_layout, nb, k, t, ldt ) ) { - return -10; - } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, k, v, ldv ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nrows_v = LAPACKE_lsame( side, 'L' ) ? m : + ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -12; + } + if( LAPACKE_cge_nancheck( matrix_layout, nb, k, t, ldt ) ) { + return -10; + } + if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, k, v, ldv ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgeqlf.c b/lapack-netlib/LAPACKE/src/lapacke_cgeqlf.c index 0f24233e6..53ee3d483 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgeqlf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgeqlf.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_cgeqlf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgeqp3.c b/lapack-netlib/LAPACKE/src/lapacke_cgeqp3.c index 118a25d48..28c5ace15 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgeqp3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgeqp3.c @@ -47,9 +47,11 @@ lapack_int LAPACKE_cgeqp3( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgeqpf.c b/lapack-netlib/LAPACKE/src/lapacke_cgeqpf.c index a30c9fb87..6467994c1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgeqpf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgeqpf.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_cgeqpf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgeqr.c b/lapack-netlib/LAPACKE/src/lapacke_cgeqr.c index 264b13568..b97414a6b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgeqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgeqr.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function cgeqr * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -46,9 +46,11 @@ lapack_int LAPACKE_cgeqr( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgeqr2.c b/lapack-netlib/LAPACKE/src/lapacke_cgeqr2.c index e39cd6752..a55a4a972 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgeqr2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgeqr2.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_cgeqr2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgeqrf.c b/lapack-netlib/LAPACKE/src/lapacke_cgeqrf.c index 79475b684..1f1f8d957 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgeqrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgeqrf.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_cgeqrf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgeqrfp.c b/lapack-netlib/LAPACKE/src/lapacke_cgeqrfp.c index 3581c10ab..1f6d3ccaa 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgeqrfp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgeqrfp.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_cgeqrfp( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgeqrt.c b/lapack-netlib/LAPACKE/src/lapacke_cgeqrt.c index 410e705e9..be4056031 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgeqrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgeqrt.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_cgeqrt( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgeqrt2.c b/lapack-netlib/LAPACKE/src/lapacke_cgeqrt2.c index 7ff59b1e2..4ffe1112a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgeqrt2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgeqrt2.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_cgeqrt2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_cgeqrt2_work( matrix_layout, m, n, a, lda, t, ldt ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgeqrt3.c b/lapack-netlib/LAPACKE/src/lapacke_cgeqrt3.c index 46e924d3d..c51a8d876 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgeqrt3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgeqrt3.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_cgeqrt3( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_cgeqrt3_work( matrix_layout, m, n, a, lda, t, ldt ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgerfs.c b/lapack-netlib/LAPACKE/src/lapacke_cgerfs.c index ff4da5448..d16dee407 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgerfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgerfs.c @@ -49,18 +49,20 @@ lapack_int LAPACKE_cgerfs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, af, ldaf ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, af, ldaf ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgerfsx.c b/lapack-netlib/LAPACKE/src/lapacke_cgerfsx.c index 5e3434840..d892dd1d3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgerfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgerfsx.c @@ -53,33 +53,35 @@ lapack_int LAPACKE_cgerfsx( int matrix_layout, char trans, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, af, ldaf ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'c' ) ) { - if( LAPACKE_s_nancheck( n, c, 1 ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; } - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -23; + if( LAPACKE_cge_nancheck( matrix_layout, n, n, af, ldaf ) ) { + return -8; } - } - if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'r' ) ) { - if( LAPACKE_s_nancheck( n, r, 1 ) ) { - return -11; + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'c' ) ) { + if( LAPACKE_s_nancheck( n, c, 1 ) ) { + return -12; + } + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -23; + } + } + if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'r' ) ) { + if( LAPACKE_s_nancheck( n, r, 1 ) ) { + return -11; + } + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -15; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -15; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgerqf.c b/lapack-netlib/LAPACKE/src/lapacke_cgerqf.c index f92ba64a2..37a382783 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgerqf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgerqf.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_cgerqf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgesdd.c b/lapack-netlib/LAPACKE/src/lapacke_cgesdd.c index 3ba34e1d8..e96957ceb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgesdd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgesdd.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function cgesdd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -52,16 +52,18 @@ lapack_int LAPACKE_cgesdd( int matrix_layout, char jobz, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif /* Additional scalars initializations for work arrays */ if( LAPACKE_lsame( jobz, 'n' ) ) { lrwork = MAX(1,7*MIN(m,n)); } else { - lrwork = (size_t)MIN(m,n)*MAX(5*MIN(m,n)+7,2*MAX(m,n)+2*MIN(m,n)+1); + lrwork = (size_t)MAX(1,MIN(m,n)*MAX(5*MIN(m,n)+7,2*MAX(m,n)+2*MIN(m,n)+1)); } /* Allocate memory for working array(s) */ iwork = (lapack_int*) diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgesv.c b/lapack-netlib/LAPACKE/src/lapacke_cgesv.c index f57ffcf8b..ce91af9a1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgesv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgesv.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_cgesv( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_cgesv_work( matrix_layout, n, nrhs, a, lda, ipiv, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgesvd.c b/lapack-netlib/LAPACKE/src/lapacke_cgesvd.c index e48331db3..9bc7dca31 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgesvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgesvd.c @@ -50,9 +50,11 @@ lapack_int LAPACKE_cgesvd( int matrix_layout, char jobu, char jobvt, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgesvdx.c b/lapack-netlib/LAPACKE/src/lapacke_cgesvdx.c index 6882a8b8f..0acddf50f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgesvdx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgesvdx.c @@ -54,14 +54,16 @@ lapack_int LAPACKE_cgesvdx( int matrix_layout, char jobu, char jobvt, char range return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ info = LAPACKE_cgesvdx_work( matrix_layout, jobu, jobvt, range, - m, n, a, lda, vl, vu, il, iu, ns, s, u, + m, n, a, lda, vl, vu, il, iu, ns, s, u, ldu, vt, ldvt, &work_query, lwork, rwork, iwork ); if( info != 0 ) { goto exit_level_0; @@ -69,7 +71,7 @@ lapack_int LAPACKE_cgesvdx( int matrix_layout, char jobu, char jobvt, char range lwork = LAPACK_C2INT (work_query); /* Allocate memory for work arrays */ work = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * lwork ); + LAPACKE_malloc( sizeof(lapack_complex_float) * lwork ); if( work == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_1; @@ -86,8 +88,8 @@ lapack_int LAPACKE_cgesvdx( int matrix_layout, char jobu, char jobvt, char range } /* Call middle-level interface */ info = LAPACKE_cgesvdx_work( matrix_layout, jobu, jobvt, range, - m, n, a, lda, vl, vu, il, iu, ns, s, u, - ldu, vt, ldvt, work, lwork, rwork, iwork ); + m, n, a, lda, vl, vu, il, iu, ns, s, u, + ldu, vt, ldvt, work, lwork, rwork, iwork ); /* Backup significant data from working array(s) */ for( i=0; i<12*MIN(m,n)-1; i++ ) { superb[i] = iwork[i+1]; diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgesvdx_work.c b/lapack-netlib/LAPACKE/src/lapacke_cgesvdx_work.c index a5955ef46..ddbc0a5b8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgesvdx_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgesvdx_work.c @@ -34,19 +34,19 @@ #include "lapacke_utils.h" lapack_int LAPACKE_cgesvdx_work( int matrix_layout, char jobu, char jobvt, char range, - lapack_int m, lapack_int n, lapack_complex_float* a, - lapack_int lda, float vl, float vu, - lapack_int il, lapack_int iu, lapack_int* ns, - float* s, lapack_complex_float* u, lapack_int ldu, - lapack_complex_float* vt, lapack_int ldvt, - lapack_complex_float* work, lapack_int lwork, - float* rwork, lapack_int* iwork ) + lapack_int m, lapack_int n, lapack_complex_float* a, + lapack_int lda, float vl, float vu, + lapack_int il, lapack_int iu, lapack_int* ns, + float* s, lapack_complex_float* u, lapack_int ldu, + lapack_complex_float* vt, lapack_int ldvt, + lapack_complex_float* work, lapack_int lwork, + float* rwork, lapack_int* iwork ) { lapack_int info = 0; if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_cgesvdx( &jobu, &jobvt, &range, &m, &n, a, &lda, &vl, &vu, - &il, &iu, ns, s, u, &ldu, vt, &ldvt, + &il, &iu, ns, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, iwork, &info ); if( info < 0 ) { info = info - 1; @@ -85,7 +85,7 @@ lapack_int LAPACKE_cgesvdx_work( int matrix_layout, char jobu, char jobvt, char /* Query optimal working array(s) size if requested */ if( lwork == -1 ) { LAPACK_cgesvdx( &jobu, &jobvt, &range, &m, &n, a, &lda_t, &vl, &vu, - &il, &iu, ns, s, u, &ldu_t, vt, + &il, &iu, ns, s, u, &ldu_t, vt, &ldvt_t, work, &lwork, rwork, iwork, &info ); return (info < 0) ? (info - 1) : info; } @@ -116,8 +116,8 @@ lapack_int LAPACKE_cgesvdx_work( int matrix_layout, char jobu, char jobvt, char LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); /* Call LAPACK function and adjust info */ LAPACK_cgesvdx( &jobu, &jobvt, &range, &m, &n, a_t, &lda_t, &vl, &vu, - &il, &iu, ns, s, u_t, &ldu_t, vt_t, - &ldvt_t, work, &lwork, rwork, iwork, &info ); + &il, &iu, ns, s, u_t, &ldu_t, vt_t, + &ldvt_t, work, &lwork, rwork, iwork, &info ); if( info < 0 ) { info = info - 1; } diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgesvj.c b/lapack-netlib/LAPACKE/src/lapacke_cgesvj.c index 9d3b81e45..253cbf00b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgesvj.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgesvj.c @@ -51,15 +51,17 @@ lapack_int LAPACKE_cgesvj( int matrix_layout, char joba, char jobu, char jobv, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_v = LAPACKE_lsame( jobv, 'v' ) ? MAX(0,n) : - ( LAPACKE_lsame( jobv, 'a' ) ? MAX(0,mv) : 0); - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -7; - } - if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, n, v, ldv ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nrows_v = LAPACKE_lsame( jobv, 'v' ) ? MAX(0,n) : + ( LAPACKE_lsame( jobv, 'a' ) ? MAX(0,mv) : 0); + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -7; + } + if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, n, v, ldv ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgesvj_work.c b/lapack-netlib/LAPACKE/src/lapacke_cgesvj_work.c index e2bbbfec7..138e229b0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgesvj_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgesvj_work.c @@ -76,7 +76,7 @@ lapack_int LAPACKE_cgesvj_work( int matrix_layout, char joba, char jobu, } if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) ) { v_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * ldv_t * MAX(1,n) ); + LAPACKE_malloc( sizeof(lapack_complex_float) * ldv_t * MAX(1,n) ); if( v_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_1; diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgesvx.c b/lapack-netlib/LAPACKE/src/lapacke_cgesvx.c index c8896baf9..5958316d2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgesvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgesvx.c @@ -51,28 +51,30 @@ lapack_int LAPACKE_cgesvx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -14; - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'c' ) ) ) { - if( LAPACKE_s_nancheck( n, c, 1 ) ) { - return -13; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, af, ldaf ) ) { + return -8; + } } - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'r' ) ) ) { - if( LAPACKE_s_nancheck( n, r, 1 ) ) { - return -12; + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -14; + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'c' ) ) ) { + if( LAPACKE_s_nancheck( n, c, 1 ) ) { + return -13; + } + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'r' ) ) ) { + if( LAPACKE_s_nancheck( n, r, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgesvxx.c b/lapack-netlib/LAPACKE/src/lapacke_cgesvxx.c index 1750325c7..1c3111016 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgesvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgesvxx.c @@ -53,33 +53,35 @@ lapack_int LAPACKE_cgesvxx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -14; - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'c' ) ) ) { - if( LAPACKE_s_nancheck( n, c, 1 ) ) { - return -13; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, af, ldaf ) ) { + return -8; + } } - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -25; + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -14; } - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'r' ) ) ) { - if( LAPACKE_s_nancheck( n, r, 1 ) ) { - return -12; + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'c' ) ) ) { + if( LAPACKE_s_nancheck( n, c, 1 ) ) { + return -13; + } + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -25; + } + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'r' ) ) ) { + if( LAPACKE_s_nancheck( n, r, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgetf2.c b/lapack-netlib/LAPACKE/src/lapacke_cgetf2.c index 6a9741dd4..343e36603 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgetf2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgetf2.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_cgetf2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_cgetf2_work( matrix_layout, m, n, a, lda, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgetrf.c b/lapack-netlib/LAPACKE/src/lapacke_cgetrf.c index 2f5ea4dc8..18d4221d9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgetrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgetrf.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_cgetrf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_cgetrf_work( matrix_layout, m, n, a, lda, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgetrf2.c b/lapack-netlib/LAPACKE/src/lapacke_cgetrf2.c index b2e4282ba..fb2ec9e8e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgetrf2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgetrf2.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_cgetrf2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_cgetrf2_work( matrix_layout, m, n, a, lda, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgetri.c b/lapack-netlib/LAPACKE/src/lapacke_cgetri.c index d84031db8..243e9842d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgetri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgetri.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_cgetri( int matrix_layout, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -3; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgetrs.c b/lapack-netlib/LAPACKE/src/lapacke_cgetrs.c index a96a9c534..cf10e6385 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgetrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgetrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_cgetrs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_cgetrs_work( matrix_layout, trans, n, nrhs, a, lda, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgetsls.c b/lapack-netlib/LAPACKE/src/lapacke_cgetsls.c index 8b35c105f..c9ff16725 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgetsls.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgetsls.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_cgetsls( int matrix_layout, char trans, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; - } - if( LAPACKE_cge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } + if( LAPACKE_cge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cggbak.c b/lapack-netlib/LAPACKE/src/lapacke_cggbak.c index 2c4ac8456..58298ed40 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cggbak.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cggbak.c @@ -43,15 +43,17 @@ lapack_int LAPACKE_cggbak( int matrix_layout, char job, char side, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, lscale, 1 ) ) { - return -7; - } - if( LAPACKE_s_nancheck( n, rscale, 1 ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, m, v, ldv ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, lscale, 1 ) ) { + return -7; + } + if( LAPACKE_s_nancheck( n, rscale, 1 ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, m, v, ldv ) ) { + return -10; + } } #endif return LAPACKE_cggbak_work( matrix_layout, job, side, n, ilo, ihi, lscale, diff --git a/lapack-netlib/LAPACKE/src/lapacke_cggbal.c b/lapack-netlib/LAPACKE/src/lapacke_cggbal.c index 564173ff9..6903c07bb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cggbal.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cggbal.c @@ -48,17 +48,19 @@ lapack_int LAPACKE_cggbal( int matrix_layout, char job, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( job, 'p' ) || LAPACKE_lsame( job, 's' ) || - LAPACKE_lsame( job, 'b' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( job, 'p' ) || LAPACKE_lsame( job, 's' ) || + LAPACKE_lsame( job, 'b' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } } - } - if( LAPACKE_lsame( job, 'p' ) || LAPACKE_lsame( job, 's' ) || - LAPACKE_lsame( job, 'b' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -6; + if( LAPACKE_lsame( job, 'p' ) || LAPACKE_lsame( job, 's' ) || + LAPACKE_lsame( job, 'b' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -6; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgges.c b/lapack-netlib/LAPACKE/src/lapacke_cgges.c index d11bc0d02..680929729 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgges.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgges.c @@ -53,12 +53,14 @@ lapack_int LAPACKE_cgges( int matrix_layout, char jobvsl, char jobvsr, char sort return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgges3.c b/lapack-netlib/LAPACKE/src/lapacke_cgges3.c index 09cd9b936..f288dd238 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgges3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgges3.c @@ -53,12 +53,14 @@ lapack_int LAPACKE_cgges3( int matrix_layout, char jobvsl, char jobvsr, char sor return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cggesx.c b/lapack-netlib/LAPACKE/src/lapacke_cggesx.c index 2aedc13b0..fc939a314 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cggesx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cggesx.c @@ -58,12 +58,14 @@ lapack_int LAPACKE_cggesx( int matrix_layout, char jobvsl, char jobvsr, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cggev.c b/lapack-netlib/LAPACKE/src/lapacke_cggev.c index 6a11dfaec..8a0323c1a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cggev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cggev.c @@ -51,12 +51,14 @@ lapack_int LAPACKE_cggev( int matrix_layout, char jobvl, char jobvr, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cggev3.c b/lapack-netlib/LAPACKE/src/lapacke_cggev3.c index e9f94ceec..e44e1e5a3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cggev3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cggev3.c @@ -52,12 +52,14 @@ lapack_int LAPACKE_cggev3( int matrix_layout, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cggevx.c b/lapack-netlib/LAPACKE/src/lapacke_cggevx.c index 548c02c19..94c081a51 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cggevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cggevx.c @@ -58,12 +58,14 @@ lapack_int LAPACKE_cggevx( int matrix_layout, char balanc, char jobvl, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } } #endif /* Additional scalars initializations for work arrays */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cggglm.c b/lapack-netlib/LAPACKE/src/lapacke_cggglm.c index 52d5885a0..5b84dee8c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cggglm.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cggglm.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_cggglm( int matrix_layout, lapack_int n, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, m, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, p, b, ldb ) ) { - return -7; - } - if( LAPACKE_c_nancheck( n, d, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, m, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, p, b, ldb ) ) { + return -7; + } + if( LAPACKE_c_nancheck( n, d, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgghd3.c b/lapack-netlib/LAPACKE/src/lapacke_cgghd3.c index ff2f11094..fcc1a9ba3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgghd3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgghd3.c @@ -50,21 +50,23 @@ lapack_int LAPACKE_cgghd3( int matrix_layout, char compq, char compz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; } - } - if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -13; + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -11; + } + } + if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -13; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgghrd.c b/lapack-netlib/LAPACKE/src/lapacke_cgghrd.c index 01f8d2928..99cc29a63 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgghrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgghrd.c @@ -45,21 +45,23 @@ lapack_int LAPACKE_cgghrd( int matrix_layout, char compq, char compz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; } - } - if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -13; + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -11; + } + } + if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -13; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgglse.c b/lapack-netlib/LAPACKE/src/lapacke_cgglse.c index 480b4e8fc..aa1086af6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgglse.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgglse.c @@ -48,18 +48,20 @@ lapack_int LAPACKE_cgglse( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -7; - } - if( LAPACKE_c_nancheck( m, c, 1 ) ) { - return -9; - } - if( LAPACKE_c_nancheck( p, d, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -7; + } + if( LAPACKE_c_nancheck( m, c, 1 ) ) { + return -9; + } + if( LAPACKE_c_nancheck( p, d, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cggqrf.c b/lapack-netlib/LAPACKE/src/lapacke_cggqrf.c index 81ef19b01..a8851348c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cggqrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cggqrf.c @@ -48,12 +48,14 @@ lapack_int LAPACKE_cggqrf( int matrix_layout, lapack_int n, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, m, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, p, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, m, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, p, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cggrqf.c b/lapack-netlib/LAPACKE/src/lapacke_cggrqf.c index 84c4200a0..ce25c3483 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cggrqf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cggrqf.c @@ -48,12 +48,14 @@ lapack_int LAPACKE_cggrqf( int matrix_layout, lapack_int m, lapack_int p, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cggsvd.c b/lapack-netlib/LAPACKE/src/lapacke_cggsvd.c index 2a683f901..6656db201 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cggsvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cggsvd.c @@ -51,12 +51,14 @@ lapack_int LAPACKE_cggsvd( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -10; - } - if( LAPACKE_cge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -10; + } + if( LAPACKE_cge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cggsvd3.c b/lapack-netlib/LAPACKE/src/lapacke_cggsvd3.c index 97995ae69..34cb131f5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cggsvd3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cggsvd3.c @@ -45,7 +45,7 @@ lapack_int LAPACKE_cggsvd3( int matrix_layout, char jobu, char jobv, char jobq, { lapack_int info = 0; float* rwork = NULL; - lapack_int lwork = -1; + lapack_int lwork = -1; lapack_complex_float* work = NULL; lapack_complex_float work_query; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { @@ -53,21 +53,23 @@ lapack_int LAPACKE_cggsvd3( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -10; - } - if( LAPACKE_cge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -10; + } + if( LAPACKE_cge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -12; + } } #endif /* Query optimal size for working array */ info = LAPACKE_cggsvd3_work( matrix_layout, jobu, jobv, jobq, m, n, p, k, l, a, lda, b, ldb, alpha, beta, u, ldu, v, ldv, q, ldq, &work_query, lwork, rwork, iwork ); - if( info != 0 ) - goto exit_level_0; - lwork = LAPACK_C2INT( work_query ); + if( info != 0 ) + goto exit_level_0; + lwork = LAPACK_C2INT( work_query ); /* Allocate memory for working array(s) */ rwork = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,2*n) ); if( rwork == NULL ) { diff --git a/lapack-netlib/LAPACKE/src/lapacke_cggsvp.c b/lapack-netlib/LAPACKE/src/lapacke_cggsvp.c index 7a774083f..a5bf20d92 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cggsvp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cggsvp.c @@ -52,18 +52,20 @@ lapack_int LAPACKE_cggsvp( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -10; - } - if( LAPACKE_s_nancheck( 1, &tola, 1 ) ) { - return -12; - } - if( LAPACKE_s_nancheck( 1, &tolb, 1 ) ) { - return -13; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -10; + } + if( LAPACKE_s_nancheck( 1, &tola, 1 ) ) { + return -12; + } + if( LAPACKE_s_nancheck( 1, &tolb, 1 ) ) { + return -13; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cggsvp3.c b/lapack-netlib/LAPACKE/src/lapacke_cggsvp3.c index 25cc97439..d3f1e2a71 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cggsvp3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cggsvp3.c @@ -54,18 +54,20 @@ lapack_int LAPACKE_cggsvp3( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -10; - } - if( LAPACKE_s_nancheck( 1, &tola, 1 ) ) { - return -12; - } - if( LAPACKE_s_nancheck( 1, &tolb, 1 ) ) { - return -13; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -10; + } + if( LAPACKE_s_nancheck( 1, &tola, 1 ) ) { + return -12; + } + if( LAPACKE_s_nancheck( 1, &tolb, 1 ) ) { + return -13; + } } #endif /* Query optimal size for working array */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgtcon.c b/lapack-netlib/LAPACKE/src/lapacke_cgtcon.c index 17c7813ca..279b07996 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgtcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgtcon.c @@ -43,21 +43,23 @@ lapack_int LAPACKE_cgtcon( char norm, lapack_int n, lapack_int info = 0; lapack_complex_float* work = NULL; #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -8; - } - if( LAPACKE_c_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_c_nancheck( n-1, dl, 1 ) ) { - return -3; - } - if( LAPACKE_c_nancheck( n-1, du, 1 ) ) { - return -5; - } - if( LAPACKE_c_nancheck( n-2, du2, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -8; + } + if( LAPACKE_c_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_c_nancheck( n-1, dl, 1 ) ) { + return -3; + } + if( LAPACKE_c_nancheck( n-1, du, 1 ) ) { + return -5; + } + if( LAPACKE_c_nancheck( n-2, du2, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgtrfs.c b/lapack-netlib/LAPACKE/src/lapacke_cgtrfs.c index 20d3f9378..a7f253e98 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgtrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgtrfs.c @@ -54,33 +54,35 @@ lapack_int LAPACKE_cgtrfs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( LAPACKE_c_nancheck( n, d, 1 ) ) { - return -6; - } - if( LAPACKE_c_nancheck( n, df, 1 ) ) { - return -9; - } - if( LAPACKE_c_nancheck( n-1, dl, 1 ) ) { - return -5; - } - if( LAPACKE_c_nancheck( n-1, dlf, 1 ) ) { - return -8; - } - if( LAPACKE_c_nancheck( n-1, du, 1 ) ) { - return -7; - } - if( LAPACKE_c_nancheck( n-2, du2, 1 ) ) { - return -11; - } - if( LAPACKE_c_nancheck( n-1, duf, 1 ) ) { - return -10; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -15; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( LAPACKE_c_nancheck( n, d, 1 ) ) { + return -6; + } + if( LAPACKE_c_nancheck( n, df, 1 ) ) { + return -9; + } + if( LAPACKE_c_nancheck( n-1, dl, 1 ) ) { + return -5; + } + if( LAPACKE_c_nancheck( n-1, dlf, 1 ) ) { + return -8; + } + if( LAPACKE_c_nancheck( n-1, du, 1 ) ) { + return -7; + } + if( LAPACKE_c_nancheck( n-2, du2, 1 ) ) { + return -11; + } + if( LAPACKE_c_nancheck( n-1, duf, 1 ) ) { + return -10; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -15; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgtsv.c b/lapack-netlib/LAPACKE/src/lapacke_cgtsv.c index ce53609bd..16e86338b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgtsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgtsv.c @@ -43,18 +43,20 @@ lapack_int LAPACKE_cgtsv( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_c_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_c_nancheck( n-1, dl, 1 ) ) { - return -4; - } - if( LAPACKE_c_nancheck( n-1, du, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_c_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_c_nancheck( n-1, dl, 1 ) ) { + return -4; + } + if( LAPACKE_c_nancheck( n-1, du, 1 ) ) { + return -6; + } } #endif return LAPACKE_cgtsv_work( matrix_layout, n, nrhs, dl, d, du, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgtsvx.c b/lapack-netlib/LAPACKE/src/lapacke_cgtsvx.c index 1a6ab7c79..170e59e75 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgtsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgtsvx.c @@ -53,37 +53,39 @@ lapack_int LAPACKE_cgtsvx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -14; - } - if( LAPACKE_c_nancheck( n, d, 1 ) ) { - return -7; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_c_nancheck( n, df, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -14; } - } - if( LAPACKE_c_nancheck( n-1, dl, 1 ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_c_nancheck( n-1, dlf, 1 ) ) { - return -9; + if( LAPACKE_c_nancheck( n, d, 1 ) ) { + return -7; } - } - if( LAPACKE_c_nancheck( n-1, du, 1 ) ) { - return -8; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_c_nancheck( n-2, du2, 1 ) ) { - return -12; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_c_nancheck( n, df, 1 ) ) { + return -10; + } } - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_c_nancheck( n-1, duf, 1 ) ) { - return -11; + if( LAPACKE_c_nancheck( n-1, dl, 1 ) ) { + return -6; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_c_nancheck( n-1, dlf, 1 ) ) { + return -9; + } + } + if( LAPACKE_c_nancheck( n-1, du, 1 ) ) { + return -8; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_c_nancheck( n-2, du2, 1 ) ) { + return -12; + } + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_c_nancheck( n-1, duf, 1 ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgttrf.c b/lapack-netlib/LAPACKE/src/lapacke_cgttrf.c index e660399df..4b0500d8b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgttrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgttrf.c @@ -38,15 +38,17 @@ lapack_int LAPACKE_cgttrf( lapack_int n, lapack_complex_float* dl, lapack_complex_float* du2, lapack_int* ipiv ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_c_nancheck( n, d, 1 ) ) { - return -3; - } - if( LAPACKE_c_nancheck( n-1, dl, 1 ) ) { - return -2; - } - if( LAPACKE_c_nancheck( n-1, du, 1 ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_c_nancheck( n, d, 1 ) ) { + return -3; + } + if( LAPACKE_c_nancheck( n-1, dl, 1 ) ) { + return -2; + } + if( LAPACKE_c_nancheck( n-1, du, 1 ) ) { + return -4; + } } #endif return LAPACKE_cgttrf_work( n, dl, d, du, du2, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgttrs.c b/lapack-netlib/LAPACKE/src/lapacke_cgttrs.c index 08ca87231..4f5c916c5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgttrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgttrs.c @@ -46,21 +46,23 @@ lapack_int LAPACKE_cgttrs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_c_nancheck( n, d, 1 ) ) { - return -6; - } - if( LAPACKE_c_nancheck( n-1, dl, 1 ) ) { - return -5; - } - if( LAPACKE_c_nancheck( n-1, du, 1 ) ) { - return -7; - } - if( LAPACKE_c_nancheck( n-2, du2, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_c_nancheck( n, d, 1 ) ) { + return -6; + } + if( LAPACKE_c_nancheck( n-1, dl, 1 ) ) { + return -5; + } + if( LAPACKE_c_nancheck( n-1, du, 1 ) ) { + return -7; + } + if( LAPACKE_c_nancheck( n-2, du2, 1 ) ) { + return -8; + } } #endif return LAPACKE_cgttrs_work( matrix_layout, trans, n, nrhs, dl, d, du, du2, diff --git a/lapack-netlib/LAPACKE/src/lapacke_chbev.c b/lapack-netlib/LAPACKE/src/lapacke_chbev.c index 0106c82bd..d79802aed 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chbev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chbev.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_chbev( int matrix_layout, char jobz, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chbev_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_chbev_2stage.c index 5be09b6e3..d62e9a9c0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chbev_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chbev_2stage.c @@ -48,9 +48,11 @@ lapack_int LAPACKE_chbev_2stage( int matrix_layout, char jobz, char uplo, lapack return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chbev_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_chbev_2stage_work.c index 075b85375..f5736c1d0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chbev_2stage_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chbev_2stage_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function chbev_2stage * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -44,7 +44,7 @@ lapack_int LAPACKE_chbev_2stage_work( int matrix_layout, char jobz, char uplo, if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_chbev_2stage( &jobz, &uplo, &n, &kd, ab, &ldab, w, z, &ldz, work, &lwork, - rwork, &info ); + rwork, &info ); if( info < 0 ) { info = info - 1; } diff --git a/lapack-netlib/LAPACKE/src/lapacke_chbevd.c b/lapack-netlib/LAPACKE/src/lapacke_chbevd.c index 6cfda9777..024cf2585 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chbevd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chbevd.c @@ -53,9 +53,11 @@ lapack_int LAPACKE_chbevd( int matrix_layout, char jobz, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chbevd_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_chbevd_2stage.c index 293701bca..63f7d8ccb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chbevd_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chbevd_2stage.c @@ -53,9 +53,11 @@ lapack_int LAPACKE_chbevd_2stage( int matrix_layout, char jobz, char uplo, lapac return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chbevx.c b/lapack-netlib/LAPACKE/src/lapacke_chbevx.c index ba356cb87..1f30a8335 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chbevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chbevx.c @@ -50,21 +50,23 @@ lapack_int LAPACKE_chbevx( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -15; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -7; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -12; + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -15; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -11; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_chbevx_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_chbevx_2stage.c index 9b64a8a53..401f47cc3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chbevx_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chbevx_2stage.c @@ -52,21 +52,23 @@ lapack_int LAPACKE_chbevx_2stage( int matrix_layout, char jobz, char range, char return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -15; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -7; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -12; + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -15; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -11; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_chbgst.c b/lapack-netlib/LAPACKE/src/lapacke_chbgst.c index f741c0d81..da972842b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chbgst.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chbgst.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_chbgst( int matrix_layout, char vect, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { - return -7; - } - if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { + return -7; + } + if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chbgv.c b/lapack-netlib/LAPACKE/src/lapacke_chbgv.c index b46db05f0..31f453168 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chbgv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chbgv.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_chbgv( int matrix_layout, char jobz, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { - return -7; - } - if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { + return -7; + } + if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chbgvd.c b/lapack-netlib/LAPACKE/src/lapacke_chbgvd.c index 0801afead..d44f6c622 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chbgvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chbgvd.c @@ -54,12 +54,14 @@ lapack_int LAPACKE_chbgvd( int matrix_layout, char jobz, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { - return -7; - } - if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { + return -7; + } + if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chbgvx.c b/lapack-netlib/LAPACKE/src/lapacke_chbgvx.c index 5e2705a64..5acacca03 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chbgvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chbgvx.c @@ -51,24 +51,26 @@ lapack_int LAPACKE_chbgvx( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { - return -8; - } - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -18; - } - if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -10; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -14; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { + return -8; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -15; + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -18; + } + if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -10; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -14; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -15; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_chbtrd.c b/lapack-netlib/LAPACKE/src/lapacke_chbtrd.c index 351382eda..634d8032f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chbtrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chbtrd.c @@ -45,13 +45,15 @@ lapack_int LAPACKE_chbtrd( int matrix_layout, char vect, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; - } - if( LAPACKE_lsame( vect, 'u' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } + if( LAPACKE_lsame( vect, 'u' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_checon.c b/lapack-netlib/LAPACKE/src/lapacke_checon.c index 168730d2f..54866ea41 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_checon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_checon.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_checon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_checon_3.c b/lapack-netlib/LAPACKE/src/lapacke_checon_3.c index d96cb9864..1481359d7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_checon_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_checon_3.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function checon_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -40,20 +40,23 @@ lapack_int LAPACKE_checon_3( int matrix_layout, char uplo, lapack_int n, { lapack_int info = 0; lapack_complex_float* work = NULL; + lapack_int e_start = LAPACKE_lsame( uplo, 'U' ) ? 1 : 0; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_checon_3", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_c_nancheck( n, e, 1 ) ) { - return -6; - } - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_c_nancheck( n-1, e + e_start, 1 ) ) { + return -6; + } + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cheequb.c b/lapack-netlib/LAPACKE/src/lapacke_cheequb.c index 61369303b..f11eb1d17 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cheequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cheequb.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_cheequb( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cheev.c b/lapack-netlib/LAPACKE/src/lapacke_cheev.c index 0be651058..7118f4d2a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cheev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cheev.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_cheev( int matrix_layout, char jobz, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cheev_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_cheev_2stage.c index 52e6a300c..bf231c39d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cheev_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cheev_2stage.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_cheev_2stage( int matrix_layout, char jobz, char uplo, lapack return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cheevd.c b/lapack-netlib/LAPACKE/src/lapacke_cheevd.c index dea004b91..d0dea375b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cheevd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cheevd.c @@ -51,9 +51,11 @@ lapack_int LAPACKE_cheevd( int matrix_layout, char jobz, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cheevd_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_cheevd_2stage.c index 339d43db6..d87481abf 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cheevd_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cheevd_2stage.c @@ -51,9 +51,11 @@ lapack_int LAPACKE_cheevd_2stage( int matrix_layout, char jobz, char uplo, lapac return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cheevr.c b/lapack-netlib/LAPACKE/src/lapacke_cheevr.c index c84c43ec4..6fe261624 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cheevr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cheevr.c @@ -55,21 +55,23 @@ lapack_int LAPACKE_cheevr( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -12; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -9; + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -12; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -8; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cheevr_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_cheevr_2stage.c index cb4c0b9a8..5b3f5c77a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cheevr_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cheevr_2stage.c @@ -55,21 +55,23 @@ lapack_int LAPACKE_cheevr_2stage( int matrix_layout, char jobz, char range, char return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -12; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -9; + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -12; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -8; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cheevr_work.c b/lapack-netlib/LAPACKE/src/lapacke_cheevr_work.c index a8628d303..e3d1d89fb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cheevr_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cheevr_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function cheevr * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -54,8 +54,9 @@ lapack_int LAPACKE_cheevr_work( int matrix_layout, char jobz, char range, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int ncols_z = ( LAPACKE_lsame( range, 'a' ) || - LAPACKE_lsame( range, 'v' ) ) ? n : + lapack_int ncols_z = ( !LAPACKE_lsame( jobz, 'v' ) ) ? 1 : + ( LAPACKE_lsame( range, 'a' ) || + LAPACKE_lsame( range, 'v' ) ) ? n : ( LAPACKE_lsame( range, 'i' ) ? (iu-il+1) : 1); lapack_int lda_t = MAX(1,n); lapack_int ldz_t = MAX(1,n); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cheevx.c b/lapack-netlib/LAPACKE/src/lapacke_cheevx.c index 5dbe5292c..f7a41d6ea 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cheevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cheevx.c @@ -51,21 +51,23 @@ lapack_int LAPACKE_cheevx( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -12; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -9; + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -12; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -8; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cheevx_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_cheevx_2stage.c index 5f3dafe6b..93add3f5f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cheevx_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cheevx_2stage.c @@ -51,21 +51,23 @@ lapack_int LAPACKE_cheevx_2stage( int matrix_layout, char jobz, char range, char return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -12; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -9; + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -12; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -8; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cheevx_work.c b/lapack-netlib/LAPACKE/src/lapacke_cheevx_work.c index 87fa02073..c1bd5a1c3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cheevx_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cheevx_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function cheevx * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -53,8 +53,9 @@ lapack_int LAPACKE_cheevx_work( int matrix_layout, char jobz, char range, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int ncols_z = ( LAPACKE_lsame( range, 'a' ) || - LAPACKE_lsame( range, 'v' ) ) ? n : + lapack_int ncols_z = ( !LAPACKE_lsame( jobz, 'v' ) ) ? 1 : + ( LAPACKE_lsame( range, 'a' ) || + LAPACKE_lsame( range, 'v' ) ) ? n : ( LAPACKE_lsame( range, 'i' ) ? (iu-il+1) : 1); lapack_int lda_t = MAX(1,n); lapack_int ldz_t = MAX(1,n); diff --git a/lapack-netlib/LAPACKE/src/lapacke_chegst.c b/lapack-netlib/LAPACKE/src/lapacke_chegst.c index 10bd1f25b..c628017c2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chegst.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chegst.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_chegst( int matrix_layout, lapack_int itype, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_chegst_work( matrix_layout, itype, uplo, n, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_chegv.c b/lapack-netlib/LAPACKE/src/lapacke_chegv.c index 6c8fd2c12..15d052987 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chegv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chegv.c @@ -48,12 +48,14 @@ lapack_int LAPACKE_chegv( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chegv_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_chegv_2stage.c index 53a5d4252..537b9450b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chegv_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chegv_2stage.c @@ -48,12 +48,14 @@ lapack_int LAPACKE_chegv_2stage( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chegvd.c b/lapack-netlib/LAPACKE/src/lapacke_chegvd.c index 85a9e2e6c..2959cb0dc 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chegvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chegvd.c @@ -53,12 +53,14 @@ lapack_int LAPACKE_chegvd( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chegvx.c b/lapack-netlib/LAPACKE/src/lapacke_chegvx.c index c120b4266..3ba62746e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chegvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chegvx.c @@ -52,24 +52,26 @@ lapack_int LAPACKE_chegvx( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -15; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -7; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -12; + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -15; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -11; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_chegvx_work.c b/lapack-netlib/LAPACKE/src/lapacke_chegvx_work.c index 8251662fd..3551d39fc 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chegvx_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chegvx_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function chegvx * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" diff --git a/lapack-netlib/LAPACKE/src/lapacke_cherfs.c b/lapack-netlib/LAPACKE/src/lapacke_cherfs.c index ae102021b..ecf13792c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cherfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cherfs.c @@ -49,18 +49,20 @@ lapack_int LAPACKE_cherfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cherfsx.c b/lapack-netlib/LAPACKE/src/lapacke_cherfsx.c index 0991b678d..4a897986d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cherfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cherfsx.c @@ -52,28 +52,30 @@ lapack_int LAPACKE_cherfsx( int matrix_layout, char uplo, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -22; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( equed, 'y' ) ) { - if( LAPACKE_s_nancheck( n, s, 1 ) ) { - return -11; + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -22; + } + } + if( LAPACKE_lsame( equed, 'y' ) ) { + if( LAPACKE_s_nancheck( n, s, 1 ) ) { + return -11; + } + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -14; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -14; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chesv.c b/lapack-netlib/LAPACKE/src/lapacke_chesv.c index 447efbea5..9a0a81f90 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chesv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chesv.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_chesv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chesv_aa.c b/lapack-netlib/LAPACKE/src/lapacke_chesv_aa.c index 2323bd287..af13d5318 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chesv_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chesv_aa.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_chesv_aa( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chesv_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_chesv_aa_2stage.c new file mode 100644 index 000000000..cd8a0cf5d --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_chesv_aa_2stage.c @@ -0,0 +1,90 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function chesv_aa_2stage +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_chesv_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* b, lapack_int ldb ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_complex_float* work = NULL; + lapack_complex_float work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_chesv_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_chesv_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb, &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_C2INT( work_query ); + /* Allocate memory for work arrays */ + work = (lapack_complex_float*) + LAPACKE_malloc( sizeof(lapack_complex_float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_chesv_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb, work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_chesv_aa_2stage", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_chesv_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_chesv_aa_2stage_work.c new file mode 100644 index 000000000..d9a44d315 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_chesv_aa_2stage_work.c @@ -0,0 +1,124 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function chesv_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_chesv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* work, lapack_int lwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_chesv_aa_2stage( &uplo, &n, &nrhs, a, &lda, tb, + <b, ipiv, ipiv2, b, &ldb, work, &lwork, + &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_int ldb_t = MAX(1,n); + lapack_complex_float* a_t = NULL; + lapack_complex_float* tb_t = NULL; + lapack_complex_float* b_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_chesv_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_chesv_aa_2stage_work", info ); + return info; + } + if( ldb < nrhs ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_chesv_aa_2stage_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_chesv_aa_2stage( &uplo, &n, &nrhs, a, &lda_t, + tb, <b, ipiv, ipiv2, b, &ldb_t, work, + &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + b_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldb_t * MAX(1,nrhs) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + /* Transpose input matrices */ + LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + LAPACKE_cge_trans( matrix_layout, n, nrhs, b, ldb, b_t, ldb_t ); + /* Call LAPACK function and adjust info */ + LAPACK_chesv_aa_2stage( &uplo, &n, &nrhs, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, b_t, &ldb_t, work, + &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb ); + /* Release memory and exit */ + LAPACKE_free( b_t ); +exit_level_2: + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_chesv_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_chesv_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_chesv_rk.c b/lapack-netlib/LAPACKE/src/lapacke_chesv_rk.c index 04b5f6a05..4741d45c8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chesv_rk.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chesv_rk.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function chesv_rk * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -48,15 +48,14 @@ lapack_int LAPACKE_chesv_rk( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_c_nancheck( n, e, 1) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chesvx.c b/lapack-netlib/LAPACKE/src/lapacke_chesvx.c index 666fd2697..4c7b29d42 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chesvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chesvx.c @@ -51,17 +51,19 @@ lapack_int LAPACKE_chesvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -11; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chesvxx.c b/lapack-netlib/LAPACKE/src/lapacke_chesvxx.c index 469e423c0..097646170 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chesvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chesvxx.c @@ -53,26 +53,28 @@ lapack_int LAPACKE_chesvxx( int matrix_layout, char fact, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -24; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } } - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_s_nancheck( n, s, 1 ) ) { - return -12; + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -24; + } + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_s_nancheck( n, s, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cheswapr.c b/lapack-netlib/LAPACKE/src/lapacke_cheswapr.c index de512fda5..5265dd4f8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cheswapr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cheswapr.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_cheswapr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_cheswapr_work( matrix_layout, uplo, n, a, lda, i1, i2 ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetrd.c b/lapack-netlib/LAPACKE/src/lapacke_chetrd.c index 4bf71b859..06b55da98 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chetrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chetrd.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_chetrd( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetrf.c b/lapack-netlib/LAPACKE/src/lapacke_chetrf.c index 93dab7a6f..87539d6fa 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chetrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chetrf.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_chetrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetrf_aa.c b/lapack-netlib/LAPACKE/src/lapacke_chetrf_aa.c index 498748160..72373c1e6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chetrf_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chetrf_aa.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_chetrf_aa( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetrf_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_chetrf_aa_2stage.c new file mode 100644 index 000000000..bd352c54b --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_chetrf_aa_2stage.c @@ -0,0 +1,86 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function chetrf_aa_2stage +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_chetrf_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2 ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_complex_float* work = NULL; + lapack_complex_float work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_chetrf_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_chetrf_aa_2stage_work( matrix_layout, uplo, n, + a, lda, tb, ltb, ipiv, ipiv2, + &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_C2INT( work_query ); + /* Allocate memory for work arrays */ + work = (lapack_complex_float*) + LAPACKE_malloc( sizeof(lapack_complex_float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_chetrf_aa_2stage_work( matrix_layout, uplo, n, + a, lda, tb, ltb, ipiv, ipiv2, + work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_chetrf_aa_2stage", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetrf_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_chetrf_aa_2stage_work.c new file mode 100644 index 000000000..bb2bb8dc5 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_chetrf_aa_2stage_work.c @@ -0,0 +1,108 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function chetrf_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_chetrf_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tb, lapack_int ltb, + lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* work, lapack_int lwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_chetrf_aa_2stage( &uplo, &n, a, &lda, tb, + <b, ipiv, ipiv2, work, &lwork, + &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_complex_float* a_t = NULL; + lapack_complex_float* tb_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_chetrf_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_chetrf_aa_2stage_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_chetrf_aa_2stage( &uplo, &n, a, &lda_t, + tb, <b, ipiv, ipiv2, work, + &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + /* Transpose input matrices */ + LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + /* Call LAPACK function and adjust info */ + LAPACK_chetrf_aa_2stage( &uplo, &n, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, work, + &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + /* Release memory and exit */ + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_chetrf_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_chetrf_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetrf_rk.c b/lapack-netlib/LAPACKE/src/lapacke_chetrf_rk.c index b9133e714..15085b8ce 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chetrf_rk.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chetrf_rk.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function chetrf_rk * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -46,12 +46,11 @@ lapack_int LAPACKE_chetrf_rk( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_c_nancheck( n, e, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetrf_rk_work.c b/lapack-netlib/LAPACKE/src/lapacke_chetrf_rk_work.c index 1a7df7c22..01034cf35 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chetrf_rk_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chetrf_rk_work.c @@ -28,14 +28,14 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function chetrf_rk * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" lapack_int LAPACKE_chetrf_rk_work( int matrix_layout, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, - lapack_complex_float* e, + lapack_complex_float* e, lapack_int* ipiv, lapack_complex_float* work, lapack_int lwork ) { diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetrf_rook.c b/lapack-netlib/LAPACKE/src/lapacke_chetrf_rook.c index 4f71f2e78..56cd7a1df 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chetrf_rook.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chetrf_rook.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_chetrf_rook( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetri.c b/lapack-netlib/LAPACKE/src/lapacke_chetri.c index cdc6fd37a..239caaa2d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chetri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chetri.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_chetri( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetri2.c b/lapack-netlib/LAPACKE/src/lapacke_chetri2.c index e5757c7d4..18ef7e5ad 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chetri2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chetri2.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_chetri2( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetri2x.c b/lapack-netlib/LAPACKE/src/lapacke_chetri2x.c index 23957f1fe..6937752c4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chetri2x.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chetri2x.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_chetri2x( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetri_3.c b/lapack-netlib/LAPACKE/src/lapacke_chetri_3.c index 8ce3ad351..d9692e062 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chetri_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chetri_3.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function chetri_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -41,17 +41,20 @@ lapack_int LAPACKE_chetri_3( int matrix_layout, char uplo, lapack_int n, lapack_int lwork = -1; lapack_complex_float* work = NULL; lapack_complex_float work_query; + lapack_int e_start = LAPACKE_lsame( uplo, 'U' ) ? 1 : 0; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_chetri_3", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_c_nancheck( n, e, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_c_nancheck( n-1, e + e_start, 1 ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetrs.c b/lapack-netlib/LAPACKE/src/lapacke_chetrs.c index a199f8a53..2f06593ca 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chetrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chetrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_chetrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_chetrs_work( matrix_layout, uplo, n, nrhs, a, lda, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetrs2.c b/lapack-netlib/LAPACKE/src/lapacke_chetrs2.c index 13434eaac..8a12e5eef 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chetrs2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chetrs2.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_chetrs2( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetrs_3.c b/lapack-netlib/LAPACKE/src/lapacke_chetrs_3.c index 77a5f2412..3c0623224 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chetrs_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chetrs_3.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function chetrs_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -44,17 +44,19 @@ lapack_int LAPACKE_chetrs_3( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_c_nancheck( n, e ,1 ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_c_nancheck( n, e ,1 ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif return LAPACKE_chetrs_3_work( matrix_layout, uplo, n, nrhs, a, lda, - e, ipiv, b, ldb ); + e, ipiv, b, ldb ); } diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetrs_aa.c b/lapack-netlib/LAPACKE/src/lapacke_chetrs_aa.c index 6a635cf74..4cad1f863 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chetrs_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chetrs_aa.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_chetrs_aa( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetrs_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_chetrs_aa_2stage.c new file mode 100644 index 000000000..455622bed --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_chetrs_aa_2stage.c @@ -0,0 +1,66 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function chetrs_aa_2stage +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_chetrs_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* b, lapack_int ldb ) +{ + lapack_int info = 0; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_chetrs_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; + } + } +#endif + /* Call middle-level interface */ + info = LAPACKE_chetrs_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb ); + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetrs_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_chetrs_aa_2stage_work.c new file mode 100644 index 000000000..41d293d3c --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_chetrs_aa_2stage_work.c @@ -0,0 +1,115 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function chetrs_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_chetrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, lapack_complex_float* b, lapack_int ldb ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_chetrs_aa_2stage( &uplo, &n, &nrhs, a, &lda, tb, + <b, ipiv, ipiv2, b, &ldb, + &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_int ldb_t = MAX(1,n); + lapack_complex_float* a_t = NULL; + lapack_complex_float* tb_t = NULL; + lapack_complex_float* b_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_chetrs_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_chetrs_aa_2stage_work", info ); + return info; + } + if( ldb < nrhs ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_chetrs_aa_2stage_work", info ); + return info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + b_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldb_t * MAX(1,nrhs) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + /* Transpose input matrices */ + LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + LAPACKE_cge_trans( matrix_layout, n, nrhs, b, ldb, b_t, ldb_t ); + /* Call LAPACK function and adjust info */ + LAPACK_chetrs_aa_2stage( &uplo, &n, &nrhs, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, b_t, &ldb_t, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb ); + /* Release memory and exit */ + LAPACKE_free( b_t ); +exit_level_2: + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_chetrs_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_chetrs_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetrs_rook.c b/lapack-netlib/LAPACKE/src/lapacke_chetrs_rook.c index eef424985..e032ae7c5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chetrs_rook.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chetrs_rook.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_chetrs_rook( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_chetrs_rook_work( matrix_layout, uplo, n, nrhs, a, lda, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_chfrk.c b/lapack-netlib/LAPACKE/src/lapacke_chfrk.c index aa458083f..6b2e321b0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chfrk.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chfrk.c @@ -44,20 +44,22 @@ lapack_int LAPACKE_chfrk( int matrix_layout, char transr, char uplo, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - ka = LAPACKE_lsame( trans, 'n' ) ? k : n; - na = LAPACKE_lsame( trans, 'n' ) ? n : k; - if( LAPACKE_cge_nancheck( matrix_layout, na, ka, a, lda ) ) { - return -8; - } - if( LAPACKE_s_nancheck( 1, &alpha, 1 ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &beta, 1 ) ) { - return -10; - } - if( LAPACKE_cpf_nancheck( n, c ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + ka = LAPACKE_lsame( trans, 'n' ) ? k : n; + na = LAPACKE_lsame( trans, 'n' ) ? n : k; + if( LAPACKE_cge_nancheck( matrix_layout, na, ka, a, lda ) ) { + return -8; + } + if( LAPACKE_s_nancheck( 1, &alpha, 1 ) ) { + return -7; + } + if( LAPACKE_s_nancheck( 1, &beta, 1 ) ) { + return -10; + } + if( LAPACKE_cpf_nancheck( n, c ) ) { + return -11; + } } #endif return LAPACKE_chfrk_work( matrix_layout, transr, uplo, trans, n, k, alpha, diff --git a/lapack-netlib/LAPACKE/src/lapacke_chgeqz.c b/lapack-netlib/LAPACKE/src/lapacke_chgeqz.c index 040a2622a..ccaf5a624 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chgeqz.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chgeqz.c @@ -52,21 +52,23 @@ lapack_int LAPACKE_chgeqz( int matrix_layout, char job, char compq, char compz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, h, ldh ) ) { - return -8; - } - if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -14; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, h, ldh ) ) { + return -8; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -10; - } - if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -16; + if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -14; + } + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -10; + } + if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -16; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_chpcon.c b/lapack-netlib/LAPACKE/src/lapacke_chpcon.c index fcb6da5d6..52638b70a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chpcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chpcon.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_chpcon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -6; - } - if( LAPACKE_chp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -6; + } + if( LAPACKE_chp_nancheck( n, ap ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chpev.c b/lapack-netlib/LAPACKE/src/lapacke_chpev.c index 1567cffda..65989287c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chpev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chpev.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_chpev( int matrix_layout, char jobz, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chp_nancheck( n, ap ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chp_nancheck( n, ap ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chpevd.c b/lapack-netlib/LAPACKE/src/lapacke_chpevd.c index 350a747f6..47c7bbe23 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chpevd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chpevd.c @@ -52,9 +52,11 @@ lapack_int LAPACKE_chpevd( int matrix_layout, char jobz, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chp_nancheck( n, ap ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chp_nancheck( n, ap ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chpevx.c b/lapack-netlib/LAPACKE/src/lapacke_chpevx.c index 7bc52ed0d..6b6bc0a84 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chpevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chpevx.c @@ -48,21 +48,23 @@ lapack_int LAPACKE_chpevx( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -11; - } - if( LAPACKE_chp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -11; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -8; + if( LAPACKE_chp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -7; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -8; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_chpgst.c b/lapack-netlib/LAPACKE/src/lapacke_chpgst.c index c22655ddc..088be1224 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chpgst.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chpgst.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_chpgst( int matrix_layout, lapack_int itype, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_chp_nancheck( n, bp ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_chp_nancheck( n, bp ) ) { + return -6; + } } #endif return LAPACKE_chpgst_work( matrix_layout, itype, uplo, n, ap, bp ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_chpgv.c b/lapack-netlib/LAPACKE/src/lapacke_chpgv.c index e2e6e8cab..4640125b9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chpgv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chpgv.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_chpgv( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_chp_nancheck( n, bp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_chp_nancheck( n, bp ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chpgvd.c b/lapack-netlib/LAPACKE/src/lapacke_chpgvd.c index 22bc00fd3..568882ec9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chpgvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chpgvd.c @@ -53,12 +53,14 @@ lapack_int LAPACKE_chpgvd( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_chp_nancheck( n, bp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_chp_nancheck( n, bp ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chpgvx.c b/lapack-netlib/LAPACKE/src/lapacke_chpgvx.c index 0bbe9b01d..2e11c1037 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chpgvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chpgvx.c @@ -50,24 +50,26 @@ lapack_int LAPACKE_chpgvx( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -13; - } - if( LAPACKE_chp_nancheck( n, ap ) ) { - return -7; - } - if( LAPACKE_chp_nancheck( n, bp ) ) { - return -8; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -13; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -10; + if( LAPACKE_chp_nancheck( n, ap ) ) { + return -7; + } + if( LAPACKE_chp_nancheck( n, bp ) ) { + return -8; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -9; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_chprfs.c b/lapack-netlib/LAPACKE/src/lapacke_chprfs.c index f50e1a6d2..3412dbe5c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chprfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chprfs.c @@ -49,18 +49,20 @@ lapack_int LAPACKE_chprfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chp_nancheck( n, afp ) ) { - return -6; - } - if( LAPACKE_chp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chp_nancheck( n, afp ) ) { + return -6; + } + if( LAPACKE_chp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chpsv.c b/lapack-netlib/LAPACKE/src/lapacke_chpsv.c index 73df97b97..7be90ee32 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chpsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chpsv.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_chpsv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_chpsv_work( matrix_layout, uplo, n, nrhs, ap, ipiv, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_chpsvx.c b/lapack-netlib/LAPACKE/src/lapacke_chpsvx.c index f6bfee491..37ec61905 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chpsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chpsvx.c @@ -48,17 +48,19 @@ lapack_int LAPACKE_chpsvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_chp_nancheck( n, afp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_chp_nancheck( n, afp ) ) { + return -7; + } + } + if( LAPACKE_chp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; } - } - if( LAPACKE_chp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chptrd.c b/lapack-netlib/LAPACKE/src/lapacke_chptrd.c index 3d1cbd4da..88df3abca 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chptrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chptrd.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_chptrd( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_chptrd_work( matrix_layout, uplo, n, ap, d, e, tau ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_chptrf.c b/lapack-netlib/LAPACKE/src/lapacke_chptrf.c index 3441a00a9..2e1afd4a6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chptrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chptrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_chptrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_chptrf_work( matrix_layout, uplo, n, ap, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_chptri.c b/lapack-netlib/LAPACKE/src/lapacke_chptri.c index 598ce3bd4..a88a35b53 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chptri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chptri.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_chptri( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chp_nancheck( n, ap ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chptrs.c b/lapack-netlib/LAPACKE/src/lapacke_chptrs.c index 2440d9994..210d5ccdb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chptrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chptrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_chptrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_chp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_chp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_chptrs_work( matrix_layout, uplo, n, nrhs, ap, ipiv, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_chsein.c b/lapack-netlib/LAPACKE/src/lapacke_chsein.c index b528108bc..a3b1a9d40 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chsein.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chsein.c @@ -49,22 +49,24 @@ lapack_int LAPACKE_chsein( int matrix_layout, char job, char eigsrc, char initv, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, h, ldh ) ) { - return -7; - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'l' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, h, ldh ) ) { + return -7; } - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'r' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -12; + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'l' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -10; + } + } + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'r' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -12; + } + } + if( LAPACKE_c_nancheck( n, w, 1 ) ) { + return -9; } - } - if( LAPACKE_c_nancheck( n, w, 1 ) ) { - return -9; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_chseqr.c b/lapack-netlib/LAPACKE/src/lapacke_chseqr.c index a2645144e..76578a57f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chseqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chseqr.c @@ -48,13 +48,15 @@ lapack_int LAPACKE_chseqr( int matrix_layout, char job, char compz, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, h, ldh ) ) { - return -7; - } - if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, h, ldh ) ) { + return -7; + } + if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_clacgv.c b/lapack-netlib/LAPACKE/src/lapacke_clacgv.c index 4e62f4fa3..0014906ed 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clacgv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clacgv.c @@ -37,9 +37,11 @@ lapack_int LAPACKE_clacgv( lapack_int n, lapack_complex_float* x, lapack_int incx ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_c_nancheck( 1+(n-1)*ABS(incx), x, incx ) ) { - return -2; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_c_nancheck( 1+(n-1)*ABS(incx), x, incx ) ) { + return -2; + } } #endif return LAPACKE_clacgv_work( n, x, incx ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_clacn2.c b/lapack-netlib/LAPACKE/src/lapacke_clacn2.c index 890c486f8..0d374d139 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clacn2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clacn2.c @@ -38,12 +38,14 @@ lapack_int LAPACKE_clacn2( lapack_int n, lapack_complex_float* v, float* est, lapack_int* kase, lapack_int* isave ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, est, 1 ) ) { - return -5; - } - if( LAPACKE_c_nancheck( n, x, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, est, 1 ) ) { + return -5; + } + if( LAPACKE_c_nancheck( n, x, 1 ) ) { + return -3; + } } #endif return LAPACKE_clacn2_work( n, v, x, est, kase, isave ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_clacp2.c b/lapack-netlib/LAPACKE/src/lapacke_clacp2.c index 0878e624c..0c415c4d1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clacp2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clacp2.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_clacp2( int matrix_layout, char uplo, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif return LAPACKE_clacp2_work( matrix_layout, uplo, m, n, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_clacp2_work.c b/lapack-netlib/LAPACKE/src/lapacke_clacp2_work.c index 69ba290a0..8f1d6b4c0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clacp2_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clacp2_work.c @@ -31,7 +31,6 @@ * Generated January, 2013 *****************************************************************************/ -#include "lapacke.h" #include "lapacke_utils.h" lapack_int LAPACKE_clacp2_work( int matrix_layout, char uplo, lapack_int m, diff --git a/lapack-netlib/LAPACKE/src/lapacke_clacpy.c b/lapack-netlib/LAPACKE/src/lapacke_clacpy.c index c45f145d3..3e0433b44 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clacpy.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clacpy.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_clacpy( int matrix_layout, char uplo, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif return LAPACKE_clacpy_work( matrix_layout, uplo, m, n, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_clacrm.c b/lapack-netlib/LAPACKE/src/lapacke_clacrm.c new file mode 100644 index 000000000..2c2c2b0b5 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_clacrm.c @@ -0,0 +1,76 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function clacrm +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_clacrm(int matrix_layout, lapack_int m, + lapack_int n, const lapack_complex_float* a, + lapack_int lda, const float* b, lapack_int ldb, + lapack_complex_float* c, lapack_int ldc) +{ + lapack_int info = 0; + float* rwork = NULL; + + if (matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR) { + LAPACKE_xerbla("LAPACKE_clacrm", -1); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if ( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -6; + } + } +#endif + /* Allocate memory for work array(s) */ + rwork = (float*) + LAPACKE_malloc(sizeof(float) * MAX(1, 2 * m * n)); + if (rwork == NULL) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_clacrm_work(matrix_layout, m, n, a, lda, b, ldb, + c, ldc, rwork); + /* Release memory and exit */ + LAPACKE_free(rwork); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_clacrm", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_clacrm_work.c b/lapack-netlib/LAPACKE/src/lapacke_clacrm_work.c new file mode 100644 index 000000000..5bfab7f95 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_clacrm_work.c @@ -0,0 +1,110 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function clacrm +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_clacrm_work(int matrix_layout, lapack_int m, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + const float* b, lapack_int ldb, + lapack_complex_float* c, lapack_int ldc, + float* rwork) +{ + lapack_int info = 0; + if (matrix_layout == LAPACK_COL_MAJOR) { + /* Call LAPACK function */ + LAPACK_clacrm(&m, &n, a, &lda, b, &ldb, c, &ldc, rwork); + } else if (matrix_layout == LAPACK_ROW_MAJOR) { + lapack_int lda_t = MAX(1,m); + lapack_int ldb_t = MAX(1,n); + lapack_int ldc_t = MAX(1,m); + lapack_complex_float* a_t = NULL; + float* b_t = NULL; + lapack_complex_float* c_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -5; + LAPACKE_xerbla( "LAPACKE_clacrm_work", info ); + return info; + } + if( ldb < n ) { + info = -7; + LAPACKE_xerbla( "LAPACKE_clacrm_work", info ); + return info; + } + if( ldc < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_clacrm_work", info ); + return info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_float*) + LAPACKE_malloc(sizeof(lapack_complex_float) * lda_t * MAX(1,n)); + b_t = (float*) + LAPACKE_malloc(sizeof(float) * ldb_t * MAX(1,n)); + c_t = (lapack_complex_float*) + LAPACKE_malloc((sizeof(lapack_complex_float) * ldc_t * MAX(1,n))); + if (a_t == NULL) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + if (b_t == NULL) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + if (c_t == NULL) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + /* Transpose input matrices */ + LAPACKE_cge_trans(matrix_layout, m, n, a, lda, a_t, lda_t); + LAPACKE_sge_trans(matrix_layout, n, n, b, ldb, b_t, ldb_t); + /* Call LAPACK function */ + LAPACK_clacrm(&m, &n, a_t, &lda_t, b_t, &ldb_t, c_t, &ldc_t, rwork); + /* Transpose output matrices */ + LAPACKE_cge_trans(LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc); + /* Release memory and exit */ + LAPACKE_free(c_t); +exit_level_2: + LAPACKE_free(b_t); +exit_level_1: + LAPACKE_free(a_t); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_clacrm_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla("LAPACKE_clacrm_work", -1); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_clag2z.c b/lapack-netlib/LAPACKE/src/lapacke_clag2z.c index 21349b233..cd989ff95 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clag2z.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clag2z.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_clag2z( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, sa, ldsa ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, sa, ldsa ) ) { + return -4; + } } #endif return LAPACKE_clag2z_work( matrix_layout, m, n, sa, ldsa, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_clagge.c b/lapack-netlib/LAPACKE/src/lapacke_clagge.c index 9e18621f8..0b6c4c23e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clagge.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clagge.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_clagge( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( MIN(m,n), d, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( MIN(m,n), d, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_claghe.c b/lapack-netlib/LAPACKE/src/lapacke_claghe.c index 2e7a0820e..d1539224a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_claghe.c +++ b/lapack-netlib/LAPACKE/src/lapacke_claghe.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_claghe( int matrix_layout, lapack_int n, lapack_int k, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_clagsy.c b/lapack-netlib/LAPACKE/src/lapacke_clagsy.c index a7782f51a..f4f98fe15 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clagsy.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clagsy.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_clagsy( int matrix_layout, lapack_int n, lapack_int k, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_clange.c b/lapack-netlib/LAPACKE/src/lapacke_clange.c index 2cbf67212..472a6c5c1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clange.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clange.c @@ -45,9 +45,11 @@ float LAPACKE_clange( int matrix_layout, char norm, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_clange_work.c b/lapack-netlib/LAPACKE/src/lapacke_clange_work.c index 740f872a1..ce5ee25ca 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clange_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clange_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function clange * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -39,37 +39,41 @@ float LAPACKE_clange_work( int matrix_layout, char norm, lapack_int m, { lapack_int info = 0; float res = 0.; + char norm_lapack; if( matrix_layout == LAPACK_COL_MAJOR ) { - /* Call LAPACK function and adjust info */ + /* Call LAPACK function */ res = LAPACK_clange( &norm, &m, &n, a, &lda, work ); - if( info < 0 ) { - info = info - 1; - } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int lda_t = MAX(1,m); - lapack_complex_float* a_t = NULL; + float* work_lapack = NULL; /* Check leading dimension(s) */ if( lda < n ) { info = -6; LAPACKE_xerbla( "LAPACKE_clange_work", info ); return info; } - /* Allocate memory for temporary array(s) */ - a_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) ); - if( a_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; + if( LAPACKE_lsame( norm, '1' ) || LAPACKE_lsame( norm, 'o' ) ) { + norm_lapack = 'i'; + } else if( LAPACKE_lsame( norm, 'i' ) ) { + norm_lapack = '1'; + } else { + norm_lapack = norm; + } + /* Allocate memory for work array(s) */ + if( LAPACKE_lsame( norm_lapack, 'i' ) ) { + work_lapack = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,n) ); + if( work_lapack == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } } - /* Transpose input matrices */ - LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); - /* Call LAPACK function and adjust info */ - res = LAPACK_clange( &norm, &m, &n, a_t, &lda_t, work ); - info = 0; /* LAPACK call is ok! */ + /* Call LAPACK function */ + res = LAPACK_clange( &norm_lapack, &n, &m, a, &lda, work_lapack ); /* Release memory and exit */ - LAPACKE_free( a_t ); + if( work_lapack ) { + LAPACKE_free( work_lapack ); + } exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + if( info == LAPACK_WORK_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_clange_work", info ); } } else { diff --git a/lapack-netlib/LAPACKE/src/lapacke_clanhe.c b/lapack-netlib/LAPACKE/src/lapacke_clanhe.c index b6883724a..5c40c29ac 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clanhe.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clanhe.c @@ -44,9 +44,11 @@ float LAPACKE_clanhe( int matrix_layout, char norm, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_clansy.c b/lapack-netlib/LAPACKE/src/lapacke_clansy.c index d08a4715b..a02264eb4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clansy.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clansy.c @@ -44,9 +44,11 @@ float LAPACKE_clansy( int matrix_layout, char norm, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_clantr.c b/lapack-netlib/LAPACKE/src/lapacke_clantr.c index 5a38fb0d7..2ddd2e211 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clantr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clantr.c @@ -45,9 +45,11 @@ float LAPACKE_clantr( int matrix_layout, char norm, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ctr_nancheck( matrix_layout, uplo, diag, MIN(m,n), a, lda ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ctr_nancheck( matrix_layout, uplo, diag, MIN(m,n), a, lda ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_clapmr.c b/lapack-netlib/LAPACKE/src/lapacke_clapmr.c index ffa5ea569..e8f9ced1e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clapmr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clapmr.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_clapmr( int matrix_layout, lapack_logical forwrd, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, x, ldx ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -5; + } } #endif return LAPACKE_clapmr_work( matrix_layout, forwrd, m, n, x, ldx, k ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_clapmt.c b/lapack-netlib/LAPACKE/src/lapacke_clapmt.c index 9a35d4c28..e0bb39589 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clapmt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clapmt.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_clapmt( int matrix_layout, lapack_logical forwrd, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, x, ldx ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -5; + } } #endif return LAPACKE_clapmt_work( matrix_layout, forwrd, m, n, x, ldx, k ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_clarcm.c b/lapack-netlib/LAPACKE/src/lapacke_clarcm.c new file mode 100644 index 000000000..7f80b7830 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_clarcm.c @@ -0,0 +1,76 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function clarcm +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_clarcm(int matrix_layout, lapack_int m, + lapack_int n, const float* a, lapack_int lda, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* c, lapack_int ldc) +{ + lapack_int info = 0; + float* rwork = NULL; + + if (matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR) { + LAPACKE_xerbla("LAPACKE_clarcm", -1); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if ( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, m, a, lda ) ) { + return -4; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -6; + } + } +#endif + /* Allocate memory for work array(s) */ + rwork = (float*) + LAPACKE_malloc(sizeof(float) * MAX(1, 2 * m * n)); + if (rwork == NULL) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_clarcm_work(matrix_layout, m, n, a, lda, b, ldb, + c, ldc, rwork); + /* Release memory and exit */ + LAPACKE_free(rwork); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_clarcm", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_clarcm_work.c b/lapack-netlib/LAPACKE/src/lapacke_clarcm_work.c new file mode 100644 index 000000000..cd12ee39c --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_clarcm_work.c @@ -0,0 +1,110 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function clarcm +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_clarcm_work(int matrix_layout, lapack_int m, lapack_int n, + const float* a, lapack_int lda, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* c, lapack_int ldc, + float* rwork) +{ + lapack_int info = 0; + if (matrix_layout == LAPACK_COL_MAJOR) { + /* Call LAPACK function */ + LAPACK_clarcm(&m, &n, a, &lda, b, &ldb, c, &ldc, rwork); + } else if (matrix_layout == LAPACK_ROW_MAJOR) { + lapack_int lda_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldc_t = MAX(1,m); + float* a_t = NULL; + lapack_complex_float* b_t = NULL; + lapack_complex_float* c_t = NULL; + /* Check leading dimension(s) */ + if( lda < m ) { + info = -5; + LAPACKE_xerbla( "LAPACKE_clarcm_work", info ); + return info; + } + if( ldb < n ) { + info = -7; + LAPACKE_xerbla( "LAPACKE_clarcm_work", info ); + return info; + } + if( ldc < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_clarcm_work", info ); + return info; + } + /* Allocate memory for temporary array(s) */ + a_t = (float*) + LAPACKE_malloc(sizeof(float) * lda_t * MAX(1,m)); + b_t = (lapack_complex_float*) + LAPACKE_malloc(sizeof(lapack_complex_float) * ldb_t * MAX(1,n)); + c_t = (lapack_complex_float*) + LAPACKE_malloc((sizeof(lapack_complex_float) * ldc_t * MAX(1,n))); + if (a_t == NULL) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + if (b_t == NULL) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + if (c_t == NULL) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + /* Transpose input matrices */ + LAPACKE_sge_trans(matrix_layout, m, m, a, lda, a_t, lda_t); + LAPACKE_cge_trans(matrix_layout, m, n, b, ldb, b_t, ldb_t); + /* Call LAPACK function */ + LAPACK_clarcm(&m, &n, a_t, &lda_t, b_t, &ldb_t, c_t, &ldc_t, rwork); + /* Transpose output matrices */ + LAPACKE_cge_trans(LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc); + /* Release memory and exit */ + LAPACKE_free(c_t); +exit_level_2: + LAPACKE_free(b_t); +exit_level_1: + LAPACKE_free(a_t); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_clarcm_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla("LAPACKE_clarcm_work", -1); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_clarfb.c b/lapack-netlib/LAPACKE/src/lapacke_clarfb.c index c1d3130bd..18e24509d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clarfb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clarfb.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function clarfb * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -41,7 +41,7 @@ lapack_int LAPACKE_clarfb( int matrix_layout, char side, char trans, char direct lapack_int ldc ) { lapack_int info = 0; - lapack_int ldwork = ( side=='l')?n:(( side=='r')?m:1); + lapack_int ldwork; lapack_complex_float* work = NULL; lapack_int ncols_v, nrows_v; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { @@ -49,57 +49,66 @@ lapack_int LAPACKE_clarfb( int matrix_layout, char side, char trans, char direct return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -13; - } - if( LAPACKE_cge_nancheck( matrix_layout, k, k, t, ldt ) ) { - return -11; - } - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_ctr_nancheck( matrix_layout, 'l', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v-k, ncols_v, &v[k*ldv], - ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_clarfb", -8 ); - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : + ( ( LAPACKE_lsame( storev, 'r' ) && + LAPACKE_lsame( side, 'l' ) ) ? m : + ( ( LAPACKE_lsame( storev, 'r' ) && + LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + nrows_v = ( LAPACKE_lsame( storev, 'c' ) && + LAPACKE_lsame( side, 'l' ) ) ? m : + ( ( LAPACKE_lsame( storev, 'c' ) && + LAPACKE_lsame( side, 'r' ) ) ? n : + ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -13; } - if( LAPACKE_ctr_nancheck( matrix_layout, 'u', 'u', k, - &v[(nrows_v-k)*ldv], ldv ) ) - return -9; - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v-k, ncols_v, v, ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_ctr_nancheck( matrix_layout, 'u', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, ncols_v-k, &v[k], - ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_clarfb", -8 ); - return -8; + if( LAPACKE_cge_nancheck( matrix_layout, k, k, t, ldt ) ) { + return -11; + } + if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { + if( LAPACKE_ctr_nancheck( matrix_layout, 'l', 'u', k, v, ldv ) ) + return -9; + if( LAPACKE_cge_nancheck( matrix_layout, nrows_v-k, ncols_v, &v[k*ldv], + ldv ) ) + return -9; + } else if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'b' ) ) { + if( k > nrows_v ) { + LAPACKE_xerbla( "LAPACKE_clarfb", -8 ); + return -8; + } + if( LAPACKE_ctr_nancheck( matrix_layout, 'u', 'u', k, + &v[(nrows_v-k)*ldv], ldv ) ) + return -9; + if( LAPACKE_cge_nancheck( matrix_layout, nrows_v-k, ncols_v, v, ldv ) ) + return -9; + } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { + if( LAPACKE_ctr_nancheck( matrix_layout, 'u', 'u', k, v, ldv ) ) + return -9; + if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, ncols_v-k, &v[k], + ldv ) ) + return -9; + } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { + if( k > ncols_v ) { + LAPACKE_xerbla( "LAPACKE_clarfb", -8 ); + return -8; + } + if( LAPACKE_ctr_nancheck( matrix_layout, 'l', 'u', k, &v[ncols_v-k], + ldv ) ) + return -9; + if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, ncols_v-k, v, ldv ) ) + return -9; } - if( LAPACKE_ctr_nancheck( matrix_layout, 'l', 'u', k, &v[ncols_v-k], - ldv ) ) - return -9; - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, ncols_v-k, v, ldv ) ) - return -9; } #endif + if( LAPACKE_lsame( side, 'l' ) ) { + ldwork = n; + } else if( LAPACKE_lsame( side, 'r' ) ) { + ldwork = m; + } else { + ldwork = 1; + } /* Allocate memory for working array(s) */ work = (lapack_complex_float*) LAPACKE_malloc( sizeof(lapack_complex_float) * ldwork * MAX(1,k) ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_clarfg.c b/lapack-netlib/LAPACKE/src/lapacke_clarfg.c index 945b799dd..0381a42bc 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clarfg.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clarfg.c @@ -38,12 +38,14 @@ lapack_int LAPACKE_clarfg( lapack_int n, lapack_complex_float* alpha, lapack_complex_float* tau ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_c_nancheck( 1, alpha, 1 ) ) { - return -2; - } - if( LAPACKE_c_nancheck( 1+(n-2)*ABS(incx), x, incx ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_c_nancheck( 1, alpha, 1 ) ) { + return -2; + } + if( LAPACKE_c_nancheck( 1+(n-2)*ABS(incx), x, incx ) ) { + return -3; + } } #endif return LAPACKE_clarfg_work( n, alpha, x, incx, tau ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_clarft.c b/lapack-netlib/LAPACKE/src/lapacke_clarft.c index cd69bd45c..961d7cbab 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clarft.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clarft.c @@ -45,16 +45,18 @@ lapack_int LAPACKE_clarft( int matrix_layout, char direct, char storev, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( LAPACKE_lsame( storev, 'r' ) ? n : 1); - nrows_v = LAPACKE_lsame( storev, 'c' ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1); - if( LAPACKE_c_nancheck( k, tau, 1 ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : + ( LAPACKE_lsame( storev, 'r' ) ? n : 1); + nrows_v = LAPACKE_lsame( storev, 'c' ) ? n : + ( LAPACKE_lsame( storev, 'r' ) ? k : 1); + if( LAPACKE_c_nancheck( k, tau, 1 ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { + return -6; + } } #endif return LAPACKE_clarft_work( matrix_layout, direct, storev, n, k, v, ldv, tau, diff --git a/lapack-netlib/LAPACKE/src/lapacke_clarfx.c b/lapack-netlib/LAPACKE/src/lapacke_clarfx.c index bf6e887d3..977e283e1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clarfx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clarfx.c @@ -43,15 +43,17 @@ lapack_int LAPACKE_clarfx( int matrix_layout, char side, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -7; - } - if( LAPACKE_c_nancheck( 1, &tau, 1 ) ) { - return -6; - } - if( LAPACKE_c_nancheck( m, v, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -7; + } + if( LAPACKE_c_nancheck( 1, &tau, 1 ) ) { + return -6; + } + if( LAPACKE_c_nancheck( m, v, 1 ) ) { + return -5; + } } #endif return LAPACKE_clarfx_work( matrix_layout, side, m, n, v, tau, c, ldc, diff --git a/lapack-netlib/LAPACKE/src/lapacke_clascl.c b/lapack-netlib/LAPACKE/src/lapacke_clascl.c index 5e3169551..fdcb02947 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clascl.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clascl.c @@ -43,68 +43,70 @@ lapack_int LAPACKE_clascl( int matrix_layout, char type, lapack_int kl, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - switch (type) { - case 'G': - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + switch (type) { + case 'G': + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -9; + } + break; + case 'L': + // TYPE = 'L' - lower triangle of general matrix + if( matrix_layout == LAPACK_COL_MAJOR && + LAPACKE_cgb_nancheck( matrix_layout, m, n, m-1, 0, a, lda+1 ) ) { + return -9; + } + if( matrix_layout == LAPACK_ROW_MAJOR && + LAPACKE_cgb_nancheck( LAPACK_COL_MAJOR, n, m, 0, m-1, a-m+1, lda+1 ) ) { + return -9; + } + break; + case 'U': + // TYPE = 'U' - upper triangle of general matrix + if( matrix_layout == LAPACK_COL_MAJOR && + LAPACKE_cgb_nancheck( matrix_layout, m, n, 0, n-1, a-n+1, lda+1 ) ) { + return -9; + } + if( matrix_layout == LAPACK_ROW_MAJOR && + LAPACKE_cgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 0, a, lda+1 ) ) { + return -9; + } + break; + case 'H': + // TYPE = 'H' - part of upper Hessenberg matrix in general matrix + if( matrix_layout == LAPACK_COL_MAJOR && + LAPACKE_cgb_nancheck( matrix_layout, m, n, 1, n-1, a-n+1, lda+1 ) ) { + return -9; + } + if( matrix_layout == LAPACK_ROW_MAJOR && + LAPACKE_cgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 1, a-1, lda+1 ) ) { + return -9; + } + case 'B': + // TYPE = 'B' - lower part of symmetric band matrix (assume m==n) + if( LAPACKE_chb_nancheck( matrix_layout, 'L', n, kl, a, lda ) ) { + return -9; + } + break; + case 'Q': + // TYPE = 'Q' - upper part of symmetric band matrix (assume m==n) + if( LAPACKE_chb_nancheck( matrix_layout, 'U', n, ku, a, lda ) ) { + return -9; + } + break; + case 'Z': + // TYPE = 'Z' - band matrix laid out for ?GBTRF + if( matrix_layout == LAPACK_COL_MAJOR && + LAPACKE_cgb_nancheck( matrix_layout, m, n, kl, ku, a+kl, lda ) ) { + return -9; + } + if( matrix_layout == LAPACK_ROW_MAJOR && + LAPACKE_cgb_nancheck( matrix_layout, m, n, kl, ku, a+lda*kl, lda ) ) { + return -9; + } + break; } - break; - case 'L': - // TYPE = 'L' - lower triangle of general matrix - if( matrix_layout == LAPACK_COL_MAJOR && - LAPACKE_cgb_nancheck( matrix_layout, m, n, m-1, 0, a, lda+1 ) ) { - return -9; - } - if( matrix_layout == LAPACK_ROW_MAJOR && - LAPACKE_cgb_nancheck( LAPACK_COL_MAJOR, n, m, 0, m-1, a-m+1, lda+1 ) ) { - return -9; - } - break; - case 'U': - // TYPE = 'U' - upper triangle of general matrix - if( matrix_layout == LAPACK_COL_MAJOR && - LAPACKE_cgb_nancheck( matrix_layout, m, n, 0, n-1, a-n+1, lda+1 ) ) { - return -9; - } - if( matrix_layout == LAPACK_ROW_MAJOR && - LAPACKE_cgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 0, a, lda+1 ) ) { - return -9; - } - break; - case 'H': - // TYPE = 'H' - part of upper Hessenberg matrix in general matrix - if( matrix_layout == LAPACK_COL_MAJOR && - LAPACKE_cgb_nancheck( matrix_layout, m, n, 1, n-1, a-n+1, lda+1 ) ) { - return -9; - } - if( matrix_layout == LAPACK_ROW_MAJOR && - LAPACKE_cgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 1, a-1, lda+1 ) ) { - return -9; - } - case 'B': - // TYPE = 'B' - lower part of symmetric band matrix (assume m==n) - if( LAPACKE_chb_nancheck( matrix_layout, 'L', n, kl, a, lda ) ) { - return -9; - } - break; - case 'Q': - // TYPE = 'Q' - upper part of symmetric band matrix (assume m==n) - if( LAPACKE_chb_nancheck( matrix_layout, 'U', n, ku, a, lda ) ) { - return -9; - } - break; - case 'Z': - // TYPE = 'Z' - band matrix laid out for ?GBTRF - if( matrix_layout == LAPACK_COL_MAJOR && - LAPACKE_cgb_nancheck( matrix_layout, m, n, kl, ku, a+kl, lda ) ) { - return -9; - } - if( matrix_layout == LAPACK_ROW_MAJOR && - LAPACKE_cgb_nancheck( matrix_layout, m, n, kl, ku, a+lda*kl, lda ) ) { - return -9; - } - break; } #endif return LAPACKE_clascl_work( matrix_layout, type, kl, ku, cfrom, cto, m, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_claset.c b/lapack-netlib/LAPACKE/src/lapacke_claset.c index fd18fd127..3884e6a30 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_claset.c +++ b/lapack-netlib/LAPACKE/src/lapacke_claset.c @@ -50,12 +50,14 @@ lapack_int LAPACKE_claset( int matrix_layout, char uplo, lapack_int m, *****************************************************************************/ #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_c_nancheck( 1, &alpha, 1 ) ) { - return -5; - } - if( LAPACKE_c_nancheck( 1, &beta, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_c_nancheck( 1, &alpha, 1 ) ) { + return -5; + } + if( LAPACKE_c_nancheck( 1, &beta, 1 ) ) { + return -6; + } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_classq.c b/lapack-netlib/LAPACKE/src/lapacke_classq.c new file mode 100644 index 000000000..b8f231dbb --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_classq.c @@ -0,0 +1,54 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function classq +* Author: Julien Langou +* Generated February, 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_classq( lapack_int n, lapack_complex_float* x, + lapack_int incx, float* scale, float* sumsq ) +{ +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input vector `x` and in/out scalars `scale` and `sumsq` for NaNs */ + if( LAPACKE_c_nancheck( 1+(n-2)*ABS(incx), x, incx ) ) { + return -2; + } + if( LAPACKE_s_nancheck( 1, scale, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( 1, sumsq, 1 ) ) { + return -5; + } + } +#endif + return LAPACKE_classq_work( n, x, incx, scale, sumsq ); +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_classq_work.c b/lapack-netlib/LAPACKE/src/lapacke_classq_work.c new file mode 100644 index 000000000..9e9c3a36e --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_classq_work.c @@ -0,0 +1,41 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function classq +* Author: Julien Langou +* Generated February, 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_classq_work( lapack_int n, lapack_complex_float* x, lapack_int incx, float* scale, float* sumsq ) +{ + lapack_int info = 0; + LAPACK_classq( &n, x, &incx, scale, sumsq ); + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_claswp.c b/lapack-netlib/LAPACKE/src/lapacke_claswp.c index bc5d4dde8..cba497a28 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_claswp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_claswp.c @@ -43,19 +43,21 @@ lapack_int LAPACKE_claswp( int matrix_layout, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ -/***************************************************************************** -* Disable the check as is below, the check below was checking for NaN -* from lda to n since there is no (obvious) way to knowing m. This is not -* a good idea. We could get a lower bound of m by scanning from ipiv. Or -* we could pass on the NaN check to LAPACKE_dlaswp_work. For now disable -* the buggy Nan check. -* See forum: http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=4827 -*****************************************************************************/ -/* if( LAPACKE_cge_nancheck( matrix_layout, lda, n, a, lda ) ) { -* return -3; -* } -*/ + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + /***************************************************************************** + * Disable the check as is below, the check below was checking for NaN + * from lda to n since there is no (obvious) way to knowing m. This is not + * a good idea. We could get a lower bound of m by scanning from ipiv. Or + * we could pass on the NaN check to LAPACKE_dlaswp_work. For now disable + * the buggy Nan check. + * See forum: http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=4827 + *****************************************************************************/ + /* if( LAPACKE_cge_nancheck( matrix_layout, lda, n, a, lda ) ) { + * return -3; + * } + */ + } #endif return LAPACKE_claswp_work( matrix_layout, n, a, lda, k1, k2, ipiv, incx ); } diff --git a/lapack-netlib/LAPACKE/src/lapacke_claswp_work.c b/lapack-netlib/LAPACKE/src/lapacke_claswp_work.c index 599e72fcf..1867987e3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_claswp_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_claswp_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function claswp * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" diff --git a/lapack-netlib/LAPACKE/src/lapacke_clatms.c b/lapack-netlib/LAPACKE/src/lapacke_clatms.c index 96415ca0d..19a084350 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clatms.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clatms.c @@ -46,18 +46,20 @@ lapack_int LAPACKE_clatms( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -14; - } - if( LAPACKE_s_nancheck( 1, &cond, 1 ) ) { - return -9; - } - if( LAPACKE_s_nancheck( MIN(n,m), d, 1 ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &dmax, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -14; + } + if( LAPACKE_s_nancheck( 1, &cond, 1 ) ) { + return -9; + } + if( LAPACKE_s_nancheck( MIN(n,m), d, 1 ) ) { + return -7; + } + if( LAPACKE_s_nancheck( 1, &dmax, 1 ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_clauum.c b/lapack-netlib/LAPACKE/src/lapacke_clauum.c index 5b34e31b1..1ce7078b0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clauum.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clauum.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_clauum( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_clauum_work( matrix_layout, uplo, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpbcon.c b/lapack-netlib/LAPACKE/src/lapacke_cpbcon.c index c67909ba0..3bf733ffd 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpbcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpbcon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_cpbcon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -5; - } - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -5; + } + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpbequ.c b/lapack-netlib/LAPACKE/src/lapacke_cpbequ.c index 4aa5bb474..4468f57f2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpbequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpbequ.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_cpbequ( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -5; + } } #endif return LAPACKE_cpbequ_work( matrix_layout, uplo, n, kd, ab, ldab, s, scond, diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpbrfs.c b/lapack-netlib/LAPACKE/src/lapacke_cpbrfs.c index 241a3d062..64ef7bd13 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpbrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpbrfs.c @@ -49,18 +49,20 @@ lapack_int LAPACKE_cpbrfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; - } - if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, afb, ldafb ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } + if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, afb, ldafb ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpbstf.c b/lapack-netlib/LAPACKE/src/lapacke_cpbstf.c index c8bfa5f32..bb52e9da1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpbstf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpbstf.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_cpbstf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -5; + } } #endif return LAPACKE_cpbstf_work( matrix_layout, uplo, n, kb, bb, ldbb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpbsv.c b/lapack-netlib/LAPACKE/src/lapacke_cpbsv.c index 04990d0ef..6528cec0c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpbsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpbsv.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_cpbsv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_cpbsv_work( matrix_layout, uplo, n, kd, nrhs, ab, ldab, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpbsvx.c b/lapack-netlib/LAPACKE/src/lapacke_cpbsvx.c index 6fa6b811d..a14d5e150 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpbsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpbsvx.c @@ -50,21 +50,23 @@ lapack_int LAPACKE_cpbsvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -7; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, afb, ldafb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -7; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { - if( LAPACKE_s_nancheck( n, s, 1 ) ) { - return -12; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, afb, ldafb ) ) { + return -9; + } + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { + if( LAPACKE_s_nancheck( n, s, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpbtrf.c b/lapack-netlib/LAPACKE/src/lapacke_cpbtrf.c index 28f3528e5..975c08158 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpbtrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpbtrf.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_cpbtrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -5; + } } #endif return LAPACKE_cpbtrf_work( matrix_layout, uplo, n, kd, ab, ldab ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpbtrs.c b/lapack-netlib/LAPACKE/src/lapacke_cpbtrs.c index e3b24678a..9ff63c177 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpbtrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpbtrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_cpbtrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_cpbtrs_work( matrix_layout, uplo, n, kd, nrhs, ab, ldab, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpftrf.c b/lapack-netlib/LAPACKE/src/lapacke_cpftrf.c index a57d692e0..382dd63d4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpftrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpftrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_cpftrf( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpf_nancheck( n, a ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpf_nancheck( n, a ) ) { + return -5; + } } #endif return LAPACKE_cpftrf_work( matrix_layout, transr, uplo, n, a ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpftri.c b/lapack-netlib/LAPACKE/src/lapacke_cpftri.c index e74457d5c..57b73d007 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpftri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpftri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_cpftri( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpf_nancheck( n, a ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpf_nancheck( n, a ) ) { + return -5; + } } #endif return LAPACKE_cpftri_work( matrix_layout, transr, uplo, n, a ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpftrs.c b/lapack-netlib/LAPACKE/src/lapacke_cpftrs.c index 4a0c37b80..410ab4d9d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpftrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpftrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_cpftrs( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpf_nancheck( n, a ) ) { - return -6; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpf_nancheck( n, a ) ) { + return -6; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_cpftrs_work( matrix_layout, transr, uplo, n, nrhs, a, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpocon.c b/lapack-netlib/LAPACKE/src/lapacke_cpocon.c index 538852660..b2ba482a4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpocon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpocon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_cpocon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpoequ.c b/lapack-netlib/LAPACKE/src/lapacke_cpoequ.c index e7c635e87..9ee976448 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpoequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpoequ.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_cpoequ( int matrix_layout, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -3; + } } #endif return LAPACKE_cpoequ_work( matrix_layout, n, a, lda, s, scond, amax ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpoequb.c b/lapack-netlib/LAPACKE/src/lapacke_cpoequb.c index b8789ecca..3b5b4cfbf 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpoequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpoequb.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_cpoequb( int matrix_layout, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -3; + } } #endif return LAPACKE_cpoequb_work( matrix_layout, n, a, lda, s, scond, amax ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cporfs.c b/lapack-netlib/LAPACKE/src/lapacke_cporfs.c index 292f2e62c..e8867eb4e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cporfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cporfs.c @@ -48,18 +48,20 @@ lapack_int LAPACKE_cporfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -11; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cporfsx.c b/lapack-netlib/LAPACKE/src/lapacke_cporfsx.c index 08bb22d7b..80c2877bb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cporfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cporfsx.c @@ -52,28 +52,30 @@ lapack_int LAPACKE_cporfsx( int matrix_layout, char uplo, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -11; - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -21; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( equed, 'y' ) ) { - if( LAPACKE_s_nancheck( n, s, 1 ) ) { - return -10; + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -21; + } + } + if( LAPACKE_lsame( equed, 'y' ) ) { + if( LAPACKE_s_nancheck( n, s, 1 ) ) { + return -10; + } + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -13; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -13; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cposv.c b/lapack-netlib/LAPACKE/src/lapacke_cposv.c index 4b7b854f5..bb9a670eb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cposv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cposv.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_cposv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_cposv_work( matrix_layout, uplo, n, nrhs, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cposvx.c b/lapack-netlib/LAPACKE/src/lapacke_cposvx.c index 99383574b..df0d5b8c3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cposvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cposvx.c @@ -49,21 +49,23 @@ lapack_int LAPACKE_cposvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { - if( LAPACKE_s_nancheck( n, s, 1 ) ) { - return -11; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { + if( LAPACKE_s_nancheck( n, s, 1 ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cposvxx.c b/lapack-netlib/LAPACKE/src/lapacke_cposvxx.c index 69ed00a8d..2dbb650f8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cposvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cposvxx.c @@ -52,26 +52,28 @@ lapack_int LAPACKE_cposvxx( int matrix_layout, char fact, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -23; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } } - } - if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { - if( LAPACKE_s_nancheck( n, s, 1 ) ) { - return -11; + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -23; + } + } + if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { + if( LAPACKE_s_nancheck( n, s, 1 ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpotrf.c b/lapack-netlib/LAPACKE/src/lapacke_cpotrf.c index c5ae7ce7b..ac1805d4c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpotrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpotrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_cpotrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_cpotrf_work( matrix_layout, uplo, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpotrf2.c b/lapack-netlib/LAPACKE/src/lapacke_cpotrf2.c index dcda27955..b1bf4ba3b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpotrf2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpotrf2.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_cpotrf2( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_cpotrf2_work( matrix_layout, uplo, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpotri.c b/lapack-netlib/LAPACKE/src/lapacke_cpotri.c index 24b1a0fa3..bf80ec5ed 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpotri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpotri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_cpotri( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_cpotri_work( matrix_layout, uplo, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpotrs.c b/lapack-netlib/LAPACKE/src/lapacke_cpotrs.c index dbe86790f..a5f056698 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpotrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpotrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_cpotrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_cpotrs_work( matrix_layout, uplo, n, nrhs, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cppcon.c b/lapack-netlib/LAPACKE/src/lapacke_cppcon.c index c7b7afd1c..d0ec180c7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cppcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cppcon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_cppcon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -5; - } - if( LAPACKE_cpp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -5; + } + if( LAPACKE_cpp_nancheck( n, ap ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cppequ.c b/lapack-netlib/LAPACKE/src/lapacke_cppequ.c index c454032dc..f0e546523 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cppequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cppequ.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_cppequ( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_cppequ_work( matrix_layout, uplo, n, ap, s, scond, amax ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpprfs.c b/lapack-netlib/LAPACKE/src/lapacke_cpprfs.c index bd6e842ae..f62c61f6b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpprfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpprfs.c @@ -48,18 +48,20 @@ lapack_int LAPACKE_cpprfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpp_nancheck( n, afp ) ) { - return -6; - } - if( LAPACKE_cpp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpp_nancheck( n, afp ) ) { + return -6; + } + if( LAPACKE_cpp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cppsv.c b/lapack-netlib/LAPACKE/src/lapacke_cppsv.c index 6af2be3cf..6a182fae9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cppsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cppsv.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_cppsv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -6; + } } #endif return LAPACKE_cppsv_work( matrix_layout, uplo, n, nrhs, ap, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cppsvx.c b/lapack-netlib/LAPACKE/src/lapacke_cppsvx.c index a552afec6..bf428fb17 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cppsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cppsvx.c @@ -48,21 +48,23 @@ lapack_int LAPACKE_cppsvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_cpp_nancheck( n, afp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_cpp_nancheck( n, afp ) ) { + return -7; + } } - } - if( LAPACKE_cpp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { - if( LAPACKE_s_nancheck( n, s, 1 ) ) { - return -9; + if( LAPACKE_cpp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { + if( LAPACKE_s_nancheck( n, s, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpptrf.c b/lapack-netlib/LAPACKE/src/lapacke_cpptrf.c index 426fefbc4..dad29b6d0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpptrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpptrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_cpptrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_cpptrf_work( matrix_layout, uplo, n, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpptri.c b/lapack-netlib/LAPACKE/src/lapacke_cpptri.c index 5018506e8..5a80ef347 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpptri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpptri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_cpptri( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_cpptri_work( matrix_layout, uplo, n, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpptrs.c b/lapack-netlib/LAPACKE/src/lapacke_cpptrs.c index 959d859d3..6eab29d05 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpptrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpptrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_cpptrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -6; + } } #endif return LAPACKE_cpptrs_work( matrix_layout, uplo, n, nrhs, ap, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpstrf.c b/lapack-netlib/LAPACKE/src/lapacke_cpstrf.c index d0c08eea0..f4111a287 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpstrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpstrf.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_cpstrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_s_nancheck( 1, &tol, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_s_nancheck( 1, &tol, 1 ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cptcon.c b/lapack-netlib/LAPACKE/src/lapacke_cptcon.c index a65da586f..ebaab3b6b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cptcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cptcon.c @@ -40,15 +40,17 @@ lapack_int LAPACKE_cptcon( lapack_int n, const float* d, lapack_int info = 0; float* work = NULL; #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -2; - } - if( LAPACKE_c_nancheck( n-1, e, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -2; + } + if( LAPACKE_c_nancheck( n-1, e, 1 ) ) { + return -3; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpteqr.c b/lapack-netlib/LAPACKE/src/lapacke_cpteqr.c index 6bf8a9ec0..3bc194f7b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpteqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpteqr.c @@ -45,16 +45,18 @@ lapack_int LAPACKE_cpteqr( int matrix_layout, char compz, lapack_int n, float* d return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -5; - } - if( LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -5; + } + if( LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -6; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cptrfs.c b/lapack-netlib/LAPACKE/src/lapacke_cptrfs.c index 60126d065..8e69403d4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cptrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cptrfs.c @@ -49,24 +49,26 @@ lapack_int LAPACKE_cptrfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n, df, 1 ) ) { - return -7; - } - if( LAPACKE_c_nancheck( n-1, e, 1 ) ) { - return -6; - } - if( LAPACKE_c_nancheck( n-1, ef, 1 ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_s_nancheck( n, df, 1 ) ) { + return -7; + } + if( LAPACKE_c_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_c_nancheck( n-1, ef, 1 ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -11; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cptsv.c b/lapack-netlib/LAPACKE/src/lapacke_cptsv.c index f1d5af0e1..7de0ea3d2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cptsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cptsv.c @@ -42,15 +42,17 @@ lapack_int LAPACKE_cptsv( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -6; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_c_nancheck( n-1, e, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -6; + } + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_c_nancheck( n-1, e, 1 ) ) { + return -5; + } } #endif return LAPACKE_cptsv_work( matrix_layout, n, nrhs, d, e, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cptsvx.c b/lapack-netlib/LAPACKE/src/lapacke_cptsvx.c index ad2458781..2e5185032 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cptsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cptsvx.c @@ -49,24 +49,26 @@ lapack_int LAPACKE_cptsvx( int matrix_layout, char fact, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_s_nancheck( n, df, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; } - } - if( LAPACKE_c_nancheck( n-1, e, 1 ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_c_nancheck( n-1, ef, 1 ) ) { - return -8; + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_s_nancheck( n, df, 1 ) ) { + return -7; + } + } + if( LAPACKE_c_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_c_nancheck( n-1, ef, 1 ) ) { + return -8; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpttrf.c b/lapack-netlib/LAPACKE/src/lapacke_cpttrf.c index 7c4e052d8..ad51c857c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpttrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpttrf.c @@ -36,12 +36,14 @@ lapack_int LAPACKE_cpttrf( lapack_int n, float* d, lapack_complex_float* e ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -2; - } - if( LAPACKE_c_nancheck( n-1, e, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -2; + } + if( LAPACKE_c_nancheck( n-1, e, 1 ) ) { + return -3; + } } #endif return LAPACKE_cpttrf_work( n, d, e ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cpttrs.c b/lapack-netlib/LAPACKE/src/lapacke_cpttrs.c index 4bb7ee50b..bc8abf39d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cpttrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cpttrs.c @@ -43,15 +43,17 @@ lapack_int LAPACKE_cpttrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_c_nancheck( n-1, e, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_c_nancheck( n-1, e, 1 ) ) { + return -6; + } } #endif return LAPACKE_cpttrs_work( matrix_layout, uplo, n, nrhs, d, e, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cspcon.c b/lapack-netlib/LAPACKE/src/lapacke_cspcon.c index f9b89dd49..19c011e7f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cspcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cspcon.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_cspcon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -6; - } - if( LAPACKE_csp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -6; + } + if( LAPACKE_csp_nancheck( n, ap ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csprfs.c b/lapack-netlib/LAPACKE/src/lapacke_csprfs.c index f0e7c611c..1c9b801e2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csprfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csprfs.c @@ -49,18 +49,20 @@ lapack_int LAPACKE_csprfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csp_nancheck( n, afp ) ) { - return -6; - } - if( LAPACKE_csp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csp_nancheck( n, afp ) ) { + return -6; + } + if( LAPACKE_csp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cspsv.c b/lapack-netlib/LAPACKE/src/lapacke_cspsv.c index 28440533f..c5b1f91d2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cspsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cspsv.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_cspsv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_cspsv_work( matrix_layout, uplo, n, nrhs, ap, ipiv, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cspsvx.c b/lapack-netlib/LAPACKE/src/lapacke_cspsvx.c index 6e08c3110..18d6163b1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cspsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cspsvx.c @@ -48,17 +48,19 @@ lapack_int LAPACKE_cspsvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_csp_nancheck( n, afp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_csp_nancheck( n, afp ) ) { + return -7; + } + } + if( LAPACKE_csp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; } - } - if( LAPACKE_csp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csptrf.c b/lapack-netlib/LAPACKE/src/lapacke_csptrf.c index b0de5e45c..d74415ea7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csptrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csptrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_csptrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_csptrf_work( matrix_layout, uplo, n, ap, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_csptri.c b/lapack-netlib/LAPACKE/src/lapacke_csptri.c index 44a08a207..3103d5286 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csptri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csptri.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_csptri( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csp_nancheck( n, ap ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csptrs.c b/lapack-netlib/LAPACKE/src/lapacke_csptrs.c index d93a806ce..bf433b716 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csptrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csptrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_csptrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_csptrs_work( matrix_layout, uplo, n, nrhs, ap, ipiv, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cstedc.c b/lapack-netlib/LAPACKE/src/lapacke_cstedc.c index 22c4a354e..5be3cec70 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cstedc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cstedc.c @@ -51,16 +51,18 @@ lapack_int LAPACKE_cstedc( int matrix_layout, char compz, lapack_int n, float* d return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -5; - } - if( LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -5; + } + if( LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -6; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cstegr.c b/lapack-netlib/LAPACKE/src/lapacke_cstegr.c index 0cd589a74..986702e62 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cstegr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cstegr.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function cstegr * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -51,24 +51,26 @@ lapack_int LAPACKE_cstegr( int matrix_layout, char jobz, char range, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -11; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n, e, 1 ) ) { - return -6; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -11; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -8; + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -7; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -8; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_cstein.c b/lapack-netlib/LAPACKE/src/lapacke_cstein.c index 5f5cd3649..9f7516c5e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cstein.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cstein.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function cstein * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -47,15 +47,17 @@ lapack_int LAPACKE_cstein( int matrix_layout, lapack_int n, const float* d, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -3; - } - if( LAPACKE_s_nancheck( n, e, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n, w, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -3; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n, w, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cstemr.c b/lapack-netlib/LAPACKE/src/lapacke_cstemr.c index 96a48f842..9b9b84e49 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cstemr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cstemr.c @@ -52,18 +52,20 @@ lapack_int LAPACKE_cstemr( int matrix_layout, char jobz, char range, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -6; - } - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -7; + } + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cstemr_work.c b/lapack-netlib/LAPACKE/src/lapacke_cstemr_work.c index b48f7f956..1afb12b3e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cstemr_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cstemr_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function cstemr * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -56,7 +56,7 @@ lapack_int LAPACKE_cstemr_work( int matrix_layout, char jobz, char range, lapack_int ldz_t = MAX(1,n); lapack_complex_float* z_t = NULL; /* Check leading dimension(s) */ - if( ldz < n ) { + if( ldz < 1 || ( LAPACKE_lsame( jobz, 'v' ) && ldz < n ) ) { info = -14; LAPACKE_xerbla( "LAPACKE_cstemr_work", info ); return info; diff --git a/lapack-netlib/LAPACKE/src/lapacke_csteqr.c b/lapack-netlib/LAPACKE/src/lapacke_csteqr.c index 57249e7de..7877925d2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csteqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csteqr.c @@ -45,16 +45,18 @@ lapack_int LAPACKE_csteqr( int matrix_layout, char compz, lapack_int n, float* d return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -5; - } - if( LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -5; + } + if( LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -6; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_csycon.c b/lapack-netlib/LAPACKE/src/lapacke_csycon.c index 5c5fb576e..e6b02d4d4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csycon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csycon.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_csycon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csycon_3.c b/lapack-netlib/LAPACKE/src/lapacke_csycon_3.c index 6425d61ac..6d5dcd646 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csycon_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csycon_3.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function csycon_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -40,20 +40,23 @@ lapack_int LAPACKE_csycon_3( int matrix_layout, char uplo, lapack_int n, { lapack_int info = 0; lapack_complex_float* work = NULL; + lapack_int e_start = LAPACKE_lsame( uplo, 'U' ) ? 1 : 0; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_csycon_3", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_c_nancheck( n, e, 1 ) ) { - return -6; - } - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_c_nancheck( n-1, e + e_start, 1 ) ) { + return -6; + } + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csycon_3_work.c b/lapack-netlib/LAPACKE/src/lapacke_csycon_3_work.c index ec3a8ae66..47323dc4a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csycon_3_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csycon_3_work.c @@ -28,14 +28,14 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function csycon_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" lapack_int LAPACKE_csycon_3_work( int matrix_layout, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, - const lapack_complex_float* e, + const lapack_complex_float* e, const lapack_int* ipiv, float anorm, float* rcond, lapack_complex_float* work ) { diff --git a/lapack-netlib/LAPACKE/src/lapacke_csyconv.c b/lapack-netlib/LAPACKE/src/lapacke_csyconv.c index cc1eb84af..2eb942e4e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csyconv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csyconv.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_csyconv( int matrix_layout, char uplo, char way, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Call middle-level interface */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csyequb.c b/lapack-netlib/LAPACKE/src/lapacke_csyequb.c index d87dcd1ef..79921b333 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csyequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csyequb.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_csyequb( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csyr.c b/lapack-netlib/LAPACKE/src/lapacke_csyr.c index 7dbb33289..54cebff1d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csyr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csyr.c @@ -43,15 +43,17 @@ lapack_int LAPACKE_csyr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -7; - } - if( LAPACKE_c_nancheck( 1, &alpha, 1 ) ) { - return -4; - } - if( LAPACKE_c_nancheck( n, x, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -7; + } + if( LAPACKE_c_nancheck( 1, &alpha, 1 ) ) { + return -4; + } + if( LAPACKE_c_nancheck( n, x, 1 ) ) { + return -5; + } } #endif return LAPACKE_csyr_work( matrix_layout, uplo, n, alpha, x, incx, a, diff --git a/lapack-netlib/LAPACKE/src/lapacke_csyrfs.c b/lapack-netlib/LAPACKE/src/lapacke_csyrfs.c index 7141ac8b3..501ba2bb0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csyrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csyrfs.c @@ -49,18 +49,20 @@ lapack_int LAPACKE_csyrfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csyrfsx.c b/lapack-netlib/LAPACKE/src/lapacke_csyrfsx.c index 3b43eea21..ceaf43776 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csyrfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csyrfsx.c @@ -52,28 +52,30 @@ lapack_int LAPACKE_csyrfsx( int matrix_layout, char uplo, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -22; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( equed, 'y' ) ) { - if( LAPACKE_s_nancheck( n, s, 1 ) ) { - return -11; + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -22; + } + } + if( LAPACKE_lsame( equed, 'y' ) ) { + if( LAPACKE_s_nancheck( n, s, 1 ) ) { + return -11; + } + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -14; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -14; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csysv.c b/lapack-netlib/LAPACKE/src/lapacke_csysv.c index 7b6814954..11baa9bb6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csysv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csysv.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_csysv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csysv_aa.c b/lapack-netlib/LAPACKE/src/lapacke_csysv_aa.c index 579bfd8a4..9e012b9a3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csysv_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csysv_aa.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function csysv_aa * Author: Intel Corporation -* Generated December 2016 +* Generated November 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -47,12 +47,14 @@ lapack_int LAPACKE_csysv_aa( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally csyck input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csysv_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_csysv_aa_2stage.c new file mode 100644 index 000000000..c93bb9c8a --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_csysv_aa_2stage.c @@ -0,0 +1,89 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function csysv_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_csysv_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, lapack_complex_float* b, lapack_int ldb ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_complex_float* work = NULL; + lapack_complex_float work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_csysv_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_csysv_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb, &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_C2INT( work_query ); + /* Allocate memory for work arrays */ + work = (lapack_complex_float*) + LAPACKE_malloc( sizeof(lapack_complex_float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_csysv_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb, work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_csysv_aa_2stage", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_csysv_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_csysv_aa_2stage_work.c new file mode 100644 index 000000000..dabe31512 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_csysv_aa_2stage_work.c @@ -0,0 +1,124 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function csysv_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_csysv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* work, lapack_int lwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_csysv_aa_2stage( &uplo, &n, &nrhs, a, &lda, tb, + <b, ipiv, ipiv2, b, &ldb, work, &lwork, + &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_int ldb_t = MAX(1,n); + lapack_complex_float* a_t = NULL; + lapack_complex_float* tb_t = NULL; + lapack_complex_float* b_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_csysv_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_csysv_aa_2stage_work", info ); + return info; + } + if( ldb < nrhs ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_csysv_aa_2stage_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_csysv_aa_2stage( &uplo, &n, &nrhs, a, &lda_t, + tb, <b, ipiv, ipiv2, b, &ldb_t, work, + &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + b_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldb_t * MAX(1,nrhs) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + /* Transpose input matrices */ + LAPACKE_csy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + LAPACKE_cge_trans( matrix_layout, n, nrhs, b, ldb, b_t, ldb_t ); + /* Call LAPACK function and adjust info */ + LAPACK_csysv_aa_2stage( &uplo, &n, &nrhs, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, b_t, &ldb_t, work, + &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_csy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb ); + /* Release memory and exit */ + LAPACKE_free( b_t ); +exit_level_2: + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_csysv_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_csysv_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_csysv_rk.c b/lapack-netlib/LAPACKE/src/lapacke_csysv_rk.c index 34e95f203..b2950c1e3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csysv_rk.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csysv_rk.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function csysv_rk * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -48,15 +48,14 @@ lapack_int LAPACKE_csysv_rk( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_c_nancheck( n, e, 1) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csysv_rook.c b/lapack-netlib/LAPACKE/src/lapacke_csysv_rook.c index 1038efa23..a9edbe706 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csysv_rook.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csysv_rook.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_csysv_rook( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csysvx.c b/lapack-netlib/LAPACKE/src/lapacke_csysvx.c index 60517b2bd..b64aab954 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csysvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csysvx.c @@ -51,17 +51,19 @@ lapack_int LAPACKE_csysvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -11; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csysvxx.c b/lapack-netlib/LAPACKE/src/lapacke_csysvxx.c index 1f44cbea1..fffac2b41 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csysvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csysvxx.c @@ -53,26 +53,28 @@ lapack_int LAPACKE_csysvxx( int matrix_layout, char fact, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -24; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } } - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_s_nancheck( n, s, 1 ) ) { - return -12; + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -24; + } + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_s_nancheck( n, s, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_csyswapr.c b/lapack-netlib/LAPACKE/src/lapacke_csyswapr.c index 78577aada..690ed4718 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csyswapr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csyswapr.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_csyswapr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_csyswapr_work( matrix_layout, uplo, n, a, lda, i1, i2 ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytrf.c b/lapack-netlib/LAPACKE/src/lapacke_csytrf.c index 42846cde0..ccde4f48f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csytrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csytrf.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_csytrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytrf_aa.c b/lapack-netlib/LAPACKE/src/lapacke_csytrf_aa.c index 29d737cc0..0448a1f2e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csytrf_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csytrf_aa.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_csytrf_aa( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally csyck input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally csyck input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytrf_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_csytrf_aa_2stage.c new file mode 100644 index 000000000..1ccad12f9 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_csytrf_aa_2stage.c @@ -0,0 +1,86 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function csytrf_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_csytrf_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tb, lapack_int ltb, + lapack_int* ipiv, lapack_int* ipiv2 ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_complex_float* work = NULL; + lapack_complex_float work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_csytrf_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_csytrf_aa_2stage_work( matrix_layout, uplo, n, + a, lda, tb, ltb, ipiv, ipiv2, + &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_C2INT( work_query ); + /* Allocate memory for work arrays */ + work = (lapack_complex_float*) + LAPACKE_malloc( sizeof(lapack_complex_float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_csytrf_aa_2stage_work( matrix_layout, uplo, n, + a, lda, tb, ltb, ipiv, ipiv2, + work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_csytrf_aa_2stage", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytrf_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_csytrf_aa_2stage_work.c new file mode 100644 index 000000000..665291c1d --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_csytrf_aa_2stage_work.c @@ -0,0 +1,109 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function csytrf_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_csytrf_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tb, lapack_int ltb, + lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_float* work, lapack_int lwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_csytrf_aa_2stage( &uplo, &n, a, &lda, tb, + <b, ipiv, ipiv2, work, &lwork, + &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_complex_float* a_t = NULL; + lapack_complex_float* tb_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_csytrf_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_csytrf_aa_2stage_work", info ); + return info; + } + + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_csytrf_aa_2stage( &uplo, &n, a, &lda_t, + tb, <b, ipiv, ipiv2, work, + &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + /* Transpose input matrices */ + LAPACKE_csy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + /* Call LAPACK function and adjust info */ + LAPACK_csytrf_aa_2stage( &uplo, &n, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, work, + &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_csy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + /* Release memory and exit */ + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_csytrf_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_csytrf_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytrf_rk.c b/lapack-netlib/LAPACKE/src/lapacke_csytrf_rk.c index eac1e6c45..52b7d2483 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csytrf_rk.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csytrf_rk.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function csytrf_rk * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -46,12 +46,11 @@ lapack_int LAPACKE_csytrf_rk( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_c_nancheck( n, e, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytrf_rook.c b/lapack-netlib/LAPACKE/src/lapacke_csytrf_rook.c index a2b0906b3..a010800db 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csytrf_rook.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csytrf_rook.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_csytrf_rook( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytri.c b/lapack-netlib/LAPACKE/src/lapacke_csytri.c index 9e030ce9b..309af8596 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csytri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csytri.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_csytri( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytri2.c b/lapack-netlib/LAPACKE/src/lapacke_csytri2.c index 6a95660aa..cacd6db92 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csytri2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csytri2.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_csytri2( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytri2x.c b/lapack-netlib/LAPACKE/src/lapacke_csytri2x.c index 0fdd55a2e..5c7e0be13 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csytri2x.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csytri2x.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_csytri2x( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytri_3.c b/lapack-netlib/LAPACKE/src/lapacke_csytri_3.c index 543d408de..75ec8f024 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csytri_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csytri_3.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function csytri_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -41,17 +41,20 @@ lapack_int LAPACKE_csytri_3( int matrix_layout, char uplo, lapack_int n, lapack_int lwork = -1; lapack_complex_float* work = NULL; lapack_complex_float work_query; + lapack_int e_start = LAPACKE_lsame( uplo, 'U' ) ? 1 : 0; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_csytri_3", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_c_nancheck( n, e, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_c_nancheck( n-1, e + e_start, 1 ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytrs.c b/lapack-netlib/LAPACKE/src/lapacke_csytrs.c index 9e1800822..5b9282fe5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csytrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csytrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_csytrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_csytrs_work( matrix_layout, uplo, n, nrhs, a, lda, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytrs2.c b/lapack-netlib/LAPACKE/src/lapacke_csytrs2.c index 45b20b755..f4a0a4334 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csytrs2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csytrs2.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_csytrs2( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytrs_3.c b/lapack-netlib/LAPACKE/src/lapacke_csytrs_3.c index 795fb2c9d..962416c00 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csytrs_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csytrs_3.c @@ -44,15 +44,17 @@ lapack_int LAPACKE_csytrs_3( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_c_nancheck( n, e ,1 ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_c_nancheck( n, e ,1 ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif return LAPACKE_csytrs_3_work( matrix_layout, uplo, n, nrhs, a, lda, diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytrs_aa.c b/lapack-netlib/LAPACKE/src/lapacke_csytrs_aa.c index 83ef44e11..ff9ba03b4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csytrs_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csytrs_aa.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_csytrs_aa( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally csyck input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally csyck input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytrs_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_csytrs_aa_2stage.c new file mode 100644 index 000000000..2edab2f9f --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_csytrs_aa_2stage.c @@ -0,0 +1,66 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function csytrs_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_csytrs_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, lapack_complex_float* b, lapack_int ldb ) +{ + lapack_int info = 0; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_csytrs_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; + } + } +#endif + /* Call middle-level interface */ + info = LAPACKE_csytrs_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb ); + + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytrs_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_csytrs_aa_2stage_work.c new file mode 100644 index 000000000..7ef73971b --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_csytrs_aa_2stage_work.c @@ -0,0 +1,114 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function csytrs_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_csytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, lapack_complex_float* b, lapack_int ldb ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_csytrs_aa_2stage( &uplo, &n, &nrhs, a, &lda, tb, + <b, ipiv, ipiv2, b, &ldb, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_int ldb_t = MAX(1,n); + lapack_complex_float* a_t = NULL; + lapack_complex_float* tb_t = NULL; + lapack_complex_float* b_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_csytrs_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_csytrs_aa_2stage_work", info ); + return info; + } + if( ldb < nrhs ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_csytrs_aa_2stage_work", info ); + return info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + b_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldb_t * MAX(1,nrhs) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + /* Transpose input matrices */ + LAPACKE_csy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + LAPACKE_cge_trans( matrix_layout, n, nrhs, b, ldb, b_t, ldb_t ); + /* Call LAPACK function and adjust info */ + LAPACK_csytrs_aa_2stage( &uplo, &n, &nrhs, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, b_t, &ldb_t, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_csy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb ); + /* Release memory and exit */ + LAPACKE_free( b_t ); +exit_level_2: + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_csytrs_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_csytrs_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytrs_rook.c b/lapack-netlib/LAPACKE/src/lapacke_csytrs_rook.c index 2841c9559..1970bc91a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csytrs_rook.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csytrs_rook.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_csytrs_rook( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_csytrs_rook_work( matrix_layout, uplo, n, nrhs, a, lda, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctbcon.c b/lapack-netlib/LAPACKE/src/lapacke_ctbcon.c index 6c223a1ce..21b86a0da 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctbcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctbcon.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_ctbcon( int matrix_layout, char norm, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ctb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ctb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctbrfs.c b/lapack-netlib/LAPACKE/src/lapacke_ctbrfs.c index a32f8b8d7..e17e623b1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctbrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctbrfs.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_ctbrfs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ctb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ctb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctbtrs.c b/lapack-netlib/LAPACKE/src/lapacke_ctbtrs.c index 5b9212645..b97f12029 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctbtrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctbtrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_ctbtrs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ctb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ctb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } } #endif return LAPACKE_ctbtrs_work( matrix_layout, uplo, trans, diag, n, kd, nrhs, diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctfsm.c b/lapack-netlib/LAPACKE/src/lapacke_ctfsm.c index 7f1cf1e75..1c97c7b06 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctfsm.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctfsm.c @@ -44,18 +44,20 @@ lapack_int LAPACKE_ctfsm( int matrix_layout, char transr, char side, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( IS_C_NONZERO(alpha) ) { - if( LAPACKE_ctf_nancheck( matrix_layout, transr, uplo, diag, n, a ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( IS_C_NONZERO(alpha) ) { + if( LAPACKE_ctf_nancheck( matrix_layout, transr, uplo, diag, n, a ) ) { + return -10; + } } - } - if( LAPACKE_c_nancheck( 1, &alpha, 1 ) ) { - return -9; - } - if( IS_C_NONZERO(alpha) ) { - if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -11; + if( LAPACKE_c_nancheck( 1, &alpha, 1 ) ) { + return -9; + } + if( IS_C_NONZERO(alpha) ) { + if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctftri.c b/lapack-netlib/LAPACKE/src/lapacke_ctftri.c index 8885578e1..2c2043314 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctftri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctftri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_ctftri( int matrix_layout, char transr, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ctf_nancheck( matrix_layout, transr, uplo, diag, n, a ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ctf_nancheck( matrix_layout, transr, uplo, diag, n, a ) ) { + return -6; + } } #endif return LAPACKE_ctftri_work( matrix_layout, transr, uplo, diag, n, a ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctfttp.c b/lapack-netlib/LAPACKE/src/lapacke_ctfttp.c index 83f6019f1..66c69c795 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctfttp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctfttp.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_ctfttp( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpf_nancheck( n, arf ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpf_nancheck( n, arf ) ) { + return -5; + } } #endif return LAPACKE_ctfttp_work( matrix_layout, transr, uplo, n, arf, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctfttr.c b/lapack-netlib/LAPACKE/src/lapacke_ctfttr.c index 4bcf51f9e..0f7b3d7f9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctfttr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctfttr.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_ctfttr( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpf_nancheck( n, arf ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpf_nancheck( n, arf ) ) { + return -5; + } } #endif return LAPACKE_ctfttr_work( matrix_layout, transr, uplo, n, arf, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctgevc.c b/lapack-netlib/LAPACKE/src/lapacke_ctgevc.c index f5126dab6..647e23884 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctgevc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctgevc.c @@ -49,21 +49,23 @@ lapack_int LAPACKE_ctgevc( int matrix_layout, char side, char howmny, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, p, ldp ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, s, lds ) ) { - return -6; - } - if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'l' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, p, ldp ) ) { + return -8; } - } - if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'r' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -12; + if( LAPACKE_cge_nancheck( matrix_layout, n, n, s, lds ) ) { + return -6; + } + if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'l' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -10; + } + } + if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'r' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctgexc.c b/lapack-netlib/LAPACKE/src/lapacke_ctgexc.c index 49e8f7cff..0b88ed9f4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctgexc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctgexc.c @@ -46,21 +46,23 @@ lapack_int LAPACKE_ctgexc( int matrix_layout, lapack_logical wantq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -7; - } - if( wantq ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; } - } - if( wantz ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -11; + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -7; + } + if( wantq ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -9; + } + } + if( wantz ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctgsen.c b/lapack-netlib/LAPACKE/src/lapacke_ctgsen.c index 4d21a0b94..e2f38c87b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctgsen.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctgsen.c @@ -56,21 +56,23 @@ lapack_int LAPACKE_ctgsen( int matrix_layout, lapack_int ijob, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( wantq ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -13; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; } - } - if( wantz ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -15; + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( wantq ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -13; + } + } + if( wantz ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -15; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctgsja.c b/lapack-netlib/LAPACKE/src/lapacke_ctgsja.c index 620cd4404..b14f3b4c8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctgsja.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctgsja.c @@ -50,32 +50,34 @@ lapack_int LAPACKE_ctgsja( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -10; - } - if( LAPACKE_cge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -12; - } - if( LAPACKE_lsame( jobq, 'i' ) || LAPACKE_lsame( jobq, 'q' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -22; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -10; } - } - if( LAPACKE_s_nancheck( 1, &tola, 1 ) ) { - return -14; - } - if( LAPACKE_s_nancheck( 1, &tolb, 1 ) ) { - return -15; - } - if( LAPACKE_lsame( jobu, 'i' ) || LAPACKE_lsame( jobu, 'u' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, m, m, u, ldu ) ) { - return -18; + if( LAPACKE_cge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -12; } - } - if( LAPACKE_lsame( jobv, 'i' ) || LAPACKE_lsame( jobv, 'v' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, p, p, v, ldv ) ) { - return -20; + if( LAPACKE_lsame( jobq, 'i' ) || LAPACKE_lsame( jobq, 'q' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -22; + } + } + if( LAPACKE_s_nancheck( 1, &tola, 1 ) ) { + return -14; + } + if( LAPACKE_s_nancheck( 1, &tolb, 1 ) ) { + return -15; + } + if( LAPACKE_lsame( jobu, 'i' ) || LAPACKE_lsame( jobu, 'u' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, m, m, u, ldu ) ) { + return -18; + } + } + if( LAPACKE_lsame( jobv, 'i' ) || LAPACKE_lsame( jobv, 'v' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, p, p, v, ldv ) ) { + return -20; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctgsna.c b/lapack-netlib/LAPACKE/src/lapacke_ctgsna.c index 0041143c8..fab7b9f69 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctgsna.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctgsna.c @@ -51,21 +51,23 @@ lapack_int LAPACKE_ctgsna( int matrix_layout, char job, char howmny, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -12; + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -10; + } + } + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctgsyl.c b/lapack-netlib/LAPACKE/src/lapacke_ctgsyl.c index 7b95b2098..d34080c90 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctgsyl.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctgsyl.c @@ -53,24 +53,26 @@ lapack_int LAPACKE_ctgsyl( int matrix_layout, char trans, lapack_int ijob, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, m, a, lda ) ) { - return -6; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, m, d, ldd ) ) { - return -12; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, e, lde ) ) { - return -14; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, f, ldf ) ) { - return -16; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, m, a, lda ) ) { + return -6; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, m, d, ldd ) ) { + return -12; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, e, lde ) ) { + return -14; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, f, ldf ) ) { + return -16; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctpcon.c b/lapack-netlib/LAPACKE/src/lapacke_ctpcon.c index abfc8c5c3..15826ece3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctpcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctpcon.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_ctpcon( int matrix_layout, char norm, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ctp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ctp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctpmqrt.c b/lapack-netlib/LAPACKE/src/lapacke_ctpmqrt.c index b38fa0313..ee153e25a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctpmqrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctpmqrt.c @@ -51,29 +51,31 @@ lapack_int LAPACKE_ctpmqrt( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - ncols_a = LAPACKE_lsame( side, 'L' ) ? n : - ( LAPACKE_lsame( side, 'R' ) ? k : 0 ); - nrows_a = LAPACKE_lsame( side, 'L' ) ? k : - ( LAPACKE_lsame( side, 'R' ) ? m : 0 ); - nrows_v = LAPACKE_lsame( side, 'L' ) ? m : - ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); - if( LAPACKE_cge_nancheck( matrix_layout, nrows_a, ncols_a, a, lda ) ) { - return -13; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -15; - } - if( LAPACKE_cge_nancheck( matrix_layout, nb, k, t, ldt ) ) { - return -11; - } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, k, v, ldv ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + ncols_a = LAPACKE_lsame( side, 'L' ) ? n : + ( LAPACKE_lsame( side, 'R' ) ? k : 0 ); + nrows_a = LAPACKE_lsame( side, 'L' ) ? k : + ( LAPACKE_lsame( side, 'R' ) ? m : 0 ); + nrows_v = LAPACKE_lsame( side, 'L' ) ? m : + ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); + if( LAPACKE_cge_nancheck( matrix_layout, nrows_a, ncols_a, a, lda ) ) { + return -13; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -15; + } + if( LAPACKE_cge_nancheck( matrix_layout, nb, k, t, ldt ) ) { + return -11; + } + if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, k, v, ldv ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ lwork = LAPACKE_lsame( side, 'L' ) ? MAX(1,nb) * MAX(1,n) : - ( LAPACKE_lsame( side, 'R' ) ? MAX(1,m) * MAX(1,nb) : 0 ); + ( LAPACKE_lsame( side, 'R' ) ? MAX(1,m) * MAX(1,nb) : 0 ); work = (lapack_complex_float*) LAPACKE_malloc( sizeof(lapack_complex_float) * lwork ); if( work == NULL ) { diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctpqrt.c b/lapack-netlib/LAPACKE/src/lapacke_ctpqrt.c index 6158f4a33..24097024b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctpqrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctpqrt.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_ctpqrt( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctpqrt2.c b/lapack-netlib/LAPACKE/src/lapacke_ctpqrt2.c index 1bcb61320..9ca49a5e9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctpqrt2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctpqrt2.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_ctpqrt2( int matrix_layout, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -6; + } } #endif return LAPACKE_ctpqrt2_work( matrix_layout, m, n, l, a, lda, b, ldb, t, ldt ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctprfb.c b/lapack-netlib/LAPACKE/src/lapacke_ctprfb.c index f66469a2b..9d2684e4c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctprfb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctprfb.c @@ -51,30 +51,32 @@ lapack_int LAPACKE_ctprfb( int matrix_layout, char side, char trans, char direct return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( storev, 'C' ) ) { - ncols_v = k; - nrows_v = LAPACKE_lsame( side, 'L' ) ? m : - ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); - } else if( LAPACKE_lsame( storev, 'R' ) ) { - ncols_v = LAPACKE_lsame( side, 'L' ) ? m : - ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); - nrows_v = k; - } else { - ncols_v = 0; - nrows_v = 0; - } - if( LAPACKE_cge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -14; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -16; - } - if( LAPACKE_cge_nancheck( matrix_layout, k, k, t, ldt ) ) { - return -12; - } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( storev, 'C' ) ) { + ncols_v = k; + nrows_v = LAPACKE_lsame( side, 'L' ) ? m : + ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); + } else if( LAPACKE_lsame( storev, 'R' ) ) { + ncols_v = LAPACKE_lsame( side, 'L' ) ? m : + ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); + nrows_v = k; + } else { + ncols_v = 0; + nrows_v = 0; + } + if( LAPACKE_cge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -14; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -16; + } + if( LAPACKE_cge_nancheck( matrix_layout, k, k, t, ldt ) ) { + return -12; + } + if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { + return -10; + } } #endif if (side=='l' || side=='L') { diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctprfs.c b/lapack-netlib/LAPACKE/src/lapacke_ctprfs.c index 350ad2dd1..b869c2020 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctprfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctprfs.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_ctprfs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ctp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ctp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctptri.c b/lapack-netlib/LAPACKE/src/lapacke_ctptri.c index eb43b3d88..720c5dadb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctptri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctptri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_ctptri( int matrix_layout, char uplo, char diag, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ctp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ctp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { + return -5; + } } #endif return LAPACKE_ctptri_work( matrix_layout, uplo, diag, n, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctptrs.c b/lapack-netlib/LAPACKE/src/lapacke_ctptrs.c index fabb41287..5b6a9141c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctptrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctptrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_ctptrs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ctp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ctp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_ctptrs_work( matrix_layout, uplo, trans, diag, n, nrhs, ap, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctpttf.c b/lapack-netlib/LAPACKE/src/lapacke_ctpttf.c index 7d2e64103..793e9ba11 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctpttf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctpttf.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_ctpttf( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpp_nancheck( n, ap ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpp_nancheck( n, ap ) ) { + return -5; + } } #endif return LAPACKE_ctpttf_work( matrix_layout, transr, uplo, n, ap, arf ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctpttr.c b/lapack-netlib/LAPACKE/src/lapacke_ctpttr.c index 34f728cc9..679c661b8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctpttr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctpttr.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_ctpttr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_ctpttr_work( matrix_layout, uplo, n, ap, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctrcon.c b/lapack-netlib/LAPACKE/src/lapacke_ctrcon.c index 863c583be..159fb43a5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctrcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctrcon.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_ctrcon( int matrix_layout, char norm, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ctr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ctr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctrevc.c b/lapack-netlib/LAPACKE/src/lapacke_ctrevc.c index bce6c3571..8c0b3d13b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctrevc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctrevc.c @@ -48,18 +48,20 @@ lapack_int LAPACKE_ctrevc( int matrix_layout, char side, char howmny, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -6; - } - if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'l' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -6; } - } - if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'r' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -10; + if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'l' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -8; + } + } + if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'r' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctrexc.c b/lapack-netlib/LAPACKE/src/lapacke_ctrexc.c index 1b7627ea8..426e8486b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctrexc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctrexc.c @@ -43,14 +43,16 @@ lapack_int LAPACKE_ctrexc( int matrix_layout, char compq, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -6; + } + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -4; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -4; } #endif return LAPACKE_ctrexc_work( matrix_layout, compq, n, t, ldt, q, ldq, ifst, diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctrrfs.c b/lapack-netlib/LAPACKE/src/lapacke_ctrrfs.c index 1a49ca2ed..164845356 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctrrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctrrfs.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_ctrrfs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ctr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ctr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -11; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctrsen.c b/lapack-netlib/LAPACKE/src/lapacke_ctrsen.c index e37734628..0ea1feb46 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctrsen.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctrsen.c @@ -49,14 +49,16 @@ lapack_int LAPACKE_ctrsen( int matrix_layout, char job, char compq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -8; + } + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -6; } - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -6; } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctrsna.c b/lapack-netlib/LAPACKE/src/lapacke_ctrsna.c index 1eefaaea8..3e3b8d894 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctrsna.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctrsna.c @@ -49,18 +49,20 @@ lapack_int LAPACKE_ctrsna( int matrix_layout, char job, char howmny, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -6; - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -6; } - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -10; + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -8; + } + } + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { + if( LAPACKE_cge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctrsyl.c b/lapack-netlib/LAPACKE/src/lapacke_ctrsyl.c index a8708f79a..d17d1eda6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctrsyl.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctrsyl.c @@ -45,15 +45,17 @@ lapack_int LAPACKE_ctrsyl( int matrix_layout, char trana, char tranb, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, m, a, lda ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, m, a, lda ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -11; + } } #endif return LAPACKE_ctrsyl_work( matrix_layout, trana, tranb, isgn, m, n, a, lda, diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctrtri.c b/lapack-netlib/LAPACKE/src/lapacke_ctrtri.c index 3e582b014..4fc39cae7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctrtri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctrtri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_ctrtri( int matrix_layout, char uplo, char diag, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ctr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ctr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { + return -5; + } } #endif return LAPACKE_ctrtri_work( matrix_layout, uplo, diag, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctrtrs.c b/lapack-netlib/LAPACKE/src/lapacke_ctrtrs.c index 7b3248df6..6669f3f7a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctrtrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctrtrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_ctrtrs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ctr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ctr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif return LAPACKE_ctrtrs_work( matrix_layout, uplo, trans, diag, n, nrhs, a, diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctrttf.c b/lapack-netlib/LAPACKE/src/lapacke_ctrttf.c index 869aee620..fd0a40c17 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctrttf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctrttf.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_ctrttf( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif return LAPACKE_ctrttf_work( matrix_layout, transr, uplo, n, a, lda, arf ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctrttp.c b/lapack-netlib/LAPACKE/src/lapacke_ctrttp.c index e115872de..c4ea703af 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctrttp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctrttp.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_ctrttp( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_ctrttp_work( matrix_layout, uplo, n, a, lda, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctzrzf.c b/lapack-netlib/LAPACKE/src/lapacke_ctzrzf.c index cbbc3f101..7ec2778ab 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ctzrzf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ctzrzf.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_ctzrzf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cunbdb.c b/lapack-netlib/LAPACKE/src/lapacke_cunbdb.c index 44f97db10..562f253d9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cunbdb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cunbdb.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function cunbdb * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -49,28 +49,31 @@ lapack_int LAPACKE_cunbdb( int matrix_layout, char trans, char signs, lapack_int lwork = -1; lapack_complex_float* work = NULL; lapack_complex_float work_query; - lapack_int nrows_x11, nrows_x12, nrows_x21, nrows_x22; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_cunbdb", -1 ); return -1; } -#ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x12, m-q, x12, ldx12 ) ) { - return -9; + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { - return -11; - } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x22, m-q, x22, ldx22 ) ) { - return -13; +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( lapack_layout, p, q, x11, ldx11 ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( lapack_layout, p, m-q, x12, ldx12 ) ) { + return -9; + } + if( LAPACKE_cge_nancheck( lapack_layout, m-p, q, x21, ldx21 ) ) { + return -11; + } + if( LAPACKE_cge_nancheck( lapack_layout, m-p, m-q, x22, ldx22 ) ) { + return -13; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cunbdb_work.c b/lapack-netlib/LAPACKE/src/lapacke_cunbdb_work.c index 7f0c47f86..8ae40e6d4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cunbdb_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cunbdb_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function cunbdb * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -47,119 +47,35 @@ lapack_int LAPACKE_cunbdb_work( int matrix_layout, char trans, char signs, lapack_complex_float* work, lapack_int lwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; + } /* Call LAPACK function and adjust info */ - LAPACK_cunbdb( &trans, &signs, &m, &p, &q, x11, &ldx11, x12, &ldx12, + LAPACK_cunbdb( <rans, &signs, &m, &p, &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, &ldx22, theta, phi, taup1, taup2, tauq1, tauq2, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - lapack_int nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - lapack_int nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - lapack_int nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - lapack_int ldx11_t = MAX(1,nrows_x11); - lapack_int ldx12_t = MAX(1,nrows_x12); - lapack_int ldx21_t = MAX(1,nrows_x21); - lapack_int ldx22_t = MAX(1,nrows_x22); - lapack_complex_float* x11_t = NULL; - lapack_complex_float* x12_t = NULL; - lapack_complex_float* x21_t = NULL; - lapack_complex_float* x22_t = NULL; - /* Check leading dimension(s) */ - if( ldx11 < q ) { - info = -8; - LAPACKE_xerbla( "LAPACKE_cunbdb_work", info ); - return info; - } - if( ldx12 < m-q ) { - info = -10; - LAPACKE_xerbla( "LAPACKE_cunbdb_work", info ); - return info; - } - if( ldx21 < q ) { - info = -12; - LAPACKE_xerbla( "LAPACKE_cunbdb_work", info ); - return info; - } - if( ldx22 < m-q ) { - info = -14; - LAPACKE_xerbla( "LAPACKE_cunbdb_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { - LAPACK_cunbdb( &trans, &signs, &m, &p, &q, x11, &ldx11_t, x12, - &ldx12_t, x21, &ldx21_t, x22, &ldx22_t, theta, phi, - taup1, taup2, tauq1, tauq2, work, &lwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - x11_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * ldx11_t * MAX(1,q) ); - if( x11_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - x12_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldx12_t * MAX(1,m-q) ); - if( x12_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - x21_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * ldx21_t * MAX(1,q) ); - if( x21_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; - } - x22_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldx22_t * MAX(1,m-q) ); - if( x22_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - /* Transpose input matrices */ - LAPACKE_cge_trans( matrix_layout, nrows_x11, q, x11, ldx11, x11_t, - ldx11_t ); - LAPACKE_cge_trans( matrix_layout, nrows_x12, m-q, x12, ldx12, x12_t, - ldx12_t ); - LAPACKE_cge_trans( matrix_layout, nrows_x21, q, x21, ldx21, x21_t, - ldx21_t ); - LAPACKE_cge_trans( matrix_layout, nrows_x22, m-q, x22, ldx22, x22_t, - ldx22_t ); - /* Call LAPACK function and adjust info */ - LAPACK_cunbdb( &trans, &signs, &m, &p, &q, x11_t, &ldx11_t, x12_t, - &ldx12_t, x21_t, &ldx21_t, x22_t, &ldx22_t, theta, phi, - taup1, taup2, tauq1, tauq2, work, &lwork, &info ); - if( info < 0 ) { - info = info - 1; - } - /* Transpose output matrices */ - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_x11, q, x11_t, ldx11_t, x11, - ldx11 ); - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_x12, m-q, x12_t, ldx12_t, - x12, ldx12 ); - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_x21, q, x21_t, ldx21_t, x21, - ldx21 ); - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_x22, m-q, x22_t, ldx22_t, - x22, ldx22 ); - /* Release memory and exit */ - LAPACKE_free( x22_t ); -exit_level_3: - LAPACKE_free( x21_t ); -exit_level_2: - LAPACKE_free( x12_t ); -exit_level_1: - LAPACKE_free( x11_t ); -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_cunbdb_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_cunbdb_work", info ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cuncsd.c b/lapack-netlib/LAPACKE/src/lapacke_cuncsd.c index 76f92b70e..4dc5b97f9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cuncsd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cuncsd.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function cuncsd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -54,28 +54,31 @@ lapack_int LAPACKE_cuncsd( int matrix_layout, char jobu1, char jobu2, lapack_complex_float* work = NULL; float rwork_query; lapack_complex_float work_query; - lapack_int nrows_x11, nrows_x12, nrows_x21, nrows_x22; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_cuncsd", -1 ); return -1; } -#ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { - return -11; - } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x12, m-q, x12, ldx12 ) ) { - return -13; + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { - return -15; - } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x22, m-q, x22, ldx22 ) ) { - return -17; +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( lapack_layout, p, q, x11, ldx11 ) ) { + return -11; + } + if( LAPACKE_cge_nancheck( lapack_layout, p, m-q, x12, ldx12 ) ) { + return -13; + } + if( LAPACKE_cge_nancheck( lapack_layout, m-p, q, x21, ldx21 ) ) { + return -15; + } + if( LAPACKE_cge_nancheck( lapack_layout, m-p, m-q, x22, ldx22 ) ) { + return -17; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cuncsd2by1.c b/lapack-netlib/LAPACKE/src/lapacke_cuncsd2by1.c index e6749c1bc..339ed8d19 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cuncsd2by1.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cuncsd2by1.c @@ -55,17 +55,18 @@ lapack_int LAPACKE_cuncsd2by1( int matrix_layout, char jobu1, char jobu2, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_x11 = p ; - nrows_x21 = m-p ; - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { - return -8; - } + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nrows_x11 = p; + nrows_x21 = m-p; + if( LAPACKE_cge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { + return -8; + } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { - return -9; + if( LAPACKE_cge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { + return -9; + } } - #endif /* Allocate memory for working array(s) */ iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * MAX(1,m-MIN(MIN(p,m-p),MIN(q,m-q))) ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cuncsd_work.c b/lapack-netlib/LAPACKE/src/lapacke_cuncsd_work.c index 60fbd9ab3..0ab02e046 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cuncsd_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cuncsd_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function cuncsd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -50,224 +50,36 @@ lapack_int LAPACKE_cuncsd_work( int matrix_layout, char jobu1, char jobu2, lapack_int* iwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { - /* Call LAPACK function and adjust info */ - LAPACK_cuncsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, &p, - &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, &ldx22, - theta, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, &ldv2t, - work, &lwork, rwork, &lrwork, iwork, &info ); - if( info < 0 ) { - info = info - 1; - } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - lapack_int nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - lapack_int nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - lapack_int nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - lapack_int nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - lapack_int nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - lapack_int nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - lapack_int nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - lapack_int ldu1_t = MAX(1,nrows_u1); - lapack_int ldu2_t = MAX(1,nrows_u2); - lapack_int ldv1t_t = MAX(1,nrows_v1t); - lapack_int ldv2t_t = MAX(1,nrows_v2t); - lapack_int ldx11_t = MAX(1,nrows_x11); - lapack_int ldx12_t = MAX(1,nrows_x12); - lapack_int ldx21_t = MAX(1,nrows_x21); - lapack_int ldx22_t = MAX(1,nrows_x22); - lapack_complex_float* x11_t = NULL; - lapack_complex_float* x12_t = NULL; - lapack_complex_float* x21_t = NULL; - lapack_complex_float* x22_t = NULL; - lapack_complex_float* u1_t = NULL; - lapack_complex_float* u2_t = NULL; - lapack_complex_float* v1t_t = NULL; - lapack_complex_float* v2t_t = NULL; - /* Check leading dimension(s) */ - if( ldu1 < p ) { - info = -21; - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - return info; - } - if( ldu2 < m-p ) { - info = -23; - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - return info; - } - if( ldv1t < q ) { - info = -25; - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - return info; - } - if( ldv2t < m-q ) { - info = -27; - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - return info; - } - if( ldx11 < q ) { - info = -12; - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - return info; - } - if( ldx12 < m-q ) { - info = -14; - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - return info; - } - if( ldx21 < q ) { - info = -16; - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - return info; - } - if( ldx22 < m-q ) { - info = -18; - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lrwork == -1 || lwork == -1 ) { - LAPACK_cuncsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, - &p, &q, x11, &ldx11_t, x12, &ldx12_t, x21, &ldx21_t, - x22, &ldx22_t, theta, u1, &ldu1_t, u2, &ldu2_t, v1t, - &ldv1t_t, v2t, &ldv2t_t, work, &lwork, rwork, - &lrwork, iwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - x11_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * ldx11_t * MAX(1,q) ); - if( x11_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - x12_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldx12_t * MAX(1,m-q) ); - if( x12_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - x21_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * ldx21_t * MAX(1,q) ); - if( x21_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; } - x22_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldx22_t * MAX(1,m-q) ); - if( x22_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - if( LAPACKE_lsame( jobu1, 'y' ) ) { - u1_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldu1_t * MAX(1,p) ); - if( u1_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_4; - } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - u2_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldu2_t * MAX(1,m-p) ); - if( u2_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_5; - } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - v1t_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldv1t_t * MAX(1,q) ); - if( v1t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_6; - } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - v2t_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldv2t_t * MAX(1,m-q) ); - if( v2t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_7; - } - } - /* Transpose input matrices */ - LAPACKE_cge_trans( matrix_layout, nrows_x11, q, x11, ldx11, x11_t, - ldx11_t ); - LAPACKE_cge_trans( matrix_layout, nrows_x12, m-q, x12, ldx12, x12_t, - ldx12_t ); - LAPACKE_cge_trans( matrix_layout, nrows_x21, q, x21, ldx21, x21_t, - ldx21_t ); - LAPACKE_cge_trans( matrix_layout, nrows_x22, m-q, x22, ldx22, x22_t, - ldx22_t ); /* Call LAPACK function and adjust info */ - LAPACK_cuncsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, &p, - &q, x11_t, &ldx11_t, x12_t, &ldx12_t, x21_t, &ldx21_t, - x22_t, &ldx22_t, theta, u1_t, &ldu1_t, u2_t, &ldu2_t, - v1t_t, &ldv1t_t, v2t_t, &ldv2t_t, work, &lwork, rwork, - &lrwork, iwork, &info ); + LAPACK_cuncsd( &jobu1, &jobu2, &jobv1t, &jobv2t, <rans, &signs, &m, + &p, &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, + &ldx22, theta, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, + &ldv2t, work, &lwork, rwork, &lrwork, iwork, &info ); if( info < 0 ) { info = info - 1; } - /* Transpose output matrices */ - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_x11, q, x11_t, ldx11_t, x11, - ldx11 ); - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_x12, m-q, x12_t, ldx12_t, - x12, ldx12 ); - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_x21, q, x21_t, ldx21_t, x21, - ldx21 ); - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_x22, m-q, x22_t, ldx22_t, - x22, ldx22 ); - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u1, p, u1_t, ldu1_t, u1, - ldu1 ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u2, m-p, u2_t, ldu2_t, - u2, ldu2 ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_v1t, q, v1t_t, ldv1t_t, - v1t, ldv1t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_v2t, m-q, v2t_t, ldv2t_t, - v2t, ldv2t ); - } - /* Release memory and exit */ - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_free( v2t_t ); - } -exit_level_7: - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_free( v1t_t ); - } -exit_level_6: - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_free( u2_t ); - } -exit_level_5: - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_free( u1_t ); - } -exit_level_4: - LAPACKE_free( x22_t ); -exit_level_3: - LAPACKE_free( x21_t ); -exit_level_2: - LAPACKE_free( x12_t ); -exit_level_1: - LAPACKE_free( x11_t ); -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_cungbr.c b/lapack-netlib/LAPACKE/src/lapacke_cungbr.c index ad505913f..5bef3cb65 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cungbr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cungbr.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_cungbr( int matrix_layout, char vect, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; - } - if( LAPACKE_c_nancheck( MIN(m,k), tau, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } + if( LAPACKE_c_nancheck( MIN(m,k), tau, 1 ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cunghr.c b/lapack-netlib/LAPACKE/src/lapacke_cunghr.c index 5cce308a8..4159e86b6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cunghr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cunghr.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_cunghr( int matrix_layout, lapack_int n, lapack_int ilo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_c_nancheck( n-1, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_c_nancheck( n-1, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cunglq.c b/lapack-netlib/LAPACKE/src/lapacke_cunglq.c index 751e6f043..a614694c1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cunglq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cunglq.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_cunglq( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_c_nancheck( k, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_c_nancheck( k, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cungql.c b/lapack-netlib/LAPACKE/src/lapacke_cungql.c index 41fa1eeb9..c80f92a8e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cungql.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cungql.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_cungql( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_c_nancheck( k, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_c_nancheck( k, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cungqr.c b/lapack-netlib/LAPACKE/src/lapacke_cungqr.c index 2f9f76017..9247dad20 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cungqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cungqr.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_cungqr( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_c_nancheck( k, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_c_nancheck( k, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cungrq.c b/lapack-netlib/LAPACKE/src/lapacke_cungrq.c index 026e1a8a7..82fea68ad 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cungrq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cungrq.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_cungrq( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_c_nancheck( k, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_c_nancheck( k, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cungtr.c b/lapack-netlib/LAPACKE/src/lapacke_cungtr.c index 536ca5e48..ddae70345 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cungtr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cungtr.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_cungtr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_c_nancheck( n-1, tau, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_c_nancheck( n-1, tau, 1 ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cunmbr.c b/lapack-netlib/LAPACKE/src/lapacke_cunmbr.c index 4fb278059..d80bc526e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cunmbr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cunmbr.c @@ -49,17 +49,19 @@ lapack_int LAPACKE_cunmbr( int matrix_layout, char vect, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nq = LAPACKE_lsame( side, 'l' ) ? m : n; - r = LAPACKE_lsame( vect, 'q' ) ? nq : MIN(nq,k); - if( LAPACKE_cge_nancheck( matrix_layout, r, MIN(nq,k), a, lda ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -11; - } - if( LAPACKE_c_nancheck( MIN(nq,k), tau, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nq = LAPACKE_lsame( side, 'l' ) ? m : n; + r = LAPACKE_lsame( vect, 'q' ) ? nq : MIN(nq,k); + if( LAPACKE_cge_nancheck( matrix_layout, r, MIN(nq,k), a, lda ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -11; + } + if( LAPACKE_c_nancheck( MIN(nq,k), tau, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cunmhr.c b/lapack-netlib/LAPACKE/src/lapacke_cunmhr.c index 7b307dc6f..592c6de45 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cunmhr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cunmhr.c @@ -49,16 +49,18 @@ lapack_int LAPACKE_cunmhr( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_cge_nancheck( matrix_layout, r, r, a, lda ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -11; - } - if( LAPACKE_c_nancheck( m-1, tau, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_cge_nancheck( matrix_layout, r, r, a, lda ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -11; + } + if( LAPACKE_c_nancheck( m-1, tau, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cunmlq.c b/lapack-netlib/LAPACKE/src/lapacke_cunmlq.c index 7f2a8098a..f340c8c93 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cunmlq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cunmlq.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_cunmlq( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_c_nancheck( k, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_c_nancheck( k, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cunmql.c b/lapack-netlib/LAPACKE/src/lapacke_cunmql.c index ab2e069f9..92c988131 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cunmql.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cunmql.c @@ -49,16 +49,18 @@ lapack_int LAPACKE_cunmql( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_cge_nancheck( matrix_layout, r, k, a, lda ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_c_nancheck( k, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_cge_nancheck( matrix_layout, r, k, a, lda ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_c_nancheck( k, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cunmqr.c b/lapack-netlib/LAPACKE/src/lapacke_cunmqr.c index 21756ca4d..1ad2afa3b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cunmqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cunmqr.c @@ -49,16 +49,18 @@ lapack_int LAPACKE_cunmqr( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_cge_nancheck( matrix_layout, r, k, a, lda ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_c_nancheck( k, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_cge_nancheck( matrix_layout, r, k, a, lda ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_c_nancheck( k, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cunmrq.c b/lapack-netlib/LAPACKE/src/lapacke_cunmrq.c index 2fee55ced..ec96052b9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cunmrq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cunmrq.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_cunmrq( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_c_nancheck( k, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_c_nancheck( k, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cunmrz.c b/lapack-netlib/LAPACKE/src/lapacke_cunmrz.c index b33ecb6b6..81d788d71 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cunmrz.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cunmrz.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_cunmrz( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -8; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -11; - } - if( LAPACKE_c_nancheck( k, tau, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -8; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -11; + } + if( LAPACKE_c_nancheck( k, tau, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cunmtr.c b/lapack-netlib/LAPACKE/src/lapacke_cunmtr.c index 5f3d598cd..1864c4121 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cunmtr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cunmtr.c @@ -49,16 +49,18 @@ lapack_int LAPACKE_cunmtr( int matrix_layout, char side, char uplo, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_cge_nancheck( matrix_layout, r, r, a, lda ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_c_nancheck( m-1, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_cge_nancheck( matrix_layout, r, r, a, lda ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_c_nancheck( m-1, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cupgtr.c b/lapack-netlib/LAPACKE/src/lapacke_cupgtr.c index 38a9e20ff..6c96735ae 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cupgtr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cupgtr.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_cupgtr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_cpp_nancheck( n, ap ) ) { - return -4; - } - if( LAPACKE_c_nancheck( n-1, tau, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cpp_nancheck( n, ap ) ) { + return -4; + } + if( LAPACKE_c_nancheck( n-1, tau, 1 ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cupmtr.c b/lapack-netlib/LAPACKE/src/lapacke_cupmtr.c index c159c6447..51f6d8276 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cupmtr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cupmtr.c @@ -49,16 +49,18 @@ lapack_int LAPACKE_cupmtr( int matrix_layout, char side, char uplo, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_cpp_nancheck( r, ap ) ) { - return -7; - } - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -9; - } - if( LAPACKE_c_nancheck( m-1, tau, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_cpp_nancheck( r, ap ) ) { + return -7; + } + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -9; + } + if( LAPACKE_c_nancheck( m-1, tau, 1 ) ) { + return -8; + } } #endif /* Additional scalars initializations for work arrays */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dbbcsd.c b/lapack-netlib/LAPACKE/src/lapacke_dbbcsd.c index 23cb7cc4f..960d62dd6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dbbcsd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dbbcsd.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dbbcsd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -47,41 +47,44 @@ lapack_int LAPACKE_dbbcsd( int matrix_layout, char jobu1, char jobu2, lapack_int lwork = -1; double* work = NULL; double work_query; - lapack_int nrows_u1, nrows_u2, nrows_v1t, nrows_v2t; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_dbbcsd", -1 ); return -1; } -#ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - if( LAPACKE_d_nancheck( q-1, phi, 1 ) ) { - return -11; - } - if( LAPACKE_d_nancheck( q, theta, 1 ) ) { - return -10; + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; } - if( LAPACKE_lsame( jobu1, 'y' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, nrows_u1, p, u1, ldu1 ) ) { - return -12; +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( q-1, phi, 1 ) ) { + return -11; } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, nrows_u2, m-p, u2, ldu2 ) ) { - return -14; + if( LAPACKE_d_nancheck( q, theta, 1 ) ) { + return -10; } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v1t, q, v1t, ldv1t ) ) { - return -16; + if( LAPACKE_lsame( jobu1, 'y' ) ) { + if( LAPACKE_dge_nancheck( lapack_layout, p, p, u1, ldu1 ) ) { + return -12; + } } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v2t, m-q, v2t, ldv2t ) ) { - return -18; + if( LAPACKE_lsame( jobu2, 'y' ) ) { + if( LAPACKE_dge_nancheck( lapack_layout, m-p, m-p, u2, ldu2 ) ) { + return -14; + } + } + if( LAPACKE_lsame( jobv1t, 'y' ) ) { + if( LAPACKE_dge_nancheck( lapack_layout, q, q, v1t, ldv1t ) ) { + return -16; + } + } + if( LAPACKE_lsame( jobv2t, 'y' ) ) { + if( LAPACKE_dge_nancheck( lapack_layout, m-q, m-q, v2t, ldv2t ) ) { + return -18; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dbbcsd_work.c b/lapack-netlib/LAPACKE/src/lapacke_dbbcsd_work.c index c4f072a45..6d3b12bfa 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dbbcsd_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dbbcsd_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function dbbcsd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -45,152 +45,36 @@ lapack_int LAPACKE_dbbcsd_work( int matrix_layout, char jobu1, char jobu2, double* work, lapack_int lwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; + } /* Call LAPACK function and adjust info */ - LAPACK_dbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, + LAPACK_dbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, <rans, &m, &p, &q, theta, phi, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, &ldv2t, b11d, b11e, b12d, b12e, b21d, b21e, b22d, b22e, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - lapack_int nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - lapack_int nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - lapack_int nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - lapack_int ldu1_t = MAX(1,nrows_u1); - lapack_int ldu2_t = MAX(1,nrows_u2); - lapack_int ldv1t_t = MAX(1,nrows_v1t); - lapack_int ldv2t_t = MAX(1,nrows_v2t); - double* u1_t = NULL; - double* u2_t = NULL; - double* v1t_t = NULL; - double* v2t_t = NULL; - /* Check leading dimension(s) */ - if( ldu1 < p ) { - info = -13; - LAPACKE_xerbla( "LAPACKE_dbbcsd_work", info ); - return info; - } - if( ldu2 < m-p ) { - info = -15; - LAPACKE_xerbla( "LAPACKE_dbbcsd_work", info ); - return info; - } - if( ldv1t < q ) { - info = -17; - LAPACKE_xerbla( "LAPACKE_dbbcsd_work", info ); - return info; - } - if( ldv2t < m-q ) { - info = -19; - LAPACKE_xerbla( "LAPACKE_dbbcsd_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { - LAPACK_dbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, - theta, phi, u1, &ldu1_t, u2, &ldu2_t, v1t, &ldv1t_t, - v2t, &ldv2t_t, b11d, b11e, b12d, b12e, b21d, b21e, - b22d, b22e, work, &lwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - u1_t = (double*) - LAPACKE_malloc( sizeof(double) * ldu1_t * MAX(1,p) ); - if( u1_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - u2_t = (double*) - LAPACKE_malloc( sizeof(double) * ldu2_t * MAX(1,m-p) ); - if( u2_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - v1t_t = (double*) - LAPACKE_malloc( sizeof(double) * ldv1t_t * MAX(1,q) ); - if( v1t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; - } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - v2t_t = (double*) - LAPACKE_malloc( sizeof(double) * ldv2t_t * MAX(1,m-q) ); - if( v2t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - } - /* Transpose input matrices */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_dge_trans( matrix_layout, nrows_u1, p, u1, ldu1, u1_t, - ldu1_t ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_dge_trans( matrix_layout, nrows_u2, m-p, u2, ldu2, u2_t, - ldu2_t ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_dge_trans( matrix_layout, nrows_v1t, q, v1t, ldv1t, v1t_t, - ldv1t_t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_dge_trans( matrix_layout, nrows_v2t, m-q, v2t, ldv2t, v2t_t, - ldv2t_t ); - } - /* Call LAPACK function and adjust info */ - LAPACK_dbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, - theta, phi, u1_t, &ldu1_t, u2_t, &ldu2_t, v1t_t, - &ldv1t_t, v2t_t, &ldv2t_t, b11d, b11e, b12d, b12e, b21d, - b21e, b22d, b22e, work, &lwork, &info ); - if( info < 0 ) { - info = info - 1; - } - /* Transpose output matrices */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_u1, p, u1_t, ldu1_t, u1, - ldu1 ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_u2, m-p, u2_t, ldu2_t, - u2, ldu2 ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_v1t, q, v1t_t, ldv1t_t, - v1t, ldv1t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_v2t, m-q, v2t_t, ldv2t_t, - v2t, ldv2t ); - } - /* Release memory and exit */ - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_free( v2t_t ); - } -exit_level_3: - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_free( v1t_t ); - } -exit_level_2: - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_free( u2_t ); - } -exit_level_1: - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_free( u1_t ); - } -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_dbbcsd_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_dbbcsd_work", info ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dbdsdc.c b/lapack-netlib/LAPACKE/src/lapacke_dbdsdc.c index a18d507a5..849b06ce7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dbdsdc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dbdsdc.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dbdsdc * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -48,12 +48,14 @@ lapack_int LAPACKE_dbdsdc( int matrix_layout, char uplo, char compq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n, e, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -6; + } } #endif /* Additional scalars initializations for work arrays */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dbdsqr.c b/lapack-netlib/LAPACKE/src/lapacke_dbdsqr.c index 717038dbc..3b27f86cf 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dbdsqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dbdsqr.c @@ -46,26 +46,28 @@ lapack_int LAPACKE_dbdsqr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( ncc != 0 ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, ncc, c, ldc ) ) { - return -13; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( ncc != 0 ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, ncc, c, ldc ) ) { + return -13; + } } - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -7; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -8; - } - if( nru != 0 ) { - if( LAPACKE_dge_nancheck( matrix_layout, nru, n, u, ldu ) ) { - return -11; + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -7; } - } - if( ncvt != 0 ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, ncvt, vt, ldvt ) ) { - return -9; + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -8; + } + if( nru != 0 ) { + if( LAPACKE_dge_nancheck( matrix_layout, nru, n, u, ldu ) ) { + return -11; + } + } + if( ncvt != 0 ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, ncvt, vt, ldvt ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dbdsvdx.c b/lapack-netlib/LAPACKE/src/lapacke_dbdsvdx.c index f61e11c41..b32e79b93 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dbdsvdx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dbdsvdx.c @@ -50,12 +50,14 @@ lapack_int LAPACKE_dbdsvdx( int matrix_layout, char uplo, char jobz, char range, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -6; - } - if( LAPACKE_d_nancheck( n - 1, e, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -6; + } + if( LAPACKE_d_nancheck( n - 1, e, 1 ) ) { + return -7; + } } #endif /* Allocate memory for work arrays */ @@ -71,8 +73,8 @@ lapack_int LAPACKE_dbdsvdx( int matrix_layout, char uplo, char jobz, char range, } /* Call middle-level interface */ info = LAPACKE_dbdsvdx_work( matrix_layout, uplo, jobz, range, - n, d, e, vl, vu, il, iu, ns, s, z, - ldz, work, iwork); + n, d, e, vl, vu, il, iu, ns, s, z, + ldz, work, iwork); /* Backup significant data from working array(s) */ for( i=0; i<12*n-1; i++ ) { superb[i] = iwork[i+1]; diff --git a/lapack-netlib/LAPACKE/src/lapacke_dbdsvdx_work.c b/lapack-netlib/LAPACKE/src/lapacke_dbdsvdx_work.c index a795110f9..fdee5cbbe 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dbdsvdx_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dbdsvdx_work.c @@ -34,17 +34,17 @@ #include "lapacke_utils.h" lapack_int LAPACKE_dbdsvdx_work( int matrix_layout, char uplo, char jobz, char range, - lapack_int n, double* d, double* e, - double vl, double vu, - lapack_int il, lapack_int iu, lapack_int* ns, - double* s, double* z, lapack_int ldz, - double* work, lapack_int* iwork ) + lapack_int n, double* d, double* e, + double vl, double vu, + lapack_int il, lapack_int iu, lapack_int* ns, + double* s, double* z, lapack_int ldz, + double* work, lapack_int* iwork ) { lapack_int info = 0; if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_dbdsvdx( &uplo, &jobz, &range, &n, d, e, &vl, &vu, - &il, &iu, ns, s, z, &ldz, + &il, &iu, ns, s, z, &ldz, work, iwork, &info ); if( info < 0 ) { info = info - 1; @@ -64,7 +64,7 @@ lapack_int LAPACKE_dbdsvdx_work( int matrix_layout, char uplo, char jobz, char r /* Allocate memory for temporary array(s) */ if( LAPACKE_lsame( jobz, 'v' ) ) { z_t = (double*) - LAPACKE_malloc( sizeof(double) * ldz_t * MAX(2*n,1) ); + LAPACKE_malloc( sizeof(double) * ldz_t * MAX(ncols_z,1) ); if( z_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_0; @@ -72,8 +72,8 @@ lapack_int LAPACKE_dbdsvdx_work( int matrix_layout, char uplo, char jobz, char r } /* Call LAPACK function and adjust info */ LAPACK_dbdsvdx( &uplo, &jobz, &range, &n, d, e, &vl, &vu, - &il, &iu, ns, s, z_t, &ldz_t, work, - iwork, &info ); + &il, &iu, ns, s, z_t, &ldz_t, work, + iwork, &info ); if( info < 0 ) { info = info - 1; } diff --git a/lapack-netlib/LAPACKE/src/lapacke_ddisna.c b/lapack-netlib/LAPACKE/src/lapacke_ddisna.c index b5cb340ce..e5a8b8240 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ddisna.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ddisna.c @@ -37,9 +37,11 @@ lapack_int LAPACKE_ddisna( char job, lapack_int m, lapack_int n, const double* d, double* sep ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( MIN(m,n), d, 1 ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( MIN(m,n), d, 1 ) ) { + return -4; + } } #endif return LAPACKE_ddisna_work( job, m, n, d, sep ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgbbrd.c b/lapack-netlib/LAPACKE/src/lapacke_dgbbrd.c index ff7914c74..89be123f5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgbbrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgbbrd.c @@ -47,13 +47,15 @@ lapack_int LAPACKE_dgbbrd( int matrix_layout, char vect, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { - return -8; - } - if( ncc != 0 ) { - if( LAPACKE_dge_nancheck( matrix_layout, m, ncc, c, ldc ) ) { - return -16; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { + return -8; + } + if( ncc != 0 ) { + if( LAPACKE_dge_nancheck( matrix_layout, m, ncc, c, ldc ) ) { + return -16; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgbcon.c b/lapack-netlib/LAPACKE/src/lapacke_dgbcon.c index 0b54a1e1e..40e694e7e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgbcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgbcon.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_dgbcon( int matrix_layout, char norm, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { - return -6; - } - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { + return -6; + } + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgbequ.c b/lapack-netlib/LAPACKE/src/lapacke_dgbequ.c index 1f72c74f7..171177843 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgbequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgbequ.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_dgbequ( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { + return -6; + } } #endif return LAPACKE_dgbequ_work( matrix_layout, m, n, kl, ku, ab, ldab, r, c, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgbequb.c b/lapack-netlib/LAPACKE/src/lapacke_dgbequb.c index 230bf0bff..da6bf077b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgbequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgbequb.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_dgbequb( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { + return -6; + } } #endif return LAPACKE_dgbequb_work( matrix_layout, m, n, kl, ku, ab, ldab, r, c, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgbrfs.c b/lapack-netlib/LAPACKE/src/lapacke_dgbrfs.c index d21fe414f..29612815a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgbrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgbrfs.c @@ -48,18 +48,20 @@ lapack_int LAPACKE_dgbrfs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { - return -7; - } - if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, ldafb ) ) { - return -9; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -14; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { + return -7; + } + if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, ldafb ) ) { + return -9; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -14; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgbrfsx.c b/lapack-netlib/LAPACKE/src/lapacke_dgbrfsx.c index df0380aa4..7fceeb6bc 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgbrfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgbrfsx.c @@ -52,33 +52,35 @@ lapack_int LAPACKE_dgbrfsx( int matrix_layout, char trans, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { - return -8; - } - if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, ldafb ) ) { - return -10; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -15; - } - if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'c' ) ) { - if( LAPACKE_d_nancheck( n, c, 1 ) ) { - return -14; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { + return -8; } - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -25; + if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, ldafb ) ) { + return -10; } - } - if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'r' ) ) { - if( LAPACKE_d_nancheck( n, r, 1 ) ) { - return -13; + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -15; + } + if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'c' ) ) { + if( LAPACKE_d_nancheck( n, c, 1 ) ) { + return -14; + } + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -25; + } + } + if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'r' ) ) { + if( LAPACKE_d_nancheck( n, r, 1 ) ) { + return -13; + } + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -17; } - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -17; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgbsv.c b/lapack-netlib/LAPACKE/src/lapacke_dgbsv.c index 8cf4243fe..f2e868c5f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgbsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgbsv.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_dgbsv( int matrix_layout, lapack_int n, lapack_int kl, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { - return -6; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { + return -6; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif return LAPACKE_dgbsv_work( matrix_layout, n, kl, ku, nrhs, ab, ldab, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgbsvx.c b/lapack-netlib/LAPACKE/src/lapacke_dgbsvx.c index d4998d7d8..7cae13022 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgbsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgbsvx.c @@ -50,29 +50,31 @@ lapack_int LAPACKE_dgbsvx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { - return -8; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, - ldafb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { + return -8; } - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -16; - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'c' ) ) ) { - if( LAPACKE_d_nancheck( n, c, 1 ) ) { - return -15; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, + ldafb ) ) { + return -10; + } } - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'r' ) ) ) { - if( LAPACKE_d_nancheck( n, r, 1 ) ) { - return -14; + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -16; + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'c' ) ) ) { + if( LAPACKE_d_nancheck( n, c, 1 ) ) { + return -15; + } + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'r' ) ) ) { + if( LAPACKE_d_nancheck( n, r, 1 ) ) { + return -14; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgbsvxx.c b/lapack-netlib/LAPACKE/src/lapacke_dgbsvxx.c index 0db2eb28b..a991bef46 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgbsvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgbsvxx.c @@ -52,34 +52,36 @@ lapack_int LAPACKE_dgbsvxx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { - return -8; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, - ldafb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { + return -8; } - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -16; - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'c' ) ) ) { - if( LAPACKE_d_nancheck( n, c, 1 ) ) { - return -15; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, + ldafb ) ) { + return -10; + } } - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -27; + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -16; } - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'r' ) ) ) { - if( LAPACKE_d_nancheck( n, r, 1 ) ) { - return -14; + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'c' ) ) ) { + if( LAPACKE_d_nancheck( n, c, 1 ) ) { + return -15; + } + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -27; + } + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'r' ) ) ) { + if( LAPACKE_d_nancheck( n, r, 1 ) ) { + return -14; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgbtrf.c b/lapack-netlib/LAPACKE/src/lapacke_dgbtrf.c index 2aac46704..a08116fea 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgbtrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgbtrf.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_dgbtrf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dgb_nancheck( matrix_layout, m, n, kl, kl+ku, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dgb_nancheck( matrix_layout, m, n, kl, kl+ku, ab, ldab ) ) { + return -6; + } } #endif return LAPACKE_dgbtrf_work( matrix_layout, m, n, kl, ku, ab, ldab, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgbtrs.c b/lapack-netlib/LAPACKE/src/lapacke_dgbtrs.c index 34742c7d6..27e5c18cf 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgbtrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgbtrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_dgbtrs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } } #endif return LAPACKE_dgbtrs_work( matrix_layout, trans, n, kl, ku, nrhs, ab, ldab, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgebak.c b/lapack-netlib/LAPACKE/src/lapacke_dgebak.c index 1cc4d861a..9363602f8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgebak.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgebak.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dgebak( int matrix_layout, char job, char side, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, scale, 1 ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, m, v, ldv ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, scale, 1 ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, m, v, ldv ) ) { + return -9; + } } #endif return LAPACKE_dgebak_work( matrix_layout, job, side, n, ilo, ihi, scale, m, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgebal.c b/lapack-netlib/LAPACKE/src/lapacke_dgebal.c index a463400b9..35eae3eb6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgebal.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgebal.c @@ -42,11 +42,13 @@ lapack_int LAPACKE_dgebal( int matrix_layout, char job, lapack_int n, double* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'p' ) || - LAPACKE_lsame( job, 's' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'p' ) || + LAPACKE_lsame( job, 's' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgebrd.c b/lapack-netlib/LAPACKE/src/lapacke_dgebrd.c index c4221f388..5ef6c62fb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgebrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgebrd.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_dgebrd( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgecon.c b/lapack-netlib/LAPACKE/src/lapacke_dgecon.c index 3b0fdecee..4cc6cbcab 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgecon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgecon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_dgecon( int matrix_layout, char norm, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgeequ.c b/lapack-netlib/LAPACKE/src/lapacke_dgeequ.c index 24a0163db..e96de3742 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgeequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgeequ.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_dgeequ( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_dgeequ_work( matrix_layout, m, n, a, lda, r, c, rowcnd, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgeequb.c b/lapack-netlib/LAPACKE/src/lapacke_dgeequb.c index 61296fed2..a80e2bd7f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgeequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgeequb.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_dgeequb( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_dgeequb_work( matrix_layout, m, n, a, lda, r, c, rowcnd, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgees.c b/lapack-netlib/LAPACKE/src/lapacke_dgees.c index 538fc071a..f80c41f44 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgees.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgees.c @@ -48,9 +48,11 @@ lapack_int LAPACKE_dgees( int matrix_layout, char jobvs, char sort, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgeesx.c b/lapack-netlib/LAPACKE/src/lapacke_dgeesx.c index ff3868eb7..27647954b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgeesx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgeesx.c @@ -52,9 +52,11 @@ lapack_int LAPACKE_dgeesx( int matrix_layout, char jobvs, char sort, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgeev.c b/lapack-netlib/LAPACKE/src/lapacke_dgeev.c index bddde4adc..460155d34 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgeev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgeev.c @@ -47,9 +47,11 @@ lapack_int LAPACKE_dgeev( int matrix_layout, char jobvl, char jobvr, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgeevx.c b/lapack-netlib/LAPACKE/src/lapacke_dgeevx.c index 227cb03db..97612c1cd 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgeevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgeevx.c @@ -50,9 +50,11 @@ lapack_int LAPACKE_dgeevx( int matrix_layout, char balanc, char jobvl, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgehrd.c b/lapack-netlib/LAPACKE/src/lapacke_dgehrd.c index 96c2f43f0..a3de86a97 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgehrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgehrd.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_dgehrd( int matrix_layout, lapack_int n, lapack_int ilo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgejsv.c b/lapack-netlib/LAPACKE/src/lapacke_dgejsv.c index 85416056e..444a07b35 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgejsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgejsv.c @@ -80,11 +80,13 @@ lapack_int LAPACKE_dgejsv( int matrix_layout, char joba, char jobu, char jobv, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nu = LAPACKE_lsame( jobu, 'n' ) ? 1 : m; - nv = LAPACKE_lsame( jobv, 'n' ) ? 1 : n; - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nu = LAPACKE_lsame( jobu, 'n' ) ? 1 : m; + nv = LAPACKE_lsame( jobv, 'n' ) ? 1 : n; + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgelq.c b/lapack-netlib/LAPACKE/src/lapacke_dgelq.c index bd2be2c1c..016a45f6a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgelq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgelq.c @@ -28,14 +28,14 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dgelq * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" -lapack_int LAPACKE_dgelq_work( int matrix_layout, lapack_int m, lapack_int n, - double* a, lapack_int lda, - double* t, lapack_int tsize ) +lapack_int LAPACKE_dgelq( int matrix_layout, lapack_int m, lapack_int n, + double* a, lapack_int lda, + double* t, lapack_int tsize ) { lapack_int info = 0; lapack_int lwork = -1; @@ -46,9 +46,11 @@ lapack_int LAPACKE_dgelq_work( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgelq2.c b/lapack-netlib/LAPACKE/src/lapacke_dgelq2.c index 2651a1ad1..ce1690ee2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgelq2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgelq2.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_dgelq2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgelqf.c b/lapack-netlib/LAPACKE/src/lapacke_dgelqf.c index 572fff23d..e5ce2e699 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgelqf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgelqf.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_dgelqf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgels.c b/lapack-netlib/LAPACKE/src/lapacke_dgels.c index 071e45742..186273351 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgels.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgels.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_dgels( int matrix_layout, char trans, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; - } - if( LAPACKE_dge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } + if( LAPACKE_dge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgelsd.c b/lapack-netlib/LAPACKE/src/lapacke_dgelsd.c index 6dd96ab46..6750597bb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgelsd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgelsd.c @@ -51,15 +51,17 @@ lapack_int LAPACKE_dgelsd( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &rcond, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_d_nancheck( 1, &rcond, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgelss.c b/lapack-netlib/LAPACKE/src/lapacke_dgelss.c index 12a8b31e4..ca888f1ea 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgelss.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgelss.c @@ -47,15 +47,17 @@ lapack_int LAPACKE_dgelss( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &rcond, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_d_nancheck( 1, &rcond, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgelsy.c b/lapack-netlib/LAPACKE/src/lapacke_dgelsy.c index f609168b3..6d318236e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgelsy.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgelsy.c @@ -47,15 +47,17 @@ lapack_int LAPACKE_dgelsy( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &rcond, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_d_nancheck( 1, &rcond, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgemlq.c b/lapack-netlib/LAPACKE/src/lapacke_dgemlq.c index fd62cad79..02b08df35 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgemlq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgemlq.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_dgemlq( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_d_nancheck( tsize, t, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_d_nancheck( tsize, t, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgemqr.c b/lapack-netlib/LAPACKE/src/lapacke_dgemqr.c index 86566d9c4..b4be9ea9c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgemqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgemqr.c @@ -49,16 +49,18 @@ lapack_int LAPACKE_dgemqr( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_dge_nancheck( matrix_layout, r, k, a, lda ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_d_nancheck( tsize, t, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_dge_nancheck( matrix_layout, r, k, a, lda ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_d_nancheck( tsize, t, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgemqrt.c b/lapack-netlib/LAPACKE/src/lapacke_dgemqrt.c index 3fb17f0bf..4b8c661d5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgemqrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgemqrt.c @@ -47,17 +47,19 @@ lapack_int LAPACKE_dgemqrt( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_v = LAPACKE_lsame( side, 'L' ) ? m : - ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -12; - } - if( LAPACKE_dge_nancheck( matrix_layout, nb, k, t, ldt ) ) { - return -10; - } - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, k, v, ldv ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nrows_v = LAPACKE_lsame( side, 'L' ) ? m : + ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -12; + } + if( LAPACKE_dge_nancheck( matrix_layout, nb, k, t, ldt ) ) { + return -10; + } + if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, k, v, ldv ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgeqlf.c b/lapack-netlib/LAPACKE/src/lapacke_dgeqlf.c index a5902bc84..cb2854c79 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgeqlf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgeqlf.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_dgeqlf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgeqp3.c b/lapack-netlib/LAPACKE/src/lapacke_dgeqp3.c index 7a9717fa1..52f7d07c6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgeqp3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgeqp3.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_dgeqp3( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgeqpf.c b/lapack-netlib/LAPACKE/src/lapacke_dgeqpf.c index bb5a8fd15..63098b232 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgeqpf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgeqpf.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_dgeqpf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgeqr.c b/lapack-netlib/LAPACKE/src/lapacke_dgeqr.c index 7f9f9d29b..4c26cb7b5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgeqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgeqr.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_dgeqr( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgeqr2.c b/lapack-netlib/LAPACKE/src/lapacke_dgeqr2.c index 7356117ed..ac4b85847 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgeqr2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgeqr2.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_dgeqr2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgeqrf.c b/lapack-netlib/LAPACKE/src/lapacke_dgeqrf.c index 2a7ae0156..1fbccd88a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgeqrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgeqrf.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_dgeqrf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgeqrfp.c b/lapack-netlib/LAPACKE/src/lapacke_dgeqrfp.c index b49fe021b..856b6d1fd 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgeqrfp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgeqrfp.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_dgeqrfp( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgeqrt.c b/lapack-netlib/LAPACKE/src/lapacke_dgeqrt.c index e17be2aa8..4d45fd929 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgeqrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgeqrt.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_dgeqrt( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgeqrt2.c b/lapack-netlib/LAPACKE/src/lapacke_dgeqrt2.c index dc34cc938..894f47025 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgeqrt2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgeqrt2.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_dgeqrt2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_dgeqrt2_work( matrix_layout, m, n, a, lda, t, ldt ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgeqrt3.c b/lapack-netlib/LAPACKE/src/lapacke_dgeqrt3.c index b1e581ec9..01a90771c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgeqrt3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgeqrt3.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_dgeqrt3( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_dgeqrt3_work( matrix_layout, m, n, a, lda, t, ldt ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgerfs.c b/lapack-netlib/LAPACKE/src/lapacke_dgerfs.c index ee64f2b05..8034676c3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgerfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgerfs.c @@ -48,18 +48,20 @@ lapack_int LAPACKE_dgerfs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, af, ldaf ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, af, ldaf ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgerfsx.c b/lapack-netlib/LAPACKE/src/lapacke_dgerfsx.c index b021f88ed..57e8bfd4a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgerfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgerfsx.c @@ -51,33 +51,35 @@ lapack_int LAPACKE_dgerfsx( int matrix_layout, char trans, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, af, ldaf ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'c' ) ) { - if( LAPACKE_d_nancheck( n, c, 1 ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; } - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -23; + if( LAPACKE_dge_nancheck( matrix_layout, n, n, af, ldaf ) ) { + return -8; } - } - if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'r' ) ) { - if( LAPACKE_d_nancheck( n, r, 1 ) ) { - return -11; + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'c' ) ) { + if( LAPACKE_d_nancheck( n, c, 1 ) ) { + return -12; + } + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -23; + } + } + if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'r' ) ) { + if( LAPACKE_d_nancheck( n, r, 1 ) ) { + return -11; + } + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -15; } - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -15; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgerqf.c b/lapack-netlib/LAPACKE/src/lapacke_dgerqf.c index efdcd33ae..c2a6e79f3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgerqf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgerqf.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_dgerqf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgesdd.c b/lapack-netlib/LAPACKE/src/lapacke_dgesdd.c index c7b6cc9b0..61bd91c6d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgesdd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgesdd.c @@ -48,9 +48,11 @@ lapack_int LAPACKE_dgesdd( int matrix_layout, char jobz, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgesv.c b/lapack-netlib/LAPACKE/src/lapacke_dgesv.c index cc3b1f4bc..57f9208f6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgesv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgesv.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dgesv( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_dgesv_work( matrix_layout, n, nrhs, a, lda, ipiv, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgesvd.c b/lapack-netlib/LAPACKE/src/lapacke_dgesvd.c index 25e8a9b9a..1801e86a5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgesvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgesvd.c @@ -48,9 +48,11 @@ lapack_int LAPACKE_dgesvd( int matrix_layout, char jobu, char jobvt, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgesvdx.c b/lapack-netlib/LAPACKE/src/lapacke_dgesvdx.c index ca82202df..b17c03776 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgesvdx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgesvdx.c @@ -52,14 +52,16 @@ lapack_int LAPACKE_dgesvdx( int matrix_layout, char jobu, char jobvt, char range return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ info = LAPACKE_dgesvdx_work( matrix_layout, jobu, jobvt, range, - m, n, a, lda, vl, vu, il, iu, ns, s, u, + m, n, a, lda, vl, vu, il, iu, ns, s, u, ldu, vt, ldvt, &work_query, lwork, iwork ); if( info != 0 ) { goto exit_level_0; @@ -78,8 +80,8 @@ lapack_int LAPACKE_dgesvdx( int matrix_layout, char jobu, char jobvt, char range } /* Call middle-level interface */ info = LAPACKE_dgesvdx_work( matrix_layout, jobu, jobvt, range, - m, n, a, lda, vl, vu, il, iu, ns, s, u, - ldu, vt, ldvt, work, lwork, iwork ); + m, n, a, lda, vl, vu, il, iu, ns, s, u, + ldu, vt, ldvt, work, lwork, iwork ); /* Backup significant data from working array(s) */ for( i=0; i<12*MIN(m,n)-1; i++ ) { superb[i] = iwork[i+1]; diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgesvdx_work.c b/lapack-netlib/LAPACKE/src/lapacke_dgesvdx_work.c index a5a2cb65e..b326cca9d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgesvdx_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgesvdx_work.c @@ -34,18 +34,18 @@ #include "lapacke_utils.h" lapack_int LAPACKE_dgesvdx_work( int matrix_layout, char jobu, char jobvt, char range, - lapack_int m, lapack_int n, double* a, - lapack_int lda, double vl, double vu, - lapack_int il, lapack_int iu, lapack_int* ns, - double* s, double* u, lapack_int ldu, - double* vt, lapack_int ldvt, - double* work, lapack_int lwork, lapack_int* iwork ) + lapack_int m, lapack_int n, double* a, + lapack_int lda, double vl, double vu, + lapack_int il, lapack_int iu, lapack_int* ns, + double* s, double* u, lapack_int ldu, + double* vt, lapack_int ldvt, + double* work, lapack_int lwork, lapack_int* iwork ) { lapack_int info = 0; if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_dgesvdx( &jobu, &jobvt, &range, &m, &n, a, &lda, &vl, &vu, - &il, &iu, ns, s, u, &ldu, vt, &ldvt, + &il, &iu, ns, s, u, &ldu, vt, &ldvt, work, &lwork, iwork, &info ); if( info < 0 ) { info = info - 1; @@ -84,7 +84,7 @@ lapack_int LAPACKE_dgesvdx_work( int matrix_layout, char jobu, char jobvt, char /* Query optimal working array(s) size if requested */ if( lwork == -1 ) { LAPACK_dgesvdx( &jobu, &jobvt, &range, &m, &n, a, &lda_t, &vl, &vu, - &il, &iu, ns, s, u, &ldu_t, vt, + &il, &iu, ns, s, u, &ldu_t, vt, &ldvt_t, work, &lwork, iwork, &info ); return (info < 0) ? (info - 1) : info; } @@ -114,8 +114,8 @@ lapack_int LAPACKE_dgesvdx_work( int matrix_layout, char jobu, char jobvt, char LAPACKE_dge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); /* Call LAPACK function and adjust info */ LAPACK_dgesvdx( &jobu, &jobvt, &range, &m, &n, a_t, &lda_t, &vl, &vu, - &il, &iu, ns, s, u_t, &ldu_t, vt_t, - &ldvt_t, work, &lwork, iwork, &info ); + &il, &iu, ns, s, u_t, &ldu_t, vt_t, + &ldvt_t, work, &lwork, iwork, &info ); if( info < 0 ) { info = info - 1; } diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgesvj.c b/lapack-netlib/LAPACKE/src/lapacke_dgesvj.c index 542e52f79..c375d2503 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgesvj.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgesvj.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_dgesvj( int matrix_layout, char joba, char jobu, char jobv, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_v = LAPACKE_lsame( jobv, 'v' ) ? MAX(0,n) : - ( LAPACKE_lsame( jobv, 'a' ) ? MAX(0,mv) : 0); - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -7; - } - if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, n, v, ldv ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nrows_v = LAPACKE_lsame( jobv, 'v' ) ? MAX(0,n) : + ( LAPACKE_lsame( jobv, 'a' ) ? MAX(0,mv) : 0); + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -7; + } + if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, n, v, ldv ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgesvx.c b/lapack-netlib/LAPACKE/src/lapacke_dgesvx.c index 48dcf7360..95554b732 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgesvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgesvx.c @@ -49,28 +49,30 @@ lapack_int LAPACKE_dgesvx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -14; - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'c' ) ) ) { - if( LAPACKE_d_nancheck( n, c, 1 ) ) { - return -13; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, af, ldaf ) ) { + return -8; + } } - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'r' ) ) ) { - if( LAPACKE_d_nancheck( n, r, 1 ) ) { - return -12; + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -14; + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'c' ) ) ) { + if( LAPACKE_d_nancheck( n, c, 1 ) ) { + return -13; + } + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'r' ) ) ) { + if( LAPACKE_d_nancheck( n, r, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgesvxx.c b/lapack-netlib/LAPACKE/src/lapacke_dgesvxx.c index 6022d3b4f..716ba1fa4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgesvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgesvxx.c @@ -51,33 +51,35 @@ lapack_int LAPACKE_dgesvxx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -14; - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'c' ) ) ) { - if( LAPACKE_d_nancheck( n, c, 1 ) ) { - return -13; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, af, ldaf ) ) { + return -8; + } } - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -25; + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -14; } - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'r' ) ) ) { - if( LAPACKE_d_nancheck( n, r, 1 ) ) { - return -12; + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'c' ) ) ) { + if( LAPACKE_d_nancheck( n, c, 1 ) ) { + return -13; + } + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -25; + } + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'r' ) ) ) { + if( LAPACKE_d_nancheck( n, r, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgetf2.c b/lapack-netlib/LAPACKE/src/lapacke_dgetf2.c index 3cf976dc0..b4e2b1668 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgetf2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgetf2.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dgetf2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_dgetf2_work( matrix_layout, m, n, a, lda, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgetrf.c b/lapack-netlib/LAPACKE/src/lapacke_dgetrf.c index d5451c652..ef2c0e256 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgetrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgetrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dgetrf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_dgetrf_work( matrix_layout, m, n, a, lda, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgetrf2.c b/lapack-netlib/LAPACKE/src/lapacke_dgetrf2.c index fe1919d4d..4c808be22 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgetrf2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgetrf2.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dgetrf2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_dgetrf2_work( matrix_layout, m, n, a, lda, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgetri.c b/lapack-netlib/LAPACKE/src/lapacke_dgetri.c index 900fe30d6..c8b427e2c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgetri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgetri.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_dgetri( int matrix_layout, lapack_int n, double* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -3; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgetrs.c b/lapack-netlib/LAPACKE/src/lapacke_dgetrs.c index 90d8f160e..8af10825d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgetrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgetrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dgetrs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_dgetrs_work( matrix_layout, trans, n, nrhs, a, lda, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgetsls.c b/lapack-netlib/LAPACKE/src/lapacke_dgetsls.c index 57563f5e5..9adc61d0d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgetsls.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgetsls.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_dgetsls( int matrix_layout, char trans, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; - } - if( LAPACKE_dge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } + if( LAPACKE_dge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dggbak.c b/lapack-netlib/LAPACKE/src/lapacke_dggbak.c index a3f128408..af890562d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dggbak.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dggbak.c @@ -43,15 +43,17 @@ lapack_int LAPACKE_dggbak( int matrix_layout, char job, char side, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, lscale, 1 ) ) { - return -7; - } - if( LAPACKE_d_nancheck( n, rscale, 1 ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, m, v, ldv ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, lscale, 1 ) ) { + return -7; + } + if( LAPACKE_d_nancheck( n, rscale, 1 ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, m, v, ldv ) ) { + return -10; + } } #endif return LAPACKE_dggbak_work( matrix_layout, job, side, n, ilo, ihi, lscale, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dggbal.c b/lapack-netlib/LAPACKE/src/lapacke_dggbal.c index 19be89bd2..00e2397c5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dggbal.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dggbal.c @@ -47,17 +47,19 @@ lapack_int LAPACKE_dggbal( int matrix_layout, char job, lapack_int n, double* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( job, 'p' ) || LAPACKE_lsame( job, 's' ) || - LAPACKE_lsame( job, 'b' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( job, 'p' ) || LAPACKE_lsame( job, 's' ) || + LAPACKE_lsame( job, 'b' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } } - } - if( LAPACKE_lsame( job, 'p' ) || LAPACKE_lsame( job, 's' ) || - LAPACKE_lsame( job, 'b' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -6; + if( LAPACKE_lsame( job, 'p' ) || LAPACKE_lsame( job, 's' ) || + LAPACKE_lsame( job, 'b' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -6; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgges.c b/lapack-netlib/LAPACKE/src/lapacke_dgges.c index 7aaed6a79..5c4a12f79 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgges.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgges.c @@ -50,12 +50,14 @@ lapack_int LAPACKE_dgges( int matrix_layout, char jobvsl, char jobvsr, char sort return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgges3.c b/lapack-netlib/LAPACKE/src/lapacke_dgges3.c index 9aae144c7..1d87cac20 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgges3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgges3.c @@ -50,12 +50,14 @@ lapack_int LAPACKE_dgges3( int matrix_layout, char jobvsl, char jobvsr, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dggesx.c b/lapack-netlib/LAPACKE/src/lapacke_dggesx.c index 62ee7b1cb..36addda74 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dggesx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dggesx.c @@ -54,12 +54,14 @@ lapack_int LAPACKE_dggesx( int matrix_layout, char jobvsl, char jobvsr, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dggev.c b/lapack-netlib/LAPACKE/src/lapacke_dggev.c index 2ed6c508c..4aa2e679c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dggev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dggev.c @@ -48,12 +48,14 @@ lapack_int LAPACKE_dggev( int matrix_layout, char jobvl, char jobvr, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dggev3.c b/lapack-netlib/LAPACKE/src/lapacke_dggev3.c index 180b70d6c..80a115eaa 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dggev3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dggev3.c @@ -50,12 +50,14 @@ lapack_int LAPACKE_dggev3( int matrix_layout, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dggevx.c b/lapack-netlib/LAPACKE/src/lapacke_dggevx.c index d62e94462..ee9e0f5d3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dggevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dggevx.c @@ -53,12 +53,14 @@ lapack_int LAPACKE_dggevx( int matrix_layout, char balanc, char jobvl, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dggglm.c b/lapack-netlib/LAPACKE/src/lapacke_dggglm.c index 2be615d77..9f2c223fa 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dggglm.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dggglm.c @@ -46,15 +46,17 @@ lapack_int LAPACKE_dggglm( int matrix_layout, lapack_int n, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, m, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, p, b, ldb ) ) { - return -7; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, m, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, p, b, ldb ) ) { + return -7; + } + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgghd3.c b/lapack-netlib/LAPACKE/src/lapacke_dgghd3.c index 106f937ca..478a41d8e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgghd3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgghd3.c @@ -49,21 +49,23 @@ lapack_int LAPACKE_dgghd3( int matrix_layout, char compq, char compz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; } - } - if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -13; + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -11; + } + } + if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -13; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgghrd.c b/lapack-netlib/LAPACKE/src/lapacke_dgghrd.c index af019b9d8..5092ba3b5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgghrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgghrd.c @@ -44,21 +44,23 @@ lapack_int LAPACKE_dgghrd( int matrix_layout, char compq, char compz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; } - } - if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -13; + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -11; + } + } + if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -13; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgglse.c b/lapack-netlib/LAPACKE/src/lapacke_dgglse.c index ecb005b2f..27e0281d9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgglse.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgglse.c @@ -46,18 +46,20 @@ lapack_int LAPACKE_dgglse( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -7; - } - if( LAPACKE_d_nancheck( m, c, 1 ) ) { - return -9; - } - if( LAPACKE_d_nancheck( p, d, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -7; + } + if( LAPACKE_d_nancheck( m, c, 1 ) ) { + return -9; + } + if( LAPACKE_d_nancheck( p, d, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dggqrf.c b/lapack-netlib/LAPACKE/src/lapacke_dggqrf.c index 9e3c035cc..292548d9f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dggqrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dggqrf.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_dggqrf( int matrix_layout, lapack_int n, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, m, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, p, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, m, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, p, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dggrqf.c b/lapack-netlib/LAPACKE/src/lapacke_dggrqf.c index b6fddf414..42f8133b0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dggrqf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dggrqf.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_dggrqf( int matrix_layout, lapack_int m, lapack_int p, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dggsvd.c b/lapack-netlib/LAPACKE/src/lapacke_dggsvd.c index 8647d9a70..e7e06c6c6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dggsvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dggsvd.c @@ -48,12 +48,14 @@ lapack_int LAPACKE_dggsvd( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -10; - } - if( LAPACKE_dge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -10; + } + if( LAPACKE_dge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dggsvd3.c b/lapack-netlib/LAPACKE/src/lapacke_dggsvd3.c index ee3bcfc0a..69dce19f1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dggsvd3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dggsvd3.c @@ -50,12 +50,14 @@ lapack_int LAPACKE_dggsvd3( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -10; - } - if( LAPACKE_dge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -10; + } + if( LAPACKE_dge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -12; + } } #endif info = LAPACKE_dggsvd3_work( matrix_layout, jobu, jobv, jobq, m, n, p, k, l, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dggsvp.c b/lapack-netlib/LAPACKE/src/lapacke_dggsvp.c index 9ca32805c..86e4b9cce 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dggsvp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dggsvp.c @@ -49,18 +49,20 @@ lapack_int LAPACKE_dggsvp( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -10; - } - if( LAPACKE_d_nancheck( 1, &tola, 1 ) ) { - return -12; - } - if( LAPACKE_d_nancheck( 1, &tolb, 1 ) ) { - return -13; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -10; + } + if( LAPACKE_d_nancheck( 1, &tola, 1 ) ) { + return -12; + } + if( LAPACKE_d_nancheck( 1, &tolb, 1 ) ) { + return -13; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dggsvp3.c b/lapack-netlib/LAPACKE/src/lapacke_dggsvp3.c index feb640cea..60a679f5a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dggsvp3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dggsvp3.c @@ -51,18 +51,20 @@ lapack_int LAPACKE_dggsvp3( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -10; - } - if( LAPACKE_d_nancheck( 1, &tola, 1 ) ) { - return -12; - } - if( LAPACKE_d_nancheck( 1, &tolb, 1 ) ) { - return -13; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -10; + } + if( LAPACKE_d_nancheck( 1, &tola, 1 ) ) { + return -12; + } + if( LAPACKE_d_nancheck( 1, &tolb, 1 ) ) { + return -13; + } } #endif /* Query optimal size for working array */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgtcon.c b/lapack-netlib/LAPACKE/src/lapacke_dgtcon.c index b8f32965f..ff61d9ea7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgtcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgtcon.c @@ -41,21 +41,23 @@ lapack_int LAPACKE_dgtcon( char norm, lapack_int n, const double* dl, lapack_int* iwork = NULL; double* work = NULL; #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -8; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n-1, dl, 1 ) ) { - return -3; - } - if( LAPACKE_d_nancheck( n-1, du, 1 ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n-2, du2, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -8; + } + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n-1, dl, 1 ) ) { + return -3; + } + if( LAPACKE_d_nancheck( n-1, du, 1 ) ) { + return -5; + } + if( LAPACKE_d_nancheck( n-2, du2, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgtrfs.c b/lapack-netlib/LAPACKE/src/lapacke_dgtrfs.c index 9645276f8..b7ff5b3ac 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgtrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgtrfs.c @@ -49,33 +49,35 @@ lapack_int LAPACKE_dgtrfs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -6; - } - if( LAPACKE_d_nancheck( n, df, 1 ) ) { - return -9; - } - if( LAPACKE_d_nancheck( n-1, dl, 1 ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n-1, dlf, 1 ) ) { - return -8; - } - if( LAPACKE_d_nancheck( n-1, du, 1 ) ) { - return -7; - } - if( LAPACKE_d_nancheck( n-2, du2, 1 ) ) { - return -11; - } - if( LAPACKE_d_nancheck( n-1, duf, 1 ) ) { - return -10; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -15; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -6; + } + if( LAPACKE_d_nancheck( n, df, 1 ) ) { + return -9; + } + if( LAPACKE_d_nancheck( n-1, dl, 1 ) ) { + return -5; + } + if( LAPACKE_d_nancheck( n-1, dlf, 1 ) ) { + return -8; + } + if( LAPACKE_d_nancheck( n-1, du, 1 ) ) { + return -7; + } + if( LAPACKE_d_nancheck( n-2, du2, 1 ) ) { + return -11; + } + if( LAPACKE_d_nancheck( n-1, duf, 1 ) ) { + return -10; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -15; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgtsv.c b/lapack-netlib/LAPACKE/src/lapacke_dgtsv.c index fbdea793d..b89be4668 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgtsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgtsv.c @@ -42,18 +42,20 @@ lapack_int LAPACKE_dgtsv( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n-1, dl, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n-1, du, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_d_nancheck( n-1, dl, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n-1, du, 1 ) ) { + return -6; + } } #endif return LAPACKE_dgtsv_work( matrix_layout, n, nrhs, dl, d, du, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgtsvx.c b/lapack-netlib/LAPACKE/src/lapacke_dgtsvx.c index c10bb1273..add4d8a61 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgtsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgtsvx.c @@ -49,37 +49,39 @@ lapack_int LAPACKE_dgtsvx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -14; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -7; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_d_nancheck( n, df, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -14; } - } - if( LAPACKE_d_nancheck( n-1, dl, 1 ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_d_nancheck( n-1, dlf, 1 ) ) { - return -9; + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -7; } - } - if( LAPACKE_d_nancheck( n-1, du, 1 ) ) { - return -8; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_d_nancheck( n-2, du2, 1 ) ) { - return -12; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_d_nancheck( n, df, 1 ) ) { + return -10; + } } - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_d_nancheck( n-1, duf, 1 ) ) { - return -11; + if( LAPACKE_d_nancheck( n-1, dl, 1 ) ) { + return -6; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_d_nancheck( n-1, dlf, 1 ) ) { + return -9; + } + } + if( LAPACKE_d_nancheck( n-1, du, 1 ) ) { + return -8; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_d_nancheck( n-2, du2, 1 ) ) { + return -12; + } + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_d_nancheck( n-1, duf, 1 ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgttrf.c b/lapack-netlib/LAPACKE/src/lapacke_dgttrf.c index 056b8cd8a..64a71521e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgttrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgttrf.c @@ -37,15 +37,17 @@ lapack_int LAPACKE_dgttrf( lapack_int n, double* dl, double* d, double* du, double* du2, lapack_int* ipiv ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -3; - } - if( LAPACKE_d_nancheck( n-1, dl, 1 ) ) { - return -2; - } - if( LAPACKE_d_nancheck( n-1, du, 1 ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -3; + } + if( LAPACKE_d_nancheck( n-1, dl, 1 ) ) { + return -2; + } + if( LAPACKE_d_nancheck( n-1, du, 1 ) ) { + return -4; + } } #endif return LAPACKE_dgttrf_work( n, dl, d, du, du2, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgttrs.c b/lapack-netlib/LAPACKE/src/lapacke_dgttrs.c index 8825e2dce..1dd61dcea 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgttrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgttrs.c @@ -43,21 +43,23 @@ lapack_int LAPACKE_dgttrs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -6; - } - if( LAPACKE_d_nancheck( n-1, dl, 1 ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n-1, du, 1 ) ) { - return -7; - } - if( LAPACKE_d_nancheck( n-2, du2, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -6; + } + if( LAPACKE_d_nancheck( n-1, dl, 1 ) ) { + return -5; + } + if( LAPACKE_d_nancheck( n-1, du, 1 ) ) { + return -7; + } + if( LAPACKE_d_nancheck( n-2, du2, 1 ) ) { + return -8; + } } #endif return LAPACKE_dgttrs_work( matrix_layout, trans, n, nrhs, dl, d, du, du2, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dhgeqz.c b/lapack-netlib/LAPACKE/src/lapacke_dhgeqz.c index 8c23bdf4b..d1f355b01 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dhgeqz.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dhgeqz.c @@ -49,21 +49,23 @@ lapack_int LAPACKE_dhgeqz( int matrix_layout, char job, char compq, char compz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, h, ldh ) ) { - return -8; - } - if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -15; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, h, ldh ) ) { + return -8; } - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -10; - } - if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -17; + if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -15; + } + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -10; + } + if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -17; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dhsein.c b/lapack-netlib/LAPACKE/src/lapacke_dhsein.c index 5f574e109..182348689 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dhsein.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dhsein.c @@ -48,25 +48,27 @@ lapack_int LAPACKE_dhsein( int matrix_layout, char job, char eigsrc, char initv, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, h, ldh ) ) { - return -7; - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'l' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, h, ldh ) ) { + return -7; } - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'r' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -13; + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'l' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -11; + } + } + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'r' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -13; + } + } + if( LAPACKE_d_nancheck( n, wi, 1 ) ) { + return -10; + } + if( LAPACKE_d_nancheck( n, wr, 1 ) ) { + return -9; } - } - if( LAPACKE_d_nancheck( n, wi, 1 ) ) { - return -10; - } - if( LAPACKE_d_nancheck( n, wr, 1 ) ) { - return -9; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dhseqr.c b/lapack-netlib/LAPACKE/src/lapacke_dhseqr.c index 59beade01..9672dc641 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dhseqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dhseqr.c @@ -47,13 +47,15 @@ lapack_int LAPACKE_dhseqr( int matrix_layout, char job, char compz, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, h, ldh ) ) { - return -7; - } - if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, h, ldh ) ) { + return -7; + } + if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlacn2.c b/lapack-netlib/LAPACKE/src/lapacke_dlacn2.c index 1ccf0a9da..7f58b237e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlacn2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlacn2.c @@ -37,12 +37,14 @@ lapack_int LAPACKE_dlacn2( lapack_int n, double* v, double* x, lapack_int* isgn, double* est, lapack_int* kase, lapack_int* isave ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, est, 1 ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n, x, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, est, 1 ) ) { + return -5; + } + if( LAPACKE_d_nancheck( n, x, 1 ) ) { + return -3; + } } #endif return LAPACKE_dlacn2_work( n, v, x, isgn, est, kase, isave ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlacpy.c b/lapack-netlib/LAPACKE/src/lapacke_dlacpy.c index 8839d8cf0..32b5e4bec 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlacpy.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlacpy.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_dlacpy( int matrix_layout, char uplo, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif return LAPACKE_dlacpy_work( matrix_layout, uplo, m, n, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlag2s.c b/lapack-netlib/LAPACKE/src/lapacke_dlag2s.c index 6b2b8b31e..e9f9aef74 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlag2s.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlag2s.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_dlag2s( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_dlag2s_work( matrix_layout, m, n, a, lda, sa, ldsa ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlagge.c b/lapack-netlib/LAPACKE/src/lapacke_dlagge.c index 78be9adeb..6ca8739ec 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlagge.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlagge.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_dlagge( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( MIN(m,n), d, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( MIN(m,n), d, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlagsy.c b/lapack-netlib/LAPACKE/src/lapacke_dlagsy.c index da5fe37c3..1293fc7d4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlagsy.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlagsy.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_dlagsy( int matrix_layout, lapack_int n, lapack_int k, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlange.c b/lapack-netlib/LAPACKE/src/lapacke_dlange.c index 4d0cea1bf..0186768e1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlange.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlange.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dlange * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -37,16 +37,18 @@ double LAPACKE_dlange( int matrix_layout, char norm, lapack_int m, lapack_int n, const double* a, lapack_int lda ) { lapack_int info = 0; - double res = 0.; + double res = 0.; double* work = NULL; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_dlange", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlange_work.c b/lapack-netlib/LAPACKE/src/lapacke_dlange_work.c index d3a478c14..43d48c816 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlange_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlange_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function dlange * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -38,37 +38,42 @@ double LAPACKE_dlange_work( int matrix_layout, char norm, lapack_int m, double* work ) { lapack_int info = 0; - double res = 0.; + double res = 0.; + char norm_lapack; if( matrix_layout == LAPACK_COL_MAJOR ) { - /* Call LAPACK function and adjust info */ + /* Call LAPACK function */ res = LAPACK_dlange( &norm, &m, &n, a, &lda, work ); - if( info < 0 ) { - info = info - 1; - } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int lda_t = MAX(1,m); - double* a_t = NULL; + double* work_lapack = NULL; /* Check leading dimension(s) */ if( lda < n ) { info = -6; LAPACKE_xerbla( "LAPACKE_dlange_work", info ); return info; } - /* Allocate memory for temporary array(s) */ - a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) ); - if( a_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; + if( LAPACKE_lsame( norm, '1' ) || LAPACKE_lsame( norm, 'o' ) ) { + norm_lapack = 'i'; + } else if( LAPACKE_lsame( norm, 'i' ) ) { + norm_lapack = '1'; + } else { + norm_lapack = norm; + } + /* Allocate memory for work array(s) */ + if( LAPACKE_lsame( norm_lapack, 'i' ) ) { + work_lapack = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,n) ); + if( work_lapack == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } } - /* Transpose input matrices */ - LAPACKE_dge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); - /* Call LAPACK function and adjust info */ - res = LAPACK_dlange( &norm, &m, &n, a_t, &lda_t, work ); - info = 0; /* LAPACK call is ok! */ + /* Call LAPACK function */ + res = LAPACK_dlange( &norm_lapack, &n, &m, a, &lda, work_lapack ); /* Release memory and exit */ - LAPACKE_free( a_t ); + if( work_lapack ) { + LAPACKE_free( work_lapack ); + } exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + if( info == LAPACK_WORK_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_dlange_work", info ); } } else { diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlansy.c b/lapack-netlib/LAPACKE/src/lapacke_dlansy.c index e970fec09..931e83f43 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlansy.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlansy.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dlansy * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -37,16 +37,18 @@ double LAPACKE_dlansy( int matrix_layout, char norm, char uplo, lapack_int n, const double* a, lapack_int lda ) { lapack_int info = 0; - double res = 0.; + double res = 0.; double* work = NULL; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_dlansy", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlansy_work.c b/lapack-netlib/LAPACKE/src/lapacke_dlansy_work.c index 6468c91aa..b3a6872b8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlansy_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlansy_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function dlansy * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -38,7 +38,7 @@ double LAPACKE_dlansy_work( int matrix_layout, char norm, char uplo, double* work ) { lapack_int info = 0; - double res = 0.; + double res = 0.; if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ res = LAPACK_dlansy( &norm, &uplo, &n, a, &lda, work ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlantr.c b/lapack-netlib/LAPACKE/src/lapacke_dlantr.c index 480f31d91..ce625c7aa 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlantr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlantr.c @@ -38,16 +38,18 @@ double LAPACKE_dlantr( int matrix_layout, char norm, char uplo, char diag, lapack_int lda ) { lapack_int info = 0; - double res = 0.; + double res = 0.; double* work = NULL; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_dlantr", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dtr_nancheck( matrix_layout, uplo, diag, MIN(m,n), a, lda ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dtr_nancheck( matrix_layout, uplo, diag, MIN(m,n), a, lda ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlapmr.c b/lapack-netlib/LAPACKE/src/lapacke_dlapmr.c index 7e837e8ee..97068bb34 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlapmr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlapmr.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_dlapmr( int matrix_layout, lapack_logical forwrd, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, x, ldx ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -5; + } } #endif return LAPACKE_dlapmr_work( matrix_layout, forwrd, m, n, x, ldx, k ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlapmt.c b/lapack-netlib/LAPACKE/src/lapacke_dlapmt.c index 40ab46c57..6c381bc7e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlapmt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlapmt.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_dlapmt( int matrix_layout, lapack_logical forwrd, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, x, ldx ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -5; + } } #endif return LAPACKE_dlapmt_work( matrix_layout, forwrd, m, n, x, ldx, k ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlapy2.c b/lapack-netlib/LAPACKE/src/lapacke_dlapy2.c index 4d636bb0e..7e9fe72d1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlapy2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlapy2.c @@ -36,12 +36,14 @@ double LAPACKE_dlapy2( double x, double y ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &x, 1 ) ) { - return -1; - } - if( LAPACKE_d_nancheck( 1, &y, 1 ) ) { - return -2; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &x, 1 ) ) { + return -1; + } + if( LAPACKE_d_nancheck( 1, &y, 1 ) ) { + return -2; + } } #endif return LAPACKE_dlapy2_work( x, y ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlapy3.c b/lapack-netlib/LAPACKE/src/lapacke_dlapy3.c index 45bbaba36..005e59ea4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlapy3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlapy3.c @@ -36,15 +36,17 @@ double LAPACKE_dlapy3( double x, double y, double z ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &x, 1 ) ) { - return -1; - } - if( LAPACKE_d_nancheck( 1, &y, 1 ) ) { - return -2; - } - if( LAPACKE_d_nancheck( 1, &z, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &x, 1 ) ) { + return -1; + } + if( LAPACKE_d_nancheck( 1, &y, 1 ) ) { + return -2; + } + if( LAPACKE_d_nancheck( 1, &z, 1 ) ) { + return -3; + } } #endif return LAPACKE_dlapy3_work( x, y, z ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlarfb.c b/lapack-netlib/LAPACKE/src/lapacke_dlarfb.c index b1bc5d485..55c26f4b6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlarfb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlarfb.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dlarfb * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -40,7 +40,7 @@ lapack_int LAPACKE_dlarfb( int matrix_layout, char side, char trans, char direct lapack_int ldc ) { lapack_int info = 0; - lapack_int ldwork = ( side=='l')?n:(( side=='r')?m:1); + lapack_int ldwork; double* work = NULL; lapack_int ncols_v, nrows_v; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { @@ -48,57 +48,66 @@ lapack_int LAPACKE_dlarfb( int matrix_layout, char side, char trans, char direct return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -13; - } - if( LAPACKE_dge_nancheck( matrix_layout, k, k, t, ldt ) ) { - return -11; - } - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_dtr_nancheck( matrix_layout, 'l', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v-k, ncols_v, &v[k*ldv], - ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_dlarfb", -8 ); - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : + ( ( LAPACKE_lsame( storev, 'r' ) && + LAPACKE_lsame( side, 'l' ) ) ? m : + ( ( LAPACKE_lsame( storev, 'r' ) && + LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + nrows_v = ( LAPACKE_lsame( storev, 'c' ) && + LAPACKE_lsame( side, 'l' ) ) ? m : + ( ( LAPACKE_lsame( storev, 'c' ) && + LAPACKE_lsame( side, 'r' ) ) ? n : + ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -13; } - if( LAPACKE_dtr_nancheck( matrix_layout, 'u', 'u', k, - &v[(nrows_v-k)*ldv], ldv ) ) - return -9; - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v-k, ncols_v, v, ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_dtr_nancheck( matrix_layout, 'u', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, ncols_v-k, &v[k], - ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_dlarfb", -8 ); - return -8; + if( LAPACKE_dge_nancheck( matrix_layout, k, k, t, ldt ) ) { + return -11; + } + if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { + if( LAPACKE_dtr_nancheck( matrix_layout, 'l', 'u', k, v, ldv ) ) + return -9; + if( LAPACKE_dge_nancheck( matrix_layout, nrows_v-k, ncols_v, &v[k*ldv], + ldv ) ) + return -9; + } else if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'b' ) ) { + if( k > nrows_v ) { + LAPACKE_xerbla( "LAPACKE_dlarfb", -8 ); + return -8; + } + if( LAPACKE_dtr_nancheck( matrix_layout, 'u', 'u', k, + &v[(nrows_v-k)*ldv], ldv ) ) + return -9; + if( LAPACKE_dge_nancheck( matrix_layout, nrows_v-k, ncols_v, v, ldv ) ) + return -9; + } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { + if( LAPACKE_dtr_nancheck( matrix_layout, 'u', 'u', k, v, ldv ) ) + return -9; + if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, ncols_v-k, &v[k], + ldv ) ) + return -9; + } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { + if( k > ncols_v ) { + LAPACKE_xerbla( "LAPACKE_dlarfb", -8 ); + return -8; + } + if( LAPACKE_dtr_nancheck( matrix_layout, 'l', 'u', k, &v[ncols_v-k], + ldv ) ) + return -9; + if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, ncols_v-k, v, ldv ) ) + return -9; } - if( LAPACKE_dtr_nancheck( matrix_layout, 'l', 'u', k, &v[ncols_v-k], - ldv ) ) - return -9; - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, ncols_v-k, v, ldv ) ) - return -9; } #endif + if( LAPACKE_lsame( side, 'l' ) ) { + ldwork = n; + } else if( LAPACKE_lsame( side, 'r' ) ) { + ldwork = m; + } else { + ldwork = 1; + } /* Allocate memory for working array(s) */ work = (double*)LAPACKE_malloc( sizeof(double) * ldwork * MAX(1,k) ); if( work == NULL ) { diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlarfg.c b/lapack-netlib/LAPACKE/src/lapacke_dlarfg.c index 33e4b2941..0f627b323 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlarfg.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlarfg.c @@ -37,12 +37,14 @@ lapack_int LAPACKE_dlarfg( lapack_int n, double* alpha, double* x, lapack_int incx, double* tau ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, alpha, 1 ) ) { - return -2; - } - if( LAPACKE_d_nancheck( 1+(n-2)*ABS(incx), x, incx ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, alpha, 1 ) ) { + return -2; + } + if( LAPACKE_d_nancheck( 1+(n-2)*ABS(incx), x, incx ) ) { + return -3; + } } #endif return LAPACKE_dlarfg_work( n, alpha, x, incx, tau ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlarft.c b/lapack-netlib/LAPACKE/src/lapacke_dlarft.c index 6c3a4e2b0..82cc7272b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlarft.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlarft.c @@ -44,16 +44,18 @@ lapack_int LAPACKE_dlarft( int matrix_layout, char direct, char storev, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( LAPACKE_lsame( storev, 'r' ) ? n : 1); - nrows_v = LAPACKE_lsame( storev, 'c' ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1); - if( LAPACKE_d_nancheck( k, tau, 1 ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : + ( LAPACKE_lsame( storev, 'r' ) ? n : 1); + nrows_v = LAPACKE_lsame( storev, 'c' ) ? n : + ( LAPACKE_lsame( storev, 'r' ) ? k : 1); + if( LAPACKE_d_nancheck( k, tau, 1 ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { + return -6; + } } #endif return LAPACKE_dlarft_work( matrix_layout, direct, storev, n, k, v, ldv, tau, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlarfx.c b/lapack-netlib/LAPACKE/src/lapacke_dlarfx.c index ee94cc337..ab4a58e76 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlarfx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlarfx.c @@ -42,15 +42,17 @@ lapack_int LAPACKE_dlarfx( int matrix_layout, char side, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &tau, 1 ) ) { - return -6; - } - if( LAPACKE_d_nancheck( m, v, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -7; + } + if( LAPACKE_d_nancheck( 1, &tau, 1 ) ) { + return -6; + } + if( LAPACKE_d_nancheck( m, v, 1 ) ) { + return -5; + } } #endif return LAPACKE_dlarfx_work( matrix_layout, side, m, n, v, tau, c, ldc, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlartgp.c b/lapack-netlib/LAPACKE/src/lapacke_dlartgp.c index 2613305db..63913ba75 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlartgp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlartgp.c @@ -37,12 +37,14 @@ lapack_int LAPACKE_dlartgp( double f, double g, double* cs, double* sn, double* r ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &f, 1 ) ) { - return -1; - } - if( LAPACKE_d_nancheck( 1, &g, 1 ) ) { - return -2; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &f, 1 ) ) { + return -1; + } + if( LAPACKE_d_nancheck( 1, &g, 1 ) ) { + return -2; + } } #endif return LAPACKE_dlartgp_work( f, g, cs, sn, r ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlartgs.c b/lapack-netlib/LAPACKE/src/lapacke_dlartgs.c index 2c0c0315e..de106cb8d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlartgs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlartgs.c @@ -37,15 +37,17 @@ lapack_int LAPACKE_dlartgs( double x, double y, double sigma, double* cs, double* sn ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &sigma, 1 ) ) { - return -3; - } - if( LAPACKE_d_nancheck( 1, &x, 1 ) ) { - return -1; - } - if( LAPACKE_d_nancheck( 1, &y, 1 ) ) { - return -2; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &sigma, 1 ) ) { + return -3; + } + if( LAPACKE_d_nancheck( 1, &x, 1 ) ) { + return -1; + } + if( LAPACKE_d_nancheck( 1, &y, 1 ) ) { + return -2; + } } #endif return LAPACKE_dlartgs_work( x, y, sigma, cs, sn ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlascl.c b/lapack-netlib/LAPACKE/src/lapacke_dlascl.c index b35b9b289..5b579a5d1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlascl.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlascl.c @@ -43,68 +43,70 @@ lapack_int LAPACKE_dlascl( int matrix_layout, char type, lapack_int kl, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - switch (type) { - case 'G': - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + switch (type) { + case 'G': + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -9; + } + break; + case 'L': + // TYPE = 'L' - lower triangle of general matrix + if( matrix_layout == LAPACK_COL_MAJOR && + LAPACKE_dgb_nancheck( matrix_layout, m, n, m-1, 0, a, lda+1 ) ) { + return -9; + } + if( matrix_layout == LAPACK_ROW_MAJOR && + LAPACKE_dgb_nancheck( LAPACK_COL_MAJOR, n, m, 0, m-1, a-m+1, lda+1 ) ) { + return -9; + } + break; + case 'U': + // TYPE = 'U' - upper triangle of general matrix + if( matrix_layout == LAPACK_COL_MAJOR && + LAPACKE_dgb_nancheck( matrix_layout, m, n, 0, n-1, a-n+1, lda+1 ) ) { + return -9; + } + if( matrix_layout == LAPACK_ROW_MAJOR && + LAPACKE_dgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 0, a, lda+1 ) ) { + return -9; + } + break; + case 'H': + // TYPE = 'H' - part of upper Hessenberg matrix in general matrix + if( matrix_layout == LAPACK_COL_MAJOR && + LAPACKE_dgb_nancheck( matrix_layout, m, n, 1, n-1, a-n+1, lda+1 ) ) { + return -9; + } + if( matrix_layout == LAPACK_ROW_MAJOR && + LAPACKE_dgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 1, a-1, lda+1 ) ) { + return -9; + } + case 'B': + // TYPE = 'B' - lower part of symmetric band matrix (assume m==n) + if( LAPACKE_dsb_nancheck( matrix_layout, 'L', n, kl, a, lda ) ) { + return -9; + } + break; + case 'Q': + // TYPE = 'Q' - upper part of symmetric band matrix (assume m==n) + if( LAPACKE_dsb_nancheck( matrix_layout, 'U', n, ku, a, lda ) ) { + return -9; + } + break; + case 'Z': + // TYPE = 'Z' - band matrix laid out for ?GBTRF + if( matrix_layout == LAPACK_COL_MAJOR && + LAPACKE_dgb_nancheck( matrix_layout, m, n, kl, ku, a+kl, lda ) ) { + return -9; + } + if( matrix_layout == LAPACK_ROW_MAJOR && + LAPACKE_dgb_nancheck( matrix_layout, m, n, kl, ku, a+lda*kl, lda ) ) { + return -9; + } + break; } - break; - case 'L': - // TYPE = 'L' - lower triangle of general matrix - if( matrix_layout == LAPACK_COL_MAJOR && - LAPACKE_dgb_nancheck( matrix_layout, m, n, m-1, 0, a, lda+1 ) ) { - return -9; - } - if( matrix_layout == LAPACK_ROW_MAJOR && - LAPACKE_dgb_nancheck( LAPACK_COL_MAJOR, n, m, 0, m-1, a-m+1, lda+1 ) ) { - return -9; - } - break; - case 'U': - // TYPE = 'U' - upper triangle of general matrix - if( matrix_layout == LAPACK_COL_MAJOR && - LAPACKE_dgb_nancheck( matrix_layout, m, n, 0, n-1, a-n+1, lda+1 ) ) { - return -9; - } - if( matrix_layout == LAPACK_ROW_MAJOR && - LAPACKE_dgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 0, a, lda+1 ) ) { - return -9; - } - break; - case 'H': - // TYPE = 'H' - part of upper Hessenberg matrix in general matrix - if( matrix_layout == LAPACK_COL_MAJOR && - LAPACKE_dgb_nancheck( matrix_layout, m, n, 1, n-1, a-n+1, lda+1 ) ) { - return -9; - } - if( matrix_layout == LAPACK_ROW_MAJOR && - LAPACKE_dgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 1, a-1, lda+1 ) ) { - return -9; - } - case 'B': - // TYPE = 'B' - lower part of symmetric band matrix (assume m==n) - if( LAPACKE_dsb_nancheck( matrix_layout, 'L', n, kl, a, lda ) ) { - return -9; - } - break; - case 'Q': - // TYPE = 'Q' - upper part of symmetric band matrix (assume m==n) - if( LAPACKE_dsb_nancheck( matrix_layout, 'U', n, ku, a, lda ) ) { - return -9; - } - break; - case 'Z': - // TYPE = 'Z' - band matrix laid out for ?GBTRF - if( matrix_layout == LAPACK_COL_MAJOR && - LAPACKE_dgb_nancheck( matrix_layout, m, n, kl, ku, a+kl, lda ) ) { - return -9; - } - if( matrix_layout == LAPACK_ROW_MAJOR && - LAPACKE_dgb_nancheck( matrix_layout, m, n, kl, ku, a+lda*kl, lda ) ) { - return -9; - } - break; } #endif return LAPACKE_dlascl_work( matrix_layout, type, kl, ku, cfrom, cto, m, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlaset.c b/lapack-netlib/LAPACKE/src/lapacke_dlaset.c index cbd52b22b..c286c18be 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlaset.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlaset.c @@ -49,11 +49,13 @@ lapack_int LAPACKE_dlaset( int matrix_layout, char uplo, lapack_int m, *****************************************************************************/ #ifndef LAPACK_DISABLE_NAN_CHECK - if( LAPACKE_d_nancheck( 1, &alpha, 1 ) ) { - return -5; - } - if( LAPACKE_d_nancheck( 1, &beta, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + if( LAPACKE_d_nancheck( 1, &alpha, 1 ) ) { + return -5; + } + if( LAPACKE_d_nancheck( 1, &beta, 1 ) ) { + return -6; + } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlasrt.c b/lapack-netlib/LAPACKE/src/lapacke_dlasrt.c index ba8a24414..844343afe 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlasrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlasrt.c @@ -36,9 +36,11 @@ lapack_int LAPACKE_dlasrt( char id, lapack_int n, double* d ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -3; + } } #endif return LAPACKE_dlasrt_work( id, n, d ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlassq.c b/lapack-netlib/LAPACKE/src/lapacke_dlassq.c new file mode 100644 index 000000000..a564240d4 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_dlassq.c @@ -0,0 +1,53 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function dlassq +* Author: Julien langou +* Generated February 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dlassq( lapack_int n, double* x, lapack_int incx, double* scale, double* sumsq ) +{ +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input vector `x` and in/out scalars `scale` and `sumsq` for NaNs */ + if( LAPACKE_d_nancheck( 1+(n-2)*ABS(incx), x, incx ) ) { + return -2; + } + if( LAPACKE_d_nancheck( 1, scale, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( 1, sumsq, 1 ) ) { + return -5; + } + } +#endif + return LAPACKE_dlassq_work( n, x, incx, scale, sumsq ); +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlassq_work.c b/lapack-netlib/LAPACKE/src/lapacke_dlassq_work.c new file mode 100644 index 000000000..9d04bfc86 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_dlassq_work.c @@ -0,0 +1,41 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function dlassq +* Author: Julien Langou +* Generated February, 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dlassq_work( lapack_int n, double* x, lapack_int incx, double* scale, double* sumsq ) +{ + lapack_int info = 0; + LAPACK_dlassq( &n, x, &incx, scale, sumsq ); + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlaswp.c b/lapack-netlib/LAPACKE/src/lapacke_dlaswp.c index b988bba43..bd07b3568 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlaswp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlaswp.c @@ -42,19 +42,21 @@ lapack_int LAPACKE_dlaswp( int matrix_layout, lapack_int n, double* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ -/***************************************************************************** -* Disable the check as is below, the check below was checking for NaN -* from lda to n since there is no (obvious) way to knowing m. This is not -* a good idea. We could get a lower bound of m by scanning from ipiv. Or -* we could pass on the NaN check to LAPACKE_dlaswp_work. For now disable -* the buggy Nan check. -* See forum: http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=4827 -*****************************************************************************/ -/* if( LAPACKE_dge_nancheck( matrix_layout, lda, n, a, lda ) ) { -* return -3; -* } -*/ + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + /***************************************************************************** + * Disable the check as is below, the check below was checking for NaN + * from lda to n since there is no (obvious) way to knowing m. This is not + * a good idea. We could get a lower bound of m by scanning from ipiv. Or + * we could pass on the NaN check to LAPACKE_dlaswp_work. For now disable + * the buggy Nan check. + * See forum: http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=4827 + *****************************************************************************/ + /* if( LAPACKE_dge_nancheck( matrix_layout, lda, n, a, lda ) ) { + * return -3; + * } + */ + } #endif return LAPACKE_dlaswp_work( matrix_layout, n, a, lda, k1, k2, ipiv, incx ); } diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlaswp_work.c b/lapack-netlib/LAPACKE/src/lapacke_dlaswp_work.c index 027c25fc0..b2b9c07f5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlaswp_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlaswp_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function dlaswp * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlatms.c b/lapack-netlib/LAPACKE/src/lapacke_dlatms.c index 6b3f83ea4..a762bd215 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlatms.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlatms.c @@ -46,18 +46,20 @@ lapack_int LAPACKE_dlatms( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -14; - } - if( LAPACKE_d_nancheck( 1, &cond, 1 ) ) { - return -9; - } - if( LAPACKE_d_nancheck( MIN(n,m), d, 1 ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &dmax, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -14; + } + if( LAPACKE_d_nancheck( 1, &cond, 1 ) ) { + return -9; + } + if( LAPACKE_d_nancheck( MIN(n,m), d, 1 ) ) { + return -7; + } + if( LAPACKE_d_nancheck( 1, &dmax, 1 ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlauum.c b/lapack-netlib/LAPACKE/src/lapacke_dlauum.c index 89cefa42d..d873f046c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlauum.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlauum.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dlauum( int matrix_layout, char uplo, lapack_int n, double* a return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_dlauum_work( matrix_layout, uplo, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dopgtr.c b/lapack-netlib/LAPACKE/src/lapacke_dopgtr.c index d59c4ea34..8d5a32986 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dopgtr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dopgtr.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_dopgtr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsp_nancheck( n, ap ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n-1, tau, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsp_nancheck( n, ap ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n-1, tau, 1 ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dopmtr.c b/lapack-netlib/LAPACKE/src/lapacke_dopmtr.c index e0f87b2d5..93d3d3d30 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dopmtr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dopmtr.c @@ -47,16 +47,18 @@ lapack_int LAPACKE_dopmtr( int matrix_layout, char side, char uplo, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_dsp_nancheck( r, ap ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -9; - } - if( LAPACKE_d_nancheck( m-1, tau, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_dsp_nancheck( r, ap ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -9; + } + if( LAPACKE_d_nancheck( m-1, tau, 1 ) ) { + return -8; + } } #endif /* Additional scalars initializations for work arrays */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dorbdb.c b/lapack-netlib/LAPACKE/src/lapacke_dorbdb.c index a32feb5f6..82dbdebc8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dorbdb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dorbdb.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dorbdb * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -45,28 +45,31 @@ lapack_int LAPACKE_dorbdb( int matrix_layout, char trans, char signs, lapack_int lwork = -1; double* work = NULL; double work_query; - lapack_int nrows_x11, nrows_x12, nrows_x21, nrows_x22; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_dorbdb", -1 ); return -1; } -#ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x12, m-q, x12, ldx12 ) ) { - return -9; + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; } - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { - return -11; - } - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x22, m-q, x22, ldx22 ) ) { - return -13; +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( lapack_layout, p, q, x11, ldx11 ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( lapack_layout, p, m-q, x12, ldx12 ) ) { + return -9; + } + if( LAPACKE_dge_nancheck( lapack_layout, m-p, q, x21, ldx21 ) ) { + return -11; + } + if( LAPACKE_dge_nancheck( lapack_layout, m-p, m-q, x22, ldx22 ) ) { + return -13; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dorbdb_work.c b/lapack-netlib/LAPACKE/src/lapacke_dorbdb_work.c index c33cc8f83..febe9d918 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dorbdb_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dorbdb_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function dorbdb * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -43,115 +43,35 @@ lapack_int LAPACKE_dorbdb_work( int matrix_layout, char trans, char signs, lapack_int lwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; + } /* Call LAPACK function and adjust info */ - LAPACK_dorbdb( &trans, &signs, &m, &p, &q, x11, &ldx11, x12, &ldx12, + LAPACK_dorbdb( <rans, &signs, &m, &p, &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, &ldx22, theta, phi, taup1, taup2, tauq1, tauq2, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - lapack_int nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - lapack_int nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - lapack_int nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - lapack_int ldx11_t = MAX(1,nrows_x11); - lapack_int ldx12_t = MAX(1,nrows_x12); - lapack_int ldx21_t = MAX(1,nrows_x21); - lapack_int ldx22_t = MAX(1,nrows_x22); - double* x11_t = NULL; - double* x12_t = NULL; - double* x21_t = NULL; - double* x22_t = NULL; - /* Check leading dimension(s) */ - if( ldx11 < q ) { - info = -8; - LAPACKE_xerbla( "LAPACKE_dorbdb_work", info ); - return info; - } - if( ldx12 < m-q ) { - info = -10; - LAPACKE_xerbla( "LAPACKE_dorbdb_work", info ); - return info; - } - if( ldx21 < q ) { - info = -12; - LAPACKE_xerbla( "LAPACKE_dorbdb_work", info ); - return info; - } - if( ldx22 < m-q ) { - info = -14; - LAPACKE_xerbla( "LAPACKE_dorbdb_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { - LAPACK_dorbdb( &trans, &signs, &m, &p, &q, x11, &ldx11_t, x12, - &ldx12_t, x21, &ldx21_t, x22, &ldx22_t, theta, phi, - taup1, taup2, tauq1, tauq2, work, &lwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - x11_t = (double*)LAPACKE_malloc( sizeof(double) * ldx11_t * MAX(1,q) ); - if( x11_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - x12_t = (double*) - LAPACKE_malloc( sizeof(double) * ldx12_t * MAX(1,m-q) ); - if( x12_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - x21_t = (double*)LAPACKE_malloc( sizeof(double) * ldx21_t * MAX(1,q) ); - if( x21_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; - } - x22_t = (double*) - LAPACKE_malloc( sizeof(double) * ldx22_t * MAX(1,m-q) ); - if( x22_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - /* Transpose input matrices */ - LAPACKE_dge_trans( matrix_layout, nrows_x11, q, x11, ldx11, x11_t, - ldx11_t ); - LAPACKE_dge_trans( matrix_layout, nrows_x12, m-q, x12, ldx12, x12_t, - ldx12_t ); - LAPACKE_dge_trans( matrix_layout, nrows_x21, q, x21, ldx21, x21_t, - ldx21_t ); - LAPACKE_dge_trans( matrix_layout, nrows_x22, m-q, x22, ldx22, x22_t, - ldx22_t ); - /* Call LAPACK function and adjust info */ - LAPACK_dorbdb( &trans, &signs, &m, &p, &q, x11_t, &ldx11_t, x12_t, - &ldx12_t, x21_t, &ldx21_t, x22_t, &ldx22_t, theta, phi, - taup1, taup2, tauq1, tauq2, work, &lwork, &info ); - if( info < 0 ) { - info = info - 1; - } - /* Transpose output matrices */ - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_x11, q, x11_t, ldx11_t, x11, - ldx11 ); - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_x12, m-q, x12_t, ldx12_t, - x12, ldx12 ); - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_x21, q, x21_t, ldx21_t, x21, - ldx21 ); - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_x22, m-q, x22_t, ldx22_t, - x22, ldx22 ); - /* Release memory and exit */ - LAPACKE_free( x22_t ); -exit_level_3: - LAPACKE_free( x21_t ); -exit_level_2: - LAPACKE_free( x12_t ); -exit_level_1: - LAPACKE_free( x11_t ); -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_dorbdb_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_dorbdb_work", info ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dorcsd.c b/lapack-netlib/LAPACKE/src/lapacke_dorcsd.c index a240ef136..a103bf052 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dorcsd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dorcsd.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dorcsd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -48,28 +48,31 @@ lapack_int LAPACKE_dorcsd( int matrix_layout, char jobu1, char jobu2, lapack_int* iwork = NULL; double* work = NULL; double work_query; - lapack_int nrows_x11, nrows_x12, nrows_x21, nrows_x22; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_dorcsd", -1 ); return -1; } -#ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { - return -11; - } - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x12, m-q, x12, ldx12 ) ) { - return -13; + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; } - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { - return -15; - } - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x22, m-q, x22, ldx22 ) ) { - return -17; +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( lapack_layout, p, q, x11, ldx11 ) ) { + return -11; + } + if( LAPACKE_dge_nancheck( lapack_layout, p, m-q, x12, ldx12 ) ) { + return -13; + } + if( LAPACKE_dge_nancheck( lapack_layout, m-p, q, x21, ldx21 ) ) { + return -15; + } + if( LAPACKE_dge_nancheck( lapack_layout, m-p, m-q, x22, ldx22 ) ) { + return -17; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dorcsd2by1.c b/lapack-netlib/LAPACKE/src/lapacke_dorcsd2by1.c index 43ca0b6ca..ec298cee4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dorcsd2by1.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dorcsd2by1.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dorcsd2by1 * Author: Intel Corporation -* Generated December 2016 +* Generated November 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -50,17 +50,18 @@ lapack_int LAPACKE_dorcsd2by1( int matrix_layout, char jobu1, char jobu2, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_x11 = p ; - nrows_x21 = m-p ; - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nrows_x11 = p; + nrows_x21 = m-p; + if( LAPACKE_dge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { + return -8; + } + + if( LAPACKE_dge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { + return -9; + } } - - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { - return -9; - } - #endif /* Allocate memory for working array(s) */ iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * MAX(1,m-MIN(MIN(p,m-p),MIN(q,m-q))) ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dorcsd_work.c b/lapack-netlib/LAPACKE/src/lapacke_dorcsd_work.c index 44e3c930a..d8f9e1a46 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dorcsd_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dorcsd_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function dorcsd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -46,216 +46,36 @@ lapack_int LAPACKE_dorcsd_work( int matrix_layout, char jobu1, char jobu2, lapack_int* iwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { - /* Call LAPACK function and adjust info */ - LAPACK_dorcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, &p, - &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, &ldx22, - theta, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, &ldv2t, - work, &lwork, iwork, &info ); - if( info < 0 ) { - info = info - 1; - } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - lapack_int nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - lapack_int nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - lapack_int nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - lapack_int nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - lapack_int nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - lapack_int nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - lapack_int nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - lapack_int ldu1_t = MAX(1,nrows_u1); - lapack_int ldu2_t = MAX(1,nrows_u2); - lapack_int ldv1t_t = MAX(1,nrows_v1t); - lapack_int ldv2t_t = MAX(1,nrows_v2t); - lapack_int ldx11_t = MAX(1,nrows_x11); - lapack_int ldx12_t = MAX(1,nrows_x12); - lapack_int ldx21_t = MAX(1,nrows_x21); - lapack_int ldx22_t = MAX(1,nrows_x22); - double* x11_t = NULL; - double* x12_t = NULL; - double* x21_t = NULL; - double* x22_t = NULL; - double* u1_t = NULL; - double* u2_t = NULL; - double* v1t_t = NULL; - double* v2t_t = NULL; - /* Check leading dimension(s) */ - if( ldu1 < p ) { - info = -21; - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - return info; - } - if( ldu2 < m-p ) { - info = -23; - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - return info; - } - if( ldv1t < q ) { - info = -25; - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - return info; - } - if( ldv2t < m-q ) { - info = -27; - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - return info; - } - if( ldx11 < q ) { - info = -12; - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - return info; - } - if( ldx12 < m-q ) { - info = -14; - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - return info; - } - if( ldx21 < q ) { - info = -16; - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - return info; - } - if( ldx22 < m-q ) { - info = -18; - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { - LAPACK_dorcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, - &p, &q, x11, &ldx11_t, x12, &ldx12_t, x21, &ldx21_t, - x22, &ldx22_t, theta, u1, &ldu1_t, u2, &ldu2_t, v1t, - &ldv1t_t, v2t, &ldv2t_t, work, &lwork, iwork, - &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - x11_t = (double*)LAPACKE_malloc( sizeof(double) * ldx11_t * MAX(1,q) ); - if( x11_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - x12_t = (double*) - LAPACKE_malloc( sizeof(double) * ldx12_t * MAX(1,m-q) ); - if( x12_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - x21_t = (double*)LAPACKE_malloc( sizeof(double) * ldx21_t * MAX(1,q) ); - if( x21_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; } - x22_t = (double*) - LAPACKE_malloc( sizeof(double) * ldx22_t * MAX(1,m-q) ); - if( x22_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - if( LAPACKE_lsame( jobu1, 'y' ) ) { - u1_t = (double*) - LAPACKE_malloc( sizeof(double) * ldu1_t * MAX(1,p) ); - if( u1_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_4; - } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - u2_t = (double*) - LAPACKE_malloc( sizeof(double) * ldu2_t * MAX(1,m-p) ); - if( u2_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_5; - } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - v1t_t = (double*) - LAPACKE_malloc( sizeof(double) * ldv1t_t * MAX(1,q) ); - if( v1t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_6; - } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - v2t_t = (double*) - LAPACKE_malloc( sizeof(double) * ldv2t_t * MAX(1,m-q) ); - if( v2t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_7; - } - } - /* Transpose input matrices */ - LAPACKE_dge_trans( matrix_layout, nrows_x11, q, x11, ldx11, x11_t, - ldx11_t ); - LAPACKE_dge_trans( matrix_layout, nrows_x12, m-q, x12, ldx12, x12_t, - ldx12_t ); - LAPACKE_dge_trans( matrix_layout, nrows_x21, q, x21, ldx21, x21_t, - ldx21_t ); - LAPACKE_dge_trans( matrix_layout, nrows_x22, m-q, x22, ldx22, x22_t, - ldx22_t ); /* Call LAPACK function and adjust info */ - LAPACK_dorcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, &p, - &q, x11_t, &ldx11_t, x12_t, &ldx12_t, x21_t, &ldx21_t, - x22_t, &ldx22_t, theta, u1_t, &ldu1_t, u2_t, &ldu2_t, - v1t_t, &ldv1t_t, v2t_t, &ldv2t_t, work, &lwork, iwork, - &info ); + LAPACK_dorcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, <rans, &signs, &m, + &p, &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, + &ldx22, theta, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, + &ldv2t, work, &lwork, iwork, &info ); if( info < 0 ) { info = info - 1; } - /* Transpose output matrices */ - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_x11, q, x11_t, ldx11_t, x11, - ldx11 ); - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_x12, m-q, x12_t, ldx12_t, - x12, ldx12 ); - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_x21, q, x21_t, ldx21_t, x21, - ldx21 ); - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_x22, m-q, x22_t, ldx22_t, - x22, ldx22 ); - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_u1, p, u1_t, ldu1_t, u1, - ldu1 ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_u2, m-p, u2_t, ldu2_t, - u2, ldu2 ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_v1t, q, v1t_t, ldv1t_t, - v1t, ldv1t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_v2t, m-q, v2t_t, ldv2t_t, - v2t, ldv2t ); - } - /* Release memory and exit */ - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_free( v2t_t ); - } -exit_level_7: - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_free( v1t_t ); - } -exit_level_6: - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_free( u2_t ); - } -exit_level_5: - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_free( u1_t ); - } -exit_level_4: - LAPACKE_free( x22_t ); -exit_level_3: - LAPACKE_free( x21_t ); -exit_level_2: - LAPACKE_free( x12_t ); -exit_level_1: - LAPACKE_free( x11_t ); -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dorgbr.c b/lapack-netlib/LAPACKE/src/lapacke_dorgbr.c index 9d941724c..5547c4707 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dorgbr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dorgbr.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_dorgbr( int matrix_layout, char vect, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; - } - if( LAPACKE_d_nancheck( MIN(m,k), tau, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } + if( LAPACKE_d_nancheck( MIN(m,k), tau, 1 ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dorghr.c b/lapack-netlib/LAPACKE/src/lapacke_dorghr.c index 42475900c..5faaeb194 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dorghr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dorghr.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_dorghr( int matrix_layout, lapack_int n, lapack_int ilo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n-1, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_d_nancheck( n-1, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dorglq.c b/lapack-netlib/LAPACKE/src/lapacke_dorglq.c index 5e7f182c8..702f0636c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dorglq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dorglq.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_dorglq( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_d_nancheck( k, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_d_nancheck( k, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dorgql.c b/lapack-netlib/LAPACKE/src/lapacke_dorgql.c index 2263c65b4..05d4e7fe0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dorgql.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dorgql.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_dorgql( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_d_nancheck( k, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_d_nancheck( k, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dorgqr.c b/lapack-netlib/LAPACKE/src/lapacke_dorgqr.c index 82c221023..078062b20 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dorgqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dorgqr.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_dorgqr( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_d_nancheck( k, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_d_nancheck( k, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dorgrq.c b/lapack-netlib/LAPACKE/src/lapacke_dorgrq.c index 09232da1d..5387672c2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dorgrq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dorgrq.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_dorgrq( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_d_nancheck( k, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_d_nancheck( k, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dorgtr.c b/lapack-netlib/LAPACKE/src/lapacke_dorgtr.c index d9ab10d1c..86184b784 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dorgtr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dorgtr.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_dorgtr( int matrix_layout, char uplo, lapack_int n, double* a return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n-1, tau, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n-1, tau, 1 ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dormbr.c b/lapack-netlib/LAPACKE/src/lapacke_dormbr.c index bee4b1ae0..9904ec53f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dormbr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dormbr.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dormbr * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -48,18 +48,20 @@ lapack_int LAPACKE_dormbr( int matrix_layout, char vect, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nq = LAPACKE_lsame( side, 'l' ) ? m : n; - ar = LAPACKE_lsame( vect, 'q' ) ? nq : MIN(nq,k); - ac = LAPACKE_lsame( vect, 'q' ) ? MIN(nq,k) : nq; - if( LAPACKE_dge_nancheck( matrix_layout, ar, ac, a, lda ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -11; - } - if( LAPACKE_d_nancheck( MIN(nq,k), tau, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nq = LAPACKE_lsame( side, 'l' ) ? m : n; + ar = LAPACKE_lsame( vect, 'q' ) ? nq : MIN(nq,k); + ac = LAPACKE_lsame( vect, 'q' ) ? MIN(nq,k) : nq; + if( LAPACKE_dge_nancheck( matrix_layout, ar, ac, a, lda ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -11; + } + if( LAPACKE_d_nancheck( MIN(nq,k), tau, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dormhr.c b/lapack-netlib/LAPACKE/src/lapacke_dormhr.c index 1b6d34c04..de4355a74 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dormhr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dormhr.c @@ -48,16 +48,18 @@ lapack_int LAPACKE_dormhr( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_dge_nancheck( matrix_layout, r, r, a, lda ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -11; - } - if( LAPACKE_d_nancheck( m-1, tau, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_dge_nancheck( matrix_layout, r, r, a, lda ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -11; + } + if( LAPACKE_d_nancheck( m-1, tau, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dormlq.c b/lapack-netlib/LAPACKE/src/lapacke_dormlq.c index e4bf0c14f..1e50d18c1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dormlq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dormlq.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dormlq * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -47,16 +47,18 @@ lapack_int LAPACKE_dormlq( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - lapack_int r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_dge_nancheck( matrix_layout, k, r, a, lda ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_d_nancheck( k, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + lapack_int r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_dge_nancheck( matrix_layout, k, r, a, lda ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_d_nancheck( k, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dormlq_work.c b/lapack-netlib/LAPACKE/src/lapacke_dormlq_work.c index 7b383d0ed..a326feffd 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dormlq_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dormlq_work.c @@ -51,8 +51,8 @@ lapack_int LAPACKE_dormlq_work( int matrix_layout, char side, char trans, lapack_int r = LAPACKE_lsame( side, 'l' ) ? m : n; lapack_int lda_t = MAX(1,k); lapack_int ldc_t = MAX(1,m); - double *a_t = NULL; - double *c_t = NULL; + double *a_t = NULL; + double *c_t = NULL; /* Check leading dimension(s) */ if( lda < r ) { info = -8; diff --git a/lapack-netlib/LAPACKE/src/lapacke_dormql.c b/lapack-netlib/LAPACKE/src/lapacke_dormql.c index 32f6281a9..11e9f07d1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dormql.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dormql.c @@ -48,16 +48,18 @@ lapack_int LAPACKE_dormql( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_dge_nancheck( matrix_layout, r, k, a, lda ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_d_nancheck( k, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_dge_nancheck( matrix_layout, r, k, a, lda ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_d_nancheck( k, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dormqr.c b/lapack-netlib/LAPACKE/src/lapacke_dormqr.c index 1c997a7b5..99d5a85db 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dormqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dormqr.c @@ -48,16 +48,18 @@ lapack_int LAPACKE_dormqr( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_dge_nancheck( matrix_layout, r, k, a, lda ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_d_nancheck( k, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_dge_nancheck( matrix_layout, r, k, a, lda ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_d_nancheck( k, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dormrq.c b/lapack-netlib/LAPACKE/src/lapacke_dormrq.c index d784c2fce..a1d659860 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dormrq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dormrq.c @@ -47,15 +47,17 @@ lapack_int LAPACKE_dormrq( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_d_nancheck( k, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_d_nancheck( k, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dormrz.c b/lapack-netlib/LAPACKE/src/lapacke_dormrz.c index 6e4eed26b..d1be94461 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dormrz.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dormrz.c @@ -47,15 +47,17 @@ lapack_int LAPACKE_dormrz( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -11; - } - if( LAPACKE_d_nancheck( k, tau, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -11; + } + if( LAPACKE_d_nancheck( k, tau, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dormtr.c b/lapack-netlib/LAPACKE/src/lapacke_dormtr.c index 92b9e08b7..05e4c57c8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dormtr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dormtr.c @@ -48,16 +48,18 @@ lapack_int LAPACKE_dormtr( int matrix_layout, char side, char uplo, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_dge_nancheck( matrix_layout, r, r, a, lda ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_d_nancheck( m-1, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_dge_nancheck( matrix_layout, r, r, a, lda ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_d_nancheck( m-1, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpbcon.c b/lapack-netlib/LAPACKE/src/lapacke_dpbcon.c index 0e5a15649..382f19493 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpbcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpbcon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_dpbcon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -5; - } - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -5; + } + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpbequ.c b/lapack-netlib/LAPACKE/src/lapacke_dpbequ.c index e407c8103..8bc8a9f33 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpbequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpbequ.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_dpbequ( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -5; + } } #endif return LAPACKE_dpbequ_work( matrix_layout, uplo, n, kd, ab, ldab, s, scond, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpbrfs.c b/lapack-netlib/LAPACKE/src/lapacke_dpbrfs.c index 5acfb5c84..71b1655ed 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpbrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpbrfs.c @@ -47,18 +47,20 @@ lapack_int LAPACKE_dpbrfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; - } - if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, afb, ldafb ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } + if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, afb, ldafb ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpbstf.c b/lapack-netlib/LAPACKE/src/lapacke_dpbstf.c index ec2173928..b0342cb42 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpbstf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpbstf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dpbstf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -5; + } } #endif return LAPACKE_dpbstf_work( matrix_layout, uplo, n, kb, bb, ldbb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpbsv.c b/lapack-netlib/LAPACKE/src/lapacke_dpbsv.c index 96ceab9cc..e77bba883 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpbsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpbsv.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dpbsv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_dpbsv_work( matrix_layout, uplo, n, kd, nrhs, ab, ldab, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpbsvx.c b/lapack-netlib/LAPACKE/src/lapacke_dpbsvx.c index e3af32270..0baf50368 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpbsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpbsvx.c @@ -48,21 +48,23 @@ lapack_int LAPACKE_dpbsvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -7; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, afb, ldafb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -7; } - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { - if( LAPACKE_d_nancheck( n, s, 1 ) ) { - return -12; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, afb, ldafb ) ) { + return -9; + } + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { + if( LAPACKE_d_nancheck( n, s, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpbtrf.c b/lapack-netlib/LAPACKE/src/lapacke_dpbtrf.c index 05f1e40a4..256557fb9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpbtrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpbtrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dpbtrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -5; + } } #endif return LAPACKE_dpbtrf_work( matrix_layout, uplo, n, kd, ab, ldab ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpbtrs.c b/lapack-netlib/LAPACKE/src/lapacke_dpbtrs.c index c78e43c4d..f624d2927 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpbtrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpbtrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dpbtrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_dpbtrs_work( matrix_layout, uplo, n, kd, nrhs, ab, ldab, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpftrf.c b/lapack-netlib/LAPACKE/src/lapacke_dpftrf.c index 31fb842cb..85148ba04 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpftrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpftrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dpftrf( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpf_nancheck( n, a ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpf_nancheck( n, a ) ) { + return -5; + } } #endif return LAPACKE_dpftrf_work( matrix_layout, transr, uplo, n, a ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpftri.c b/lapack-netlib/LAPACKE/src/lapacke_dpftri.c index 75b93b93b..5bcdac65f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpftri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpftri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dpftri( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpf_nancheck( n, a ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpf_nancheck( n, a ) ) { + return -5; + } } #endif return LAPACKE_dpftri_work( matrix_layout, transr, uplo, n, a ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpftrs.c b/lapack-netlib/LAPACKE/src/lapacke_dpftrs.c index 210b76fa2..fef567fa4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpftrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpftrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dpftrs( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpf_nancheck( n, a ) ) { - return -6; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpf_nancheck( n, a ) ) { + return -6; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_dpftrs_work( matrix_layout, transr, uplo, n, nrhs, a, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpocon.c b/lapack-netlib/LAPACKE/src/lapacke_dpocon.c index 72ed8a84f..34ac297bb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpocon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpocon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_dpocon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpoequ.c b/lapack-netlib/LAPACKE/src/lapacke_dpoequ.c index 756dc10ad..ba7ad1da3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpoequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpoequ.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_dpoequ( int matrix_layout, lapack_int n, const double* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -3; + } } #endif return LAPACKE_dpoequ_work( matrix_layout, n, a, lda, s, scond, amax ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpoequb.c b/lapack-netlib/LAPACKE/src/lapacke_dpoequb.c index 2ffefb330..ce6378b84 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpoequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpoequb.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_dpoequb( int matrix_layout, lapack_int n, const double* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -3; + } } #endif return LAPACKE_dpoequb_work( matrix_layout, n, a, lda, s, scond, amax ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dporfs.c b/lapack-netlib/LAPACKE/src/lapacke_dporfs.c index b8057ed67..a2cf8f0b9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dporfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dporfs.c @@ -47,18 +47,20 @@ lapack_int LAPACKE_dporfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -11; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dporfsx.c b/lapack-netlib/LAPACKE/src/lapacke_dporfsx.c index 81dd86c98..d9674d2ee 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dporfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dporfsx.c @@ -50,28 +50,30 @@ lapack_int LAPACKE_dporfsx( int matrix_layout, char uplo, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -11; - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -21; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( equed, 'y' ) ) { - if( LAPACKE_d_nancheck( n, s, 1 ) ) { - return -10; + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -21; + } + } + if( LAPACKE_lsame( equed, 'y' ) ) { + if( LAPACKE_d_nancheck( n, s, 1 ) ) { + return -10; + } + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -13; } - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -13; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dposv.c b/lapack-netlib/LAPACKE/src/lapacke_dposv.c index 021ae42d8..a8602a450 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dposv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dposv.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dposv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_dposv_work( matrix_layout, uplo, n, nrhs, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dposvx.c b/lapack-netlib/LAPACKE/src/lapacke_dposvx.c index 81024dfd2..25ddb42fd 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dposvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dposvx.c @@ -47,21 +47,23 @@ lapack_int LAPACKE_dposvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { - if( LAPACKE_d_nancheck( n, s, 1 ) ) { - return -11; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { + if( LAPACKE_d_nancheck( n, s, 1 ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dposvxx.c b/lapack-netlib/LAPACKE/src/lapacke_dposvxx.c index 54d33e758..27fc3ba95 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dposvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dposvxx.c @@ -50,26 +50,28 @@ lapack_int LAPACKE_dposvxx( int matrix_layout, char fact, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -23; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } } - } - if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { - if( LAPACKE_d_nancheck( n, s, 1 ) ) { - return -11; + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -23; + } + } + if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { + if( LAPACKE_d_nancheck( n, s, 1 ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpotrf.c b/lapack-netlib/LAPACKE/src/lapacke_dpotrf.c index 1d60ebe8f..e69d39986 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpotrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpotrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dpotrf( int matrix_layout, char uplo, lapack_int n, double* a return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_dpotrf_work( matrix_layout, uplo, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpotrf2.c b/lapack-netlib/LAPACKE/src/lapacke_dpotrf2.c index de3a926d8..06390dcad 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpotrf2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpotrf2.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dpotrf2( int matrix_layout, char uplo, lapack_int n, double* return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_dpotrf2_work( matrix_layout, uplo, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpotri.c b/lapack-netlib/LAPACKE/src/lapacke_dpotri.c index d4940d25b..1bacec9a2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpotri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpotri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dpotri( int matrix_layout, char uplo, lapack_int n, double* a return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_dpotri_work( matrix_layout, uplo, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpotrs.c b/lapack-netlib/LAPACKE/src/lapacke_dpotrs.c index 357369716..17fa8c82f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpotrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpotrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dpotrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_dpotrs_work( matrix_layout, uplo, n, nrhs, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dppcon.c b/lapack-netlib/LAPACKE/src/lapacke_dppcon.c index be382bb73..985a705c1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dppcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dppcon.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_dppcon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -5; - } - if( LAPACKE_dpp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -5; + } + if( LAPACKE_dpp_nancheck( n, ap ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dppequ.c b/lapack-netlib/LAPACKE/src/lapacke_dppequ.c index 503e18a6f..f444e7c4c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dppequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dppequ.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_dppequ( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_dppequ_work( matrix_layout, uplo, n, ap, s, scond, amax ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpprfs.c b/lapack-netlib/LAPACKE/src/lapacke_dpprfs.c index bd04a55e8..36f1c0c88 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpprfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpprfs.c @@ -46,18 +46,20 @@ lapack_int LAPACKE_dpprfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpp_nancheck( n, afp ) ) { - return -6; - } - if( LAPACKE_dpp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpp_nancheck( n, afp ) ) { + return -6; + } + if( LAPACKE_dpp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dppsv.c b/lapack-netlib/LAPACKE/src/lapacke_dppsv.c index 57782f3e7..90d12728d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dppsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dppsv.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dppsv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -6; + } } #endif return LAPACKE_dppsv_work( matrix_layout, uplo, n, nrhs, ap, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dppsvx.c b/lapack-netlib/LAPACKE/src/lapacke_dppsvx.c index 5b9832e8f..c427840aa 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dppsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dppsvx.c @@ -47,21 +47,23 @@ lapack_int LAPACKE_dppsvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_dpp_nancheck( n, afp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_dpp_nancheck( n, afp ) ) { + return -7; + } } - } - if( LAPACKE_dpp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { - if( LAPACKE_d_nancheck( n, s, 1 ) ) { - return -9; + if( LAPACKE_dpp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { + if( LAPACKE_d_nancheck( n, s, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpptrf.c b/lapack-netlib/LAPACKE/src/lapacke_dpptrf.c index 1cdc3acc2..2b5c7a746 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpptrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpptrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dpptrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_dpptrf_work( matrix_layout, uplo, n, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpptri.c b/lapack-netlib/LAPACKE/src/lapacke_dpptri.c index 601a4a15e..06d25947c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpptri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpptri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dpptri( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_dpptri_work( matrix_layout, uplo, n, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpptrs.c b/lapack-netlib/LAPACKE/src/lapacke_dpptrs.c index 317e9cc48..9336f39ff 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpptrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpptrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dpptrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -6; + } } #endif return LAPACKE_dpptrs_work( matrix_layout, uplo, n, nrhs, ap, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpstrf.c b/lapack-netlib/LAPACKE/src/lapacke_dpstrf.c index 57a507224..4c2f6ca56 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpstrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpstrf.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_dpstrf( int matrix_layout, char uplo, lapack_int n, double* a return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_d_nancheck( 1, &tol, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_d_nancheck( 1, &tol, 1 ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dptcon.c b/lapack-netlib/LAPACKE/src/lapacke_dptcon.c index 7b5def46f..48b49c24a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dptcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dptcon.c @@ -39,15 +39,17 @@ lapack_int LAPACKE_dptcon( lapack_int n, const double* d, const double* e, lapack_int info = 0; double* work = NULL; #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -2; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -2; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -3; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpteqr.c b/lapack-netlib/LAPACKE/src/lapacke_dpteqr.c index d1d85ccb4..9f6972486 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpteqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpteqr.c @@ -45,16 +45,18 @@ lapack_int LAPACKE_dpteqr( int matrix_layout, char compz, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -5; - } - if( LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -5; + } + if( LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -6; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dptrfs.c b/lapack-netlib/LAPACKE/src/lapacke_dptrfs.c index 97b220bb9..3c0c967db 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dptrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dptrfs.c @@ -46,24 +46,26 @@ lapack_int LAPACKE_dptrfs( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n, df, 1 ) ) { - return -6; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n-1, ef, 1 ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n, df, 1 ) ) { + return -6; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -5; + } + if( LAPACKE_d_nancheck( n-1, ef, 1 ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dptsv.c b/lapack-netlib/LAPACKE/src/lapacke_dptsv.c index 60862a84a..df309dab2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dptsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dptsv.c @@ -41,15 +41,17 @@ lapack_int LAPACKE_dptsv( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -6; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -6; + } + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -5; + } } #endif return LAPACKE_dptsv_work( matrix_layout, n, nrhs, d, e, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dptsvx.c b/lapack-netlib/LAPACKE/src/lapacke_dptsvx.c index e6964af6b..6d9c030f4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dptsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dptsvx.c @@ -46,24 +46,26 @@ lapack_int LAPACKE_dptsvx( int matrix_layout, char fact, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_d_nancheck( n, df, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; } - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_d_nancheck( n-1, ef, 1 ) ) { - return -8; + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_d_nancheck( n, df, 1 ) ) { + return -7; + } + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_d_nancheck( n-1, ef, 1 ) ) { + return -8; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpttrf.c b/lapack-netlib/LAPACKE/src/lapacke_dpttrf.c index d66527006..c48755377 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpttrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpttrf.c @@ -36,12 +36,14 @@ lapack_int LAPACKE_dpttrf( lapack_int n, double* d, double* e ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -2; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -2; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -3; + } } #endif return LAPACKE_dpttrf_work( n, d, e ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dpttrs.c b/lapack-netlib/LAPACKE/src/lapacke_dpttrs.c index 79a2bed80..88a962a0f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dpttrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dpttrs.c @@ -42,15 +42,17 @@ lapack_int LAPACKE_dpttrs( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -6; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -6; + } + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -5; + } } #endif return LAPACKE_dpttrs_work( matrix_layout, n, nrhs, d, e, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsbev.c b/lapack-netlib/LAPACKE/src/lapacke_dsbev.c index 8b0bffbf8..ac2b05391 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsbev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsbev.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_dsbev( int matrix_layout, char jobz, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsbev_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_dsbev_2stage.c index 3ad34a660..79cd25386 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsbev_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsbev_2stage.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_dsbev_2stage( int matrix_layout, char jobz, char uplo, lapack return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsbevd.c b/lapack-netlib/LAPACKE/src/lapacke_dsbevd.c index 96f0fcbc2..4ecd1b522 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsbevd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsbevd.c @@ -49,9 +49,11 @@ lapack_int LAPACKE_dsbevd( int matrix_layout, char jobz, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsbevd_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_dsbevd_2stage.c index a197c3f9e..b0ccc0b1e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsbevd_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsbevd_2stage.c @@ -49,9 +49,11 @@ lapack_int LAPACKE_dsbevd_2stage( int matrix_layout, char jobz, char uplo, lapac return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsbevx.c b/lapack-netlib/LAPACKE/src/lapacke_dsbevx.c index 5f8c37472..cd84a0869 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsbevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsbevx.c @@ -48,21 +48,23 @@ lapack_int LAPACKE_dsbevx( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -15; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -7; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -12; + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -15; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -11; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsbevx_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_dsbevx_2stage.c index 7188efc76..256bf729e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsbevx_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsbevx_2stage.c @@ -50,21 +50,23 @@ lapack_int LAPACKE_dsbevx_2stage( int matrix_layout, char jobz, char range, char return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -15; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -7; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -12; + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -15; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -11; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsbgst.c b/lapack-netlib/LAPACKE/src/lapacke_dsbgst.c index 017cb189e..81002930d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsbgst.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsbgst.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_dsbgst( int matrix_layout, char vect, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { - return -7; - } - if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { + return -7; + } + if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsbgv.c b/lapack-netlib/LAPACKE/src/lapacke_dsbgv.c index 6206e2e89..c313031ab 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsbgv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsbgv.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_dsbgv( int matrix_layout, char jobz, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { - return -7; - } - if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { + return -7; + } + if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsbgvd.c b/lapack-netlib/LAPACKE/src/lapacke_dsbgvd.c index cc33f2364..36f912ee5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsbgvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsbgvd.c @@ -50,12 +50,14 @@ lapack_int LAPACKE_dsbgvd( int matrix_layout, char jobz, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { - return -7; - } - if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { + return -7; + } + if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsbgvx.c b/lapack-netlib/LAPACKE/src/lapacke_dsbgvx.c index 8e9b3d3f3..3a28a794c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsbgvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsbgvx.c @@ -49,24 +49,26 @@ lapack_int LAPACKE_dsbgvx( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { - return -8; - } - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -18; - } - if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -10; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -14; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { + return -8; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -15; + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -18; + } + if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -10; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -14; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -15; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsbtrd.c b/lapack-netlib/LAPACKE/src/lapacke_dsbtrd.c index c1c9c03c5..c7ad8ce7b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsbtrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsbtrd.c @@ -44,13 +44,15 @@ lapack_int LAPACKE_dsbtrd( int matrix_layout, char vect, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; - } - if( LAPACKE_lsame( vect, 'u' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } + if( LAPACKE_lsame( vect, 'u' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsfrk.c b/lapack-netlib/LAPACKE/src/lapacke_dsfrk.c index 2342e92aa..603e0931e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsfrk.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsfrk.c @@ -44,20 +44,22 @@ lapack_int LAPACKE_dsfrk( int matrix_layout, char transr, char uplo, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - ka = LAPACKE_lsame( trans, 'n' ) ? k : n; - na = LAPACKE_lsame( trans, 'n' ) ? n : k; - if( LAPACKE_dge_nancheck( matrix_layout, na, ka, a, lda ) ) { - return -8; - } - if( LAPACKE_d_nancheck( 1, &alpha, 1 ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &beta, 1 ) ) { - return -10; - } - if( LAPACKE_dpf_nancheck( n, c ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + ka = LAPACKE_lsame( trans, 'n' ) ? k : n; + na = LAPACKE_lsame( trans, 'n' ) ? n : k; + if( LAPACKE_dge_nancheck( matrix_layout, na, ka, a, lda ) ) { + return -8; + } + if( LAPACKE_d_nancheck( 1, &alpha, 1 ) ) { + return -7; + } + if( LAPACKE_d_nancheck( 1, &beta, 1 ) ) { + return -10; + } + if( LAPACKE_dpf_nancheck( n, c ) ) { + return -11; + } } #endif return LAPACKE_dsfrk_work( matrix_layout, transr, uplo, trans, n, k, alpha, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsgesv.c b/lapack-netlib/LAPACKE/src/lapacke_dsgesv.c index be1e41442..9d6c39d64 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsgesv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsgesv.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_dsgesv( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dspcon.c b/lapack-netlib/LAPACKE/src/lapacke_dspcon.c index cc65fd026..217e5bde7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dspcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dspcon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_dspcon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -6; - } - if( LAPACKE_dsp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -6; + } + if( LAPACKE_dsp_nancheck( n, ap ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dspev.c b/lapack-netlib/LAPACKE/src/lapacke_dspev.c index d3a07e9ff..c662cfed1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dspev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dspev.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_dspev( int matrix_layout, char jobz, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsp_nancheck( n, ap ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsp_nancheck( n, ap ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dspevd.c b/lapack-netlib/LAPACKE/src/lapacke_dspevd.c index dca74645c..3b6b25d5e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dspevd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dspevd.c @@ -48,9 +48,11 @@ lapack_int LAPACKE_dspevd( int matrix_layout, char jobz, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsp_nancheck( n, ap ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsp_nancheck( n, ap ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dspevx.c b/lapack-netlib/LAPACKE/src/lapacke_dspevx.c index aa7e38c3b..172976357 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dspevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dspevx.c @@ -47,21 +47,23 @@ lapack_int LAPACKE_dspevx( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -11; - } - if( LAPACKE_dsp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -11; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -8; + if( LAPACKE_dsp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -7; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -8; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dspgst.c b/lapack-netlib/LAPACKE/src/lapacke_dspgst.c index 1ba890a5e..20ea58a05 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dspgst.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dspgst.c @@ -41,12 +41,14 @@ lapack_int LAPACKE_dspgst( int matrix_layout, lapack_int itype, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_dsp_nancheck( n, bp ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_dsp_nancheck( n, bp ) ) { + return -6; + } } #endif return LAPACKE_dspgst_work( matrix_layout, itype, uplo, n, ap, bp ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dspgv.c b/lapack-netlib/LAPACKE/src/lapacke_dspgv.c index b7e32af25..818f131ce 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dspgv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dspgv.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_dspgv( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_dsp_nancheck( n, bp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_dsp_nancheck( n, bp ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dspgvd.c b/lapack-netlib/LAPACKE/src/lapacke_dspgvd.c index 9c1861b48..8ca478ed1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dspgvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dspgvd.c @@ -49,12 +49,14 @@ lapack_int LAPACKE_dspgvd( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_dsp_nancheck( n, bp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_dsp_nancheck( n, bp ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dspgvx.c b/lapack-netlib/LAPACKE/src/lapacke_dspgvx.c index cc2cab753..82a6217e2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dspgvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dspgvx.c @@ -48,24 +48,26 @@ lapack_int LAPACKE_dspgvx( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -13; - } - if( LAPACKE_dsp_nancheck( n, ap ) ) { - return -7; - } - if( LAPACKE_dsp_nancheck( n, bp ) ) { - return -8; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -13; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -10; + if( LAPACKE_dsp_nancheck( n, ap ) ) { + return -7; + } + if( LAPACKE_dsp_nancheck( n, bp ) ) { + return -8; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -9; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsposv.c b/lapack-netlib/LAPACKE/src/lapacke_dsposv.c index 46b062f5a..68ad48b6e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsposv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsposv.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_dsposv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsprfs.c b/lapack-netlib/LAPACKE/src/lapacke_dsprfs.c index 7b3079f6f..ef0feee6a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsprfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsprfs.c @@ -47,18 +47,20 @@ lapack_int LAPACKE_dsprfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsp_nancheck( n, afp ) ) { - return -6; - } - if( LAPACKE_dsp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsp_nancheck( n, afp ) ) { + return -6; + } + if( LAPACKE_dsp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dspsv.c b/lapack-netlib/LAPACKE/src/lapacke_dspsv.c index 548000a6b..48d94c44b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dspsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dspsv.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dspsv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_dspsv_work( matrix_layout, uplo, n, nrhs, ap, ipiv, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dspsvx.c b/lapack-netlib/LAPACKE/src/lapacke_dspsvx.c index afa5907ac..6e2d9a481 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dspsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dspsvx.c @@ -47,17 +47,19 @@ lapack_int LAPACKE_dspsvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_dsp_nancheck( n, afp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_dsp_nancheck( n, afp ) ) { + return -7; + } + } + if( LAPACKE_dsp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; } - } - if( LAPACKE_dsp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsptrd.c b/lapack-netlib/LAPACKE/src/lapacke_dsptrd.c index 8d01ecdf2..232f33f65 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsptrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsptrd.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dsptrd( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_dsptrd_work( matrix_layout, uplo, n, ap, d, e, tau ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsptrf.c b/lapack-netlib/LAPACKE/src/lapacke_dsptrf.c index 4ba7bd890..9c3a89813 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsptrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsptrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dsptrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_dsptrf_work( matrix_layout, uplo, n, ap, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsptri.c b/lapack-netlib/LAPACKE/src/lapacke_dsptri.c index b13f7c61c..728e0b854 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsptri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsptri.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_dsptri( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsp_nancheck( n, ap ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsptrs.c b/lapack-netlib/LAPACKE/src/lapacke_dsptrs.c index 492bf2131..1d7f313ca 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsptrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsptrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dsptrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_dsptrs_work( matrix_layout, uplo, n, nrhs, ap, ipiv, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dstebz.c b/lapack-netlib/LAPACKE/src/lapacke_dstebz.c index fb1646c50..011770be7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dstebz.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dstebz.c @@ -43,24 +43,26 @@ lapack_int LAPACKE_dstebz( char range, char order, lapack_int n, double vl, lapack_int* iwork = NULL; double* work = NULL; #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -8; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -9; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -10; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -8; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -5; + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -9; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -10; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -4; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -5; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dstedc.c b/lapack-netlib/LAPACKE/src/lapacke_dstedc.c index a974f4563..4f88a04c4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dstedc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dstedc.c @@ -48,16 +48,18 @@ lapack_int LAPACKE_dstedc( int matrix_layout, char compz, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -5; - } - if( LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -5; + } + if( LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -6; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dstegr.c b/lapack-netlib/LAPACKE/src/lapacke_dstegr.c index 1d63e12bc..9191f0a9f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dstegr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dstegr.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dstegr * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -51,24 +51,26 @@ lapack_int LAPACKE_dstegr( int matrix_layout, char jobz, char range, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -11; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n, e, 1 ) ) { - return -6; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -11; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -8; + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -7; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -8; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dstein.c b/lapack-netlib/LAPACKE/src/lapacke_dstein.c index 140a1f556..44479939b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dstein.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dstein.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dstein * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -46,15 +46,17 @@ lapack_int LAPACKE_dstein( int matrix_layout, lapack_int n, const double* d, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -3; - } - if( LAPACKE_d_nancheck( n, e, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n, w, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -3; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n, w, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dstemr.c b/lapack-netlib/LAPACKE/src/lapacke_dstemr.c index 7e72f4464..8dc2bd237 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dstemr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dstemr.c @@ -52,18 +52,20 @@ lapack_int LAPACKE_dstemr( int matrix_layout, char jobz, char range, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -6; - } - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -7; + } + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dstemr_work.c b/lapack-netlib/LAPACKE/src/lapacke_dstemr_work.c index 947e883e7..6a25fefc6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dstemr_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dstemr_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function dstemr * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -55,7 +55,7 @@ lapack_int LAPACKE_dstemr_work( int matrix_layout, char jobz, char range, lapack_int ldz_t = MAX(1,n); double* z_t = NULL; /* Check leading dimension(s) */ - if( ldz < n ) { + if( ldz < 1 || ( LAPACKE_lsame( jobz, 'v' ) && ldz < n ) ) { info = -14; LAPACKE_xerbla( "LAPACKE_dstemr_work", info ); return info; diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsteqr.c b/lapack-netlib/LAPACKE/src/lapacke_dsteqr.c index 81f325e66..8984fdf90 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsteqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsteqr.c @@ -45,16 +45,18 @@ lapack_int LAPACKE_dsteqr( int matrix_layout, char compz, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -5; - } - if( LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -5; + } + if( LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -6; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsterf.c b/lapack-netlib/LAPACKE/src/lapacke_dsterf.c index 29f46cf4b..35537278d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsterf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsterf.c @@ -36,12 +36,14 @@ lapack_int LAPACKE_dsterf( lapack_int n, double* d, double* e ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -2; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -2; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -3; + } } #endif return LAPACKE_dsterf_work( n, d, e ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dstev.c b/lapack-netlib/LAPACKE/src/lapacke_dstev.c index 6e575b181..6811d578f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dstev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dstev.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dstev * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -43,12 +43,14 @@ lapack_int LAPACKE_dstev( int matrix_layout, char jobz, lapack_int n, double* d, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n, e, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dstevd.c b/lapack-netlib/LAPACKE/src/lapacke_dstevd.c index be2f09290..e824a164b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dstevd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dstevd.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dstevd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -48,12 +48,14 @@ lapack_int LAPACKE_dstevd( int matrix_layout, char jobz, lapack_int n, double* d return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n, e, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dstevr.c b/lapack-netlib/LAPACKE/src/lapacke_dstevr.c index db1a99c1c..fd53e0ac0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dstevr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dstevr.c @@ -51,24 +51,26 @@ lapack_int LAPACKE_dstevr( int matrix_layout, char jobz, char range, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -11; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -6; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -11; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -8; + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -7; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -8; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dstevx.c b/lapack-netlib/LAPACKE/src/lapacke_dstevx.c index 45f161d4f..9c89f9467 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dstevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dstevx.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dstevx * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -47,24 +47,26 @@ lapack_int LAPACKE_dstevx( int matrix_layout, char jobz, char range, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -11; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n, e, 1 ) ) { - return -6; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -11; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -8; + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -7; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -8; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsycon.c b/lapack-netlib/LAPACKE/src/lapacke_dsycon.c index 584600540..ba68bf796 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsycon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsycon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_dsycon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsycon_3.c b/lapack-netlib/LAPACKE/src/lapacke_dsycon_3.c index 40503d117..f846bb408 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsycon_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsycon_3.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dsycon_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -40,20 +40,23 @@ lapack_int LAPACKE_dsycon_3( int matrix_layout, char uplo, lapack_int n, lapack_int info = 0; lapack_int* iwork = NULL; double* work = NULL; + lapack_int e_start = LAPACKE_lsame( uplo, 'U' ) ? 1 : 0; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_dsycon_3", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n, e, 1 ) ) { - return -6; - } - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n-1, e + e_start, 1 ) ) { + return -6; + } + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsyconv.c b/lapack-netlib/LAPACKE/src/lapacke_dsyconv.c index 1e3f78ee6..cca9be489 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsyconv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsyconv.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dsyconv( int matrix_layout, char uplo, char way, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Call middle-level interface */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsyequb.c b/lapack-netlib/LAPACKE/src/lapacke_dsyequb.c index 1daa80dd1..cfa045285 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsyequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsyequb.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_dsyequb( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsyev.c b/lapack-netlib/LAPACKE/src/lapacke_dsyev.c index d3ca12953..baac99481 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsyev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsyev.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_dsyev( int matrix_layout, char jobz, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsyev_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_dsyev_2stage.c index 1bac91884..8d168990e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsyev_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsyev_2stage.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_dsyev_2stage( int matrix_layout, char jobz, char uplo, lapack return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsyevd.c b/lapack-netlib/LAPACKE/src/lapacke_dsyevd.c index 36afe2cb0..870148b31 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsyevd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsyevd.c @@ -48,9 +48,11 @@ lapack_int LAPACKE_dsyevd( int matrix_layout, char jobz, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsyevd_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_dsyevd_2stage.c index 627605a5c..a5507394c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsyevd_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsyevd_2stage.c @@ -48,9 +48,11 @@ lapack_int LAPACKE_dsyevd_2stage( int matrix_layout, char jobz, char uplo, lapac return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsyevr.c b/lapack-netlib/LAPACKE/src/lapacke_dsyevr.c index 62f944478..bae72f6c3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsyevr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsyevr.c @@ -51,21 +51,23 @@ lapack_int LAPACKE_dsyevr( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -12; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -9; + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -12; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -8; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsyevr_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_dsyevr_2stage.c index ee191d024..dad20209e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsyevr_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsyevr_2stage.c @@ -51,21 +51,23 @@ lapack_int LAPACKE_dsyevr_2stage( int matrix_layout, char jobz, char range, char return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -12; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -9; + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -12; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -8; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsyevr_work.c b/lapack-netlib/LAPACKE/src/lapacke_dsyevr_work.c index 74cabe5ea..f3e3ea675 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsyevr_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsyevr_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function dsyevr * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -52,8 +52,9 @@ lapack_int LAPACKE_dsyevr_work( int matrix_layout, char jobz, char range, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int ncols_z = ( LAPACKE_lsame( range, 'a' ) || - LAPACKE_lsame( range, 'v' ) ) ? n : + lapack_int ncols_z = ( !LAPACKE_lsame( jobz, 'v' ) ) ? 1 : + ( LAPACKE_lsame( range, 'a' ) || + LAPACKE_lsame( range, 'v' ) ) ? n : ( LAPACKE_lsame( range, 'i' ) ? (iu-il+1) : 1); lapack_int lda_t = MAX(1,n); lapack_int ldz_t = MAX(1,n); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsyevx.c b/lapack-netlib/LAPACKE/src/lapacke_dsyevx.c index ea11abfbe..13980e109 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsyevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsyevx.c @@ -49,21 +49,23 @@ lapack_int LAPACKE_dsyevx( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -12; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -9; + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -12; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -8; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsyevx_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_dsyevx_2stage.c index 29b6fa533..caaff5a98 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsyevx_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsyevx_2stage.c @@ -49,21 +49,23 @@ lapack_int LAPACKE_dsyevx_2stage( int matrix_layout, char jobz, char range, char return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -12; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -9; + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -12; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -8; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsyevx_work.c b/lapack-netlib/LAPACKE/src/lapacke_dsyevx_work.c index 65f68484f..f4ded1df1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsyevx_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsyevx_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function dsyevx * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -51,8 +51,9 @@ lapack_int LAPACKE_dsyevx_work( int matrix_layout, char jobz, char range, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int ncols_z = ( LAPACKE_lsame( range, 'a' ) || - LAPACKE_lsame( range, 'v' ) ) ? n : + lapack_int ncols_z = ( !LAPACKE_lsame( jobz, 'v' ) ) ? 1 : + ( LAPACKE_lsame( range, 'a' ) || + LAPACKE_lsame( range, 'v' ) ) ? n : ( LAPACKE_lsame( range, 'i' ) ? (iu-il+1) : 1); lapack_int lda_t = MAX(1,n); lapack_int ldz_t = MAX(1,n); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsygst.c b/lapack-netlib/LAPACKE/src/lapacke_dsygst.c index 6a8793512..800a30b24 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsygst.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsygst.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dsygst( int matrix_layout, lapack_int itype, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_dsygst_work( matrix_layout, itype, uplo, n, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsygv.c b/lapack-netlib/LAPACKE/src/lapacke_dsygv.c index 9d99a879c..533b6a446 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsygv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsygv.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_dsygv( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsygv_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_dsygv_2stage.c index 018e038b7..974b63e54 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsygv_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsygv_2stage.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_dsygv_2stage( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsygvd.c b/lapack-netlib/LAPACKE/src/lapacke_dsygvd.c index 910b3b68e..907ad50bd 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsygvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsygvd.c @@ -49,12 +49,14 @@ lapack_int LAPACKE_dsygvd( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsygvx.c b/lapack-netlib/LAPACKE/src/lapacke_dsygvx.c index f76e54838..02d54d7fa 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsygvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsygvx.c @@ -50,24 +50,26 @@ lapack_int LAPACKE_dsygvx( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -15; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -7; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -12; + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -15; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -11; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsygvx_work.c b/lapack-netlib/LAPACKE/src/lapacke_dsygvx_work.c index b5f41c773..b0829c975 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsygvx_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsygvx_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function dsygvx * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsyrfs.c b/lapack-netlib/LAPACKE/src/lapacke_dsyrfs.c index 44e4fb986..78fce7766 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsyrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsyrfs.c @@ -48,18 +48,20 @@ lapack_int LAPACKE_dsyrfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsyrfsx.c b/lapack-netlib/LAPACKE/src/lapacke_dsyrfsx.c index 7525f7b10..63d4c68fa 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsyrfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsyrfsx.c @@ -51,28 +51,30 @@ lapack_int LAPACKE_dsyrfsx( int matrix_layout, char uplo, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -22; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( equed, 'y' ) ) { - if( LAPACKE_d_nancheck( n, s, 1 ) ) { - return -11; + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -22; + } + } + if( LAPACKE_lsame( equed, 'y' ) ) { + if( LAPACKE_d_nancheck( n, s, 1 ) ) { + return -11; + } + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -14; } - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -14; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsysv.c b/lapack-netlib/LAPACKE/src/lapacke_dsysv.c index e721b0a1b..ca8347f0b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsysv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsysv.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_dsysv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsysv_aa.c b/lapack-netlib/LAPACKE/src/lapacke_dsysv_aa.c index ea8d4f9f6..114794b78 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsysv_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsysv_aa.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_dsysv_aa( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsysv_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_dsysv_aa_2stage.c new file mode 100644 index 000000000..54a3788d7 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_dsysv_aa_2stage.c @@ -0,0 +1,88 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function dsysv_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dsysv_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, double* b, lapack_int ldb ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + double* work = NULL; + double work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_dsysv_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_dsysv_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb, &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = (lapack_int)work_query; + /* Allocate memory for work arrays */ + work = (double*)LAPACKE_malloc( sizeof(double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_dsysv_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb, work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dsysv_aa_2stage", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsysv_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_dsysv_aa_2stage_work.c new file mode 100644 index 000000000..d74e1ce42 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_dsysv_aa_2stage_work.c @@ -0,0 +1,124 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function dsysv_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dsysv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, double* b, lapack_int ldb, + double* work, lapack_int lwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_dsysv_aa_2stage( &uplo, &n, &nrhs, a, &lda, tb, + <b, ipiv, ipiv2, b, &ldb, work, &lwork, + &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_int ldb_t = MAX(1,n); + double* a_t = NULL; + double* tb_t = NULL; + double* b_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_dsysv_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_dsysv_aa_2stage_work", info ); + return info; + } + if( ldb < nrhs ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_dsysv_aa_2stage_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_dsysv_aa_2stage( &uplo, &n, &nrhs, a, &lda_t, + tb, <b, ipiv, ipiv2, b, &ldb_t, work, + &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (double*)LAPACKE_malloc( sizeof(double) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + b_t = (double*)LAPACKE_malloc( sizeof(double) * ldb_t * MAX(1,nrhs) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + /* Transpose input matrices */ + LAPACKE_dsy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + LAPACKE_dge_trans( matrix_layout, n, nrhs, b, ldb, b_t, ldb_t ); + /* Call LAPACK function and adjust info */ + LAPACK_dsysv_aa_2stage( &uplo, &n, &nrhs, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, b_t, &ldb_t, work, + &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_dsy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb ); + /* Release memory and exit */ + LAPACKE_free( b_t ); +exit_level_2: + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dsysv_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_dsysv_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsysv_rk.c b/lapack-netlib/LAPACKE/src/lapacke_dsysv_rk.c index ba9eaa3a8..df34aba62 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsysv_rk.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsysv_rk.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dsysv_rk * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -46,15 +46,14 @@ lapack_int LAPACKE_dsysv_rk( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n, e, 1) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsysv_rook.c b/lapack-netlib/LAPACKE/src/lapacke_dsysv_rook.c index 9c77733ae..266cc53a8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsysv_rook.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsysv_rook.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_dsysv_rook( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsysvx.c b/lapack-netlib/LAPACKE/src/lapacke_dsysvx.c index 22af16d7e..e2853508f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsysvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsysvx.c @@ -50,17 +50,19 @@ lapack_int LAPACKE_dsysvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; } - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -11; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsysvxx.c b/lapack-netlib/LAPACKE/src/lapacke_dsysvxx.c index 6fb6143ad..d4edaf4b4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsysvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsysvxx.c @@ -51,26 +51,28 @@ lapack_int LAPACKE_dsysvxx( int matrix_layout, char fact, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -24; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } } - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_d_nancheck( n, s, 1 ) ) { - return -12; + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -24; + } + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_d_nancheck( n, s, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsyswapr.c b/lapack-netlib/LAPACKE/src/lapacke_dsyswapr.c index eed600c60..02fa6befe 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsyswapr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsyswapr.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dsyswapr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_dsyswapr_work( matrix_layout, uplo, n, a, lda, i1, i2 ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytrd.c b/lapack-netlib/LAPACKE/src/lapacke_dsytrd.c index e00da39f8..40a704c88 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsytrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytrd.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_dsytrd( int matrix_layout, char uplo, lapack_int n, double* a return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytrf.c b/lapack-netlib/LAPACKE/src/lapacke_dsytrf.c index 49c7723a2..d6bcaa693 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsytrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytrf.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_dsytrf( int matrix_layout, char uplo, lapack_int n, double* a return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa.c b/lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa.c index 1e81f6a2c..046621dab 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_dsytrf_aa( int matrix_layout, char uplo, lapack_int n, double return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa_2stage.c new file mode 100644 index 000000000..82e733562 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa_2stage.c @@ -0,0 +1,85 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function dsytrf_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dsytrf_aa_2stage( int matrix_layout, char uplo, lapack_int n, + double* a, lapack_int lda, + double* tb, lapack_int ltb, + lapack_int* ipiv, lapack_int* ipiv2 ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + double* work = NULL; + double work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_dsytrf_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_dsytrf_aa_2stage_work( matrix_layout, uplo, n, + a, lda, tb, ltb, ipiv, ipiv2, + &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = (lapack_int)work_query; + /* Allocate memory for work arrays */ + work = (double*)LAPACKE_malloc( sizeof(double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_dsytrf_aa_2stage_work( matrix_layout, uplo, n, + a, lda, tb, ltb, ipiv, ipiv2, + work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dsytrf_aa_2stage", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa_2stage_work.c new file mode 100644 index 000000000..2cc7b9ad2 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa_2stage_work.c @@ -0,0 +1,109 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function dsytrf_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dsytrf_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + double* a, lapack_int lda, + double* tb, lapack_int ltb, + lapack_int* ipiv, lapack_int* ipiv2, + double* work, lapack_int lwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_dsytrf_aa_2stage( &uplo, &n, a, &lda, tb, + <b, ipiv, ipiv2, work, &lwork, + &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_int ldb_t = MAX(1,n); + double* a_t = NULL; + double* tb_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_dsytrf_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_dsytrf_aa_2stage_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_dsytrf_aa_2stage( &uplo, &n, a, &lda_t, + tb, <b, ipiv, ipiv2, work, + &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (double*)LAPACKE_malloc( sizeof(double) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + /* Transpose input matrices */ + LAPACKE_dsy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + /* Call LAPACK function and adjust info */ + LAPACK_dsytrf_aa_2stage( &uplo, &n, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, work, + &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_dsy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + /* Release memory and exit */ + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dsytrf_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_dsytrf_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytrf_rk.c b/lapack-netlib/LAPACKE/src/lapacke_dsytrf_rk.c index 0d9cade8e..a2d433c62 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsytrf_rk.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytrf_rk.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dsytrf_rk * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -45,12 +45,11 @@ lapack_int LAPACKE_dsytrf_rk( int matrix_layout, char uplo, lapack_int n, double return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n, e, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytrf_rook.c b/lapack-netlib/LAPACKE/src/lapacke_dsytrf_rook.c index 99aa50ad4..197141741 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsytrf_rook.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytrf_rook.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_dsytrf_rook( int matrix_layout, char uplo, lapack_int n, doub return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytri.c b/lapack-netlib/LAPACKE/src/lapacke_dsytri.c index b91249452..fcda55d03 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsytri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytri.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_dsytri( int matrix_layout, char uplo, lapack_int n, double* a return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytri2.c b/lapack-netlib/LAPACKE/src/lapacke_dsytri2.c index 8ab5d82fd..1c5662cda 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsytri2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytri2.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_dsytri2( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytri2x.c b/lapack-netlib/LAPACKE/src/lapacke_dsytri2x.c index 3e5766a33..bdf639576 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsytri2x.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytri2x.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_dsytri2x( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytri_3.c b/lapack-netlib/LAPACKE/src/lapacke_dsytri_3.c index b0960dada..acf2c5542 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsytri_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytri_3.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dsytri_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -40,17 +40,20 @@ lapack_int LAPACKE_dsytri_3( int matrix_layout, char uplo, lapack_int n, double* lapack_int lwork = -1; double* work = NULL; double work_query; + lapack_int e_start = LAPACKE_lsame( uplo, 'U' ) ? 1 : 0; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_dsytri_3", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n, e, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n-1, e + e_start, 1 ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytrs.c b/lapack-netlib/LAPACKE/src/lapacke_dsytrs.c index 8b7624afa..b2a0149cd 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsytrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dsytrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_dsytrs_work( matrix_layout, uplo, n, nrhs, a, lda, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytrs2.c b/lapack-netlib/LAPACKE/src/lapacke_dsytrs2.c index e0124eea8..46c90190f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsytrs2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytrs2.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_dsytrs2( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytrs_3.c b/lapack-netlib/LAPACKE/src/lapacke_dsytrs_3.c index 59b83f9ae..a701b9536 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsytrs_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytrs_3.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dsytrs_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -43,17 +43,19 @@ lapack_int LAPACKE_dsytrs_3( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n, e ,1 ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_d_nancheck( n, e ,1 ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif return LAPACKE_dsytrs_3_work( matrix_layout, uplo, n, nrhs, a, lda, - e, ipiv, b, ldb ); + e, ipiv, b, ldb ); } diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytrs_aa.c b/lapack-netlib/LAPACKE/src/lapacke_dsytrs_aa.c index 6adb343a4..2d3c5a371 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsytrs_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytrs_aa.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function dsytrs_aa * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -46,12 +46,14 @@ lapack_int LAPACKE_dsytrs_aa( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ @@ -60,7 +62,7 @@ lapack_int LAPACKE_dsytrs_aa( int matrix_layout, char uplo, lapack_int n, if( info != 0 ) { goto exit_level_0; } - lwork = (lapack_int)work_query; + lwork = (lapack_int)work_query; /* Allocate memory for work arrays */ work = (double*) LAPACKE_malloc( sizeof(double) * lwork ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytrs_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_dsytrs_aa_2stage.c new file mode 100644 index 000000000..49411d0fa --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytrs_aa_2stage.c @@ -0,0 +1,65 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function dsytrs_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dsytrs_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, double* b, lapack_int ldb ) +{ + lapack_int info = 0; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_dsytrs_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; + } + } +#endif + /* Call middle-level interface */ + info = LAPACKE_dsytrs_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb ); + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytrs_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_dsytrs_aa_2stage_work.c new file mode 100644 index 000000000..9d76d0a7c --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytrs_aa_2stage_work.c @@ -0,0 +1,114 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function dsytrs_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dsytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, double* b, lapack_int ldb ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_dsytrs_aa_2stage( &uplo, &n, &nrhs, a, &lda, tb, + <b, ipiv, ipiv2, b, &ldb, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_int ldb_t = MAX(1,n); + double* a_t = NULL; + double* tb_t = NULL; + double* b_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_dsytrs_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_dsytrs_aa_2stage_work", info ); + return info; + } + if( ldb < nrhs ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_dsytrs_aa_2stage_work", info ); + return info; + } + /* Allocate memory for temporary array(s) */ + a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (double*)LAPACKE_malloc( sizeof(double) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + b_t = (double*)LAPACKE_malloc( sizeof(double) * ldb_t * MAX(1,nrhs) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + /* Transpose input matrices */ + LAPACKE_dsy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + LAPACKE_dge_trans( matrix_layout, n, nrhs, b, ldb, b_t, ldb_t ); + /* Call LAPACK function and adjust info */ + LAPACK_dsytrs_aa_2stage( &uplo, &n, &nrhs, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, b_t, &ldb_t, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_dsy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb ); + /* Release memory and exit */ + LAPACKE_free( b_t ); +exit_level_2: + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dsytrs_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_dsytrs_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytrs_rook.c b/lapack-netlib/LAPACKE/src/lapacke_dsytrs_rook.c index 85c4e5fa1..42002a5f6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsytrs_rook.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytrs_rook.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dsytrs_rook( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_dsytrs_rook_work( matrix_layout, uplo, n, nrhs, a, lda, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtbcon.c b/lapack-netlib/LAPACKE/src/lapacke_dtbcon.c index f029046e2..fbcf42bd8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtbcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtbcon.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_dtbcon( int matrix_layout, char norm, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dtb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dtb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtbrfs.c b/lapack-netlib/LAPACKE/src/lapacke_dtbrfs.c index 0776721fa..719e2385b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtbrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtbrfs.c @@ -47,15 +47,17 @@ lapack_int LAPACKE_dtbrfs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dtb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dtb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtbtrs.c b/lapack-netlib/LAPACKE/src/lapacke_dtbtrs.c index 3740be986..f868e18d9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtbtrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtbtrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_dtbtrs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dtb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dtb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } } #endif return LAPACKE_dtbtrs_work( matrix_layout, uplo, trans, diag, n, kd, nrhs, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtfsm.c b/lapack-netlib/LAPACKE/src/lapacke_dtfsm.c index 4600cbf9a..4c9896beb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtfsm.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtfsm.c @@ -43,18 +43,20 @@ lapack_int LAPACKE_dtfsm( int matrix_layout, char transr, char side, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( IS_D_NONZERO(alpha) ) { - if( LAPACKE_dtf_nancheck( matrix_layout, transr, uplo, diag, n, a ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( IS_D_NONZERO(alpha) ) { + if( LAPACKE_dtf_nancheck( matrix_layout, transr, uplo, diag, n, a ) ) { + return -10; + } } - } - if( LAPACKE_d_nancheck( 1, &alpha, 1 ) ) { - return -9; - } - if( IS_D_NONZERO(alpha) ) { - if( LAPACKE_dge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -11; + if( LAPACKE_d_nancheck( 1, &alpha, 1 ) ) { + return -9; + } + if( IS_D_NONZERO(alpha) ) { + if( LAPACKE_dge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtftri.c b/lapack-netlib/LAPACKE/src/lapacke_dtftri.c index b60db92a5..fbe7a9eec 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtftri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtftri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dtftri( int matrix_layout, char transr, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dtf_nancheck( matrix_layout, transr, uplo, diag, n, a ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dtf_nancheck( matrix_layout, transr, uplo, diag, n, a ) ) { + return -6; + } } #endif return LAPACKE_dtftri_work( matrix_layout, transr, uplo, diag, n, a ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtfttp.c b/lapack-netlib/LAPACKE/src/lapacke_dtfttp.c index 1c68759fd..2b8822960 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtfttp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtfttp.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dtfttp( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpf_nancheck( n, arf ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpf_nancheck( n, arf ) ) { + return -5; + } } #endif return LAPACKE_dtfttp_work( matrix_layout, transr, uplo, n, arf, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtfttr.c b/lapack-netlib/LAPACKE/src/lapacke_dtfttr.c index b6440cd03..748b31f5d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtfttr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtfttr.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_dtfttr( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpf_nancheck( n, arf ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpf_nancheck( n, arf ) ) { + return -5; + } } #endif return LAPACKE_dtfttr_work( matrix_layout, transr, uplo, n, arf, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtgevc.c b/lapack-netlib/LAPACKE/src/lapacke_dtgevc.c index e796db171..c59583100 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtgevc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtgevc.c @@ -47,21 +47,23 @@ lapack_int LAPACKE_dtgevc( int matrix_layout, char side, char howmny, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, p, ldp ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, s, lds ) ) { - return -6; - } - if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'l' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, p, ldp ) ) { + return -8; } - } - if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'r' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -12; + if( LAPACKE_dge_nancheck( matrix_layout, n, n, s, lds ) ) { + return -6; + } + if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'l' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -10; + } + } + if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'r' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtgexc.c b/lapack-netlib/LAPACKE/src/lapacke_dtgexc.c index 88789957e..b5ceb6d6e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtgexc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtgexc.c @@ -48,21 +48,23 @@ lapack_int LAPACKE_dtgexc( int matrix_layout, lapack_logical wantq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -7; - } - if( wantq ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; } - } - if( wantz ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -11; + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -7; + } + if( wantq ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -9; + } + } + if( wantz ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtgsen.c b/lapack-netlib/LAPACKE/src/lapacke_dtgsen.c index b654e2f22..2cb7fce4b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtgsen.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtgsen.c @@ -53,21 +53,23 @@ lapack_int LAPACKE_dtgsen( int matrix_layout, lapack_int ijob, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( wantq ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -14; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; } - } - if( wantz ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -16; + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( wantq ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -14; + } + } + if( wantz ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -16; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtgsja.c b/lapack-netlib/LAPACKE/src/lapacke_dtgsja.c index d57361910..7cc8cf950 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtgsja.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtgsja.c @@ -49,32 +49,34 @@ lapack_int LAPACKE_dtgsja( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -10; - } - if( LAPACKE_dge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -12; - } - if( LAPACKE_lsame( jobq, 'i' ) || LAPACKE_lsame( jobq, 'q' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -22; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -10; } - } - if( LAPACKE_d_nancheck( 1, &tola, 1 ) ) { - return -14; - } - if( LAPACKE_d_nancheck( 1, &tolb, 1 ) ) { - return -15; - } - if( LAPACKE_lsame( jobu, 'i' ) || LAPACKE_lsame( jobu, 'u' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, m, m, u, ldu ) ) { - return -18; + if( LAPACKE_dge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -12; } - } - if( LAPACKE_lsame( jobv, 'i' ) || LAPACKE_lsame( jobv, 'v' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, p, p, v, ldv ) ) { - return -20; + if( LAPACKE_lsame( jobq, 'i' ) || LAPACKE_lsame( jobq, 'q' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -22; + } + } + if( LAPACKE_d_nancheck( 1, &tola, 1 ) ) { + return -14; + } + if( LAPACKE_d_nancheck( 1, &tolb, 1 ) ) { + return -15; + } + if( LAPACKE_lsame( jobu, 'i' ) || LAPACKE_lsame( jobu, 'u' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, m, m, u, ldu ) ) { + return -18; + } + } + if( LAPACKE_lsame( jobv, 'i' ) || LAPACKE_lsame( jobv, 'v' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, p, p, v, ldv ) ) { + return -20; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtgsna.c b/lapack-netlib/LAPACKE/src/lapacke_dtgsna.c index bbcc1fd1e..1dda79c76 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtgsna.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtgsna.c @@ -50,21 +50,23 @@ lapack_int LAPACKE_dtgsna( int matrix_layout, char job, char howmny, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -12; + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -10; + } + } + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtgsyl.c b/lapack-netlib/LAPACKE/src/lapacke_dtgsyl.c index e8a3b3687..feacd3b44 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtgsyl.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtgsyl.c @@ -51,24 +51,26 @@ lapack_int LAPACKE_dtgsyl( int matrix_layout, char trans, lapack_int ijob, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, m, a, lda ) ) { - return -6; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, m, d, ldd ) ) { - return -12; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, e, lde ) ) { - return -14; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, f, ldf ) ) { - return -16; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, m, a, lda ) ) { + return -6; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, m, d, ldd ) ) { + return -12; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, e, lde ) ) { + return -14; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, f, ldf ) ) { + return -16; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtpcon.c b/lapack-netlib/LAPACKE/src/lapacke_dtpcon.c index a5c136017..67cedacde 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtpcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtpcon.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_dtpcon( int matrix_layout, char norm, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dtp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dtp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtpmqrt.c b/lapack-netlib/LAPACKE/src/lapacke_dtpmqrt.c index 140890464..0b00dccf4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtpmqrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtpmqrt.c @@ -50,24 +50,26 @@ lapack_int LAPACKE_dtpmqrt( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - ncols_a = LAPACKE_lsame( side, 'L' ) ? n : - ( LAPACKE_lsame( side, 'R' ) ? k : 0 ); - nrows_a = LAPACKE_lsame( side, 'L' ) ? k : - ( LAPACKE_lsame( side, 'R' ) ? m : 0 ); - nrows_v = LAPACKE_lsame( side, 'L' ) ? m : - ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); - if( LAPACKE_dge_nancheck( matrix_layout, nrows_a, ncols_a, a, lda ) ) { - return -13; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -15; - } - if( LAPACKE_dge_nancheck( matrix_layout, nb, k, t, ldt ) ) { - return -11; - } - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, k, v, ldv ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + ncols_a = LAPACKE_lsame( side, 'L' ) ? n : + ( LAPACKE_lsame( side, 'R' ) ? k : 0 ); + nrows_a = LAPACKE_lsame( side, 'L' ) ? k : + ( LAPACKE_lsame( side, 'R' ) ? m : 0 ); + nrows_v = LAPACKE_lsame( side, 'L' ) ? m : + ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); + if( LAPACKE_dge_nancheck( matrix_layout, nrows_a, ncols_a, a, lda ) ) { + return -13; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -15; + } + if( LAPACKE_dge_nancheck( matrix_layout, nb, k, t, ldt ) ) { + return -11; + } + if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, k, v, ldv ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtpqrt.c b/lapack-netlib/LAPACKE/src/lapacke_dtpqrt.c index ea4202ad9..99b764c65 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtpqrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtpqrt.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_dtpqrt( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtpqrt2.c b/lapack-netlib/LAPACKE/src/lapacke_dtpqrt2.c index 0e18330ba..299089e62 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtpqrt2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtpqrt2.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_dtpqrt2( int matrix_layout, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -6; + } } #endif return LAPACKE_dtpqrt2_work( matrix_layout, m, n, l, a, lda, b, ldb, t, ldt ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtprfb.c b/lapack-netlib/LAPACKE/src/lapacke_dtprfb.c index 7c4ce924d..5191f79bb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtprfb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtprfb.c @@ -49,30 +49,32 @@ lapack_int LAPACKE_dtprfb( int matrix_layout, char side, char trans, char direct return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( storev, 'C' ) ) { - ncols_v = k; - nrows_v = LAPACKE_lsame( side, 'L' ) ? m : - ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); - } else if( LAPACKE_lsame( storev, 'R' ) ) { - ncols_v = LAPACKE_lsame( side, 'L' ) ? m : - ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); - nrows_v = k; - } else { - ncols_v = 0; - nrows_v = 0; - } - if( LAPACKE_dge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -14; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -16; - } - if( LAPACKE_dge_nancheck( matrix_layout, k, k, t, ldt ) ) { - return -12; - } - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( storev, 'C' ) ) { + ncols_v = k; + nrows_v = LAPACKE_lsame( side, 'L' ) ? m : + ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); + } else if( LAPACKE_lsame( storev, 'R' ) ) { + ncols_v = LAPACKE_lsame( side, 'L' ) ? m : + ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); + nrows_v = k; + } else { + ncols_v = 0; + nrows_v = 0; + } + if( LAPACKE_dge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -14; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -16; + } + if( LAPACKE_dge_nancheck( matrix_layout, k, k, t, ldt ) ) { + return -12; + } + if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { + return -10; + } } #endif if (side=='l' || side=='L') { diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtprfs.c b/lapack-netlib/LAPACKE/src/lapacke_dtprfs.c index 6ebb5c88f..43a13215e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtprfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtprfs.c @@ -46,15 +46,17 @@ lapack_int LAPACKE_dtprfs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dtp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dtp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtptri.c b/lapack-netlib/LAPACKE/src/lapacke_dtptri.c index a643a8646..4e84c5712 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtptri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtptri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dtptri( int matrix_layout, char uplo, char diag, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dtp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dtp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { + return -5; + } } #endif return LAPACKE_dtptri_work( matrix_layout, uplo, diag, n, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtptrs.c b/lapack-netlib/LAPACKE/src/lapacke_dtptrs.c index ffbd91671..2b804b89e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtptrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtptrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dtptrs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dtp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dtp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_dtptrs_work( matrix_layout, uplo, trans, diag, n, nrhs, ap, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtpttf.c b/lapack-netlib/LAPACKE/src/lapacke_dtpttf.c index 0e59224b1..df2ae2c5d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtpttf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtpttf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dtpttf( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpp_nancheck( n, ap ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpp_nancheck( n, ap ) ) { + return -5; + } } #endif return LAPACKE_dtpttf_work( matrix_layout, transr, uplo, n, ap, arf ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtpttr.c b/lapack-netlib/LAPACKE/src/lapacke_dtpttr.c index e9de5087e..11194dfb6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtpttr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtpttr.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dtpttr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dpp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dpp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_dtpttr_work( matrix_layout, uplo, n, ap, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtrcon.c b/lapack-netlib/LAPACKE/src/lapacke_dtrcon.c index 40e44385c..421d17d26 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtrcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtrcon.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_dtrcon( int matrix_layout, char norm, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dtr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dtr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtrevc.c b/lapack-netlib/LAPACKE/src/lapacke_dtrevc.c index 29078d6fb..a76e3b4af 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtrevc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtrevc.c @@ -46,18 +46,20 @@ lapack_int LAPACKE_dtrevc( int matrix_layout, char side, char howmny, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -6; - } - if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'l' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -6; } - } - if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'r' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -10; + if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'l' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -8; + } + } + if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'r' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtrexc.c b/lapack-netlib/LAPACKE/src/lapacke_dtrexc.c index 695c497ae..4808a7577 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtrexc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtrexc.c @@ -44,14 +44,16 @@ lapack_int LAPACKE_dtrexc( int matrix_layout, char compq, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -6; + } + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -4; } - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -4; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtrrfs.c b/lapack-netlib/LAPACKE/src/lapacke_dtrrfs.c index cc464bbfd..9c651fe45 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtrrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtrrfs.c @@ -47,15 +47,17 @@ lapack_int LAPACKE_dtrrfs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dtr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dtr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -11; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtrsen.c b/lapack-netlib/LAPACKE/src/lapacke_dtrsen.c index b671a35f1..521bc2701 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtrsen.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtrsen.c @@ -51,14 +51,16 @@ lapack_int LAPACKE_dtrsen( int matrix_layout, char job, char compq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -8; + } + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -6; } - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -6; } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtrsna.c b/lapack-netlib/LAPACKE/src/lapacke_dtrsna.c index 01a124a5b..b7e710e9b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtrsna.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtrsna.c @@ -49,18 +49,20 @@ lapack_int LAPACKE_dtrsna( int matrix_layout, char job, char howmny, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -6; - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -6; } - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -10; + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -8; + } + } + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { + if( LAPACKE_dge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtrsyl.c b/lapack-netlib/LAPACKE/src/lapacke_dtrsyl.c index 48e9993c3..3fe0813bf 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtrsyl.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtrsyl.c @@ -44,15 +44,17 @@ lapack_int LAPACKE_dtrsyl( int matrix_layout, char trana, char tranb, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, m, a, lda ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, m, a, lda ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -11; + } } #endif return LAPACKE_dtrsyl_work( matrix_layout, trana, tranb, isgn, m, n, a, lda, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtrtri.c b/lapack-netlib/LAPACKE/src/lapacke_dtrtri.c index d8fa426c8..cdb92a638 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtrtri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtrtri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dtrtri( int matrix_layout, char uplo, char diag, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dtr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dtr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { + return -5; + } } #endif return LAPACKE_dtrtri_work( matrix_layout, uplo, diag, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtrtrs.c b/lapack-netlib/LAPACKE/src/lapacke_dtrtrs.c index 805641031..ae75b0209 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtrtrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtrtrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_dtrtrs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dtr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { - return -7; - } - if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dtr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { + return -7; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif return LAPACKE_dtrtrs_work( matrix_layout, uplo, trans, diag, n, nrhs, a, diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtrttf.c b/lapack-netlib/LAPACKE/src/lapacke_dtrttf.c index ca2916cb6..66d1e5a2c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtrttf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtrttf.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_dtrttf( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif return LAPACKE_dtrttf_work( matrix_layout, transr, uplo, n, a, lda, arf ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtrttp.c b/lapack-netlib/LAPACKE/src/lapacke_dtrttp.c index 0386d09a1..89f01dc95 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtrttp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtrttp.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_dtrttp( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_dtrttp_work( matrix_layout, uplo, n, a, lda, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtzrzf.c b/lapack-netlib/LAPACKE/src/lapacke_dtzrzf.c index b63d662d5..60ed28e08 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dtzrzf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dtzrzf.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_dtzrzf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_nancheck.c b/lapack-netlib/LAPACKE/src/lapacke_nancheck.c new file mode 100644 index 000000000..c63e8e84b --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_nancheck.c @@ -0,0 +1,60 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native C interface to control NaN checking +* Author: Intel Corporation +* Generated July, 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +static int nancheck_flag = -1; + +void LAPACKE_set_nancheck( int flag ) +{ + nancheck_flag = ( flag ) ? 1 : 0; +} + +int LAPACKE_get_nancheck( ) +{ + char* env; + if ( nancheck_flag != -1 ) { + return nancheck_flag; + } + + /* Check environment variable, once and only once */ + env = getenv( "LAPACKE_NANCHECK" ); + if ( !env ) { + /* By default, NaN checking is enabled */ + nancheck_flag = 1; + } else { + nancheck_flag = atoi( env ) ? 1 : 0; + } + + return nancheck_flag; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_sbbcsd.c b/lapack-netlib/LAPACKE/src/lapacke_sbbcsd.c index 05dbea8ea..54bc045be 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sbbcsd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sbbcsd.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function sbbcsd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -46,41 +46,44 @@ lapack_int LAPACKE_sbbcsd( int matrix_layout, char jobu1, char jobu2, lapack_int lwork = -1; float* work = NULL; float work_query; - lapack_int nrows_u1, nrows_u2, nrows_v1t, nrows_v2t; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_sbbcsd", -1 ); return -1; } -#ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - if( LAPACKE_s_nancheck( q-1, phi, 1 ) ) { - return -11; - } - if( LAPACKE_s_nancheck( q, theta, 1 ) ) { - return -10; + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; } - if( LAPACKE_lsame( jobu1, 'y' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, nrows_u1, p, u1, ldu1 ) ) { - return -12; +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( q-1, phi, 1 ) ) { + return -11; } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, nrows_u2, m-p, u2, ldu2 ) ) { - return -14; + if( LAPACKE_s_nancheck( q, theta, 1 ) ) { + return -10; } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v1t, q, v1t, ldv1t ) ) { - return -16; + if( LAPACKE_lsame( jobu1, 'y' ) ) { + if( LAPACKE_sge_nancheck( lapack_layout, p, p, u1, ldu1 ) ) { + return -12; + } } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v2t, m-q, v2t, ldv2t ) ) { - return -18; + if( LAPACKE_lsame( jobu2, 'y' ) ) { + if( LAPACKE_sge_nancheck( lapack_layout, m-p, m-p, u2, ldu2 ) ) { + return -14; + } + } + if( LAPACKE_lsame( jobv1t, 'y' ) ) { + if( LAPACKE_sge_nancheck( lapack_layout, q, q, v1t, ldv1t ) ) { + return -16; + } + } + if( LAPACKE_lsame( jobv2t, 'y' ) ) { + if( LAPACKE_sge_nancheck( lapack_layout, m-q, m-q, v2t, ldv2t ) ) { + return -18; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sbbcsd_work.c b/lapack-netlib/LAPACKE/src/lapacke_sbbcsd_work.c index 9d840be18..41312b9d9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sbbcsd_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sbbcsd_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function sbbcsd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -45,151 +45,36 @@ lapack_int LAPACKE_sbbcsd_work( int matrix_layout, char jobu1, char jobu2, float* work, lapack_int lwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; + } /* Call LAPACK function and adjust info */ - LAPACK_sbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, + LAPACK_sbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, <rans, &m, &p, &q, theta, phi, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, &ldv2t, b11d, b11e, b12d, b12e, b21d, b21e, b22d, b22e, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - lapack_int nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - lapack_int nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - lapack_int nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - lapack_int ldu1_t = MAX(1,nrows_u1); - lapack_int ldu2_t = MAX(1,nrows_u2); - lapack_int ldv1t_t = MAX(1,nrows_v1t); - lapack_int ldv2t_t = MAX(1,nrows_v2t); - float* u1_t = NULL; - float* u2_t = NULL; - float* v1t_t = NULL; - float* v2t_t = NULL; - /* Check leading dimension(s) */ - if( ldu1 < p ) { - info = -13; - LAPACKE_xerbla( "LAPACKE_sbbcsd_work", info ); - return info; - } - if( ldu2 < m-p ) { - info = -15; - LAPACKE_xerbla( "LAPACKE_sbbcsd_work", info ); - return info; - } - if( ldv1t < q ) { - info = -17; - LAPACKE_xerbla( "LAPACKE_sbbcsd_work", info ); - return info; - } - if( ldv2t < m-q ) { - info = -19; - LAPACKE_xerbla( "LAPACKE_sbbcsd_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { - LAPACK_sbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, - theta, phi, u1, &ldu1_t, u2, &ldu2_t, v1t, &ldv1t_t, - v2t, &ldv2t_t, b11d, b11e, b12d, b12e, b21d, b21e, - b22d, b22e, work, &lwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - u1_t = (float*)LAPACKE_malloc( sizeof(float) * ldu1_t * MAX(1,p) ); - if( u1_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - u2_t = (float*) - LAPACKE_malloc( sizeof(float) * ldu2_t * MAX(1,m-p) ); - if( u2_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - v1t_t = (float*) - LAPACKE_malloc( sizeof(float) * ldv1t_t * MAX(1,q) ); - if( v1t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; - } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - v2t_t = (float*) - LAPACKE_malloc( sizeof(float) * ldv2t_t * MAX(1,m-q) ); - if( v2t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - } - /* Transpose input matrices */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_sge_trans( matrix_layout, nrows_u1, p, u1, ldu1, u1_t, - ldu1_t ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_sge_trans( matrix_layout, nrows_u2, m-p, u2, ldu2, u2_t, - ldu2_t ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_sge_trans( matrix_layout, nrows_v1t, q, v1t, ldv1t, v1t_t, - ldv1t_t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_sge_trans( matrix_layout, nrows_v2t, m-q, v2t, ldv2t, v2t_t, - ldv2t_t ); - } - /* Call LAPACK function and adjust info */ - LAPACK_sbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, - theta, phi, u1_t, &ldu1_t, u2_t, &ldu2_t, v1t_t, - &ldv1t_t, v2t_t, &ldv2t_t, b11d, b11e, b12d, b12e, b21d, - b21e, b22d, b22e, work, &lwork, &info ); - if( info < 0 ) { - info = info - 1; - } - /* Transpose output matrices */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_u1, p, u1_t, ldu1_t, u1, - ldu1 ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_u2, m-p, u2_t, ldu2_t, - u2, ldu2 ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_v1t, q, v1t_t, ldv1t_t, - v1t, ldv1t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_v2t, m-q, v2t_t, ldv2t_t, - v2t, ldv2t ); - } - /* Release memory and exit */ - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_free( v2t_t ); - } -exit_level_3: - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_free( v1t_t ); - } -exit_level_2: - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_free( u2_t ); - } -exit_level_1: - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_free( u1_t ); - } -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_sbbcsd_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_sbbcsd_work", info ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sbdsdc.c b/lapack-netlib/LAPACKE/src/lapacke_sbdsdc.c index 431768049..cd0318206 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sbdsdc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sbdsdc.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function sbdsdc * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -48,12 +48,14 @@ lapack_int LAPACKE_sbdsdc( int matrix_layout, char uplo, char compq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n, e, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -6; + } } #endif /* Additional scalars initializations for work arrays */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sbdsqr.c b/lapack-netlib/LAPACKE/src/lapacke_sbdsqr.c index 02196087a..8ff181ca3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sbdsqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sbdsqr.c @@ -45,26 +45,28 @@ lapack_int LAPACKE_sbdsqr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( ncc != 0 ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, ncc, c, ldc ) ) { - return -13; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( ncc != 0 ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, ncc, c, ldc ) ) { + return -13; + } } - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -7; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -8; - } - if( nru != 0 ) { - if( LAPACKE_sge_nancheck( matrix_layout, nru, n, u, ldu ) ) { - return -11; + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -7; } - } - if( ncvt != 0 ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, ncvt, vt, ldvt ) ) { - return -9; + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -8; + } + if( nru != 0 ) { + if( LAPACKE_sge_nancheck( matrix_layout, nru, n, u, ldu ) ) { + return -11; + } + } + if( ncvt != 0 ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, ncvt, vt, ldvt ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sbdsvdx.c b/lapack-netlib/LAPACKE/src/lapacke_sbdsvdx.c index 127417fc4..575c2b159 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sbdsvdx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sbdsvdx.c @@ -50,12 +50,14 @@ lapack_int LAPACKE_sbdsvdx( int matrix_layout, char uplo, char jobz, char range, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -6; - } - if( LAPACKE_s_nancheck( n - 1, e, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -6; + } + if( LAPACKE_s_nancheck( n - 1, e, 1 ) ) { + return -7; + } } #endif /* Allocate memory for work arrays */ @@ -71,8 +73,8 @@ lapack_int LAPACKE_sbdsvdx( int matrix_layout, char uplo, char jobz, char range, } /* Call middle-level interface */ info = LAPACKE_sbdsvdx_work( matrix_layout, uplo, jobz, range, - n, d, e, vl, vu, il, iu, ns, s, z, - ldz, work, iwork); + n, d, e, vl, vu, il, iu, ns, s, z, + ldz, work, iwork); /* Backup significant data from working array(s) */ for( i=0; i<12*n-1; i++ ) { superb[i] = iwork[i+1]; diff --git a/lapack-netlib/LAPACKE/src/lapacke_sbdsvdx_work.c b/lapack-netlib/LAPACKE/src/lapacke_sbdsvdx_work.c index f632b3ea9..69d970b7b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sbdsvdx_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sbdsvdx_work.c @@ -34,17 +34,17 @@ #include "lapacke_utils.h" lapack_int LAPACKE_sbdsvdx_work( int matrix_layout, char uplo, char jobz, char range, - lapack_int n, float* d, float* e, - float vl, float vu, - lapack_int il, lapack_int iu, lapack_int* ns, - float* s, float* z, lapack_int ldz, - float* work, lapack_int* iwork ) + lapack_int n, float* d, float* e, + float vl, float vu, + lapack_int il, lapack_int iu, lapack_int* ns, + float* s, float* z, lapack_int ldz, + float* work, lapack_int* iwork ) { lapack_int info = 0; if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_sbdsvdx( &uplo, &jobz, &range, &n, d, e, &vl, &vu, - &il, &iu, ns, s, z, &ldz, + &il, &iu, ns, s, z, &ldz, work, iwork, &info ); if( info < 0 ) { info = info - 1; @@ -64,7 +64,7 @@ lapack_int LAPACKE_sbdsvdx_work( int matrix_layout, char uplo, char jobz, char r /* Allocate memory for temporary array(s) */ if( LAPACKE_lsame( jobz, 'v' ) ) { z_t = (float*) - LAPACKE_malloc( sizeof(float) * ldz_t * MAX(2*n,1) ); + LAPACKE_malloc( sizeof(float) * ldz_t * MAX(ncols_z,1) ); if( z_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_0; @@ -72,8 +72,8 @@ lapack_int LAPACKE_sbdsvdx_work( int matrix_layout, char uplo, char jobz, char r } /* Call LAPACK function and adjust info */ LAPACK_sbdsvdx( &uplo, &jobz, &range, &n, d, e, &vl, &vu, - &il, &iu, ns, s, z_t, &ldz_t, work, - iwork, &info ); + &il, &iu, ns, s, z_t, &ldz_t, work, + iwork, &info ); if( info < 0 ) { info = info - 1; } diff --git a/lapack-netlib/LAPACKE/src/lapacke_sdisna.c b/lapack-netlib/LAPACKE/src/lapacke_sdisna.c index 07be8c569..84b4914b2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sdisna.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sdisna.c @@ -37,9 +37,11 @@ lapack_int LAPACKE_sdisna( char job, lapack_int m, lapack_int n, const float* d, float* sep ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( MIN(m,n), d, 1 ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( MIN(m,n), d, 1 ) ) { + return -4; + } } #endif return LAPACKE_sdisna_work( job, m, n, d, sep ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgbbrd.c b/lapack-netlib/LAPACKE/src/lapacke_sgbbrd.c index 8aaf195b1..0963c774f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgbbrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgbbrd.c @@ -46,13 +46,15 @@ lapack_int LAPACKE_sgbbrd( int matrix_layout, char vect, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { - return -8; - } - if( ncc != 0 ) { - if( LAPACKE_sge_nancheck( matrix_layout, m, ncc, c, ldc ) ) { - return -16; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { + return -8; + } + if( ncc != 0 ) { + if( LAPACKE_sge_nancheck( matrix_layout, m, ncc, c, ldc ) ) { + return -16; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgbcon.c b/lapack-netlib/LAPACKE/src/lapacke_sgbcon.c index 4ed5c08b0..f5e251027 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgbcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgbcon.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_sgbcon( int matrix_layout, char norm, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { - return -6; - } - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { + return -6; + } + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgbequ.c b/lapack-netlib/LAPACKE/src/lapacke_sgbequ.c index 46bd13b68..4c02db84d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgbequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgbequ.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_sgbequ( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { + return -6; + } } #endif return LAPACKE_sgbequ_work( matrix_layout, m, n, kl, ku, ab, ldab, r, c, diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgbequb.c b/lapack-netlib/LAPACKE/src/lapacke_sgbequb.c index 826917307..2210e5138 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgbequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgbequb.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_sgbequb( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { + return -6; + } } #endif return LAPACKE_sgbequb_work( matrix_layout, m, n, kl, ku, ab, ldab, r, c, diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgbrfs.c b/lapack-netlib/LAPACKE/src/lapacke_sgbrfs.c index 8af03deab..ce35b1318 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgbrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgbrfs.c @@ -48,18 +48,20 @@ lapack_int LAPACKE_sgbrfs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { - return -7; - } - if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, ldafb ) ) { - return -9; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -14; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { + return -7; + } + if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, ldafb ) ) { + return -9; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -14; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgbrfsx.c b/lapack-netlib/LAPACKE/src/lapacke_sgbrfsx.c index 5f76295ba..02f786bbf 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgbrfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgbrfsx.c @@ -52,33 +52,35 @@ lapack_int LAPACKE_sgbrfsx( int matrix_layout, char trans, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { - return -8; - } - if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, ldafb ) ) { - return -10; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -15; - } - if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'c' ) ) { - if( LAPACKE_s_nancheck( n, c, 1 ) ) { - return -14; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { + return -8; } - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -25; + if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, ldafb ) ) { + return -10; } - } - if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'r' ) ) { - if( LAPACKE_s_nancheck( n, r, 1 ) ) { - return -13; + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -15; + } + if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'c' ) ) { + if( LAPACKE_s_nancheck( n, c, 1 ) ) { + return -14; + } + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -25; + } + } + if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'r' ) ) { + if( LAPACKE_s_nancheck( n, r, 1 ) ) { + return -13; + } + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -17; } - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -17; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgbsv.c b/lapack-netlib/LAPACKE/src/lapacke_sgbsv.c index faaa179f3..c7375ec39 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgbsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgbsv.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_sgbsv( int matrix_layout, lapack_int n, lapack_int kl, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { - return -6; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { + return -6; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif return LAPACKE_sgbsv_work( matrix_layout, n, kl, ku, nrhs, ab, ldab, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgbsvx.c b/lapack-netlib/LAPACKE/src/lapacke_sgbsvx.c index 725ea971e..6b667d240 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgbsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgbsvx.c @@ -50,29 +50,31 @@ lapack_int LAPACKE_sgbsvx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { - return -8; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, - ldafb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { + return -8; } - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -16; - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'c' ) ) ) { - if( LAPACKE_s_nancheck( n, c, 1 ) ) { - return -15; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, + ldafb ) ) { + return -10; + } } - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'r' ) ) ) { - if( LAPACKE_s_nancheck( n, r, 1 ) ) { - return -14; + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -16; + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'c' ) ) ) { + if( LAPACKE_s_nancheck( n, c, 1 ) ) { + return -15; + } + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'r' ) ) ) { + if( LAPACKE_s_nancheck( n, r, 1 ) ) { + return -14; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgbsvxx.c b/lapack-netlib/LAPACKE/src/lapacke_sgbsvxx.c index f8a0a00f0..3264a1d12 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgbsvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgbsvxx.c @@ -52,34 +52,36 @@ lapack_int LAPACKE_sgbsvxx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { - return -8; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, - ldafb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { + return -8; } - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -16; - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'c' ) ) ) { - if( LAPACKE_s_nancheck( n, c, 1 ) ) { - return -15; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, + ldafb ) ) { + return -10; + } } - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -27; + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -16; } - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'r' ) ) ) { - if( LAPACKE_s_nancheck( n, r, 1 ) ) { - return -14; + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'c' ) ) ) { + if( LAPACKE_s_nancheck( n, c, 1 ) ) { + return -15; + } + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -27; + } + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'r' ) ) ) { + if( LAPACKE_s_nancheck( n, r, 1 ) ) { + return -14; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgbtrf.c b/lapack-netlib/LAPACKE/src/lapacke_sgbtrf.c index 2e2aa8db2..8752144a0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgbtrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgbtrf.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_sgbtrf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sgb_nancheck( matrix_layout, m, n, kl, kl+ku, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sgb_nancheck( matrix_layout, m, n, kl, kl+ku, ab, ldab ) ) { + return -6; + } } #endif return LAPACKE_sgbtrf_work( matrix_layout, m, n, kl, ku, ab, ldab, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgbtrs.c b/lapack-netlib/LAPACKE/src/lapacke_sgbtrs.c index dbd390dc4..ecaa459ea 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgbtrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgbtrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_sgbtrs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } } #endif return LAPACKE_sgbtrs_work( matrix_layout, trans, n, kl, ku, nrhs, ab, ldab, diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgebak.c b/lapack-netlib/LAPACKE/src/lapacke_sgebak.c index 7a1be37d0..d4c507ea0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgebak.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgebak.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_sgebak( int matrix_layout, char job, char side, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, scale, 1 ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, m, v, ldv ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, scale, 1 ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, m, v, ldv ) ) { + return -9; + } } #endif return LAPACKE_sgebak_work( matrix_layout, job, side, n, ilo, ihi, scale, m, diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgebal.c b/lapack-netlib/LAPACKE/src/lapacke_sgebal.c index d1bc09519..10e3ff16b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgebal.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgebal.c @@ -42,11 +42,13 @@ lapack_int LAPACKE_sgebal( int matrix_layout, char job, lapack_int n, float* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'p' ) || - LAPACKE_lsame( job, 's' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'p' ) || + LAPACKE_lsame( job, 's' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgebrd.c b/lapack-netlib/LAPACKE/src/lapacke_sgebrd.c index 6f2170f54..422bb5e9a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgebrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgebrd.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_sgebrd( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgecon.c b/lapack-netlib/LAPACKE/src/lapacke_sgecon.c index c2289e27e..78426f9d0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgecon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgecon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_sgecon( int matrix_layout, char norm, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgeequ.c b/lapack-netlib/LAPACKE/src/lapacke_sgeequ.c index 28690e6ba..75095e9ad 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgeequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgeequ.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_sgeequ( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_sgeequ_work( matrix_layout, m, n, a, lda, r, c, rowcnd, diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgeequb.c b/lapack-netlib/LAPACKE/src/lapacke_sgeequb.c index 60de696dd..d98cce32d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgeequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgeequb.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_sgeequb( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_sgeequb_work( matrix_layout, m, n, a, lda, r, c, rowcnd, diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgees.c b/lapack-netlib/LAPACKE/src/lapacke_sgees.c index 5b88538d1..f87005afc 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgees.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgees.c @@ -48,9 +48,11 @@ lapack_int LAPACKE_sgees( int matrix_layout, char jobvs, char sort, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgeesx.c b/lapack-netlib/LAPACKE/src/lapacke_sgeesx.c index 04c08ffab..91cfc4fa5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgeesx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgeesx.c @@ -52,9 +52,11 @@ lapack_int LAPACKE_sgeesx( int matrix_layout, char jobvs, char sort, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgeev.c b/lapack-netlib/LAPACKE/src/lapacke_sgeev.c index 41581145d..fe79121f5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgeev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgeev.c @@ -47,9 +47,11 @@ lapack_int LAPACKE_sgeev( int matrix_layout, char jobvl, char jobvr, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgeevx.c b/lapack-netlib/LAPACKE/src/lapacke_sgeevx.c index 5447734f2..732328fc0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgeevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgeevx.c @@ -50,9 +50,11 @@ lapack_int LAPACKE_sgeevx( int matrix_layout, char balanc, char jobvl, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgehrd.c b/lapack-netlib/LAPACKE/src/lapacke_sgehrd.c index 7a0fb4fbe..b073abbd7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgehrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgehrd.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_sgehrd( int matrix_layout, lapack_int n, lapack_int ilo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgejsv.c b/lapack-netlib/LAPACKE/src/lapacke_sgejsv.c index 2dc52090c..aa0eeb746 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgejsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgejsv.c @@ -80,11 +80,13 @@ lapack_int LAPACKE_sgejsv( int matrix_layout, char joba, char jobu, char jobv, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nu = LAPACKE_lsame( jobu, 'n' ) ? 1 : m; - nv = LAPACKE_lsame( jobv, 'n' ) ? 1 : n; - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nu = LAPACKE_lsame( jobu, 'n' ) ? 1 : m; + nv = LAPACKE_lsame( jobv, 'n' ) ? 1 : n; + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgelq.c b/lapack-netlib/LAPACKE/src/lapacke_sgelq.c index 3fb1d9f85..d0c14d494 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgelq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgelq.c @@ -28,14 +28,14 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function sgelq * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" -lapack_int LAPACKE_sgelq_work( int matrix_layout, lapack_int m, lapack_int n, - float* a, lapack_int lda, - float* t, lapack_int tsize ) +lapack_int LAPACKE_sgelq( int matrix_layout, lapack_int m, lapack_int n, + float* a, lapack_int lda, + float* t, lapack_int tsize ) { lapack_int info = 0; lapack_int lwork = -1; @@ -46,9 +46,11 @@ lapack_int LAPACKE_sgelq_work( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgelq2.c b/lapack-netlib/LAPACKE/src/lapacke_sgelq2.c index bab4775cd..90da3a6c3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgelq2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgelq2.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_sgelq2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgelqf.c b/lapack-netlib/LAPACKE/src/lapacke_sgelqf.c index 7590b688d..a017001ba 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgelqf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgelqf.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_sgelqf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgels.c b/lapack-netlib/LAPACKE/src/lapacke_sgels.c index 43a4097a5..f9cac6c5b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgels.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgels.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_sgels( int matrix_layout, char trans, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; - } - if( LAPACKE_sge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } + if( LAPACKE_sge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgelsd.c b/lapack-netlib/LAPACKE/src/lapacke_sgelsd.c index 08502bbfc..fc42b1eec 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgelsd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgelsd.c @@ -51,15 +51,17 @@ lapack_int LAPACKE_sgelsd( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &rcond, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_s_nancheck( 1, &rcond, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgelss.c b/lapack-netlib/LAPACKE/src/lapacke_sgelss.c index 6fd39c474..8b9eff322 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgelss.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgelss.c @@ -47,15 +47,17 @@ lapack_int LAPACKE_sgelss( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &rcond, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_s_nancheck( 1, &rcond, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgelsy.c b/lapack-netlib/LAPACKE/src/lapacke_sgelsy.c index 956c649ae..435b76ecb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgelsy.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgelsy.c @@ -47,15 +47,17 @@ lapack_int LAPACKE_sgelsy( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &rcond, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_s_nancheck( 1, &rcond, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgemlq.c b/lapack-netlib/LAPACKE/src/lapacke_sgemlq.c index 1ed3f0485..708b04fde 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgemlq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgemlq.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function sgemlq * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -48,15 +48,17 @@ lapack_int LAPACKE_sgemlq( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_d_nancheck( tsize, t, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_s_nancheck( tsize, t, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgemqr.c b/lapack-netlib/LAPACKE/src/lapacke_sgemqr.c index 4619d927a..1eb7b75be 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgemqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgemqr.c @@ -49,16 +49,18 @@ lapack_int LAPACKE_sgemqr( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_sge_nancheck( matrix_layout, r, k, a, lda ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_s_nancheck( tsize, t, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_sge_nancheck( matrix_layout, r, k, a, lda ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_s_nancheck( tsize, t, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgemqrt.c b/lapack-netlib/LAPACKE/src/lapacke_sgemqrt.c index 1fa1c82d0..0745bbfba 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgemqrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgemqrt.c @@ -47,17 +47,19 @@ lapack_int LAPACKE_sgemqrt( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_v = LAPACKE_lsame( side, 'L' ) ? m : - ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -12; - } - if( LAPACKE_sge_nancheck( matrix_layout, nb, k, t, ldt ) ) { - return -10; - } - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, k, v, ldv ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nrows_v = LAPACKE_lsame( side, 'L' ) ? m : + ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -12; + } + if( LAPACKE_sge_nancheck( matrix_layout, nb, k, t, ldt ) ) { + return -10; + } + if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, k, v, ldv ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgeqlf.c b/lapack-netlib/LAPACKE/src/lapacke_sgeqlf.c index 6b1983bac..2881981c3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgeqlf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgeqlf.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_sgeqlf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgeqp3.c b/lapack-netlib/LAPACKE/src/lapacke_sgeqp3.c index 9a3f2f664..c35c4b898 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgeqp3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgeqp3.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_sgeqp3( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgeqpf.c b/lapack-netlib/LAPACKE/src/lapacke_sgeqpf.c index 1ad648973..65a06a809 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgeqpf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgeqpf.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_sgeqpf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgeqr.c b/lapack-netlib/LAPACKE/src/lapacke_sgeqr.c index 60323f53c..5606f3ed7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgeqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgeqr.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_sgeqr( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgeqr2.c b/lapack-netlib/LAPACKE/src/lapacke_sgeqr2.c index 818a2aeeb..5c044f56f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgeqr2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgeqr2.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_sgeqr2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgeqrf.c b/lapack-netlib/LAPACKE/src/lapacke_sgeqrf.c index 69168b2ea..17929fc28 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgeqrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgeqrf.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_sgeqrf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgeqrfp.c b/lapack-netlib/LAPACKE/src/lapacke_sgeqrfp.c index 83b8d4cb0..39901fdbe 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgeqrfp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgeqrfp.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_sgeqrfp( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgeqrt.c b/lapack-netlib/LAPACKE/src/lapacke_sgeqrt.c index a3363781e..04997cda5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgeqrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgeqrt.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_sgeqrt( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgeqrt2.c b/lapack-netlib/LAPACKE/src/lapacke_sgeqrt2.c index 238f7283b..a46d0d07f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgeqrt2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgeqrt2.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_sgeqrt2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_sgeqrt2_work( matrix_layout, m, n, a, lda, t, ldt ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgeqrt3.c b/lapack-netlib/LAPACKE/src/lapacke_sgeqrt3.c index 26f76211d..7722d6283 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgeqrt3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgeqrt3.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_sgeqrt3( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_sgeqrt3_work( matrix_layout, m, n, a, lda, t, ldt ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgerfs.c b/lapack-netlib/LAPACKE/src/lapacke_sgerfs.c index 39c317b29..73c7a023e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgerfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgerfs.c @@ -48,18 +48,20 @@ lapack_int LAPACKE_sgerfs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, af, ldaf ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, af, ldaf ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgerfsx.c b/lapack-netlib/LAPACKE/src/lapacke_sgerfsx.c index 3a3df411c..bf0bd74b1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgerfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgerfsx.c @@ -51,33 +51,35 @@ lapack_int LAPACKE_sgerfsx( int matrix_layout, char trans, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, af, ldaf ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'c' ) ) { - if( LAPACKE_s_nancheck( n, c, 1 ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; } - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -23; + if( LAPACKE_sge_nancheck( matrix_layout, n, n, af, ldaf ) ) { + return -8; } - } - if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'r' ) ) { - if( LAPACKE_s_nancheck( n, r, 1 ) ) { - return -11; + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'c' ) ) { + if( LAPACKE_s_nancheck( n, c, 1 ) ) { + return -12; + } + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -23; + } + } + if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'r' ) ) { + if( LAPACKE_s_nancheck( n, r, 1 ) ) { + return -11; + } + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -15; } - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -15; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgerqf.c b/lapack-netlib/LAPACKE/src/lapacke_sgerqf.c index cab074ce9..c6de00616 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgerqf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgerqf.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_sgerqf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgesdd.c b/lapack-netlib/LAPACKE/src/lapacke_sgesdd.c index 80dd3124e..1c555411e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgesdd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgesdd.c @@ -48,9 +48,11 @@ lapack_int LAPACKE_sgesdd( int matrix_layout, char jobz, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgesv.c b/lapack-netlib/LAPACKE/src/lapacke_sgesv.c index 6771407fe..0ef406ea8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgesv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgesv.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_sgesv( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_sgesv_work( matrix_layout, n, nrhs, a, lda, ipiv, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgesvd.c b/lapack-netlib/LAPACKE/src/lapacke_sgesvd.c index 0ae005788..bf401ddb2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgesvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgesvd.c @@ -48,9 +48,11 @@ lapack_int LAPACKE_sgesvd( int matrix_layout, char jobu, char jobvt, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgesvdx.c b/lapack-netlib/LAPACKE/src/lapacke_sgesvdx.c index 6387451af..e6d2a7dda 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgesvdx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgesvdx.c @@ -52,14 +52,16 @@ lapack_int LAPACKE_sgesvdx( int matrix_layout, char jobu, char jobvt, char range return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ info = LAPACKE_sgesvdx_work( matrix_layout, jobu, jobvt, range, - m, n, a, lda, vl, vu, il, iu, ns, s, u, + m, n, a, lda, vl, vu, il, iu, ns, s, u, ldu, vt, ldvt, &work_query, lwork, iwork ); if( info != 0 ) { goto exit_level_0; @@ -78,8 +80,8 @@ lapack_int LAPACKE_sgesvdx( int matrix_layout, char jobu, char jobvt, char range } /* Call middle-level interface */ info = LAPACKE_sgesvdx_work( matrix_layout, jobu, jobvt, range, - m, n, a, lda, vl, vu, il, iu, ns, s, u, - ldu, vt, ldvt, work, lwork, iwork ); + m, n, a, lda, vl, vu, il, iu, ns, s, u, + ldu, vt, ldvt, work, lwork, iwork ); /* Backup significant data from working array(s) */ for( i=0; i<12*MIN(m,n)-1; i++ ) { superb[i] = iwork[i+1]; diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgesvdx_work.c b/lapack-netlib/LAPACKE/src/lapacke_sgesvdx_work.c index f7973f016..8f4739d56 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgesvdx_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgesvdx_work.c @@ -34,18 +34,18 @@ #include "lapacke_utils.h" lapack_int LAPACKE_sgesvdx_work( int matrix_layout, char jobu, char jobvt, char range, - lapack_int m, lapack_int n, float* a, - lapack_int lda, float vl, float vu, - lapack_int il, lapack_int iu, lapack_int* ns, - float* s, float* u, lapack_int ldu, - float* vt, lapack_int ldvt, - float* work, lapack_int lwork, lapack_int* iwork ) + lapack_int m, lapack_int n, float* a, + lapack_int lda, float vl, float vu, + lapack_int il, lapack_int iu, lapack_int* ns, + float* s, float* u, lapack_int ldu, + float* vt, lapack_int ldvt, + float* work, lapack_int lwork, lapack_int* iwork ) { lapack_int info = 0; if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_sgesvdx( &jobu, &jobvt, &range, &m, &n, a, &lda, &vl, &vu, - &il, &iu, ns, s, u, &ldu, vt, &ldvt, + &il, &iu, ns, s, u, &ldu, vt, &ldvt, work, &lwork, iwork, &info ); if( info < 0 ) { info = info - 1; @@ -84,7 +84,7 @@ lapack_int LAPACKE_sgesvdx_work( int matrix_layout, char jobu, char jobvt, char /* Query optimal working array(s) size if requested */ if( lwork == -1 ) { LAPACK_sgesvdx( &jobu, &jobvt, &range, &m, &n, a, &lda_t, &vl, &vu, - &il, &iu, ns, s, u, &ldu_t, vt, + &il, &iu, ns, s, u, &ldu_t, vt, &ldvt_t, work, &lwork, iwork, &info ); return (info < 0) ? (info - 1) : info; } @@ -114,8 +114,8 @@ lapack_int LAPACKE_sgesvdx_work( int matrix_layout, char jobu, char jobvt, char LAPACKE_sge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); /* Call LAPACK function and adjust info */ LAPACK_sgesvdx( &jobu, &jobvt, &range, &m, &n, a_t, &lda_t, &vl, &vu, - &il, &iu, ns, s, u_t, &ldu_t, vt_t, - &ldvt_t, work, &lwork, iwork, &info ); + &il, &iu, ns, s, u_t, &ldu_t, vt_t, + &ldvt_t, work, &lwork, iwork, &info ); if( info < 0 ) { info = info - 1; } diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgesvj.c b/lapack-netlib/LAPACKE/src/lapacke_sgesvj.c index c49c5412c..f1b48a02d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgesvj.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgesvj.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_sgesvj( int matrix_layout, char joba, char jobu, char jobv, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_v = LAPACKE_lsame( jobv, 'v' ) ? MAX(0,n) : - ( LAPACKE_lsame( jobv, 'a' ) ? MAX(0,mv) : 0 ); - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -7; - } - if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, n, v, ldv ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nrows_v = LAPACKE_lsame( jobv, 'v' ) ? MAX(0,n) : + ( LAPACKE_lsame( jobv, 'a' ) ? MAX(0,mv) : 0 ); + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -7; + } + if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, n, v, ldv ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgesvx.c b/lapack-netlib/LAPACKE/src/lapacke_sgesvx.c index 9e198f508..34203df72 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgesvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgesvx.c @@ -49,28 +49,30 @@ lapack_int LAPACKE_sgesvx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -14; - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'c' ) ) ) { - if( LAPACKE_s_nancheck( n, c, 1 ) ) { - return -13; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, af, ldaf ) ) { + return -8; + } } - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'r' ) ) ) { - if( LAPACKE_s_nancheck( n, r, 1 ) ) { - return -12; + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -14; + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'c' ) ) ) { + if( LAPACKE_s_nancheck( n, c, 1 ) ) { + return -13; + } + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'r' ) ) ) { + if( LAPACKE_s_nancheck( n, r, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgesvxx.c b/lapack-netlib/LAPACKE/src/lapacke_sgesvxx.c index c22453508..a064dd9d0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgesvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgesvxx.c @@ -51,33 +51,35 @@ lapack_int LAPACKE_sgesvxx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -14; - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'c' ) ) ) { - if( LAPACKE_s_nancheck( n, c, 1 ) ) { - return -13; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, af, ldaf ) ) { + return -8; + } } - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -25; + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -14; } - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'r' ) ) ) { - if( LAPACKE_s_nancheck( n, r, 1 ) ) { - return -12; + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'c' ) ) ) { + if( LAPACKE_s_nancheck( n, c, 1 ) ) { + return -13; + } + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -25; + } + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'r' ) ) ) { + if( LAPACKE_s_nancheck( n, r, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgetf2.c b/lapack-netlib/LAPACKE/src/lapacke_sgetf2.c index f7e566081..dbfeb9fe9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgetf2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgetf2.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_sgetf2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_sgetf2_work( matrix_layout, m, n, a, lda, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgetrf.c b/lapack-netlib/LAPACKE/src/lapacke_sgetrf.c index ef76e4fb0..4b97ea1f7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgetrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgetrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_sgetrf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_sgetrf_work( matrix_layout, m, n, a, lda, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgetrf2.c b/lapack-netlib/LAPACKE/src/lapacke_sgetrf2.c index 3bb6a865b..81d487b32 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgetrf2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgetrf2.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_sgetrf2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_sgetrf2_work( matrix_layout, m, n, a, lda, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgetri.c b/lapack-netlib/LAPACKE/src/lapacke_sgetri.c index 21cef74e2..d62221d54 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgetri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgetri.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_sgetri( int matrix_layout, lapack_int n, float* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -3; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgetrs.c b/lapack-netlib/LAPACKE/src/lapacke_sgetrs.c index 095c5c533..bedd4653a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgetrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgetrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_sgetrs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_sgetrs_work( matrix_layout, trans, n, nrhs, a, lda, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgetsls.c b/lapack-netlib/LAPACKE/src/lapacke_sgetsls.c index 1a1d8f3a8..4ee7eeed5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgetsls.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgetsls.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_sgetsls( int matrix_layout, char trans, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; - } - if( LAPACKE_sge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } + if( LAPACKE_sge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sggbak.c b/lapack-netlib/LAPACKE/src/lapacke_sggbak.c index 23a1d6d1e..b58171289 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sggbak.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sggbak.c @@ -43,15 +43,17 @@ lapack_int LAPACKE_sggbak( int matrix_layout, char job, char side, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, lscale, 1 ) ) { - return -7; - } - if( LAPACKE_s_nancheck( n, rscale, 1 ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, m, v, ldv ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, lscale, 1 ) ) { + return -7; + } + if( LAPACKE_s_nancheck( n, rscale, 1 ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, m, v, ldv ) ) { + return -10; + } } #endif return LAPACKE_sggbak_work( matrix_layout, job, side, n, ilo, ihi, lscale, diff --git a/lapack-netlib/LAPACKE/src/lapacke_sggbal.c b/lapack-netlib/LAPACKE/src/lapacke_sggbal.c index 628aec897..74cd85cf5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sggbal.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sggbal.c @@ -47,17 +47,19 @@ lapack_int LAPACKE_sggbal( int matrix_layout, char job, lapack_int n, float* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( job, 'p' ) || LAPACKE_lsame( job, 's' ) || - LAPACKE_lsame( job, 'b' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( job, 'p' ) || LAPACKE_lsame( job, 's' ) || + LAPACKE_lsame( job, 'b' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } } - } - if( LAPACKE_lsame( job, 'p' ) || LAPACKE_lsame( job, 's' ) || - LAPACKE_lsame( job, 'b' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -6; + if( LAPACKE_lsame( job, 'p' ) || LAPACKE_lsame( job, 's' ) || + LAPACKE_lsame( job, 'b' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -6; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgges.c b/lapack-netlib/LAPACKE/src/lapacke_sgges.c index 16baa67cf..d2e32408b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgges.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgges.c @@ -50,12 +50,14 @@ lapack_int LAPACKE_sgges( int matrix_layout, char jobvsl, char jobvsr, char sort return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgges3.c b/lapack-netlib/LAPACKE/src/lapacke_sgges3.c index 00bd01acb..e02a302dc 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgges3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgges3.c @@ -50,12 +50,14 @@ lapack_int LAPACKE_sgges3( int matrix_layout, char jobvsl, char jobvsr, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sggesx.c b/lapack-netlib/LAPACKE/src/lapacke_sggesx.c index 282d88678..f0acb70a4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sggesx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sggesx.c @@ -54,12 +54,14 @@ lapack_int LAPACKE_sggesx( int matrix_layout, char jobvsl, char jobvsr, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sggev.c b/lapack-netlib/LAPACKE/src/lapacke_sggev.c index e2a69ef03..aaec6c512 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sggev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sggev.c @@ -48,12 +48,14 @@ lapack_int LAPACKE_sggev( int matrix_layout, char jobvl, char jobvr, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sggev3.c b/lapack-netlib/LAPACKE/src/lapacke_sggev3.c index 0b32ffa80..b6a16d913 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sggev3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sggev3.c @@ -48,12 +48,14 @@ lapack_int LAPACKE_sggev3( int matrix_layout, char jobvl, char jobvr, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sggevx.c b/lapack-netlib/LAPACKE/src/lapacke_sggevx.c index b52dab364..0e028547d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sggevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sggevx.c @@ -53,12 +53,14 @@ lapack_int LAPACKE_sggevx( int matrix_layout, char balanc, char jobvl, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sggglm.c b/lapack-netlib/LAPACKE/src/lapacke_sggglm.c index d18f80082..418dae1bf 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sggglm.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sggglm.c @@ -46,15 +46,17 @@ lapack_int LAPACKE_sggglm( int matrix_layout, lapack_int n, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, m, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, p, b, ldb ) ) { - return -7; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, m, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, p, b, ldb ) ) { + return -7; + } + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgghd3.c b/lapack-netlib/LAPACKE/src/lapacke_sgghd3.c index 00a967c39..ffc7d9cae 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgghd3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgghd3.c @@ -47,21 +47,23 @@ lapack_int LAPACKE_sgghd3( int matrix_layout, char compq, char compz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; } - } - if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -13; + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -11; + } + } + if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -13; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgghrd.c b/lapack-netlib/LAPACKE/src/lapacke_sgghrd.c index a26461b66..e438c01a2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgghrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgghrd.c @@ -43,21 +43,23 @@ lapack_int LAPACKE_sgghrd( int matrix_layout, char compq, char compz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; } - } - if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -13; + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -11; + } + } + if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -13; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgglse.c b/lapack-netlib/LAPACKE/src/lapacke_sgglse.c index f126dcef6..8c10eda9c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgglse.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgglse.c @@ -46,18 +46,20 @@ lapack_int LAPACKE_sgglse( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -7; - } - if( LAPACKE_s_nancheck( m, c, 1 ) ) { - return -9; - } - if( LAPACKE_s_nancheck( p, d, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -7; + } + if( LAPACKE_s_nancheck( m, c, 1 ) ) { + return -9; + } + if( LAPACKE_s_nancheck( p, d, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sggqrf.c b/lapack-netlib/LAPACKE/src/lapacke_sggqrf.c index d1dd32483..6f1c73f60 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sggqrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sggqrf.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_sggqrf( int matrix_layout, lapack_int n, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, m, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, p, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, m, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, p, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sggrqf.c b/lapack-netlib/LAPACKE/src/lapacke_sggrqf.c index f5bc8d2cc..9d5799df1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sggrqf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sggrqf.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_sggrqf( int matrix_layout, lapack_int m, lapack_int p, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sggsvd.c b/lapack-netlib/LAPACKE/src/lapacke_sggsvd.c index 35661cbf9..fed6d3d04 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sggsvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sggsvd.c @@ -48,12 +48,14 @@ lapack_int LAPACKE_sggsvd( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -10; - } - if( LAPACKE_sge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -10; + } + if( LAPACKE_sge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sggsvd3.c b/lapack-netlib/LAPACKE/src/lapacke_sggsvd3.c index 5a2703a5f..7b209e497 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sggsvd3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sggsvd3.c @@ -50,12 +50,14 @@ lapack_int LAPACKE_sggsvd3( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -10; - } - if( LAPACKE_sge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -10; + } + if( LAPACKE_sge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -12; + } } #endif /* Query optimal working array(s) size if requested */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sggsvp.c b/lapack-netlib/LAPACKE/src/lapacke_sggsvp.c index 7911c39fc..ad656a063 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sggsvp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sggsvp.c @@ -49,18 +49,20 @@ lapack_int LAPACKE_sggsvp( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -10; - } - if( LAPACKE_s_nancheck( 1, &tola, 1 ) ) { - return -12; - } - if( LAPACKE_s_nancheck( 1, &tolb, 1 ) ) { - return -13; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -10; + } + if( LAPACKE_s_nancheck( 1, &tola, 1 ) ) { + return -12; + } + if( LAPACKE_s_nancheck( 1, &tolb, 1 ) ) { + return -13; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sggsvp3.c b/lapack-netlib/LAPACKE/src/lapacke_sggsvp3.c index 1cb1d337b..16417fbf7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sggsvp3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sggsvp3.c @@ -51,18 +51,20 @@ lapack_int LAPACKE_sggsvp3( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -10; - } - if( LAPACKE_s_nancheck( 1, &tola, 1 ) ) { - return -12; - } - if( LAPACKE_s_nancheck( 1, &tolb, 1 ) ) { - return -13; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -10; + } + if( LAPACKE_s_nancheck( 1, &tola, 1 ) ) { + return -12; + } + if( LAPACKE_s_nancheck( 1, &tolb, 1 ) ) { + return -13; + } } #endif /* Query optimal size for working array */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgtcon.c b/lapack-netlib/LAPACKE/src/lapacke_sgtcon.c index f3818c57c..013f8a66c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgtcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgtcon.c @@ -41,21 +41,23 @@ lapack_int LAPACKE_sgtcon( char norm, lapack_int n, const float* dl, lapack_int* iwork = NULL; float* work = NULL; #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -8; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n-1, dl, 1 ) ) { - return -3; - } - if( LAPACKE_s_nancheck( n-1, du, 1 ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n-2, du2, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -8; + } + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n-1, dl, 1 ) ) { + return -3; + } + if( LAPACKE_s_nancheck( n-1, du, 1 ) ) { + return -5; + } + if( LAPACKE_s_nancheck( n-2, du2, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgtrfs.c b/lapack-netlib/LAPACKE/src/lapacke_sgtrfs.c index 4741542a1..9d27c8f07 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgtrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgtrfs.c @@ -49,33 +49,35 @@ lapack_int LAPACKE_sgtrfs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -6; - } - if( LAPACKE_s_nancheck( n, df, 1 ) ) { - return -9; - } - if( LAPACKE_s_nancheck( n-1, dl, 1 ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n-1, dlf, 1 ) ) { - return -8; - } - if( LAPACKE_s_nancheck( n-1, du, 1 ) ) { - return -7; - } - if( LAPACKE_s_nancheck( n-2, du2, 1 ) ) { - return -11; - } - if( LAPACKE_s_nancheck( n-1, duf, 1 ) ) { - return -10; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -15; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -6; + } + if( LAPACKE_s_nancheck( n, df, 1 ) ) { + return -9; + } + if( LAPACKE_s_nancheck( n-1, dl, 1 ) ) { + return -5; + } + if( LAPACKE_s_nancheck( n-1, dlf, 1 ) ) { + return -8; + } + if( LAPACKE_s_nancheck( n-1, du, 1 ) ) { + return -7; + } + if( LAPACKE_s_nancheck( n-2, du2, 1 ) ) { + return -11; + } + if( LAPACKE_s_nancheck( n-1, duf, 1 ) ) { + return -10; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -15; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgtsv.c b/lapack-netlib/LAPACKE/src/lapacke_sgtsv.c index 1b80b4621..af692c52f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgtsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgtsv.c @@ -42,18 +42,20 @@ lapack_int LAPACKE_sgtsv( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n-1, dl, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n-1, du, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_s_nancheck( n-1, dl, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n-1, du, 1 ) ) { + return -6; + } } #endif return LAPACKE_sgtsv_work( matrix_layout, n, nrhs, dl, d, du, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgtsvx.c b/lapack-netlib/LAPACKE/src/lapacke_sgtsvx.c index f6fb15147..76239c211 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgtsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgtsvx.c @@ -49,37 +49,39 @@ lapack_int LAPACKE_sgtsvx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -14; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -7; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_s_nancheck( n, df, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -14; } - } - if( LAPACKE_s_nancheck( n-1, dl, 1 ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_s_nancheck( n-1, dlf, 1 ) ) { - return -9; + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -7; } - } - if( LAPACKE_s_nancheck( n-1, du, 1 ) ) { - return -8; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_s_nancheck( n-2, du2, 1 ) ) { - return -12; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_s_nancheck( n, df, 1 ) ) { + return -10; + } } - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_s_nancheck( n-1, duf, 1 ) ) { - return -11; + if( LAPACKE_s_nancheck( n-1, dl, 1 ) ) { + return -6; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_s_nancheck( n-1, dlf, 1 ) ) { + return -9; + } + } + if( LAPACKE_s_nancheck( n-1, du, 1 ) ) { + return -8; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_s_nancheck( n-2, du2, 1 ) ) { + return -12; + } + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_s_nancheck( n-1, duf, 1 ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgttrf.c b/lapack-netlib/LAPACKE/src/lapacke_sgttrf.c index 3d418a5d6..5814d9dc2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgttrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgttrf.c @@ -37,15 +37,17 @@ lapack_int LAPACKE_sgttrf( lapack_int n, float* dl, float* d, float* du, float* du2, lapack_int* ipiv ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -3; - } - if( LAPACKE_s_nancheck( n-1, dl, 1 ) ) { - return -2; - } - if( LAPACKE_s_nancheck( n-1, du, 1 ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -3; + } + if( LAPACKE_s_nancheck( n-1, dl, 1 ) ) { + return -2; + } + if( LAPACKE_s_nancheck( n-1, du, 1 ) ) { + return -4; + } } #endif return LAPACKE_sgttrf_work( n, dl, d, du, du2, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgttrs.c b/lapack-netlib/LAPACKE/src/lapacke_sgttrs.c index 3ae674b72..de60a20ee 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgttrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgttrs.c @@ -43,21 +43,23 @@ lapack_int LAPACKE_sgttrs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -6; - } - if( LAPACKE_s_nancheck( n-1, dl, 1 ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n-1, du, 1 ) ) { - return -7; - } - if( LAPACKE_s_nancheck( n-2, du2, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -6; + } + if( LAPACKE_s_nancheck( n-1, dl, 1 ) ) { + return -5; + } + if( LAPACKE_s_nancheck( n-1, du, 1 ) ) { + return -7; + } + if( LAPACKE_s_nancheck( n-2, du2, 1 ) ) { + return -8; + } } #endif return LAPACKE_sgttrs_work( matrix_layout, trans, n, nrhs, dl, d, du, du2, diff --git a/lapack-netlib/LAPACKE/src/lapacke_shgeqz.c b/lapack-netlib/LAPACKE/src/lapacke_shgeqz.c index 911f944dd..164af10be 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_shgeqz.c +++ b/lapack-netlib/LAPACKE/src/lapacke_shgeqz.c @@ -48,21 +48,23 @@ lapack_int LAPACKE_shgeqz( int matrix_layout, char job, char compq, char compz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, h, ldh ) ) { - return -8; - } - if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -15; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, h, ldh ) ) { + return -8; } - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -10; - } - if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -17; + if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -15; + } + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -10; + } + if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -17; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_shsein.c b/lapack-netlib/LAPACKE/src/lapacke_shsein.c index c1107f8aa..f5ea56d47 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_shsein.c +++ b/lapack-netlib/LAPACKE/src/lapacke_shsein.c @@ -47,25 +47,27 @@ lapack_int LAPACKE_shsein( int matrix_layout, char job, char eigsrc, char initv, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, h, ldh ) ) { - return -7; - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'l' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, h, ldh ) ) { + return -7; } - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'r' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -13; + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'l' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -11; + } + } + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'r' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -13; + } + } + if( LAPACKE_s_nancheck( n, wi, 1 ) ) { + return -10; + } + if( LAPACKE_s_nancheck( n, wr, 1 ) ) { + return -9; } - } - if( LAPACKE_s_nancheck( n, wi, 1 ) ) { - return -10; - } - if( LAPACKE_s_nancheck( n, wr, 1 ) ) { - return -9; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_shseqr.c b/lapack-netlib/LAPACKE/src/lapacke_shseqr.c index e0b5f69b7..a6ab57881 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_shseqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_shseqr.c @@ -47,13 +47,15 @@ lapack_int LAPACKE_shseqr( int matrix_layout, char job, char compz, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, h, ldh ) ) { - return -7; - } - if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, h, ldh ) ) { + return -7; + } + if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_slacn2.c b/lapack-netlib/LAPACKE/src/lapacke_slacn2.c index 15d06d551..4d1df9e4c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slacn2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slacn2.c @@ -37,12 +37,14 @@ lapack_int LAPACKE_slacn2( lapack_int n, float* v, float* x, lapack_int* isgn, float* est, lapack_int* kase, lapack_int* isave ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, est, 1 ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n, x, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, est, 1 ) ) { + return -5; + } + if( LAPACKE_s_nancheck( n, x, 1 ) ) { + return -3; + } } #endif return LAPACKE_slacn2_work( n, v, x, isgn, est, kase, isave ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_slacpy.c b/lapack-netlib/LAPACKE/src/lapacke_slacpy.c index b7c0796a7..83ba93be5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slacpy.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slacpy.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_slacpy( int matrix_layout, char uplo, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif return LAPACKE_slacpy_work( matrix_layout, uplo, m, n, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_slag2d.c b/lapack-netlib/LAPACKE/src/lapacke_slag2d.c index 7f25d3b2b..76485c106 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slag2d.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slag2d.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_slag2d( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, sa, ldsa ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, sa, ldsa ) ) { + return -4; + } } #endif return LAPACKE_slag2d_work( matrix_layout, m, n, sa, ldsa, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_slagge.c b/lapack-netlib/LAPACKE/src/lapacke_slagge.c index 30879527d..050f6cbe9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slagge.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slagge.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_slagge( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( MIN(m,n), d, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( MIN(m,n), d, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_slagsy.c b/lapack-netlib/LAPACKE/src/lapacke_slagsy.c index 6ff1160ac..d21051038 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slagsy.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slagsy.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_slagsy( int matrix_layout, lapack_int n, lapack_int k, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_slange.c b/lapack-netlib/LAPACKE/src/lapacke_slange.c index 8d1e172f6..f5f8272f3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slange.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slange.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function slange * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -37,16 +37,18 @@ float LAPACKE_slange( int matrix_layout, char norm, lapack_int m, lapack_int n, const float* a, lapack_int lda ) { lapack_int info = 0; - float res = 0.; + float res = 0.; float* work = NULL; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_slange", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_slange_work.c b/lapack-netlib/LAPACKE/src/lapacke_slange_work.c index dc03fdfb3..6a5dfacfe 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slange_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slange_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function slange * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -38,37 +38,42 @@ float LAPACKE_slange_work( int matrix_layout, char norm, lapack_int m, float* work ) { lapack_int info = 0; - float res = 0.; + float res = 0.; + char norm_lapack; if( matrix_layout == LAPACK_COL_MAJOR ) { - /* Call LAPACK function and adjust info */ + /* Call LAPACK function */ res = LAPACK_slange( &norm, &m, &n, a, &lda, work ); - if( info < 0 ) { - info = info - 1; - } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int lda_t = MAX(1,m); - float* a_t = NULL; + float* work_lapack = NULL; /* Check leading dimension(s) */ if( lda < n ) { info = -6; LAPACKE_xerbla( "LAPACKE_slange_work", info ); return info; } - /* Allocate memory for temporary array(s) */ - a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) ); - if( a_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; + if( LAPACKE_lsame( norm, '1' ) || LAPACKE_lsame( norm, 'o' ) ) { + norm_lapack = 'i'; + } else if( LAPACKE_lsame( norm, 'i' ) ) { + norm_lapack = '1'; + } else { + norm_lapack = norm; + } + /* Allocate memory for work array(s) */ + if( LAPACKE_lsame( norm_lapack, 'i' ) ) { + work_lapack = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,n) ); + if( work_lapack == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } } - /* Transpose input matrices */ - LAPACKE_sge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); - /* Call LAPACK function and adjust info */ - res = LAPACK_slange( &norm, &m, &n, a_t, &lda_t, work ); - info = 0; /* LAPACK call is ok! */ + /* Call LAPACK function */ + res = LAPACK_slange( &norm_lapack, &n, &m, a, &lda, work_lapack ); /* Release memory and exit */ - LAPACKE_free( a_t ); + if( work_lapack ) { + LAPACKE_free( work_lapack ); + } exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + if( info == LAPACK_WORK_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_slange_work", info ); } } else { diff --git a/lapack-netlib/LAPACKE/src/lapacke_slansy.c b/lapack-netlib/LAPACKE/src/lapacke_slansy.c index a2711fa82..bbe3688a4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slansy.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slansy.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function slansy * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -37,16 +37,18 @@ float LAPACKE_slansy( int matrix_layout, char norm, char uplo, lapack_int n, const float* a, lapack_int lda ) { lapack_int info = 0; - float res = 0.; + float res = 0.; float* work = NULL; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_slansy", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_slansy_work.c b/lapack-netlib/LAPACKE/src/lapacke_slansy_work.c index 9e14a5f97..3c0909ab9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slansy_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slansy_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function slansy * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -38,7 +38,7 @@ float LAPACKE_slansy_work( int matrix_layout, char norm, char uplo, float* work ) { lapack_int info = 0; - float res = 0.; + float res = 0.; if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ res = LAPACK_slansy( &norm, &uplo, &n, a, &lda, work ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_slantr.c b/lapack-netlib/LAPACKE/src/lapacke_slantr.c index e92dc62ff..9e67fcc40 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slantr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slantr.c @@ -45,9 +45,11 @@ float LAPACKE_slantr( int matrix_layout, char norm, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_str_nancheck( matrix_layout, uplo, diag, MIN(m,n), a, lda ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_str_nancheck( matrix_layout, uplo, diag, MIN(m,n), a, lda ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_slapmr.c b/lapack-netlib/LAPACKE/src/lapacke_slapmr.c index e020cc49a..139703cb6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slapmr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slapmr.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_slapmr( int matrix_layout, lapack_logical forwrd, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, x, ldx ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -5; + } } #endif return LAPACKE_slapmr_work( matrix_layout, forwrd, m, n, x, ldx, k ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_slapmt.c b/lapack-netlib/LAPACKE/src/lapacke_slapmt.c index 7d9a7abe6..55d2f4730 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slapmt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slapmt.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_slapmt( int matrix_layout, lapack_logical forwrd, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, x, ldx ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -5; + } } #endif return LAPACKE_slapmt_work( matrix_layout, forwrd, m, n, x, ldx, k ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_slapy2.c b/lapack-netlib/LAPACKE/src/lapacke_slapy2.c index 841086270..54e3d3ed8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slapy2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slapy2.c @@ -36,12 +36,14 @@ float LAPACKE_slapy2( float x, float y ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &x, 1 ) ) { - return -1; - } - if( LAPACKE_s_nancheck( 1, &y, 1 ) ) { - return -2; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &x, 1 ) ) { + return -1; + } + if( LAPACKE_s_nancheck( 1, &y, 1 ) ) { + return -2; + } } #endif return LAPACKE_slapy2_work( x, y ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_slapy3.c b/lapack-netlib/LAPACKE/src/lapacke_slapy3.c index c6eea251f..4fb4d097d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slapy3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slapy3.c @@ -36,15 +36,17 @@ float LAPACKE_slapy3( float x, float y, float z ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &x, 1 ) ) { - return -1; - } - if( LAPACKE_s_nancheck( 1, &y, 1 ) ) { - return -2; - } - if( LAPACKE_s_nancheck( 1, &z, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &x, 1 ) ) { + return -1; + } + if( LAPACKE_s_nancheck( 1, &y, 1 ) ) { + return -2; + } + if( LAPACKE_s_nancheck( 1, &z, 1 ) ) { + return -3; + } } #endif return LAPACKE_slapy3_work( x, y, z ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_slarfb.c b/lapack-netlib/LAPACKE/src/lapacke_slarfb.c index c64fc1f0a..72fa75ef1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slarfb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slarfb.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function slarfb * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -40,7 +40,7 @@ lapack_int LAPACKE_slarfb( int matrix_layout, char side, char trans, char direct lapack_int ldc ) { lapack_int info = 0; - lapack_int ldwork = ( side=='l')?n:(( side=='r')?m:1); + lapack_int ldwork; float* work = NULL; lapack_int ncols_v, nrows_v; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { @@ -48,57 +48,66 @@ lapack_int LAPACKE_slarfb( int matrix_layout, char side, char trans, char direct return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -13; - } - if( LAPACKE_sge_nancheck( matrix_layout, k, k, t, ldt ) ) { - return -11; - } - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_str_nancheck( matrix_layout, 'l', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v-k, ncols_v, &v[k*ldv], - ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_slarfb", -8 ); - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : + ( ( LAPACKE_lsame( storev, 'r' ) && + LAPACKE_lsame( side, 'l' ) ) ? m : + ( ( LAPACKE_lsame( storev, 'r' ) && + LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + nrows_v = ( LAPACKE_lsame( storev, 'c' ) && + LAPACKE_lsame( side, 'l' ) ) ? m : + ( ( LAPACKE_lsame( storev, 'c' ) && + LAPACKE_lsame( side, 'r' ) ) ? n : + ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -13; } - if( LAPACKE_str_nancheck( matrix_layout, 'u', 'u', k, - &v[(nrows_v-k)*ldv], ldv ) ) - return -9; - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v-k, ncols_v, v, ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_str_nancheck( matrix_layout, 'u', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, ncols_v-k, &v[k], - ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_slarfb", -8 ); - return -8; + if( LAPACKE_sge_nancheck( matrix_layout, k, k, t, ldt ) ) { + return -11; + } + if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { + if( LAPACKE_str_nancheck( matrix_layout, 'l', 'u', k, v, ldv ) ) + return -9; + if( LAPACKE_sge_nancheck( matrix_layout, nrows_v-k, ncols_v, &v[k*ldv], + ldv ) ) + return -9; + } else if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'b' ) ) { + if( k > nrows_v ) { + LAPACKE_xerbla( "LAPACKE_slarfb", -8 ); + return -8; + } + if( LAPACKE_str_nancheck( matrix_layout, 'u', 'u', k, + &v[(nrows_v-k)*ldv], ldv ) ) + return -9; + if( LAPACKE_sge_nancheck( matrix_layout, nrows_v-k, ncols_v, v, ldv ) ) + return -9; + } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { + if( LAPACKE_str_nancheck( matrix_layout, 'u', 'u', k, v, ldv ) ) + return -9; + if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, ncols_v-k, &v[k], + ldv ) ) + return -9; + } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { + if( k > ncols_v ) { + LAPACKE_xerbla( "LAPACKE_slarfb", -8 ); + return -8; + } + if( LAPACKE_str_nancheck( matrix_layout, 'l', 'u', k, &v[ncols_v-k], + ldv ) ) + return -9; + if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, ncols_v-k, v, ldv ) ) + return -9; } - if( LAPACKE_str_nancheck( matrix_layout, 'l', 'u', k, &v[ncols_v-k], - ldv ) ) - return -9; - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, ncols_v-k, v, ldv ) ) - return -9; } #endif + if( LAPACKE_lsame( side, 'l' ) ) { + ldwork = n; + } else if( LAPACKE_lsame( side, 'r' ) ) { + ldwork = m; + } else { + ldwork = 1; + } /* Allocate memory for working array(s) */ work = (float*)LAPACKE_malloc( sizeof(float) * ldwork * MAX(1,k) ); if( work == NULL ) { diff --git a/lapack-netlib/LAPACKE/src/lapacke_slarfg.c b/lapack-netlib/LAPACKE/src/lapacke_slarfg.c index f5a4eb1db..295277387 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slarfg.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slarfg.c @@ -37,12 +37,14 @@ lapack_int LAPACKE_slarfg( lapack_int n, float* alpha, float* x, lapack_int incx, float* tau ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, alpha, 1 ) ) { - return -2; - } - if( LAPACKE_s_nancheck( 1+(n-2)*ABS(incx), x, incx ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, alpha, 1 ) ) { + return -2; + } + if( LAPACKE_s_nancheck( 1+(n-2)*ABS(incx), x, incx ) ) { + return -3; + } } #endif return LAPACKE_slarfg_work( n, alpha, x, incx, tau ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_slarft.c b/lapack-netlib/LAPACKE/src/lapacke_slarft.c index 277752650..ae8b3a966 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slarft.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slarft.c @@ -44,16 +44,18 @@ lapack_int LAPACKE_slarft( int matrix_layout, char direct, char storev, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( LAPACKE_lsame( storev, 'r' ) ? n : 1); - nrows_v = LAPACKE_lsame( storev, 'c' ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1); - if( LAPACKE_s_nancheck( k, tau, 1 ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : + ( LAPACKE_lsame( storev, 'r' ) ? n : 1); + nrows_v = LAPACKE_lsame( storev, 'c' ) ? n : + ( LAPACKE_lsame( storev, 'r' ) ? k : 1); + if( LAPACKE_s_nancheck( k, tau, 1 ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { + return -6; + } } #endif return LAPACKE_slarft_work( matrix_layout, direct, storev, n, k, v, ldv, tau, diff --git a/lapack-netlib/LAPACKE/src/lapacke_slarfx.c b/lapack-netlib/LAPACKE/src/lapacke_slarfx.c index f6df46d0e..426137815 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slarfx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slarfx.c @@ -42,15 +42,17 @@ lapack_int LAPACKE_slarfx( int matrix_layout, char side, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &tau, 1 ) ) { - return -6; - } - if( LAPACKE_s_nancheck( m, v, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -7; + } + if( LAPACKE_s_nancheck( 1, &tau, 1 ) ) { + return -6; + } + if( LAPACKE_s_nancheck( m, v, 1 ) ) { + return -5; + } } #endif return LAPACKE_slarfx_work( matrix_layout, side, m, n, v, tau, c, ldc, diff --git a/lapack-netlib/LAPACKE/src/lapacke_slartgp.c b/lapack-netlib/LAPACKE/src/lapacke_slartgp.c index 2b48b8599..88cda1372 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slartgp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slartgp.c @@ -36,12 +36,14 @@ lapack_int LAPACKE_slartgp( float f, float g, float* cs, float* sn, float* r ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &f, 1 ) ) { - return -1; - } - if( LAPACKE_s_nancheck( 1, &g, 1 ) ) { - return -2; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &f, 1 ) ) { + return -1; + } + if( LAPACKE_s_nancheck( 1, &g, 1 ) ) { + return -2; + } } #endif return LAPACKE_slartgp_work( f, g, cs, sn, r ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_slartgs.c b/lapack-netlib/LAPACKE/src/lapacke_slartgs.c index 3da6df1f5..566f474b2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slartgs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slartgs.c @@ -37,15 +37,17 @@ lapack_int LAPACKE_slartgs( float x, float y, float sigma, float* cs, float* sn ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &sigma, 1 ) ) { - return -3; - } - if( LAPACKE_s_nancheck( 1, &x, 1 ) ) { - return -1; - } - if( LAPACKE_s_nancheck( 1, &y, 1 ) ) { - return -2; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &sigma, 1 ) ) { + return -3; + } + if( LAPACKE_s_nancheck( 1, &x, 1 ) ) { + return -1; + } + if( LAPACKE_s_nancheck( 1, &y, 1 ) ) { + return -2; + } } #endif return LAPACKE_slartgs_work( x, y, sigma, cs, sn ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_slascl.c b/lapack-netlib/LAPACKE/src/lapacke_slascl.c index b5368e4b8..25bd9624e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slascl.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slascl.c @@ -43,68 +43,70 @@ lapack_int LAPACKE_slascl( int matrix_layout, char type, lapack_int kl, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - switch (type) { - case 'G': - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + switch (type) { + case 'G': + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -9; + } + break; + case 'L': + // TYPE = 'L' - lower triangle of general matrix + if( matrix_layout == LAPACK_COL_MAJOR && + LAPACKE_sgb_nancheck( matrix_layout, m, n, m-1, 0, a, lda+1 ) ) { + return -9; + } + if( matrix_layout == LAPACK_ROW_MAJOR && + LAPACKE_sgb_nancheck( LAPACK_COL_MAJOR, n, m, 0, m-1, a-m+1, lda+1 ) ) { + return -9; + } + break; + case 'U': + // TYPE = 'U' - upper triangle of general matrix + if( matrix_layout == LAPACK_COL_MAJOR && + LAPACKE_sgb_nancheck( matrix_layout, m, n, 0, n-1, a-n+1, lda+1 ) ) { + return -9; + } + if( matrix_layout == LAPACK_ROW_MAJOR && + LAPACKE_sgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 0, a, lda+1 ) ) { + return -9; + } + break; + case 'H': + // TYPE = 'H' - part of upper Hessenberg matrix in general matrix + if( matrix_layout == LAPACK_COL_MAJOR && + LAPACKE_sgb_nancheck( matrix_layout, m, n, 1, n-1, a-n+1, lda+1 ) ) { + return -9; + } + if( matrix_layout == LAPACK_ROW_MAJOR && + LAPACKE_sgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 1, a-1, lda+1 ) ) { + return -9; + } + case 'B': + // TYPE = 'B' - lower part of symmetric band matrix (assume m==n) + if( LAPACKE_ssb_nancheck( matrix_layout, 'L', n, kl, a, lda ) ) { + return -9; + } + break; + case 'Q': + // TYPE = 'Q' - upper part of symmetric band matrix (assume m==n) + if( LAPACKE_ssb_nancheck( matrix_layout, 'U', n, ku, a, lda ) ) { + return -9; + } + break; + case 'Z': + // TYPE = 'Z' - band matrix laid out for ?GBTRF + if( matrix_layout == LAPACK_COL_MAJOR && + LAPACKE_sgb_nancheck( matrix_layout, m, n, kl, ku, a+kl, lda ) ) { + return -9; + } + if( matrix_layout == LAPACK_ROW_MAJOR && + LAPACKE_sgb_nancheck( matrix_layout, m, n, kl, ku, a+lda*kl, lda ) ) { + return -9; + } + break; } - break; - case 'L': - // TYPE = 'L' - lower triangle of general matrix - if( matrix_layout == LAPACK_COL_MAJOR && - LAPACKE_sgb_nancheck( matrix_layout, m, n, m-1, 0, a, lda+1 ) ) { - return -9; - } - if( matrix_layout == LAPACK_ROW_MAJOR && - LAPACKE_sgb_nancheck( LAPACK_COL_MAJOR, n, m, 0, m-1, a-m+1, lda+1 ) ) { - return -9; - } - break; - case 'U': - // TYPE = 'U' - upper triangle of general matrix - if( matrix_layout == LAPACK_COL_MAJOR && - LAPACKE_sgb_nancheck( matrix_layout, m, n, 0, n-1, a-n+1, lda+1 ) ) { - return -9; - } - if( matrix_layout == LAPACK_ROW_MAJOR && - LAPACKE_sgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 0, a, lda+1 ) ) { - return -9; - } - break; - case 'H': - // TYPE = 'H' - part of upper Hessenberg matrix in general matrix - if( matrix_layout == LAPACK_COL_MAJOR && - LAPACKE_sgb_nancheck( matrix_layout, m, n, 1, n-1, a-n+1, lda+1 ) ) { - return -9; - } - if( matrix_layout == LAPACK_ROW_MAJOR && - LAPACKE_sgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 1, a-1, lda+1 ) ) { - return -9; - } - case 'B': - // TYPE = 'B' - lower part of symmetric band matrix (assume m==n) - if( LAPACKE_ssb_nancheck( matrix_layout, 'L', n, kl, a, lda ) ) { - return -9; - } - break; - case 'Q': - // TYPE = 'Q' - upper part of symmetric band matrix (assume m==n) - if( LAPACKE_ssb_nancheck( matrix_layout, 'U', n, ku, a, lda ) ) { - return -9; - } - break; - case 'Z': - // TYPE = 'Z' - band matrix laid out for ?GBTRF - if( matrix_layout == LAPACK_COL_MAJOR && - LAPACKE_sgb_nancheck( matrix_layout, m, n, kl, ku, a+kl, lda ) ) { - return -9; - } - if( matrix_layout == LAPACK_ROW_MAJOR && - LAPACKE_sgb_nancheck( matrix_layout, m, n, kl, ku, a+lda*kl, lda ) ) { - return -9; - } - break; } #endif return LAPACKE_slascl_work( matrix_layout, type, kl, ku, cfrom, cto, m, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_slaset.c b/lapack-netlib/LAPACKE/src/lapacke_slaset.c index 45044ad5f..793cb5171 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slaset.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slaset.c @@ -49,12 +49,14 @@ lapack_int LAPACKE_slaset( int matrix_layout, char uplo, lapack_int m, *****************************************************************************/ #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &alpha, 1 ) ) { - return -5; - } - if( LAPACKE_s_nancheck( 1, &beta, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &alpha, 1 ) ) { + return -5; + } + if( LAPACKE_s_nancheck( 1, &beta, 1 ) ) { + return -6; + } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_slasrt.c b/lapack-netlib/LAPACKE/src/lapacke_slasrt.c index d426fb2ad..5a950d361 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slasrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slasrt.c @@ -36,9 +36,11 @@ lapack_int LAPACKE_slasrt( char id, lapack_int n, float* d ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -3; + } } #endif return LAPACKE_slasrt_work( id, n, d ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_slassq.c b/lapack-netlib/LAPACKE/src/lapacke_slassq.c new file mode 100644 index 000000000..668289e18 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_slassq.c @@ -0,0 +1,53 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function slassq +* Author: Julien Langou +* Generated February, 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_slassq( lapack_int n, float* x, lapack_int incx, float* scale, float* sumsq ) +{ +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input vector `x` and in/out scalars `scale` and `sumsq` for NaNs */ + if( LAPACKE_s_nancheck( 1+(n-2)*ABS(incx), x, incx ) ) { + return -2; + } + if( LAPACKE_s_nancheck( 1, scale, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( 1, sumsq, 1 ) ) { + return -5; + } + } +#endif + return LAPACKE_slassq_work( n, x, incx, scale, sumsq ); +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_slassq_work.c b/lapack-netlib/LAPACKE/src/lapacke_slassq_work.c new file mode 100644 index 000000000..00688c9e4 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_slassq_work.c @@ -0,0 +1,41 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function slassq +* Author: Julien Langou +* Generated February, 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_slassq_work( lapack_int n, float* x, lapack_int incx, float* scale, float* sumsq ) +{ + lapack_int info = 0; + LAPACK_slassq( &n, x, &incx, scale, sumsq ); + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_slaswp.c b/lapack-netlib/LAPACKE/src/lapacke_slaswp.c index 4fd4a5e14..a46e7ba9b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slaswp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slaswp.c @@ -42,19 +42,21 @@ lapack_int LAPACKE_slaswp( int matrix_layout, lapack_int n, float* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ -/***************************************************************************** -* Disable the check as is below, the check below was checking for NaN -* from lda to n since there is no (obvious) way to knowing m. This is not -* a good idea. We could get a lower bound of m by scanning from ipiv. Or -* we could pass on the NaN check to LAPACKE_dlaswp_work. For now disable -* the buggy Nan check. -* See forum: http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=4827 -*****************************************************************************/ -/* if( LAPACKE_sge_nancheck( matrix_layout, lda, n, a, lda ) ) { -* return -3; -* } -*/ + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + /***************************************************************************** + * Disable the check as is below, the check below was checking for NaN + * from lda to n since there is no (obvious) way to knowing m. This is not + * a good idea. We could get a lower bound of m by scanning from ipiv. Or + * we could pass on the NaN check to LAPACKE_dlaswp_work. For now disable + * the buggy Nan check. + * See forum: http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=4827 + *****************************************************************************/ + /* if( LAPACKE_sge_nancheck( matrix_layout, lda, n, a, lda ) ) { + * return -3; + * } + */ + } #endif return LAPACKE_slaswp_work( matrix_layout, n, a, lda, k1, k2, ipiv, incx ); } diff --git a/lapack-netlib/LAPACKE/src/lapacke_slaswp_work.c b/lapack-netlib/LAPACKE/src/lapacke_slaswp_work.c index 1faadbb96..fa920df5e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slaswp_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slaswp_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function slaswp * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" diff --git a/lapack-netlib/LAPACKE/src/lapacke_slatms.c b/lapack-netlib/LAPACKE/src/lapacke_slatms.c index d5ac5d338..ac01ebc1d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slatms.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slatms.c @@ -46,18 +46,20 @@ lapack_int LAPACKE_slatms( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -14; - } - if( LAPACKE_s_nancheck( 1, &cond, 1 ) ) { - return -9; - } - if( LAPACKE_s_nancheck( MIN(n,m), d, 1 ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &dmax, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -14; + } + if( LAPACKE_s_nancheck( 1, &cond, 1 ) ) { + return -9; + } + if( LAPACKE_s_nancheck( MIN(n,m), d, 1 ) ) { + return -7; + } + if( LAPACKE_s_nancheck( 1, &dmax, 1 ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_slauum.c b/lapack-netlib/LAPACKE/src/lapacke_slauum.c index f50d14b69..d31a9b974 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slauum.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slauum.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_slauum( int matrix_layout, char uplo, lapack_int n, float* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_slauum_work( matrix_layout, uplo, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sopgtr.c b/lapack-netlib/LAPACKE/src/lapacke_sopgtr.c index 68103e4c9..cf3ae2093 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sopgtr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sopgtr.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_sopgtr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssp_nancheck( n, ap ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n-1, tau, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssp_nancheck( n, ap ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n-1, tau, 1 ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sopmtr.c b/lapack-netlib/LAPACKE/src/lapacke_sopmtr.c index 6ed4665b7..333789837 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sopmtr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sopmtr.c @@ -47,16 +47,18 @@ lapack_int LAPACKE_sopmtr( int matrix_layout, char side, char uplo, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_ssp_nancheck( r, ap ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -9; - } - if( LAPACKE_s_nancheck( m-1, tau, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_ssp_nancheck( r, ap ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -9; + } + if( LAPACKE_s_nancheck( m-1, tau, 1 ) ) { + return -8; + } } #endif /* Additional scalars initializations for work arrays */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sorbdb.c b/lapack-netlib/LAPACKE/src/lapacke_sorbdb.c index 1a0ede9ae..d1fdab004 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sorbdb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sorbdb.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function sorbdb * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -45,28 +45,31 @@ lapack_int LAPACKE_sorbdb( int matrix_layout, char trans, char signs, lapack_int lwork = -1; float* work = NULL; float work_query; - lapack_int nrows_x11, nrows_x12, nrows_x21, nrows_x22; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_sorbdb", -1 ); return -1; } -#ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x12, m-q, x12, ldx12 ) ) { - return -9; + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; } - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { - return -11; - } - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x22, m-q, x22, ldx22 ) ) { - return -13; +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( lapack_layout, p, q, x11, ldx11 ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( lapack_layout, p, m-q, x12, ldx12 ) ) { + return -9; + } + if( LAPACKE_sge_nancheck( lapack_layout, m-p, q, x21, ldx21 ) ) { + return -11; + } + if( LAPACKE_sge_nancheck( lapack_layout, m-p, m-q, x22, ldx22 ) ) { + return -13; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sorbdb_work.c b/lapack-netlib/LAPACKE/src/lapacke_sorbdb_work.c index 544082017..603905706 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sorbdb_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sorbdb_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function sorbdb * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -43,113 +43,35 @@ lapack_int LAPACKE_sorbdb_work( int matrix_layout, char trans, char signs, lapack_int lwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; + } /* Call LAPACK function and adjust info */ - LAPACK_sorbdb( &trans, &signs, &m, &p, &q, x11, &ldx11, x12, &ldx12, + LAPACK_sorbdb( <rans, &signs, &m, &p, &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, &ldx22, theta, phi, taup1, taup2, tauq1, tauq2, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - lapack_int nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - lapack_int nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - lapack_int nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - lapack_int ldx11_t = MAX(1,nrows_x11); - lapack_int ldx12_t = MAX(1,nrows_x12); - lapack_int ldx21_t = MAX(1,nrows_x21); - lapack_int ldx22_t = MAX(1,nrows_x22); - float* x11_t = NULL; - float* x12_t = NULL; - float* x21_t = NULL; - float* x22_t = NULL; - /* Check leading dimension(s) */ - if( ldx11 < q ) { - info = -8; - LAPACKE_xerbla( "LAPACKE_sorbdb_work", info ); - return info; - } - if( ldx12 < m-q ) { - info = -10; - LAPACKE_xerbla( "LAPACKE_sorbdb_work", info ); - return info; - } - if( ldx21 < q ) { - info = -12; - LAPACKE_xerbla( "LAPACKE_sorbdb_work", info ); - return info; - } - if( ldx22 < m-q ) { - info = -14; - LAPACKE_xerbla( "LAPACKE_sorbdb_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { - LAPACK_sorbdb( &trans, &signs, &m, &p, &q, x11, &ldx11_t, x12, - &ldx12_t, x21, &ldx21_t, x22, &ldx22_t, theta, phi, - taup1, taup2, tauq1, tauq2, work, &lwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - x11_t = (float*)LAPACKE_malloc( sizeof(float) * ldx11_t * MAX(1,q) ); - if( x11_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - x12_t = (float*)LAPACKE_malloc( sizeof(float) * ldx12_t * MAX(1,m-q) ); - if( x12_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - x21_t = (float*)LAPACKE_malloc( sizeof(float) * ldx21_t * MAX(1,q) ); - if( x21_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; - } - x22_t = (float*)LAPACKE_malloc( sizeof(float) * ldx22_t * MAX(1,m-q) ); - if( x22_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - /* Transpose input matrices */ - LAPACKE_sge_trans( matrix_layout, nrows_x11, q, x11, ldx11, x11_t, - ldx11_t ); - LAPACKE_sge_trans( matrix_layout, nrows_x12, m-q, x12, ldx12, x12_t, - ldx12_t ); - LAPACKE_sge_trans( matrix_layout, nrows_x21, q, x21, ldx21, x21_t, - ldx21_t ); - LAPACKE_sge_trans( matrix_layout, nrows_x22, m-q, x22, ldx22, x22_t, - ldx22_t ); - /* Call LAPACK function and adjust info */ - LAPACK_sorbdb( &trans, &signs, &m, &p, &q, x11_t, &ldx11_t, x12_t, - &ldx12_t, x21_t, &ldx21_t, x22_t, &ldx22_t, theta, phi, - taup1, taup2, tauq1, tauq2, work, &lwork, &info ); - if( info < 0 ) { - info = info - 1; - } - /* Transpose output matrices */ - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_x11, q, x11_t, ldx11_t, x11, - ldx11 ); - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_x12, m-q, x12_t, ldx12_t, - x12, ldx12 ); - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_x21, q, x21_t, ldx21_t, x21, - ldx21 ); - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_x22, m-q, x22_t, ldx22_t, - x22, ldx22 ); - /* Release memory and exit */ - LAPACKE_free( x22_t ); -exit_level_3: - LAPACKE_free( x21_t ); -exit_level_2: - LAPACKE_free( x12_t ); -exit_level_1: - LAPACKE_free( x11_t ); -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_sorbdb_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_sorbdb_work", info ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sorcsd.c b/lapack-netlib/LAPACKE/src/lapacke_sorcsd.c index b9864aeff..06fa61ae1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sorcsd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sorcsd.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function sorcsd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -48,28 +48,31 @@ lapack_int LAPACKE_sorcsd( int matrix_layout, char jobu1, char jobu2, lapack_int* iwork = NULL; float* work = NULL; float work_query; - lapack_int nrows_x11, nrows_x12, nrows_x21, nrows_x22; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_sorcsd", -1 ); return -1; } -#ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { - return -11; - } - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x12, m-q, x12, ldx12 ) ) { - return -13; + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; } - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { - return -15; - } - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x22, m-q, x22, ldx22 ) ) { - return -17; +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( lapack_layout, p, q, x11, ldx11 ) ) { + return -11; + } + if( LAPACKE_sge_nancheck( lapack_layout, p, m-q, x12, ldx12 ) ) { + return -13; + } + if( LAPACKE_sge_nancheck( lapack_layout, m-p, q, x21, ldx21 ) ) { + return -15; + } + if( LAPACKE_sge_nancheck( lapack_layout, m-p, m-q, x22, ldx22 ) ) { + return -17; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sorcsd2by1.c b/lapack-netlib/LAPACKE/src/lapacke_sorcsd2by1.c index d0223859b..1b79ba109 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sorcsd2by1.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sorcsd2by1.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function sorcsd2by1 * Author: Intel Corporation -* Generated December 2016 +* Generated November 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -50,17 +50,18 @@ lapack_int LAPACKE_sorcsd2by1( int matrix_layout, char jobu1, char jobu2, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_x11 = p ; - nrows_x21 = m-p ; - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nrows_x11 = p; + nrows_x21 = m-p; + if( LAPACKE_sge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { + return -8; + } + + if( LAPACKE_sge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { + return -9; + } } - - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { - return -9; - } - #endif /* Allocate memory for working array(s) */ iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * MAX(1,m-MIN(MIN(p,m-p),MIN(q,m-q))) ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sorcsd_work.c b/lapack-netlib/LAPACKE/src/lapacke_sorcsd_work.c index feff8fab3..b83bb5378 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sorcsd_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sorcsd_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function sorcsd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -46,213 +46,36 @@ lapack_int LAPACKE_sorcsd_work( int matrix_layout, char jobu1, char jobu2, lapack_int* iwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { - /* Call LAPACK function and adjust info */ - LAPACK_sorcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, &p, - &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, &ldx22, - theta, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, &ldv2t, - work, &lwork, iwork, &info ); - if( info < 0 ) { - info = info - 1; - } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - lapack_int nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - lapack_int nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - lapack_int nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - lapack_int nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - lapack_int nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - lapack_int nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - lapack_int nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - lapack_int ldu1_t = MAX(1,nrows_u1); - lapack_int ldu2_t = MAX(1,nrows_u2); - lapack_int ldv1t_t = MAX(1,nrows_v1t); - lapack_int ldv2t_t = MAX(1,nrows_v2t); - lapack_int ldx11_t = MAX(1,nrows_x11); - lapack_int ldx12_t = MAX(1,nrows_x12); - lapack_int ldx21_t = MAX(1,nrows_x21); - lapack_int ldx22_t = MAX(1,nrows_x22); - float* x11_t = NULL; - float* x12_t = NULL; - float* x21_t = NULL; - float* x22_t = NULL; - float* u1_t = NULL; - float* u2_t = NULL; - float* v1t_t = NULL; - float* v2t_t = NULL; - /* Check leading dimension(s) */ - if( ldu1 < p ) { - info = -21; - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - return info; - } - if( ldu2 < m-p ) { - info = -23; - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - return info; - } - if( ldv1t < q ) { - info = -25; - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - return info; - } - if( ldv2t < m-q ) { - info = -27; - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - return info; - } - if( ldx11 < q ) { - info = -12; - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - return info; - } - if( ldx12 < m-q ) { - info = -14; - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - return info; - } - if( ldx21 < q ) { - info = -16; - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - return info; - } - if( ldx22 < m-q ) { - info = -18; - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { - LAPACK_sorcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, - &p, &q, x11, &ldx11_t, x12, &ldx12_t, x21, &ldx21_t, - x22, &ldx22_t, theta, u1, &ldu1_t, u2, &ldu2_t, v1t, - &ldv1t_t, v2t, &ldv2t_t, work, &lwork, iwork, - &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - x11_t = (float*)LAPACKE_malloc( sizeof(float) * ldx11_t * MAX(1,q) ); - if( x11_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - x12_t = (float*)LAPACKE_malloc( sizeof(float) * ldx12_t * MAX(1,m-q) ); - if( x12_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - x21_t = (float*)LAPACKE_malloc( sizeof(float) * ldx21_t * MAX(1,q) ); - if( x21_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; } - x22_t = (float*)LAPACKE_malloc( sizeof(float) * ldx22_t * MAX(1,m-q) ); - if( x22_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - if( LAPACKE_lsame( jobu1, 'y' ) ) { - u1_t = (float*)LAPACKE_malloc( sizeof(float) * ldu1_t * MAX(1,p) ); - if( u1_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_4; - } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - u2_t = (float*) - LAPACKE_malloc( sizeof(float) * ldu2_t * MAX(1,m-p) ); - if( u2_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_5; - } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - v1t_t = (float*) - LAPACKE_malloc( sizeof(float) * ldv1t_t * MAX(1,q) ); - if( v1t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_6; - } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - v2t_t = (float*) - LAPACKE_malloc( sizeof(float) * ldv2t_t * MAX(1,m-q) ); - if( v2t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_7; - } - } - /* Transpose input matrices */ - LAPACKE_sge_trans( matrix_layout, nrows_x11, q, x11, ldx11, x11_t, - ldx11_t ); - LAPACKE_sge_trans( matrix_layout, nrows_x12, m-q, x12, ldx12, x12_t, - ldx12_t ); - LAPACKE_sge_trans( matrix_layout, nrows_x21, q, x21, ldx21, x21_t, - ldx21_t ); - LAPACKE_sge_trans( matrix_layout, nrows_x22, m-q, x22, ldx22, x22_t, - ldx22_t ); /* Call LAPACK function and adjust info */ - LAPACK_sorcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, &p, - &q, x11_t, &ldx11_t, x12_t, &ldx12_t, x21_t, &ldx21_t, - x22_t, &ldx22_t, theta, u1_t, &ldu1_t, u2_t, &ldu2_t, - v1t_t, &ldv1t_t, v2t_t, &ldv2t_t, work, &lwork, iwork, - &info ); + LAPACK_sorcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, <rans, &signs, &m, + &p, &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, + &ldx22, theta, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, + &ldv2t, work, &lwork, iwork, &info ); if( info < 0 ) { info = info - 1; } - /* Transpose output matrices */ - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_x11, q, x11_t, ldx11_t, x11, - ldx11 ); - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_x12, m-q, x12_t, ldx12_t, - x12, ldx12 ); - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_x21, q, x21_t, ldx21_t, x21, - ldx21 ); - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_x22, m-q, x22_t, ldx22_t, - x22, ldx22 ); - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_u1, p, u1_t, ldu1_t, u1, - ldu1 ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_u2, m-p, u2_t, ldu2_t, - u2, ldu2 ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_v1t, q, v1t_t, ldv1t_t, - v1t, ldv1t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_v2t, m-q, v2t_t, ldv2t_t, - v2t, ldv2t ); - } - /* Release memory and exit */ - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_free( v2t_t ); - } -exit_level_7: - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_free( v1t_t ); - } -exit_level_6: - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_free( u2_t ); - } -exit_level_5: - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_free( u1_t ); - } -exit_level_4: - LAPACKE_free( x22_t ); -exit_level_3: - LAPACKE_free( x21_t ); -exit_level_2: - LAPACKE_free( x12_t ); -exit_level_1: - LAPACKE_free( x11_t ); -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sorgbr.c b/lapack-netlib/LAPACKE/src/lapacke_sorgbr.c index 1726d01c4..eb90d5e9e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sorgbr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sorgbr.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_sorgbr( int matrix_layout, char vect, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; - } - if( LAPACKE_s_nancheck( MIN(m,k), tau, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } + if( LAPACKE_s_nancheck( MIN(m,k), tau, 1 ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sorghr.c b/lapack-netlib/LAPACKE/src/lapacke_sorghr.c index 74f6fb4b0..b34a14c55 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sorghr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sorghr.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_sorghr( int matrix_layout, lapack_int n, lapack_int ilo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n-1, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_s_nancheck( n-1, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sorglq.c b/lapack-netlib/LAPACKE/src/lapacke_sorglq.c index f48a1e625..eaa1ca800 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sorglq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sorglq.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_sorglq( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_s_nancheck( k, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_s_nancheck( k, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sorgql.c b/lapack-netlib/LAPACKE/src/lapacke_sorgql.c index 86d0874a1..7791f107f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sorgql.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sorgql.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_sorgql( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_s_nancheck( k, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_s_nancheck( k, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sorgqr.c b/lapack-netlib/LAPACKE/src/lapacke_sorgqr.c index 3e7930b3e..99468f317 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sorgqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sorgqr.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_sorgqr( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_s_nancheck( k, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_s_nancheck( k, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sorgrq.c b/lapack-netlib/LAPACKE/src/lapacke_sorgrq.c index f3335da95..7a81cd9c5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sorgrq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sorgrq.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_sorgrq( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_s_nancheck( k, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_s_nancheck( k, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sorgtr.c b/lapack-netlib/LAPACKE/src/lapacke_sorgtr.c index aadb42914..90dc435c9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sorgtr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sorgtr.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_sorgtr( int matrix_layout, char uplo, lapack_int n, float* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n-1, tau, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n-1, tau, 1 ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sormbr.c b/lapack-netlib/LAPACKE/src/lapacke_sormbr.c index 4df425efe..2efbbce44 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sormbr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sormbr.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function sormbr * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -48,18 +48,20 @@ lapack_int LAPACKE_sormbr( int matrix_layout, char vect, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nq = LAPACKE_lsame( side, 'l' ) ? m : n; - ar = LAPACKE_lsame( vect, 'q' ) ? nq : MIN(nq,k); - ac = LAPACKE_lsame( vect, 'q' ) ? MIN(nq,k) : nq; - if( LAPACKE_sge_nancheck( matrix_layout, ar, ac, a, lda ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -11; - } - if( LAPACKE_s_nancheck( MIN(nq,k), tau, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nq = LAPACKE_lsame( side, 'l' ) ? m : n; + ar = LAPACKE_lsame( vect, 'q' ) ? nq : MIN(nq,k); + ac = LAPACKE_lsame( vect, 'q' ) ? MIN(nq,k) : nq; + if( LAPACKE_sge_nancheck( matrix_layout, ar, ac, a, lda ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -11; + } + if( LAPACKE_s_nancheck( MIN(nq,k), tau, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sormhr.c b/lapack-netlib/LAPACKE/src/lapacke_sormhr.c index f88fdbcd9..a5cca2c45 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sormhr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sormhr.c @@ -48,16 +48,18 @@ lapack_int LAPACKE_sormhr( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_sge_nancheck( matrix_layout, r, r, a, lda ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -11; - } - if( LAPACKE_s_nancheck( m-1, tau, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_sge_nancheck( matrix_layout, r, r, a, lda ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -11; + } + if( LAPACKE_s_nancheck( m-1, tau, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sormlq.c b/lapack-netlib/LAPACKE/src/lapacke_sormlq.c index c05fdb1e5..4f8947af7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sormlq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sormlq.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function sormlq * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -47,16 +47,18 @@ lapack_int LAPACKE_sormlq( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - lapack_int r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_sge_nancheck( matrix_layout, k, r, a, lda ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_s_nancheck( k, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + lapack_int r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_sge_nancheck( matrix_layout, k, r, a, lda ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_s_nancheck( k, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sormlq_work.c b/lapack-netlib/LAPACKE/src/lapacke_sormlq_work.c index f2f378587..089733298 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sormlq_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sormlq_work.c @@ -51,8 +51,8 @@ lapack_int LAPACKE_sormlq_work( int matrix_layout, char side, char trans, lapack_int r = LAPACKE_lsame( side, 'l' ) ? m : n; lapack_int lda_t = MAX(1,k); lapack_int ldc_t = MAX(1,m); - float *a_t = NULL; - float *c_t = NULL; + float *a_t = NULL; + float *c_t = NULL; /* Check leading dimension(s) */ if( lda < r ) { info = -8; diff --git a/lapack-netlib/LAPACKE/src/lapacke_sormql.c b/lapack-netlib/LAPACKE/src/lapacke_sormql.c index aa8cbe63a..e074cf819 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sormql.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sormql.c @@ -48,16 +48,18 @@ lapack_int LAPACKE_sormql( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_sge_nancheck( matrix_layout, r, k, a, lda ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_s_nancheck( k, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_sge_nancheck( matrix_layout, r, k, a, lda ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_s_nancheck( k, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sormqr.c b/lapack-netlib/LAPACKE/src/lapacke_sormqr.c index 7b8678f34..92714ea21 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sormqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sormqr.c @@ -48,16 +48,18 @@ lapack_int LAPACKE_sormqr( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_sge_nancheck( matrix_layout, r, k, a, lda ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_s_nancheck( k, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_sge_nancheck( matrix_layout, r, k, a, lda ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_s_nancheck( k, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sormrq.c b/lapack-netlib/LAPACKE/src/lapacke_sormrq.c index 83b50d09a..c884d8112 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sormrq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sormrq.c @@ -47,15 +47,17 @@ lapack_int LAPACKE_sormrq( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_s_nancheck( k, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_s_nancheck( k, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sormrz.c b/lapack-netlib/LAPACKE/src/lapacke_sormrz.c index f44d7a4db..499bc0b1d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sormrz.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sormrz.c @@ -47,15 +47,17 @@ lapack_int LAPACKE_sormrz( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -11; - } - if( LAPACKE_s_nancheck( k, tau, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -11; + } + if( LAPACKE_s_nancheck( k, tau, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sormtr.c b/lapack-netlib/LAPACKE/src/lapacke_sormtr.c index c20b1b7ec..5a9d44138 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sormtr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sormtr.c @@ -48,16 +48,18 @@ lapack_int LAPACKE_sormtr( int matrix_layout, char side, char uplo, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_sge_nancheck( matrix_layout, r, r, a, lda ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_s_nancheck( m-1, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_sge_nancheck( matrix_layout, r, r, a, lda ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_s_nancheck( m-1, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_spbcon.c b/lapack-netlib/LAPACKE/src/lapacke_spbcon.c index bd0af09cc..51610e67b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spbcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spbcon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_spbcon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -5; - } - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -5; + } + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_spbequ.c b/lapack-netlib/LAPACKE/src/lapacke_spbequ.c index 9ed657a93..d07d1aece 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spbequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spbequ.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_spbequ( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -5; + } } #endif return LAPACKE_spbequ_work( matrix_layout, uplo, n, kd, ab, ldab, s, scond, diff --git a/lapack-netlib/LAPACKE/src/lapacke_spbrfs.c b/lapack-netlib/LAPACKE/src/lapacke_spbrfs.c index da537798d..be7f3ba98 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spbrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spbrfs.c @@ -47,18 +47,20 @@ lapack_int LAPACKE_spbrfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; - } - if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, afb, ldafb ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } + if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, afb, ldafb ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_spbstf.c b/lapack-netlib/LAPACKE/src/lapacke_spbstf.c index 416b41094..0d5a9b21e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spbstf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spbstf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_spbstf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -5; + } } #endif return LAPACKE_spbstf_work( matrix_layout, uplo, n, kb, bb, ldbb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_spbsv.c b/lapack-netlib/LAPACKE/src/lapacke_spbsv.c index f6d550a01..ef810c632 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spbsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spbsv.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_spbsv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_spbsv_work( matrix_layout, uplo, n, kd, nrhs, ab, ldab, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_spbsvx.c b/lapack-netlib/LAPACKE/src/lapacke_spbsvx.c index b19d93f16..1ffacd4f8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spbsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spbsvx.c @@ -48,21 +48,23 @@ lapack_int LAPACKE_spbsvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -7; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, afb, ldafb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -7; } - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { - if( LAPACKE_s_nancheck( n, s, 1 ) ) { - return -12; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, afb, ldafb ) ) { + return -9; + } + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { + if( LAPACKE_s_nancheck( n, s, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_spbtrf.c b/lapack-netlib/LAPACKE/src/lapacke_spbtrf.c index 424f213ae..5612caee0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spbtrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spbtrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_spbtrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -5; + } } #endif return LAPACKE_spbtrf_work( matrix_layout, uplo, n, kd, ab, ldab ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_spbtrs.c b/lapack-netlib/LAPACKE/src/lapacke_spbtrs.c index e353166ef..8601cfdbf 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spbtrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spbtrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_spbtrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_spbtrs_work( matrix_layout, uplo, n, kd, nrhs, ab, ldab, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_spftrf.c b/lapack-netlib/LAPACKE/src/lapacke_spftrf.c index 784625683..ac85d6ca4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spftrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spftrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_spftrf( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spf_nancheck( n, a ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spf_nancheck( n, a ) ) { + return -5; + } } #endif return LAPACKE_spftrf_work( matrix_layout, transr, uplo, n, a ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_spftri.c b/lapack-netlib/LAPACKE/src/lapacke_spftri.c index a91cf9ffc..7a5d1ef69 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spftri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spftri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_spftri( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spf_nancheck( n, a ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spf_nancheck( n, a ) ) { + return -5; + } } #endif return LAPACKE_spftri_work( matrix_layout, transr, uplo, n, a ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_spftrs.c b/lapack-netlib/LAPACKE/src/lapacke_spftrs.c index 7b9d77373..badacf649 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spftrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spftrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_spftrs( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spf_nancheck( n, a ) ) { - return -6; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spf_nancheck( n, a ) ) { + return -6; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_spftrs_work( matrix_layout, transr, uplo, n, nrhs, a, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_spocon.c b/lapack-netlib/LAPACKE/src/lapacke_spocon.c index 8a6b32aee..fb7155558 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spocon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spocon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_spocon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_spoequ.c b/lapack-netlib/LAPACKE/src/lapacke_spoequ.c index 2a706df35..320e208d3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spoequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spoequ.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_spoequ( int matrix_layout, lapack_int n, const float* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -3; + } } #endif return LAPACKE_spoequ_work( matrix_layout, n, a, lda, s, scond, amax ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_spoequb.c b/lapack-netlib/LAPACKE/src/lapacke_spoequb.c index f00ff8eb8..aecfceebf 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spoequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spoequb.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_spoequb( int matrix_layout, lapack_int n, const float* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -3; + } } #endif return LAPACKE_spoequb_work( matrix_layout, n, a, lda, s, scond, amax ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sporfs.c b/lapack-netlib/LAPACKE/src/lapacke_sporfs.c index 0f90c3674..01a4e951f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sporfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sporfs.c @@ -47,18 +47,20 @@ lapack_int LAPACKE_sporfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -11; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sporfsx.c b/lapack-netlib/LAPACKE/src/lapacke_sporfsx.c index 191db44ce..918333b58 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sporfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sporfsx.c @@ -50,28 +50,30 @@ lapack_int LAPACKE_sporfsx( int matrix_layout, char uplo, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -11; - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -21; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( equed, 'y' ) ) { - if( LAPACKE_s_nancheck( n, s, 1 ) ) { - return -10; + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -21; + } + } + if( LAPACKE_lsame( equed, 'y' ) ) { + if( LAPACKE_s_nancheck( n, s, 1 ) ) { + return -10; + } + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -13; } - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -13; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sposv.c b/lapack-netlib/LAPACKE/src/lapacke_sposv.c index fd4ba9663..a06c92f3e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sposv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sposv.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_sposv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_sposv_work( matrix_layout, uplo, n, nrhs, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sposvx.c b/lapack-netlib/LAPACKE/src/lapacke_sposvx.c index 1250b2aa4..a8fdab51b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sposvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sposvx.c @@ -47,21 +47,23 @@ lapack_int LAPACKE_sposvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { - if( LAPACKE_s_nancheck( n, s, 1 ) ) { - return -11; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { + if( LAPACKE_s_nancheck( n, s, 1 ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sposvxx.c b/lapack-netlib/LAPACKE/src/lapacke_sposvxx.c index 2549b9551..0e3160c15 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sposvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sposvxx.c @@ -50,26 +50,28 @@ lapack_int LAPACKE_sposvxx( int matrix_layout, char fact, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -23; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } } - } - if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { - if( LAPACKE_s_nancheck( n, s, 1 ) ) { - return -11; + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -23; + } + } + if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { + if( LAPACKE_s_nancheck( n, s, 1 ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_spotrf.c b/lapack-netlib/LAPACKE/src/lapacke_spotrf.c index 5fa0fe7aa..2da01cc80 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spotrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spotrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_spotrf( int matrix_layout, char uplo, lapack_int n, float* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_spotrf_work( matrix_layout, uplo, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_spotrf2.c b/lapack-netlib/LAPACKE/src/lapacke_spotrf2.c index d1fc9d051..1a262a34d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spotrf2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spotrf2.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_spotrf2( int matrix_layout, char uplo, lapack_int n, float* a return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_spotrf2_work( matrix_layout, uplo, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_spotri.c b/lapack-netlib/LAPACKE/src/lapacke_spotri.c index 1805b7646..8eddb7d12 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spotri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spotri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_spotri( int matrix_layout, char uplo, lapack_int n, float* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_spotri_work( matrix_layout, uplo, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_spotrs.c b/lapack-netlib/LAPACKE/src/lapacke_spotrs.c index fd5a0a154..7cabc16c1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spotrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spotrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_spotrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_spotrs_work( matrix_layout, uplo, n, nrhs, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sppcon.c b/lapack-netlib/LAPACKE/src/lapacke_sppcon.c index 0c0e8d520..cf4dc26d1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sppcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sppcon.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_sppcon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -5; - } - if( LAPACKE_spp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -5; + } + if( LAPACKE_spp_nancheck( n, ap ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sppequ.c b/lapack-netlib/LAPACKE/src/lapacke_sppequ.c index a695c3014..606072a24 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sppequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sppequ.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_sppequ( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_sppequ_work( matrix_layout, uplo, n, ap, s, scond, amax ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_spprfs.c b/lapack-netlib/LAPACKE/src/lapacke_spprfs.c index 69c737961..792c0e47d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spprfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spprfs.c @@ -46,18 +46,20 @@ lapack_int LAPACKE_spprfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spp_nancheck( n, afp ) ) { - return -6; - } - if( LAPACKE_spp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spp_nancheck( n, afp ) ) { + return -6; + } + if( LAPACKE_spp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sppsv.c b/lapack-netlib/LAPACKE/src/lapacke_sppsv.c index f40bc2d4e..cdfe486a8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sppsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sppsv.c @@ -41,12 +41,14 @@ lapack_int LAPACKE_sppsv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -6; + } } #endif return LAPACKE_sppsv_work( matrix_layout, uplo, n, nrhs, ap, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sppsvx.c b/lapack-netlib/LAPACKE/src/lapacke_sppsvx.c index c5ea81320..1e626e2b3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sppsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sppsvx.c @@ -47,21 +47,23 @@ lapack_int LAPACKE_sppsvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_spp_nancheck( n, afp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_spp_nancheck( n, afp ) ) { + return -7; + } } - } - if( LAPACKE_spp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { - if( LAPACKE_s_nancheck( n, s, 1 ) ) { - return -9; + if( LAPACKE_spp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { + if( LAPACKE_s_nancheck( n, s, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_spptrf.c b/lapack-netlib/LAPACKE/src/lapacke_spptrf.c index 375baae8c..f3297c720 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spptrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spptrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_spptrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_spptrf_work( matrix_layout, uplo, n, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_spptri.c b/lapack-netlib/LAPACKE/src/lapacke_spptri.c index 1b5949b95..e1a6b4113 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spptri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spptri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_spptri( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_spptri_work( matrix_layout, uplo, n, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_spptrs.c b/lapack-netlib/LAPACKE/src/lapacke_spptrs.c index 4c7270130..9322f3cd6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spptrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spptrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_spptrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -6; + } } #endif return LAPACKE_spptrs_work( matrix_layout, uplo, n, nrhs, ap, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_spstrf.c b/lapack-netlib/LAPACKE/src/lapacke_spstrf.c index 11a1b35ee..0c32e8c04 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spstrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spstrf.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_spstrf( int matrix_layout, char uplo, lapack_int n, float* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_s_nancheck( 1, &tol, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_s_nancheck( 1, &tol, 1 ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sptcon.c b/lapack-netlib/LAPACKE/src/lapacke_sptcon.c index 8894593bc..0eb14b3a6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sptcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sptcon.c @@ -39,15 +39,17 @@ lapack_int LAPACKE_sptcon( lapack_int n, const float* d, const float* e, lapack_int info = 0; float* work = NULL; #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -2; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -2; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -3; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_spteqr.c b/lapack-netlib/LAPACKE/src/lapacke_spteqr.c index 6c2417a8c..c0ff15efe 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spteqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spteqr.c @@ -45,16 +45,18 @@ lapack_int LAPACKE_spteqr( int matrix_layout, char compz, lapack_int n, float* d return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -5; - } - if( LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -5; + } + if( LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -6; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sptrfs.c b/lapack-netlib/LAPACKE/src/lapacke_sptrfs.c index efcd12c74..44d376301 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sptrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sptrfs.c @@ -45,24 +45,26 @@ lapack_int LAPACKE_sptrfs( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n, df, 1 ) ) { - return -6; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n-1, ef, 1 ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n, df, 1 ) ) { + return -6; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -5; + } + if( LAPACKE_s_nancheck( n-1, ef, 1 ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sptsv.c b/lapack-netlib/LAPACKE/src/lapacke_sptsv.c index dfdfe6fd7..8955ede24 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sptsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sptsv.c @@ -41,15 +41,17 @@ lapack_int LAPACKE_sptsv( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -6; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -6; + } + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -5; + } } #endif return LAPACKE_sptsv_work( matrix_layout, n, nrhs, d, e, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sptsvx.c b/lapack-netlib/LAPACKE/src/lapacke_sptsvx.c index f5fb4ed2b..467cc9b10 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sptsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sptsvx.c @@ -46,24 +46,26 @@ lapack_int LAPACKE_sptsvx( int matrix_layout, char fact, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_s_nancheck( n, df, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; } - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_s_nancheck( n-1, ef, 1 ) ) { - return -8; + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_s_nancheck( n, df, 1 ) ) { + return -7; + } + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_s_nancheck( n-1, ef, 1 ) ) { + return -8; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_spttrf.c b/lapack-netlib/LAPACKE/src/lapacke_spttrf.c index 3083eff47..38c60362a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spttrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spttrf.c @@ -36,12 +36,14 @@ lapack_int LAPACKE_spttrf( lapack_int n, float* d, float* e ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -2; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -2; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -3; + } } #endif return LAPACKE_spttrf_work( n, d, e ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_spttrs.c b/lapack-netlib/LAPACKE/src/lapacke_spttrs.c index d2c873732..25f4bca7c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_spttrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_spttrs.c @@ -42,15 +42,17 @@ lapack_int LAPACKE_spttrs( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -6; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -6; + } + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -5; + } } #endif return LAPACKE_spttrs_work( matrix_layout, n, nrhs, d, e, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssbev.c b/lapack-netlib/LAPACKE/src/lapacke_ssbev.c index 713167631..be2021bf3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssbev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssbev.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_ssbev( int matrix_layout, char jobz, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssbev_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_ssbev_2stage.c index 3ef3975ea..266e1bd41 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssbev_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssbev_2stage.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_ssbev_2stage( int matrix_layout, char jobz, char uplo, lapack return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssbevd.c b/lapack-netlib/LAPACKE/src/lapacke_ssbevd.c index 5b5d3a0d6..3acdeb95d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssbevd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssbevd.c @@ -49,9 +49,11 @@ lapack_int LAPACKE_ssbevd( int matrix_layout, char jobz, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssbevd_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_ssbevd_2stage.c index a9f95c7f1..2eda9cde9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssbevd_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssbevd_2stage.c @@ -49,9 +49,11 @@ lapack_int LAPACKE_ssbevd_2stage( int matrix_layout, char jobz, char uplo, lapac return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssbevx.c b/lapack-netlib/LAPACKE/src/lapacke_ssbevx.c index d25a5fdcb..b891d580a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssbevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssbevx.c @@ -48,21 +48,23 @@ lapack_int LAPACKE_ssbevx( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -15; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -7; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -12; + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -15; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -11; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssbevx_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_ssbevx_2stage.c index 27102f111..6f1e1c404 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssbevx_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssbevx_2stage.c @@ -50,21 +50,23 @@ lapack_int LAPACKE_ssbevx_2stage( int matrix_layout, char jobz, char range, char return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -15; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -7; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -12; + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -15; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -11; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssbgst.c b/lapack-netlib/LAPACKE/src/lapacke_ssbgst.c index d289cc08d..4066114af 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssbgst.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssbgst.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_ssbgst( int matrix_layout, char vect, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { - return -7; - } - if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { + return -7; + } + if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssbgv.c b/lapack-netlib/LAPACKE/src/lapacke_ssbgv.c index 5d73923ab..6c8ca1b76 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssbgv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssbgv.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_ssbgv( int matrix_layout, char jobz, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { - return -7; - } - if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { + return -7; + } + if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssbgvd.c b/lapack-netlib/LAPACKE/src/lapacke_ssbgvd.c index e884ba02c..a6c036846 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssbgvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssbgvd.c @@ -50,12 +50,14 @@ lapack_int LAPACKE_ssbgvd( int matrix_layout, char jobz, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { - return -7; - } - if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { + return -7; + } + if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssbgvx.c b/lapack-netlib/LAPACKE/src/lapacke_ssbgvx.c index ec42d8cc1..3d0a0a489 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssbgvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssbgvx.c @@ -49,24 +49,26 @@ lapack_int LAPACKE_ssbgvx( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { - return -8; - } - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -18; - } - if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -10; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -14; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { + return -8; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -15; + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -18; + } + if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -10; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -14; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -15; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssbtrd.c b/lapack-netlib/LAPACKE/src/lapacke_ssbtrd.c index 03a3ac9e5..453ccf559 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssbtrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssbtrd.c @@ -44,13 +44,15 @@ lapack_int LAPACKE_ssbtrd( int matrix_layout, char vect, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; - } - if( LAPACKE_lsame( vect, 'u' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } + if( LAPACKE_lsame( vect, 'u' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssfrk.c b/lapack-netlib/LAPACKE/src/lapacke_ssfrk.c index 8709e87e4..88aa956cb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssfrk.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssfrk.c @@ -43,20 +43,22 @@ lapack_int LAPACKE_ssfrk( int matrix_layout, char transr, char uplo, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - ka = LAPACKE_lsame( trans, 'n' ) ? k : n; - na = LAPACKE_lsame( trans, 'n' ) ? n : k; - if( LAPACKE_sge_nancheck( matrix_layout, na, ka, a, lda ) ) { - return -8; - } - if( LAPACKE_s_nancheck( 1, &alpha, 1 ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &beta, 1 ) ) { - return -10; - } - if( LAPACKE_spf_nancheck( n, c ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + ka = LAPACKE_lsame( trans, 'n' ) ? k : n; + na = LAPACKE_lsame( trans, 'n' ) ? n : k; + if( LAPACKE_sge_nancheck( matrix_layout, na, ka, a, lda ) ) { + return -8; + } + if( LAPACKE_s_nancheck( 1, &alpha, 1 ) ) { + return -7; + } + if( LAPACKE_s_nancheck( 1, &beta, 1 ) ) { + return -10; + } + if( LAPACKE_spf_nancheck( n, c ) ) { + return -11; + } } #endif return LAPACKE_ssfrk_work( matrix_layout, transr, uplo, trans, n, k, alpha, diff --git a/lapack-netlib/LAPACKE/src/lapacke_sspcon.c b/lapack-netlib/LAPACKE/src/lapacke_sspcon.c index 8cffd2e3f..d99ff688a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sspcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sspcon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_sspcon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -6; - } - if( LAPACKE_ssp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -6; + } + if( LAPACKE_ssp_nancheck( n, ap ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sspev.c b/lapack-netlib/LAPACKE/src/lapacke_sspev.c index c19748972..df867c2ab 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sspev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sspev.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_sspev( int matrix_layout, char jobz, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssp_nancheck( n, ap ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssp_nancheck( n, ap ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sspevd.c b/lapack-netlib/LAPACKE/src/lapacke_sspevd.c index d6681a6bd..bd06a8ba6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sspevd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sspevd.c @@ -48,9 +48,11 @@ lapack_int LAPACKE_sspevd( int matrix_layout, char jobz, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssp_nancheck( n, ap ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssp_nancheck( n, ap ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sspevx.c b/lapack-netlib/LAPACKE/src/lapacke_sspevx.c index 63540a565..d9ac9c3aa 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sspevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sspevx.c @@ -47,21 +47,23 @@ lapack_int LAPACKE_sspevx( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -11; - } - if( LAPACKE_ssp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -11; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -8; + if( LAPACKE_ssp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -7; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -8; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sspgst.c b/lapack-netlib/LAPACKE/src/lapacke_sspgst.c index 956aa57f5..45e4c2715 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sspgst.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sspgst.c @@ -41,12 +41,14 @@ lapack_int LAPACKE_sspgst( int matrix_layout, lapack_int itype, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_ssp_nancheck( n, bp ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_ssp_nancheck( n, bp ) ) { + return -6; + } } #endif return LAPACKE_sspgst_work( matrix_layout, itype, uplo, n, ap, bp ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sspgv.c b/lapack-netlib/LAPACKE/src/lapacke_sspgv.c index 815056a6b..6b60a8f34 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sspgv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sspgv.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_sspgv( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_ssp_nancheck( n, bp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_ssp_nancheck( n, bp ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sspgvd.c b/lapack-netlib/LAPACKE/src/lapacke_sspgvd.c index 197fb4a52..749abb0b1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sspgvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sspgvd.c @@ -49,12 +49,14 @@ lapack_int LAPACKE_sspgvd( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_ssp_nancheck( n, bp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_ssp_nancheck( n, bp ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sspgvx.c b/lapack-netlib/LAPACKE/src/lapacke_sspgvx.c index 96843c530..4246e895d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sspgvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sspgvx.c @@ -47,24 +47,26 @@ lapack_int LAPACKE_sspgvx( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -13; - } - if( LAPACKE_ssp_nancheck( n, ap ) ) { - return -7; - } - if( LAPACKE_ssp_nancheck( n, bp ) ) { - return -8; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -13; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -10; + if( LAPACKE_ssp_nancheck( n, ap ) ) { + return -7; + } + if( LAPACKE_ssp_nancheck( n, bp ) ) { + return -8; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -9; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssprfs.c b/lapack-netlib/LAPACKE/src/lapacke_ssprfs.c index f1cfb995d..4c652dab2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssprfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssprfs.c @@ -47,18 +47,20 @@ lapack_int LAPACKE_ssprfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssp_nancheck( n, afp ) ) { - return -6; - } - if( LAPACKE_ssp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssp_nancheck( n, afp ) ) { + return -6; + } + if( LAPACKE_ssp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sspsv.c b/lapack-netlib/LAPACKE/src/lapacke_sspsv.c index 8ad8958cb..e8272e3f4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sspsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sspsv.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_sspsv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_sspsv_work( matrix_layout, uplo, n, nrhs, ap, ipiv, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sspsvx.c b/lapack-netlib/LAPACKE/src/lapacke_sspsvx.c index 25bb7ee28..fa759e45d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sspsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sspsvx.c @@ -47,17 +47,19 @@ lapack_int LAPACKE_sspsvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_ssp_nancheck( n, afp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_ssp_nancheck( n, afp ) ) { + return -7; + } + } + if( LAPACKE_ssp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; } - } - if( LAPACKE_ssp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssptrd.c b/lapack-netlib/LAPACKE/src/lapacke_ssptrd.c index 6d41f6b6a..ea35b76e0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssptrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssptrd.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_ssptrd( int matrix_layout, char uplo, lapack_int n, float* ap return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_ssptrd_work( matrix_layout, uplo, n, ap, d, e, tau ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssptrf.c b/lapack-netlib/LAPACKE/src/lapacke_ssptrf.c index a3fe4639d..e50114d3c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssptrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssptrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_ssptrf( int matrix_layout, char uplo, lapack_int n, float* ap return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_ssptrf_work( matrix_layout, uplo, n, ap, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssptri.c b/lapack-netlib/LAPACKE/src/lapacke_ssptri.c index 9e60ba441..de9815fdf 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssptri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssptri.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_ssptri( int matrix_layout, char uplo, lapack_int n, float* ap return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssp_nancheck( n, ap ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssptrs.c b/lapack-netlib/LAPACKE/src/lapacke_ssptrs.c index f7a4df7dd..53d855fa5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssptrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssptrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_ssptrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_ssptrs_work( matrix_layout, uplo, n, nrhs, ap, ipiv, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sstebz.c b/lapack-netlib/LAPACKE/src/lapacke_sstebz.c index 9225729dd..763a80395 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sstebz.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sstebz.c @@ -43,24 +43,26 @@ lapack_int LAPACKE_sstebz( char range, char order, lapack_int n, float vl, lapack_int* iwork = NULL; float* work = NULL; #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -8; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -9; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -10; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -8; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -5; + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -9; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -10; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -4; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -5; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sstedc.c b/lapack-netlib/LAPACKE/src/lapacke_sstedc.c index b5b6b49ce..157874668 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sstedc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sstedc.c @@ -48,16 +48,18 @@ lapack_int LAPACKE_sstedc( int matrix_layout, char compz, lapack_int n, float* d return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -5; - } - if( LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -5; + } + if( LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -6; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sstegr.c b/lapack-netlib/LAPACKE/src/lapacke_sstegr.c index a3c5d4d08..c6a73b2b4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sstegr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sstegr.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function sstegr * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -51,24 +51,26 @@ lapack_int LAPACKE_sstegr( int matrix_layout, char jobz, char range, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -11; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n, e, 1 ) ) { - return -6; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -11; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -8; + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -7; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -8; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sstein.c b/lapack-netlib/LAPACKE/src/lapacke_sstein.c index ccad56f41..31a021374 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sstein.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sstein.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function sstein * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -46,15 +46,17 @@ lapack_int LAPACKE_sstein( int matrix_layout, lapack_int n, const float* d, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -3; - } - if( LAPACKE_s_nancheck( n, e, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n, w, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -3; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n, w, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sstemr.c b/lapack-netlib/LAPACKE/src/lapacke_sstemr.c index dd2040a3d..4229819ab 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sstemr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sstemr.c @@ -51,18 +51,20 @@ lapack_int LAPACKE_sstemr( int matrix_layout, char jobz, char range, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -6; - } - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -7; + } + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sstemr_work.c b/lapack-netlib/LAPACKE/src/lapacke_sstemr_work.c index 834014e91..d0c8ce42e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sstemr_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sstemr_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function sstemr * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -55,7 +55,7 @@ lapack_int LAPACKE_sstemr_work( int matrix_layout, char jobz, char range, lapack_int ldz_t = MAX(1,n); float* z_t = NULL; /* Check leading dimension(s) */ - if( ldz < n ) { + if( ldz < 1 || ( LAPACKE_lsame( jobz, 'v' ) && ldz < n ) ) { info = -14; LAPACKE_xerbla( "LAPACKE_sstemr_work", info ); return info; diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssteqr.c b/lapack-netlib/LAPACKE/src/lapacke_ssteqr.c index fc7ea29b8..714ff2075 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssteqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssteqr.c @@ -45,16 +45,18 @@ lapack_int LAPACKE_ssteqr( int matrix_layout, char compz, lapack_int n, float* d return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -5; - } - if( LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -5; + } + if( LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -6; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssterf.c b/lapack-netlib/LAPACKE/src/lapacke_ssterf.c index d42384dcc..61754c800 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssterf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssterf.c @@ -36,12 +36,14 @@ lapack_int LAPACKE_ssterf( lapack_int n, float* d, float* e ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -2; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -2; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -3; + } } #endif return LAPACKE_ssterf_work( n, d, e ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sstev.c b/lapack-netlib/LAPACKE/src/lapacke_sstev.c index 97da7310f..0227c09e4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sstev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sstev.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function sstev * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -43,12 +43,14 @@ lapack_int LAPACKE_sstev( int matrix_layout, char jobz, lapack_int n, float* d, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n, e, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sstevd.c b/lapack-netlib/LAPACKE/src/lapacke_sstevd.c index b2bf5bb3f..9f9e2e79e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sstevd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sstevd.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function sstevd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -48,12 +48,14 @@ lapack_int LAPACKE_sstevd( int matrix_layout, char jobz, lapack_int n, float* d, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n, e, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_sstevr.c b/lapack-netlib/LAPACKE/src/lapacke_sstevr.c index e9b0ca2a5..f45c49087 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sstevr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sstevr.c @@ -51,24 +51,26 @@ lapack_int LAPACKE_sstevr( int matrix_layout, char jobz, char range, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -11; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { - return -6; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -11; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -8; + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -7; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -8; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_sstevx.c b/lapack-netlib/LAPACKE/src/lapacke_sstevx.c index 1dd833d25..cb7fbec3a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sstevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sstevx.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function sstevx * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -47,24 +47,26 @@ lapack_int LAPACKE_sstevx( int matrix_layout, char jobz, char range, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -11; - } - if( LAPACKE_s_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n, e, 1 ) ) { - return -6; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -11; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -8; + if( LAPACKE_s_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_s_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -7; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -8; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssycon.c b/lapack-netlib/LAPACKE/src/lapacke_ssycon.c index b6781aeb3..2a2e7a923 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssycon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssycon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_ssycon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssycon_3.c b/lapack-netlib/LAPACKE/src/lapacke_ssycon_3.c index ece1482eb..b8775db66 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssycon_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssycon_3.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function ssycon_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -40,20 +40,23 @@ lapack_int LAPACKE_ssycon_3( int matrix_layout, char uplo, lapack_int n, lapack_int info = 0; lapack_int* iwork = NULL; float* work = NULL; + lapack_int e_start = LAPACKE_lsame( uplo, 'U' ) ? 1 : 0; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_ssycon_3", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n, e, 1 ) ) { - return -6; - } - if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n-1, e + e_start, 1 ) ) { + return -6; + } + if( LAPACKE_s_nancheck( 1, &anorm, 1 ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssyconv.c b/lapack-netlib/LAPACKE/src/lapacke_ssyconv.c index c22b1e447..5fd0a78c5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssyconv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssyconv.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_ssyconv( int matrix_layout, char uplo, char way, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Call middle-level interface */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssyequb.c b/lapack-netlib/LAPACKE/src/lapacke_ssyequb.c index 2412feb0d..b4dd5b0ab 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssyequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssyequb.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_ssyequb( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssyev.c b/lapack-netlib/LAPACKE/src/lapacke_ssyev.c index 9d240e8bc..81b65db02 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssyev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssyev.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_ssyev( int matrix_layout, char jobz, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssyev_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_ssyev_2stage.c index d031cea53..d57a7c2eb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssyev_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssyev_2stage.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_ssyev_2stage( int matrix_layout, char jobz, char uplo, lapack return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssyevd.c b/lapack-netlib/LAPACKE/src/lapacke_ssyevd.c index b668e5f21..1995e7950 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssyevd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssyevd.c @@ -48,9 +48,11 @@ lapack_int LAPACKE_ssyevd( int matrix_layout, char jobz, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssyevd_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_ssyevd_2stage.c index 12db2274e..6d6785acc 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssyevd_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssyevd_2stage.c @@ -48,9 +48,11 @@ lapack_int LAPACKE_ssyevd_2stage( int matrix_layout, char jobz, char uplo, lapac return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssyevr.c b/lapack-netlib/LAPACKE/src/lapacke_ssyevr.c index a60b2d948..d7e050143 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssyevr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssyevr.c @@ -51,21 +51,23 @@ lapack_int LAPACKE_ssyevr( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -12; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -9; + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -12; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -8; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssyevr_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_ssyevr_2stage.c index 40a93b26a..cbc3014e9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssyevr_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssyevr_2stage.c @@ -51,21 +51,23 @@ lapack_int LAPACKE_ssyevr_2stage( int matrix_layout, char jobz, char range, char return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -12; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -9; + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -12; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -8; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssyevr_work.c b/lapack-netlib/LAPACKE/src/lapacke_ssyevr_work.c index 3e874a93d..26e8cd48d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssyevr_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssyevr_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function ssyevr * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -52,8 +52,9 @@ lapack_int LAPACKE_ssyevr_work( int matrix_layout, char jobz, char range, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int ncols_z = ( LAPACKE_lsame( range, 'a' ) || - LAPACKE_lsame( range, 'v' ) ) ? n : + lapack_int ncols_z = ( !LAPACKE_lsame( jobz, 'v' ) ) ? 1 : + ( LAPACKE_lsame( range, 'a' ) || + LAPACKE_lsame( range, 'v' ) ) ? n : ( LAPACKE_lsame( range, 'i' ) ? (iu-il+1) : 1); lapack_int lda_t = MAX(1,n); lapack_int ldz_t = MAX(1,n); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssyevx.c b/lapack-netlib/LAPACKE/src/lapacke_ssyevx.c index 9924669a3..43be722b4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssyevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssyevx.c @@ -49,21 +49,23 @@ lapack_int LAPACKE_ssyevx( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -12; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -9; + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -12; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -8; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssyevx_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_ssyevx_2stage.c index 7527f348d..42559e35f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssyevx_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssyevx_2stage.c @@ -49,21 +49,23 @@ lapack_int LAPACKE_ssyevx_2stage( int matrix_layout, char jobz, char range, char return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -12; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -9; + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -12; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -8; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssyevx_work.c b/lapack-netlib/LAPACKE/src/lapacke_ssyevx_work.c index 9ffd925b4..d4d0b8001 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssyevx_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssyevx_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function ssyevx * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -51,8 +51,9 @@ lapack_int LAPACKE_ssyevx_work( int matrix_layout, char jobz, char range, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int ncols_z = ( LAPACKE_lsame( range, 'a' ) || - LAPACKE_lsame( range, 'v' ) ) ? n : + lapack_int ncols_z = ( !LAPACKE_lsame( jobz, 'v' ) ) ? 1 : + ( LAPACKE_lsame( range, 'a' ) || + LAPACKE_lsame( range, 'v' ) ) ? n : ( LAPACKE_lsame( range, 'i' ) ? (iu-il+1) : 1); lapack_int lda_t = MAX(1,n); lapack_int ldz_t = MAX(1,n); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssygst.c b/lapack-netlib/LAPACKE/src/lapacke_ssygst.c index 2111c457d..7b97f472b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssygst.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssygst.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_ssygst( int matrix_layout, lapack_int itype, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_ssygst_work( matrix_layout, itype, uplo, n, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssygv.c b/lapack-netlib/LAPACKE/src/lapacke_ssygv.c index 13ca1ec62..8ec40d954 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssygv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssygv.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_ssygv( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssygv_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_ssygv_2stage.c index 5ca1bb76a..a2eba6653 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssygv_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssygv_2stage.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_ssygv_2stage( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssygvd.c b/lapack-netlib/LAPACKE/src/lapacke_ssygvd.c index 3bc9d8cb6..2a1c62aef 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssygvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssygvd.c @@ -49,12 +49,14 @@ lapack_int LAPACKE_ssygvd( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssygvx.c b/lapack-netlib/LAPACKE/src/lapacke_ssygvx.c index 598ece743..1fe4e2c6c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssygvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssygvx.c @@ -50,24 +50,26 @@ lapack_int LAPACKE_ssygvx( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -7; - } - if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { - return -15; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -7; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { - return -12; + if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) { + return -15; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vl, 1 ) ) { + return -11; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_s_nancheck( 1, &vu, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssygvx_work.c b/lapack-netlib/LAPACKE/src/lapacke_ssygvx_work.c index 9bc87810b..6306bd2cd 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssygvx_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssygvx_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function ssygvx * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssyrfs.c b/lapack-netlib/LAPACKE/src/lapacke_ssyrfs.c index 6d856dc1b..f65daa951 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssyrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssyrfs.c @@ -48,18 +48,20 @@ lapack_int LAPACKE_ssyrfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssyrfsx.c b/lapack-netlib/LAPACKE/src/lapacke_ssyrfsx.c index 2d8eb0309..cccc99639 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssyrfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssyrfsx.c @@ -51,28 +51,30 @@ lapack_int LAPACKE_ssyrfsx( int matrix_layout, char uplo, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -22; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( equed, 'y' ) ) { - if( LAPACKE_s_nancheck( n, s, 1 ) ) { - return -11; + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -22; + } + } + if( LAPACKE_lsame( equed, 'y' ) ) { + if( LAPACKE_s_nancheck( n, s, 1 ) ) { + return -11; + } + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -14; } - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -14; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssysv.c b/lapack-netlib/LAPACKE/src/lapacke_ssysv.c index 65221979b..579d98d62 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssysv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssysv.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_ssysv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssysv_aa.c b/lapack-netlib/LAPACKE/src/lapacke_ssysv_aa.c index 0f10c8294..2777e1838 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssysv_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssysv_aa.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_ssysv_aa( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssysv_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_ssysv_aa_2stage.c new file mode 100644 index 000000000..ce50a89f4 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_ssysv_aa_2stage.c @@ -0,0 +1,89 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function ssysv_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_ssysv_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, + float* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, float* b, lapack_int ldb ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + float* work = NULL; + float work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_ssysv_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_ssysv_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb, &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = (lapack_int)work_query; + /* Allocate memory for work arrays */ + work = (float*) + LAPACKE_malloc( sizeof(float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_ssysv_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb, work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_ssysv_aa_2stage", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssysv_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_ssysv_aa_2stage_work.c new file mode 100644 index 000000000..d21dbb048 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_ssysv_aa_2stage_work.c @@ -0,0 +1,124 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function ssysv_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_ssysv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, + float* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, float* b, lapack_int ldb, + float* work, lapack_int lwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_ssysv_aa_2stage( &uplo, &n, &nrhs, a, &lda, tb, + <b, ipiv, ipiv2, b, &ldb, work, &lwork, + &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_int ldb_t = MAX(1,n); + float* a_t = NULL; + float* tb_t = NULL; + float* b_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_ssysv_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_ssysv_aa_2stage_work", info ); + return info; + } + if( ldb < nrhs ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_ssysv_aa_2stage_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_ssysv_aa_2stage( &uplo, &n, &nrhs, a, &lda_t, + tb, <b, ipiv, ipiv2, b, &ldb_t, work, + &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (float*)LAPACKE_malloc( sizeof(float) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + b_t = (float*)LAPACKE_malloc( sizeof(float) * ldb_t * MAX(1,nrhs) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + /* Transpose input matrices */ + LAPACKE_ssy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + LAPACKE_sge_trans( matrix_layout, n, nrhs, b, ldb, b_t, ldb_t ); + /* Call LAPACK function and adjust info */ + LAPACK_ssysv_aa_2stage( &uplo, &n, &nrhs, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, b_t, &ldb_t, work, + &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_ssy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + LAPACKE_sge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb ); + /* Release memory and exit */ + LAPACKE_free( b_t ); +exit_level_2: + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_ssysv_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_ssysv_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssysv_rk.c b/lapack-netlib/LAPACKE/src/lapacke_ssysv_rk.c index b35202844..afb9869fc 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssysv_rk.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssysv_rk.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function ssysv_rk * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -46,15 +46,14 @@ lapack_int LAPACKE_ssysv_rk( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n, e, 1) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssysv_rook.c b/lapack-netlib/LAPACKE/src/lapacke_ssysv_rook.c index 952bc8a34..2c6fee6b9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssysv_rook.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssysv_rook.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_ssysv_rook( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssysvx.c b/lapack-netlib/LAPACKE/src/lapacke_ssysvx.c index 973ac27d3..97410885c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssysvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssysvx.c @@ -50,17 +50,19 @@ lapack_int LAPACKE_ssysvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; } - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -11; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssysvxx.c b/lapack-netlib/LAPACKE/src/lapacke_ssysvxx.c index 0770a1854..860773db3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssysvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssysvxx.c @@ -51,26 +51,28 @@ lapack_int LAPACKE_ssysvxx( int matrix_layout, char fact, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( nparams>0 ) { - if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { - return -24; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } } - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_s_nancheck( n, s, 1 ) ) { - return -12; + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( nparams>0 ) { + if( LAPACKE_s_nancheck( nparams, params, 1 ) ) { + return -24; + } + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_s_nancheck( n, s, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssyswapr.c b/lapack-netlib/LAPACKE/src/lapacke_ssyswapr.c index 43a5a3e0c..733db5a2d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssyswapr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssyswapr.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_ssyswapr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_ssyswapr_work( matrix_layout, uplo, n, a, lda, i1, i2 ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytrd.c b/lapack-netlib/LAPACKE/src/lapacke_ssytrd.c index 7ea659e5a..4dee60728 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssytrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytrd.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_ssytrd( int matrix_layout, char uplo, lapack_int n, float* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytrf.c b/lapack-netlib/LAPACKE/src/lapacke_ssytrf.c index 1ec5cdeed..cf25c57c3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssytrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytrf.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_ssytrf( int matrix_layout, char uplo, lapack_int n, float* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa.c b/lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa.c index 31056df4b..6689e7e85 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_ssytrf_aa( int matrix_layout, char uplo, lapack_int n, float* return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa_2stage.c new file mode 100644 index 000000000..6285f2d9d --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa_2stage.c @@ -0,0 +1,86 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function ssytrf_aa_2stage +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_ssytrf_aa_2stage( int matrix_layout, char uplo, lapack_int n, + float* a, lapack_int lda, + float* tb, lapack_int ltb, + lapack_int* ipiv, lapack_int* ipiv2 ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + float* work = NULL; + float work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_ssytrf_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_ssytrf_aa_2stage_work( matrix_layout, uplo, n, + a, lda, tb, ltb, ipiv, ipiv2, + &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = (lapack_int)work_query; + /* Allocate memory for work arrays */ + work = (float*) + LAPACKE_malloc( sizeof(float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_ssytrf_aa_2stage_work( matrix_layout, uplo, n, + a, lda, tb, ltb, ipiv, ipiv2, + work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_ssytrf_aa_2stage", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa_2stage_work.c new file mode 100644 index 000000000..5ffe8154b --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa_2stage_work.c @@ -0,0 +1,108 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function ssytrf_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_ssytrf_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + float* a, lapack_int lda, + float* tb, lapack_int ltb, + lapack_int* ipiv, lapack_int* ipiv2, + float* work, lapack_int lwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_ssytrf_aa_2stage( &uplo, &n, a, &lda, tb, + <b, ipiv, ipiv2, work, &lwork, + &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + float* a_t = NULL; + float* tb_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_ssytrf_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_ssytrf_aa_2stage_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_ssytrf_aa_2stage( &uplo, &n, a, &lda_t, + tb, <b, ipiv, ipiv2, work, + &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (float*)LAPACKE_malloc( sizeof(float) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + /* Transpose input matrices */ + LAPACKE_ssy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + /* Call LAPACK function and adjust info */ + LAPACK_ssytrf_aa_2stage( &uplo, &n, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, work, + &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_ssy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + /* Release memory and exit */ + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_ssytrf_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_ssytrf_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytrf_rk.c b/lapack-netlib/LAPACKE/src/lapacke_ssytrf_rk.c index 806631845..993750fa4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssytrf_rk.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytrf_rk.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function ssytrf_rk * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -45,12 +45,11 @@ lapack_int LAPACKE_ssytrf_rk( int matrix_layout, char uplo, lapack_int n, float* return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n, e, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytrf_rook.c b/lapack-netlib/LAPACKE/src/lapacke_ssytrf_rook.c index 8fda18a6a..468dc43b6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssytrf_rook.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytrf_rook.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_ssytrf_rook( int matrix_layout, char uplo, lapack_int n, floa return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytri.c b/lapack-netlib/LAPACKE/src/lapacke_ssytri.c index fe93407df..eb68f72d7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssytri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytri.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_ssytri( int matrix_layout, char uplo, lapack_int n, float* a, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytri2.c b/lapack-netlib/LAPACKE/src/lapacke_ssytri2.c index eb348b112..4159115c0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssytri2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytri2.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_ssytri2( int matrix_layout, char uplo, lapack_int n, float* a return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytri2x.c b/lapack-netlib/LAPACKE/src/lapacke_ssytri2x.c index e8695e20c..34c989a4a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssytri2x.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytri2x.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_ssytri2x( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytri_3.c b/lapack-netlib/LAPACKE/src/lapacke_ssytri_3.c index b97ca4983..f2a183527 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssytri_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytri_3.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function ssytri_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -40,17 +40,20 @@ lapack_int LAPACKE_ssytri_3( int matrix_layout, char uplo, lapack_int n, float* lapack_int lwork = -1; float* work = NULL; float work_query; + lapack_int e_start = LAPACKE_lsame( uplo, 'U' ) ? 1 : 0; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_ssytri_3", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_s_nancheck( n, e, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_s_nancheck( n-1, e + e_start, 1 ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytrs.c b/lapack-netlib/LAPACKE/src/lapacke_ssytrs.c index e374d3335..9919d6a4e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssytrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_ssytrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_ssytrs_work( matrix_layout, uplo, n, nrhs, a, lda, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytrs2.c b/lapack-netlib/LAPACKE/src/lapacke_ssytrs2.c index b143bf904..a95a71469 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssytrs2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytrs2.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_ssytrs2( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytrs_3.c b/lapack-netlib/LAPACKE/src/lapacke_ssytrs_3.c index 1bcefd4dc..44e713b72 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssytrs_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytrs_3.c @@ -43,15 +43,17 @@ lapack_int LAPACKE_ssytrs_3( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_s_nancheck( n, e ,1 ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_s_nancheck( n, e ,1 ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif return LAPACKE_ssytrs_3_work( matrix_layout, uplo, n, nrhs, a, lda, diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytrs_aa.c b/lapack-netlib/LAPACKE/src/lapacke_ssytrs_aa.c index 6e384095b..9eb20d2ea 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssytrs_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytrs_aa.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function ssytrs_aa * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -46,12 +46,14 @@ lapack_int LAPACKE_ssytrs_aa( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ @@ -60,7 +62,7 @@ lapack_int LAPACKE_ssytrs_aa( int matrix_layout, char uplo, lapack_int n, if( info != 0 ) { goto exit_level_0; } - lwork = (lapack_int)work_query; + lwork = (lapack_int)work_query; /* Allocate memory for work arrays */ work = (float*) LAPACKE_malloc( sizeof(float) * lwork ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytrs_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_ssytrs_aa_2stage.c new file mode 100644 index 000000000..c67ad2d50 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytrs_aa_2stage.c @@ -0,0 +1,66 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function ssytrs_aa_2stage +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_ssytrs_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, + float* tb, lapack_int ltb, + lapack_int* ipiv, lapack_int* ipiv2, + float* b, lapack_int ldb ) +{ + lapack_int info = 0; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_ssytrs_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; + } + } +#endif + /* Call middle-level interface */ + info = LAPACKE_ssytrs_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb ); + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytrs_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_ssytrs_aa_2stage_work.c new file mode 100644 index 000000000..95def2929 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytrs_aa_2stage_work.c @@ -0,0 +1,114 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function ssytrs_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_ssytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, + float* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, float* b, lapack_int ldb ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_ssytrs_aa_2stage( &uplo, &n, &nrhs, a, &lda, tb, + <b, ipiv, ipiv2, b, &ldb, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_int ldb_t = MAX(1,n); + float* a_t = NULL; + float* tb_t = NULL; + float* b_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_ssytrs_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_ssytrs_aa_2stage_work", info ); + return info; + } + if( ldb < nrhs ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_ssytrs_aa_2stage_work", info ); + return info; + } + /* Allocate memory for temporary array(s) */ + a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (float*)LAPACKE_malloc( sizeof(float) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + b_t = (float*)LAPACKE_malloc( sizeof(float) * ldb_t * MAX(1,nrhs) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + /* Transpose input matrices */ + LAPACKE_ssy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + LAPACKE_sge_trans( matrix_layout, n, nrhs, b, ldb, b_t, ldb_t ); + /* Call LAPACK function and adjust info */ + LAPACK_ssytrs_aa_2stage( &uplo, &n, &nrhs, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, b_t, &ldb_t, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_ssy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + LAPACKE_sge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb ); + /* Release memory and exit */ + LAPACKE_free( b_t ); +exit_level_2: + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_ssytrs_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_ssytrs_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytrs_rook.c b/lapack-netlib/LAPACKE/src/lapacke_ssytrs_rook.c index 8d428c286..fa8238222 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssytrs_rook.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytrs_rook.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_ssytrs_rook( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_ssytrs_rook_work( matrix_layout, uplo, n, nrhs, a, lda, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_stbcon.c b/lapack-netlib/LAPACKE/src/lapacke_stbcon.c index ff3bd2738..9230171f9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stbcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stbcon.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_stbcon( int matrix_layout, char norm, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_stb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_stb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_stbrfs.c b/lapack-netlib/LAPACKE/src/lapacke_stbrfs.c index 783f160b9..6f4de2429 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stbrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stbrfs.c @@ -47,15 +47,17 @@ lapack_int LAPACKE_stbrfs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_stb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_stb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_stbtrs.c b/lapack-netlib/LAPACKE/src/lapacke_stbtrs.c index 50893e62c..a99c37ad3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stbtrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stbtrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_stbtrs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_stb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_stb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } } #endif return LAPACKE_stbtrs_work( matrix_layout, uplo, trans, diag, n, kd, nrhs, diff --git a/lapack-netlib/LAPACKE/src/lapacke_stfsm.c b/lapack-netlib/LAPACKE/src/lapacke_stfsm.c index 55bebfe21..bf04a277b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stfsm.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stfsm.c @@ -43,18 +43,20 @@ lapack_int LAPACKE_stfsm( int matrix_layout, char transr, char side, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( IS_S_NONZERO(alpha) ) { - if( LAPACKE_stf_nancheck( matrix_layout, transr, uplo, diag, n, a ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( IS_S_NONZERO(alpha) ) { + if( LAPACKE_stf_nancheck( matrix_layout, transr, uplo, diag, n, a ) ) { + return -10; + } } - } - if( LAPACKE_s_nancheck( 1, &alpha, 1 ) ) { - return -9; - } - if( IS_S_NONZERO(alpha) ) { - if( LAPACKE_sge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -11; + if( LAPACKE_s_nancheck( 1, &alpha, 1 ) ) { + return -9; + } + if( IS_S_NONZERO(alpha) ) { + if( LAPACKE_sge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_stftri.c b/lapack-netlib/LAPACKE/src/lapacke_stftri.c index d2eb97798..bd6256978 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stftri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stftri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_stftri( int matrix_layout, char transr, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_stf_nancheck( matrix_layout, transr, uplo, diag, n, a ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_stf_nancheck( matrix_layout, transr, uplo, diag, n, a ) ) { + return -6; + } } #endif return LAPACKE_stftri_work( matrix_layout, transr, uplo, diag, n, a ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_stfttp.c b/lapack-netlib/LAPACKE/src/lapacke_stfttp.c index 7d14d086f..b5cf832a0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stfttp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stfttp.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_stfttp( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spf_nancheck( n, arf ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spf_nancheck( n, arf ) ) { + return -5; + } } #endif return LAPACKE_stfttp_work( matrix_layout, transr, uplo, n, arf, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_stfttr.c b/lapack-netlib/LAPACKE/src/lapacke_stfttr.c index 4d114d40e..5099e7e96 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stfttr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stfttr.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_stfttr( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spf_nancheck( n, arf ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spf_nancheck( n, arf ) ) { + return -5; + } } #endif return LAPACKE_stfttr_work( matrix_layout, transr, uplo, n, arf, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_stgevc.c b/lapack-netlib/LAPACKE/src/lapacke_stgevc.c index 7984906c1..e95a37e86 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stgevc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stgevc.c @@ -47,21 +47,23 @@ lapack_int LAPACKE_stgevc( int matrix_layout, char side, char howmny, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, p, ldp ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, s, lds ) ) { - return -6; - } - if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'l' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, p, ldp ) ) { + return -8; } - } - if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'r' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -12; + if( LAPACKE_sge_nancheck( matrix_layout, n, n, s, lds ) ) { + return -6; + } + if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'l' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -10; + } + } + if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'r' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_stgexc.c b/lapack-netlib/LAPACKE/src/lapacke_stgexc.c index 923804818..7d9e21e62 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stgexc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stgexc.c @@ -48,21 +48,23 @@ lapack_int LAPACKE_stgexc( int matrix_layout, lapack_logical wantq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -7; - } - if( wantq ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; } - } - if( wantz ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -11; + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -7; + } + if( wantq ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -9; + } + } + if( wantz ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_stgsen.c b/lapack-netlib/LAPACKE/src/lapacke_stgsen.c index f58be28af..5464fd22b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stgsen.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stgsen.c @@ -53,21 +53,23 @@ lapack_int LAPACKE_stgsen( int matrix_layout, lapack_int ijob, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( wantq ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -14; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; } - } - if( wantz ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -16; + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( wantq ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -14; + } + } + if( wantz ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -16; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_stgsja.c b/lapack-netlib/LAPACKE/src/lapacke_stgsja.c index c92225123..2b239c758 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stgsja.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stgsja.c @@ -48,32 +48,34 @@ lapack_int LAPACKE_stgsja( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -10; - } - if( LAPACKE_sge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -12; - } - if( LAPACKE_lsame( jobq, 'i' ) || LAPACKE_lsame( jobq, 'q' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -22; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -10; } - } - if( LAPACKE_s_nancheck( 1, &tola, 1 ) ) { - return -14; - } - if( LAPACKE_s_nancheck( 1, &tolb, 1 ) ) { - return -15; - } - if( LAPACKE_lsame( jobu, 'i' ) || LAPACKE_lsame( jobu, 'u' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, m, m, u, ldu ) ) { - return -18; + if( LAPACKE_sge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -12; } - } - if( LAPACKE_lsame( jobv, 'i' ) || LAPACKE_lsame( jobv, 'v' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, p, p, v, ldv ) ) { - return -20; + if( LAPACKE_lsame( jobq, 'i' ) || LAPACKE_lsame( jobq, 'q' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -22; + } + } + if( LAPACKE_s_nancheck( 1, &tola, 1 ) ) { + return -14; + } + if( LAPACKE_s_nancheck( 1, &tolb, 1 ) ) { + return -15; + } + if( LAPACKE_lsame( jobu, 'i' ) || LAPACKE_lsame( jobu, 'u' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, m, m, u, ldu ) ) { + return -18; + } + } + if( LAPACKE_lsame( jobv, 'i' ) || LAPACKE_lsame( jobv, 'v' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, p, p, v, ldv ) ) { + return -20; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_stgsna.c b/lapack-netlib/LAPACKE/src/lapacke_stgsna.c index 3282695f3..074424732 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stgsna.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stgsna.c @@ -50,21 +50,23 @@ lapack_int LAPACKE_stgsna( int matrix_layout, char job, char howmny, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -12; + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -10; + } + } + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_stgsyl.c b/lapack-netlib/LAPACKE/src/lapacke_stgsyl.c index b8d930b1d..3c6b859cc 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stgsyl.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stgsyl.c @@ -50,24 +50,26 @@ lapack_int LAPACKE_stgsyl( int matrix_layout, char trans, lapack_int ijob, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, m, a, lda ) ) { - return -6; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, m, d, ldd ) ) { - return -12; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, e, lde ) ) { - return -14; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, f, ldf ) ) { - return -16; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, m, a, lda ) ) { + return -6; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, m, d, ldd ) ) { + return -12; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, e, lde ) ) { + return -14; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, f, ldf ) ) { + return -16; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_stpcon.c b/lapack-netlib/LAPACKE/src/lapacke_stpcon.c index 27d6c4f7f..661269de5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stpcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stpcon.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_stpcon( int matrix_layout, char norm, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_stp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_stp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_stpmqrt.c b/lapack-netlib/LAPACKE/src/lapacke_stpmqrt.c index f7d65aca8..b863b79a9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stpmqrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stpmqrt.c @@ -49,29 +49,31 @@ lapack_int LAPACKE_stpmqrt( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - ncols_a = LAPACKE_lsame( side, 'L' ) ? n : - ( LAPACKE_lsame( side, 'R' ) ? k : 0 ); - nrows_a = LAPACKE_lsame( side, 'L' ) ? k : - ( LAPACKE_lsame( side, 'R' ) ? m : 0 ); - nrows_v = LAPACKE_lsame( side, 'L' ) ? m : - ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); - if( LAPACKE_sge_nancheck( matrix_layout, nrows_a, ncols_a, a, lda ) ) { - return -13; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -15; - } - if( LAPACKE_sge_nancheck( matrix_layout, nb, k, t, ldt ) ) { - return -11; - } - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, k, v, ldv ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + ncols_a = LAPACKE_lsame( side, 'L' ) ? n : + ( LAPACKE_lsame( side, 'R' ) ? k : 0 ); + nrows_a = LAPACKE_lsame( side, 'L' ) ? k : + ( LAPACKE_lsame( side, 'R' ) ? m : 0 ); + nrows_v = LAPACKE_lsame( side, 'L' ) ? m : + ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); + if( LAPACKE_sge_nancheck( matrix_layout, nrows_a, ncols_a, a, lda ) ) { + return -13; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -15; + } + if( LAPACKE_sge_nancheck( matrix_layout, nb, k, t, ldt ) ) { + return -11; + } + if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, k, v, ldv ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ lwork = LAPACKE_lsame( side, 'L' ) ? MAX(1,nb) * MAX(1,n) : - ( LAPACKE_lsame( side, 'R' ) ? MAX(1,m) * MAX(1,nb) : 0 ); + ( LAPACKE_lsame( side, 'R' ) ? MAX(1,m) * MAX(1,nb) : 0 ); work = (float*)LAPACKE_malloc( sizeof(float) * lwork ); if( work == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; diff --git a/lapack-netlib/LAPACKE/src/lapacke_stpqrt.c b/lapack-netlib/LAPACKE/src/lapacke_stpqrt.c index 1d638dc1e..1438ea29d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stpqrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stpqrt.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_stpqrt( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_stpqrt2.c b/lapack-netlib/LAPACKE/src/lapacke_stpqrt2.c index 1af9fe7ea..923258621 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stpqrt2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stpqrt2.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_stpqrt2( int matrix_layout, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -6; + } } #endif return LAPACKE_stpqrt2_work( matrix_layout, m, n, l, a, lda, b, ldb, t, ldt ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_stprfb.c b/lapack-netlib/LAPACKE/src/lapacke_stprfb.c index 13d0cc7e7..846d4ccb3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stprfb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stprfb.c @@ -49,30 +49,32 @@ lapack_int LAPACKE_stprfb( int matrix_layout, char side, char trans, char direct return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( storev, 'C' ) ) { - ncols_v = k; - nrows_v = LAPACKE_lsame( side, 'L' ) ? m : - ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); - } else if( LAPACKE_lsame( storev, 'R' ) ) { - ncols_v = LAPACKE_lsame( side, 'L' ) ? m : - ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); - nrows_v = k; - } else { - ncols_v = 0; - nrows_v = 0; - } - if( LAPACKE_sge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -14; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -16; - } - if( LAPACKE_sge_nancheck( matrix_layout, k, k, t, ldt ) ) { - return -12; - } - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( storev, 'C' ) ) { + ncols_v = k; + nrows_v = LAPACKE_lsame( side, 'L' ) ? m : + ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); + } else if( LAPACKE_lsame( storev, 'R' ) ) { + ncols_v = LAPACKE_lsame( side, 'L' ) ? m : + ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); + nrows_v = k; + } else { + ncols_v = 0; + nrows_v = 0; + } + if( LAPACKE_sge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -14; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -16; + } + if( LAPACKE_sge_nancheck( matrix_layout, k, k, t, ldt ) ) { + return -12; + } + if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { + return -10; + } } #endif if (side=='l' || side=='L') { diff --git a/lapack-netlib/LAPACKE/src/lapacke_stprfs.c b/lapack-netlib/LAPACKE/src/lapacke_stprfs.c index 6b4fd7ead..299e05120 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stprfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stprfs.c @@ -46,15 +46,17 @@ lapack_int LAPACKE_stprfs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_stp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_stp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_stptri.c b/lapack-netlib/LAPACKE/src/lapacke_stptri.c index cdc12217c..edd25bd74 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stptri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stptri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_stptri( int matrix_layout, char uplo, char diag, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_stp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_stp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { + return -5; + } } #endif return LAPACKE_stptri_work( matrix_layout, uplo, diag, n, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_stptrs.c b/lapack-netlib/LAPACKE/src/lapacke_stptrs.c index 643097ce1..fc24e5f16 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stptrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stptrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_stptrs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_stp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_stp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_stptrs_work( matrix_layout, uplo, trans, diag, n, nrhs, ap, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_stpttf.c b/lapack-netlib/LAPACKE/src/lapacke_stpttf.c index 318f534b1..5c7746e61 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stpttf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stpttf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_stpttf( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spp_nancheck( n, ap ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spp_nancheck( n, ap ) ) { + return -5; + } } #endif return LAPACKE_stpttf_work( matrix_layout, transr, uplo, n, ap, arf ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_stpttr.c b/lapack-netlib/LAPACKE/src/lapacke_stpttr.c index adccfe22d..9e1340c16 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stpttr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stpttr.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_stpttr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_spp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_spp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_stpttr_work( matrix_layout, uplo, n, ap, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_strcon.c b/lapack-netlib/LAPACKE/src/lapacke_strcon.c index ad7260fe6..39efd559c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_strcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_strcon.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_strcon( int matrix_layout, char norm, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_str_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_str_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_strevc.c b/lapack-netlib/LAPACKE/src/lapacke_strevc.c index 0f0049480..2c983f6c3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_strevc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_strevc.c @@ -46,18 +46,20 @@ lapack_int LAPACKE_strevc( int matrix_layout, char side, char howmny, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -6; - } - if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'l' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -6; } - } - if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'r' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -10; + if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'l' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -8; + } + } + if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'r' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_strexc.c b/lapack-netlib/LAPACKE/src/lapacke_strexc.c index 9606b8d07..b7976b66e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_strexc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_strexc.c @@ -44,14 +44,16 @@ lapack_int LAPACKE_strexc( int matrix_layout, char compq, lapack_int n, float* t return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -6; + } + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -4; } - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -4; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_strrfs.c b/lapack-netlib/LAPACKE/src/lapacke_strrfs.c index 62e2b1db8..9a1156c6f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_strrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_strrfs.c @@ -47,15 +47,17 @@ lapack_int LAPACKE_strrfs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_str_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_str_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -11; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_strsen.c b/lapack-netlib/LAPACKE/src/lapacke_strsen.c index 3cba0b15f..efba91af8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_strsen.c +++ b/lapack-netlib/LAPACKE/src/lapacke_strsen.c @@ -50,14 +50,16 @@ lapack_int LAPACKE_strsen( int matrix_layout, char job, char compq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -8; + } + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -6; } - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -6; } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_strsna.c b/lapack-netlib/LAPACKE/src/lapacke_strsna.c index f2c53a005..9d130efca 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_strsna.c +++ b/lapack-netlib/LAPACKE/src/lapacke_strsna.c @@ -48,18 +48,20 @@ lapack_int LAPACKE_strsna( int matrix_layout, char job, char howmny, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -6; - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -6; } - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -10; + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -8; + } + } + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { + if( LAPACKE_sge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_strsyl.c b/lapack-netlib/LAPACKE/src/lapacke_strsyl.c index d8833f346..df618cd73 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_strsyl.c +++ b/lapack-netlib/LAPACKE/src/lapacke_strsyl.c @@ -44,15 +44,17 @@ lapack_int LAPACKE_strsyl( int matrix_layout, char trana, char tranb, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, m, a, lda ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, m, a, lda ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -11; + } } #endif return LAPACKE_strsyl_work( matrix_layout, trana, tranb, isgn, m, n, a, lda, diff --git a/lapack-netlib/LAPACKE/src/lapacke_strtri.c b/lapack-netlib/LAPACKE/src/lapacke_strtri.c index 11503dfb0..2888624aa 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_strtri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_strtri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_strtri( int matrix_layout, char uplo, char diag, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_str_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_str_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { + return -5; + } } #endif return LAPACKE_strtri_work( matrix_layout, uplo, diag, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_strtrs.c b/lapack-netlib/LAPACKE/src/lapacke_strtrs.c index 3000369ec..b4c0aaeae 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_strtrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_strtrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_strtrs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_str_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { - return -7; - } - if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_str_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { + return -7; + } + if( LAPACKE_sge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif return LAPACKE_strtrs_work( matrix_layout, uplo, trans, diag, n, nrhs, a, diff --git a/lapack-netlib/LAPACKE/src/lapacke_strttf.c b/lapack-netlib/LAPACKE/src/lapacke_strttf.c index 9286709c9..fee7ab9ae 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_strttf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_strttf.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_strttf( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif return LAPACKE_strttf_work( matrix_layout, transr, uplo, n, a, lda, arf ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_strttp.c b/lapack-netlib/LAPACKE/src/lapacke_strttp.c index ae0eb817f..6c4b84aa3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_strttp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_strttp.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_strttp( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_strttp_work( matrix_layout, uplo, n, a, lda, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_stzrzf.c b/lapack-netlib/LAPACKE/src/lapacke_stzrzf.c index 8ca4e6657..0243179e6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_stzrzf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_stzrzf.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_stzrzf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zbbcsd.c b/lapack-netlib/LAPACKE/src/lapacke_zbbcsd.c index b88b9a85d..e0d0e224a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zbbcsd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zbbcsd.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zbbcsd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -48,41 +48,44 @@ lapack_int LAPACKE_zbbcsd( int matrix_layout, char jobu1, char jobu2, lapack_int lrwork = -1; double* rwork = NULL; double rwork_query; - lapack_int nrows_u1, nrows_u2, nrows_v1t, nrows_v2t; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zbbcsd", -1 ); return -1; } -#ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - if( LAPACKE_d_nancheck( q-1, phi, 1 ) ) { - return -11; - } - if( LAPACKE_d_nancheck( q, theta, 1 ) ) { - return -10; + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; } - if( LAPACKE_lsame( jobu1, 'y' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, nrows_u1, p, u1, ldu1 ) ) { - return -12; +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( q-1, phi, 1 ) ) { + return -11; } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, nrows_u2, m-p, u2, ldu2 ) ) { - return -14; + if( LAPACKE_d_nancheck( q, theta, 1 ) ) { + return -10; } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v1t, q, v1t, ldv1t ) ) { - return -16; + if( LAPACKE_lsame( jobu1, 'y' ) ) { + if( LAPACKE_zge_nancheck( lapack_layout, p, p, u1, ldu1 ) ) { + return -12; + } } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v2t, m-q, v2t, ldv2t ) ) { - return -18; + if( LAPACKE_lsame( jobu2, 'y' ) ) { + if( LAPACKE_zge_nancheck( lapack_layout, m-p, m-p, u2, ldu2 ) ) { + return -14; + } + } + if( LAPACKE_lsame( jobv1t, 'y' ) ) { + if( LAPACKE_zge_nancheck( lapack_layout, q, q, v1t, ldv1t ) ) { + return -16; + } + } + if( LAPACKE_lsame( jobv2t, 'y' ) ) { + if( LAPACKE_zge_nancheck( lapack_layout, m-q, m-q, v2t, ldv2t ) ) { + return -18; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zbbcsd_work.c b/lapack-netlib/LAPACKE/src/lapacke_zbbcsd_work.c index 34882ccdd..96b8b97e3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zbbcsd_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zbbcsd_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function zbbcsd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -47,156 +47,36 @@ lapack_int LAPACKE_zbbcsd_work( int matrix_layout, char jobu1, char jobu2, lapack_int lrwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; + } /* Call LAPACK function and adjust info */ - LAPACK_zbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, + LAPACK_zbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, <rans, &m, &p, &q, theta, phi, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, &ldv2t, b11d, b11e, b12d, b12e, b21d, b21e, b22d, b22e, rwork, &lrwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - lapack_int nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - lapack_int nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - lapack_int nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - lapack_int ldu1_t = MAX(1,nrows_u1); - lapack_int ldu2_t = MAX(1,nrows_u2); - lapack_int ldv1t_t = MAX(1,nrows_v1t); - lapack_int ldv2t_t = MAX(1,nrows_v2t); - lapack_complex_double* u1_t = NULL; - lapack_complex_double* u2_t = NULL; - lapack_complex_double* v1t_t = NULL; - lapack_complex_double* v2t_t = NULL; - /* Check leading dimension(s) */ - if( ldu1 < p ) { - info = -13; - LAPACKE_xerbla( "LAPACKE_zbbcsd_work", info ); - return info; - } - if( ldu2 < m-p ) { - info = -15; - LAPACKE_xerbla( "LAPACKE_zbbcsd_work", info ); - return info; - } - if( ldv1t < q ) { - info = -17; - LAPACKE_xerbla( "LAPACKE_zbbcsd_work", info ); - return info; - } - if( ldv2t < m-q ) { - info = -19; - LAPACKE_xerbla( "LAPACKE_zbbcsd_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lrwork == -1 ) { - LAPACK_zbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, - theta, phi, u1, &ldu1_t, u2, &ldu2_t, v1t, &ldv1t_t, - v2t, &ldv2t_t, b11d, b11e, b12d, b12e, b21d, b21e, - b22d, b22e, rwork, &lrwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - u1_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldu1_t * MAX(1,p) ); - if( u1_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - u2_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldu2_t * MAX(1,m-p) ); - if( u2_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - v1t_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldv1t_t * MAX(1,q) ); - if( v1t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; - } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - v2t_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldv2t_t * MAX(1,m-q) ); - if( v2t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - } - /* Transpose input matrices */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_zge_trans( matrix_layout, nrows_u1, p, u1, ldu1, u1_t, - ldu1_t ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_zge_trans( matrix_layout, nrows_u2, m-p, u2, ldu2, u2_t, - ldu2_t ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_zge_trans( matrix_layout, nrows_v1t, q, v1t, ldv1t, v1t_t, - ldv1t_t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_zge_trans( matrix_layout, nrows_v2t, m-q, v2t, ldv2t, v2t_t, - ldv2t_t ); - } - /* Call LAPACK function and adjust info */ - LAPACK_zbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, - theta, phi, u1_t, &ldu1_t, u2_t, &ldu2_t, v1t_t, - &ldv1t_t, v2t_t, &ldv2t_t, b11d, b11e, b12d, b12e, b21d, - b21e, b22d, b22e, rwork, &lrwork, &info ); - if( info < 0 ) { - info = info - 1; - } - /* Transpose output matrices */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_u1, p, u1_t, ldu1_t, u1, - ldu1 ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_u2, m-p, u2_t, ldu2_t, - u2, ldu2 ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_v1t, q, v1t_t, ldv1t_t, - v1t, ldv1t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_v2t, m-q, v2t_t, ldv2t_t, - v2t, ldv2t ); - } - /* Release memory and exit */ - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_free( v2t_t ); - } -exit_level_3: - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_free( v1t_t ); - } -exit_level_2: - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_free( u2_t ); - } -exit_level_1: - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_free( u1_t ); - } -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_zbbcsd_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_zbbcsd_work", info ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zbdsqr.c b/lapack-netlib/LAPACKE/src/lapacke_zbdsqr.c index 705ff6324..c12ef3eae 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zbdsqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zbdsqr.c @@ -47,26 +47,28 @@ lapack_int LAPACKE_zbdsqr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( ncc != 0 ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, ncc, c, ldc ) ) { - return -13; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( ncc != 0 ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, ncc, c, ldc ) ) { + return -13; + } } - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -7; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -8; - } - if( nru != 0 ) { - if( LAPACKE_zge_nancheck( matrix_layout, nru, n, u, ldu ) ) { - return -11; + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -7; } - } - if( ncvt != 0 ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, ncvt, vt, ldvt ) ) { - return -9; + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -8; + } + if( nru != 0 ) { + if( LAPACKE_zge_nancheck( matrix_layout, nru, n, u, ldu ) ) { + return -11; + } + } + if( ncvt != 0 ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, ncvt, vt, ldvt ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zcgesv.c b/lapack-netlib/LAPACKE/src/lapacke_zcgesv.c index ff40822cd..f161b185c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zcgesv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zcgesv.c @@ -48,12 +48,14 @@ lapack_int LAPACKE_zcgesv( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zcposv.c b/lapack-netlib/LAPACKE/src/lapacke_zcposv.c index 26b06ceca..1c54c391f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zcposv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zcposv.c @@ -48,12 +48,14 @@ lapack_int LAPACKE_zcposv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgbbrd.c b/lapack-netlib/LAPACKE/src/lapacke_zgbbrd.c index 42b0556e9..63c9a756f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgbbrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgbbrd.c @@ -49,13 +49,15 @@ lapack_int LAPACKE_zgbbrd( int matrix_layout, char vect, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { - return -8; - } - if( ncc != 0 ) { - if( LAPACKE_zge_nancheck( matrix_layout, m, ncc, c, ldc ) ) { - return -16; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { + return -8; + } + if( ncc != 0 ) { + if( LAPACKE_zge_nancheck( matrix_layout, m, ncc, c, ldc ) ) { + return -16; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgbcon.c b/lapack-netlib/LAPACKE/src/lapacke_zgbcon.c index d8e6d0f90..6ce309d7d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgbcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgbcon.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_zgbcon( int matrix_layout, char norm, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { - return -6; - } - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { + return -6; + } + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgbequ.c b/lapack-netlib/LAPACKE/src/lapacke_zgbequ.c index 8bd5fbc8c..33781a10a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgbequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgbequ.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_zgbequ( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { + return -6; + } } #endif return LAPACKE_zgbequ_work( matrix_layout, m, n, kl, ku, ab, ldab, r, c, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgbequb.c b/lapack-netlib/LAPACKE/src/lapacke_zgbequb.c index d311718e2..96038277e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgbequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgbequb.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_zgbequb( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zgb_nancheck( matrix_layout, m, n, kl, ku, ab, ldab ) ) { + return -6; + } } #endif return LAPACKE_zgbequb_work( matrix_layout, m, n, kl, ku, ab, ldab, r, c, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgbrfs.c b/lapack-netlib/LAPACKE/src/lapacke_zgbrfs.c index 2f77be1a0..2bf3fa377 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgbrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgbrfs.c @@ -50,18 +50,20 @@ lapack_int LAPACKE_zgbrfs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { - return -7; - } - if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, ldafb ) ) { - return -9; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -14; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { + return -7; + } + if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, ldafb ) ) { + return -9; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -14; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgbrfsx.c b/lapack-netlib/LAPACKE/src/lapacke_zgbrfsx.c index f7be6aab7..6d269de43 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgbrfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgbrfsx.c @@ -53,33 +53,35 @@ lapack_int LAPACKE_zgbrfsx( int matrix_layout, char trans, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { - return -8; - } - if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, ldafb ) ) { - return -10; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -15; - } - if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'c' ) ) { - if( LAPACKE_d_nancheck( n, c, 1 ) ) { - return -14; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { + return -8; } - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -25; + if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, ldafb ) ) { + return -10; } - } - if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'r' ) ) { - if( LAPACKE_d_nancheck( n, r, 1 ) ) { - return -13; + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -15; + } + if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'c' ) ) { + if( LAPACKE_d_nancheck( n, c, 1 ) ) { + return -14; + } + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -25; + } + } + if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'r' ) ) { + if( LAPACKE_d_nancheck( n, r, 1 ) ) { + return -13; + } + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -17; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -17; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgbsv.c b/lapack-netlib/LAPACKE/src/lapacke_zgbsv.c index b3f143cd8..17aa1d37f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgbsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgbsv.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_zgbsv( int matrix_layout, lapack_int n, lapack_int kl, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { - return -6; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { + return -6; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif return LAPACKE_zgbsv_work( matrix_layout, n, kl, ku, nrhs, ab, ldab, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgbsvx.c b/lapack-netlib/LAPACKE/src/lapacke_zgbsvx.c index 9904a71c4..954245ecd 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgbsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgbsvx.c @@ -51,29 +51,31 @@ lapack_int LAPACKE_zgbsvx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { - return -8; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, - ldafb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { + return -8; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -16; - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'c' ) ) ) { - if( LAPACKE_d_nancheck( n, c, 1 ) ) { - return -15; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, + ldafb ) ) { + return -10; + } } - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'r' ) ) ) { - if( LAPACKE_d_nancheck( n, r, 1 ) ) { - return -14; + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -16; + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'c' ) ) ) { + if( LAPACKE_d_nancheck( n, c, 1 ) ) { + return -15; + } + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'r' ) ) ) { + if( LAPACKE_d_nancheck( n, r, 1 ) ) { + return -14; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgbsvxx.c b/lapack-netlib/LAPACKE/src/lapacke_zgbsvxx.c index ed8209242..4d5f86b9e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgbsvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgbsvxx.c @@ -53,34 +53,36 @@ lapack_int LAPACKE_zgbsvxx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { - return -8; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, - ldafb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, ku, ab, ldab ) ) { + return -8; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -16; - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'c' ) ) ) { - if( LAPACKE_d_nancheck( n, c, 1 ) ) { - return -15; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, kl+ku, afb, + ldafb ) ) { + return -10; + } } - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -27; + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -16; } - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'r' ) ) ) { - if( LAPACKE_d_nancheck( n, r, 1 ) ) { - return -14; + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'c' ) ) ) { + if( LAPACKE_d_nancheck( n, c, 1 ) ) { + return -15; + } + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -27; + } + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'r' ) ) ) { + if( LAPACKE_d_nancheck( n, r, 1 ) ) { + return -14; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgbtrf.c b/lapack-netlib/LAPACKE/src/lapacke_zgbtrf.c index e01ac4af8..34e84f25a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgbtrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgbtrf.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_zgbtrf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zgb_nancheck( matrix_layout, m, n, kl, kl+ku, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zgb_nancheck( matrix_layout, m, n, kl, kl+ku, ab, ldab ) ) { + return -6; + } } #endif return LAPACKE_zgbtrf_work( matrix_layout, m, n, kl, ku, ab, ldab, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgbtrs.c b/lapack-netlib/LAPACKE/src/lapacke_zgbtrs.c index c8036b035..7192fd234 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgbtrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgbtrs.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_zgbtrs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zgb_nancheck( matrix_layout, n, n, kl, kl+ku, ab, ldab ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } } #endif return LAPACKE_zgbtrs_work( matrix_layout, trans, n, kl, ku, nrhs, ab, ldab, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgebak.c b/lapack-netlib/LAPACKE/src/lapacke_zgebak.c index eb2f497db..69e5c5085 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgebak.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgebak.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zgebak( int matrix_layout, char job, char side, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, scale, 1 ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, m, v, ldv ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, scale, 1 ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, m, v, ldv ) ) { + return -9; + } } #endif return LAPACKE_zgebak_work( matrix_layout, job, side, n, ilo, ihi, scale, m, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgebal.c b/lapack-netlib/LAPACKE/src/lapacke_zgebal.c index ff0b8c2dd..25e0c01cc 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgebal.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgebal.c @@ -42,11 +42,13 @@ lapack_int LAPACKE_zgebal( int matrix_layout, char job, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'p' ) || - LAPACKE_lsame( job, 's' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'p' ) || + LAPACKE_lsame( job, 's' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgebrd.c b/lapack-netlib/LAPACKE/src/lapacke_zgebrd.c index d7ae0ebf9..cb3409aca 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgebrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgebrd.c @@ -47,9 +47,11 @@ lapack_int LAPACKE_zgebrd( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgecon.c b/lapack-netlib/LAPACKE/src/lapacke_zgecon.c index c74d1fcee..ee0f9c9fb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgecon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgecon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_zgecon( int matrix_layout, char norm, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgeequ.c b/lapack-netlib/LAPACKE/src/lapacke_zgeequ.c index 058feae34..43b5c2f9f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgeequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgeequ.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_zgeequ( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_zgeequ_work( matrix_layout, m, n, a, lda, r, c, rowcnd, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgeequb.c b/lapack-netlib/LAPACKE/src/lapacke_zgeequb.c index 82dd4c226..a97aa2d16 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgeequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgeequb.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_zgeequb( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_zgeequb_work( matrix_layout, m, n, a, lda, r, c, rowcnd, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgees.c b/lapack-netlib/LAPACKE/src/lapacke_zgees.c index 888467149..66ba8e35d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgees.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgees.c @@ -50,9 +50,11 @@ lapack_int LAPACKE_zgees( int matrix_layout, char jobvs, char sort, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgeesx.c b/lapack-netlib/LAPACKE/src/lapacke_zgeesx.c index 391a21b70..f1792a6ab 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgeesx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgeesx.c @@ -51,9 +51,11 @@ lapack_int LAPACKE_zgeesx( int matrix_layout, char jobvs, char sort, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgeev.c b/lapack-netlib/LAPACKE/src/lapacke_zgeev.c index 56c00fe93..6d4fe9396 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgeev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgeev.c @@ -49,9 +49,11 @@ lapack_int LAPACKE_zgeev( int matrix_layout, char jobvl, char jobvr, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgeevx.c b/lapack-netlib/LAPACKE/src/lapacke_zgeevx.c index 59953e0e1..e10df52a9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgeevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgeevx.c @@ -52,9 +52,11 @@ lapack_int LAPACKE_zgeevx( int matrix_layout, char balanc, char jobvl, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgehrd.c b/lapack-netlib/LAPACKE/src/lapacke_zgehrd.c index d713d3413..92263cfde 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgehrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgehrd.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zgehrd( int matrix_layout, lapack_int n, lapack_int ilo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgejsv.c b/lapack-netlib/LAPACKE/src/lapacke_zgejsv.c index 6eababe56..f3b5110a7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgejsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgejsv.c @@ -41,22 +41,22 @@ lapack_int LAPACKE_zgejsv( int matrix_layout, char joba, char jobu, char jobv, { lapack_int info = 0; lapack_int lwork = ( - // 1.1 - ( LAPACKE_lsame( jobu, 'n' ) && LAPACKE_lsame( jobv, 'n' ) && + // 1.1 + ( LAPACKE_lsame( jobu, 'n' ) && LAPACKE_lsame( jobv, 'n' ) && ( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) )) ? 2*n+1 : //1.2 - ( ( LAPACKE_lsame( jobu, 'n' ) && LAPACKE_lsame( jobv, 'n' ) && + ( ( LAPACKE_lsame( jobu, 'n' ) && LAPACKE_lsame( jobv, 'n' ) && !( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) )) ? n*n+3*n : //2.1 - ( ( ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && - (!( LAPACKE_lsame( jobu, 'u') || LAPACKE_lsame( jobu, 'f' ) )&& + ( ( ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && + !( LAPACKE_lsame( jobu, 'u') || LAPACKE_lsame( jobu, 'f' ) )&& ( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) ))? 3*n : //2.2 - ( ( ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && + ( ( ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && !( LAPACKE_lsame( jobu, 'u' ) || LAPACKE_lsame( jobu, 'f' ) ) && !( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) ))? 3*n : @@ -79,10 +79,10 @@ lapack_int LAPACKE_zgejsv( int matrix_layout, char joba, char jobu, char jobv, ( ( ( LAPACKE_lsame( jobu, 'u' ) || LAPACKE_lsame( jobu, 'f' ) ) && ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && ( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) ))? 4*n*n: - 1) ) ) ) ) ) ) ) ); + 1) ) ) ) ) ) ) ); lapack_int lrwork = ( - // 1.1 - ( LAPACKE_lsame( jobu, 'n' ) && LAPACKE_lsame( jobv, 'n' ) && + // 1.1 + ( LAPACKE_lsame( jobu, 'n' ) && LAPACKE_lsame( jobv, 'n' ) && ( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) )) ? MAX(7,n+2*m) : //1.2 @@ -90,13 +90,13 @@ lapack_int LAPACKE_zgejsv( int matrix_layout, char joba, char jobu, char jobv, !( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) )) ? MAX(7,2*n) : //2.1 - ( ( ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && - (!( LAPACKE_lsame( jobu, 'u') || LAPACKE_lsame( jobu, 'f' ) ) && + ( ( ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && + !( LAPACKE_lsame( jobu, 'u') || LAPACKE_lsame( jobu, 'f' ) ) && ( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) ))? MAX( 7, n+ 2*m ) : //2.2 - ( ( ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && + ( ( ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && !( LAPACKE_lsame( jobu, 'u' ) || LAPACKE_lsame( jobu, 'f' ) ) && !( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) ))? MAX(7,2*n) : @@ -119,7 +119,7 @@ lapack_int LAPACKE_zgejsv( int matrix_layout, char joba, char jobu, char jobv, ( ( ( LAPACKE_lsame( jobu, 'u' ) || LAPACKE_lsame( jobu, 'f' ) ) && ( LAPACKE_lsame( jobv, 'v' ) || LAPACKE_lsame( jobv, 'j' ) ) && ( LAPACKE_lsame( jobt, 't' ) || LAPACKE_lsame( joba, 'f' ) || LAPACKE_lsame( joba, 'g' ) ))? MAX(7,2*n) : - 7) ) ) ) ) ) ) ) ); + 7) ) ) ) ) ) ) ); lapack_int* iwork = NULL; double* rwork = NULL; lapack_complex_double* cwork = NULL; @@ -130,11 +130,13 @@ lapack_int LAPACKE_zgejsv( int matrix_layout, char joba, char jobu, char jobv, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nu = LAPACKE_lsame( jobu, 'n' ) ? 1 : m; - nv = LAPACKE_lsame( jobv, 'n' ) ? 1 : n; - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nu = LAPACKE_lsame( jobu, 'n' ) ? 1 : m; + nv = LAPACKE_lsame( jobv, 'n' ) ? 1 : n; + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgelq.c b/lapack-netlib/LAPACKE/src/lapacke_zgelq.c index 5f1203506..e0206b23d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgelq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgelq.c @@ -28,14 +28,14 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zgelq * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" -lapack_int LAPACKE_zgelq_work( int matrix_layout, lapack_int m, lapack_int n, - lapack_complex_double* a, lapack_int lda, - lapack_complex_double* t, lapack_int tsize ) +lapack_int LAPACKE_zgelq( int matrix_layout, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* t, lapack_int tsize ) { lapack_int info = 0; lapack_int lwork = -1; @@ -46,9 +46,11 @@ lapack_int LAPACKE_zgelq_work( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgelq2.c b/lapack-netlib/LAPACKE/src/lapacke_zgelq2.c index 3515b9219..772db3c7f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgelq2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgelq2.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_zgelq2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgelqf.c b/lapack-netlib/LAPACKE/src/lapacke_zgelqf.c index c977165b1..664cb4894 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgelqf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgelqf.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zgelqf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgels.c b/lapack-netlib/LAPACKE/src/lapacke_zgels.c index 40307299d..79e275a37 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgels.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgels.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_zgels( int matrix_layout, char trans, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; - } - if( LAPACKE_zge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } + if( LAPACKE_zge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgelsd.c b/lapack-netlib/LAPACKE/src/lapacke_zgelsd.c index 05794e7ac..6d111c69f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgelsd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgelsd.c @@ -55,15 +55,17 @@ lapack_int LAPACKE_zgelsd( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &rcond, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_d_nancheck( 1, &rcond, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgelss.c b/lapack-netlib/LAPACKE/src/lapacke_zgelss.c index 41b6a9726..4ef86b39f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgelss.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgelss.c @@ -49,15 +49,17 @@ lapack_int LAPACKE_zgelss( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &rcond, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_d_nancheck( 1, &rcond, 1 ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgelsy.c b/lapack-netlib/LAPACKE/src/lapacke_zgelsy.c index 8850ddb83..bac6ab6f5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgelsy.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgelsy.c @@ -49,15 +49,17 @@ lapack_int LAPACKE_zgelsy( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &rcond, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_d_nancheck( 1, &rcond, 1 ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgemlq.c b/lapack-netlib/LAPACKE/src/lapacke_zgemlq.c index b47ca26a9..946e2b5f3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgemlq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgemlq.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zgemlq * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -48,15 +48,17 @@ lapack_int LAPACKE_zgemlq( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_d_nancheck( tsize, t, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_z_nancheck( tsize, t, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgemqr.c b/lapack-netlib/LAPACKE/src/lapacke_zgemqr.c index 07e1a7aa2..f5daf4afe 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgemqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgemqr.c @@ -49,16 +49,18 @@ lapack_int LAPACKE_zgemqr( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_zge_nancheck( matrix_layout, r, k, a, lda ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_z_nancheck( tsize, t, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_zge_nancheck( matrix_layout, r, k, a, lda ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_z_nancheck( tsize, t, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgemqrt.c b/lapack-netlib/LAPACKE/src/lapacke_zgemqrt.c index f1f32bb5b..222422f18 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgemqrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgemqrt.c @@ -48,17 +48,19 @@ lapack_int LAPACKE_zgemqrt( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_v = LAPACKE_lsame( side, 'L' ) ? m : - ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -12; - } - if( LAPACKE_zge_nancheck( matrix_layout, nb, k, t, ldt ) ) { - return -10; - } - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, k, v, ldv ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nrows_v = LAPACKE_lsame( side, 'L' ) ? m : + ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -12; + } + if( LAPACKE_zge_nancheck( matrix_layout, nb, k, t, ldt ) ) { + return -10; + } + if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, k, v, ldv ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgeqlf.c b/lapack-netlib/LAPACKE/src/lapacke_zgeqlf.c index 559393e05..b887f1d3a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgeqlf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgeqlf.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zgeqlf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgeqp3.c b/lapack-netlib/LAPACKE/src/lapacke_zgeqp3.c index f46770403..bd20b8b08 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgeqp3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgeqp3.c @@ -47,9 +47,11 @@ lapack_int LAPACKE_zgeqp3( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgeqpf.c b/lapack-netlib/LAPACKE/src/lapacke_zgeqpf.c index 4f7b4c511..2b9cecf7f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgeqpf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgeqpf.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_zgeqpf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgeqr.c b/lapack-netlib/LAPACKE/src/lapacke_zgeqr.c index 2370f4c23..3e567f553 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgeqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgeqr.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zgeqr * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -46,9 +46,11 @@ lapack_int LAPACKE_zgeqr( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgeqr2.c b/lapack-netlib/LAPACKE/src/lapacke_zgeqr2.c index 48fb5cb0f..8c33e0cc3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgeqr2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgeqr2.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_zgeqr2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgeqrf.c b/lapack-netlib/LAPACKE/src/lapacke_zgeqrf.c index 0c8255a1f..10db33ab6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgeqrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgeqrf.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zgeqrf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgeqrfp.c b/lapack-netlib/LAPACKE/src/lapacke_zgeqrfp.c index 30342f5cb..62e578715 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgeqrfp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgeqrfp.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zgeqrfp( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgeqrt.c b/lapack-netlib/LAPACKE/src/lapacke_zgeqrt.c index 248ed17de..5774d4b19 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgeqrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgeqrt.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_zgeqrt( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgeqrt2.c b/lapack-netlib/LAPACKE/src/lapacke_zgeqrt2.c index 82ed75969..b31237c6a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgeqrt2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgeqrt2.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zgeqrt2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_zgeqrt2_work( matrix_layout, m, n, a, lda, t, ldt ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgeqrt3.c b/lapack-netlib/LAPACKE/src/lapacke_zgeqrt3.c index e347df46a..51f0558bf 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgeqrt3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgeqrt3.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zgeqrt3( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_zgeqrt3_work( matrix_layout, m, n, a, lda, t, ldt ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgerfs.c b/lapack-netlib/LAPACKE/src/lapacke_zgerfs.c index 98d212852..2ee8f70e6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgerfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgerfs.c @@ -49,18 +49,20 @@ lapack_int LAPACKE_zgerfs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, af, ldaf ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, af, ldaf ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgerfsx.c b/lapack-netlib/LAPACKE/src/lapacke_zgerfsx.c index 90a9c0ed9..a993ab83d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgerfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgerfsx.c @@ -53,33 +53,35 @@ lapack_int LAPACKE_zgerfsx( int matrix_layout, char trans, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, af, ldaf ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'c' ) ) { - if( LAPACKE_d_nancheck( n, c, 1 ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; } - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -23; + if( LAPACKE_zge_nancheck( matrix_layout, n, n, af, ldaf ) ) { + return -8; } - } - if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'r' ) ) { - if( LAPACKE_d_nancheck( n, r, 1 ) ) { - return -11; + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'c' ) ) { + if( LAPACKE_d_nancheck( n, c, 1 ) ) { + return -12; + } + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -23; + } + } + if( LAPACKE_lsame( equed, 'b' ) || LAPACKE_lsame( equed, 'r' ) ) { + if( LAPACKE_d_nancheck( n, r, 1 ) ) { + return -11; + } + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -15; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -15; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgerqf.c b/lapack-netlib/LAPACKE/src/lapacke_zgerqf.c index d7b23b0e5..f77de8b16 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgerqf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgerqf.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zgerqf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgesdd.c b/lapack-netlib/LAPACKE/src/lapacke_zgesdd.c index 18a53e4e6..016ec4f68 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgesdd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgesdd.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zgesdd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -52,16 +52,18 @@ lapack_int LAPACKE_zgesdd( int matrix_layout, char jobz, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif /* Additional scalars initializations for work arrays */ if( LAPACKE_lsame( jobz, 'n' ) ) { lrwork = MAX(1,7*MIN(m,n)); } else { - lrwork = (size_t)MIN(m,n)*MAX(5*MIN(m,n)+7,2*MAX(m,n)+2*MIN(m,n)+1); + lrwork = (size_t)MAX(1,MIN(m,n)*MAX(5*MIN(m,n)+7,2*MAX(m,n)+2*MIN(m,n)+1)); } /* Allocate memory for working array(s) */ iwork = (lapack_int*) diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgesv.c b/lapack-netlib/LAPACKE/src/lapacke_zgesv.c index b334aeea2..23015ccc6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgesv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgesv.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zgesv( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_zgesv_work( matrix_layout, n, nrhs, a, lda, ipiv, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgesvd.c b/lapack-netlib/LAPACKE/src/lapacke_zgesvd.c index d1e2a862d..3a96c010d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgesvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgesvd.c @@ -50,9 +50,11 @@ lapack_int LAPACKE_zgesvd( int matrix_layout, char jobu, char jobvt, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgesvdx.c b/lapack-netlib/LAPACKE/src/lapacke_zgesvdx.c index 4847dbf1c..2efe981f0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgesvdx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgesvdx.c @@ -34,12 +34,12 @@ #include "lapacke_utils.h" lapack_int LAPACKE_zgesvdx( int matrix_layout, char jobu, char jobvt, char range, - lapack_int m, lapack_int n, lapack_complex_double* a, - lapack_int lda, double vl, double vu, - lapack_int il, lapack_int iu, lapack_int* ns, - double* s, lapack_complex_double* u, lapack_int ldu, - lapack_complex_double* vt, lapack_int ldvt, - lapack_int* superb ) + lapack_int m, lapack_int n, lapack_complex_double* a, + lapack_int lda, double vl, double vu, + lapack_int il, lapack_int iu, lapack_int* ns, + double* s, lapack_complex_double* u, lapack_int ldu, + lapack_complex_double* vt, lapack_int ldvt, + lapack_int* superb ) { lapack_int info = 0; lapack_int lwork = -1; @@ -54,14 +54,16 @@ lapack_int LAPACKE_zgesvdx( int matrix_layout, char jobu, char jobvt, char range return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ info = LAPACKE_zgesvdx_work( matrix_layout, jobu, jobvt, range, - m, n, a, lda, vl, vu, il, iu, ns, s, u, + m, n, a, lda, vl, vu, il, iu, ns, s, u, ldu, vt, ldvt, &work_query, lwork, rwork, iwork ); if( info != 0 ) { goto exit_level_0; @@ -69,7 +71,7 @@ lapack_int LAPACKE_zgesvdx( int matrix_layout, char jobu, char jobvt, char range lwork = LAPACK_Z2INT (work_query); /* Allocate memory for work arrays */ work = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * lwork ); + LAPACKE_malloc( sizeof(lapack_complex_double) * lwork ); if( work == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_1; @@ -86,8 +88,8 @@ lapack_int LAPACKE_zgesvdx( int matrix_layout, char jobu, char jobvt, char range } /* Call middle-level interface */ info = LAPACKE_zgesvdx_work( matrix_layout, jobu, jobvt, range, - m, n, a, lda, vl, vu, il, iu, ns, s, u, - ldu, vt, ldvt, work, lwork, rwork, iwork ); + m, n, a, lda, vl, vu, il, iu, ns, s, u, + ldu, vt, ldvt, work, lwork, rwork, iwork ); /* Backup significant data from working array(s) */ for( i=0; i<12*MIN(m,n)-1; i++ ) { superb[i] = iwork[i+1]; diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgesvdx_work.c b/lapack-netlib/LAPACKE/src/lapacke_zgesvdx_work.c index 3070687a7..d9e9cb65c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgesvdx_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgesvdx_work.c @@ -34,19 +34,19 @@ #include "lapacke_utils.h" lapack_int LAPACKE_zgesvdx_work( int matrix_layout, char jobu, char jobvt, char range, - lapack_int m, lapack_int n, lapack_complex_double* a, - lapack_int lda, double vl, double vu, - lapack_int il, lapack_int iu, lapack_int* ns, - double* s, lapack_complex_double* u, lapack_int ldu, - lapack_complex_double* vt, lapack_int ldvt, - lapack_complex_double* work, lapack_int lwork, - double* rwork, lapack_int* iwork ) + lapack_int m, lapack_int n, lapack_complex_double* a, + lapack_int lda, double vl, double vu, + lapack_int il, lapack_int iu, lapack_int* ns, + double* s, lapack_complex_double* u, lapack_int ldu, + lapack_complex_double* vt, lapack_int ldvt, + lapack_complex_double* work, lapack_int lwork, + double* rwork, lapack_int* iwork ) { lapack_int info = 0; if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_zgesvdx( &jobu, &jobvt, &range, &m, &n, a, &lda, &vl, &vu, - &il, &iu, ns, s, u, &ldu, vt, &ldvt, + &il, &iu, ns, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, iwork, &info ); if( info < 0 ) { info = info - 1; @@ -85,7 +85,7 @@ lapack_int LAPACKE_zgesvdx_work( int matrix_layout, char jobu, char jobvt, char /* Query optimal working array(s) size if requested */ if( lwork == -1 ) { LAPACK_zgesvdx( &jobu, &jobvt, &range, &m, &n, a, &lda_t, &vl, &vu, - &il, &iu, ns, s, u, &ldu_t, vt, + &il, &iu, ns, s, u, &ldu_t, vt, &ldvt_t, work, &lwork, rwork, iwork, &info ); return (info < 0) ? (info - 1) : info; } @@ -116,7 +116,7 @@ lapack_int LAPACKE_zgesvdx_work( int matrix_layout, char jobu, char jobvt, char LAPACKE_zge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); /* Call LAPACK function and adjust info */ LAPACK_zgesvdx( &jobu, &jobvt, &range, &m, &n, a_t, &lda_t, &vl, &vu, - &il, &iu, ns, s, u_t, &ldu_t, vt_t, + &il, &iu, ns, s, u_t, &ldu_t, vt_t, &ldvt_t, work, &lwork, rwork, iwork, &info ); if( info < 0 ) { info = info - 1; diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgesvj.c b/lapack-netlib/LAPACKE/src/lapacke_zgesvj.c index dfa0ca88a..77805dbd5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgesvj.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgesvj.c @@ -51,15 +51,17 @@ lapack_int LAPACKE_zgesvj( int matrix_layout, char joba, char jobu, char jobv, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_v = LAPACKE_lsame( jobv, 'v' ) ? MAX(0,n) : - ( LAPACKE_lsame( jobv, 'a' ) ? MAX(0,mv) : 0); - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -7; - } - if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, n, v, ldv ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nrows_v = LAPACKE_lsame( jobv, 'v' ) ? MAX(0,n) : + ( LAPACKE_lsame( jobv, 'a' ) ? MAX(0,mv) : 0); + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -7; + } + if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, n, v, ldv ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgesvj_work.c b/lapack-netlib/LAPACKE/src/lapacke_zgesvj_work.c index 37e6c1ee6..1a46cfaef 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgesvj_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgesvj_work.c @@ -76,7 +76,7 @@ lapack_int LAPACKE_zgesvj_work( int matrix_layout, char joba, char jobu, } if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) ) { v_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * ldv_t * MAX(1,n) ); + LAPACKE_malloc( sizeof(lapack_complex_double) * ldv_t * MAX(1,n) ); if( v_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_1; diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgesvx.c b/lapack-netlib/LAPACKE/src/lapacke_zgesvx.c index 886b217e9..344b5778f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgesvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgesvx.c @@ -51,28 +51,30 @@ lapack_int LAPACKE_zgesvx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -14; - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'c' ) ) ) { - if( LAPACKE_d_nancheck( n, c, 1 ) ) { - return -13; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, af, ldaf ) ) { + return -8; + } } - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'r' ) ) ) { - if( LAPACKE_d_nancheck( n, r, 1 ) ) { - return -12; + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -14; + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'c' ) ) ) { + if( LAPACKE_d_nancheck( n, c, 1 ) ) { + return -13; + } + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'r' ) ) ) { + if( LAPACKE_d_nancheck( n, r, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgesvxx.c b/lapack-netlib/LAPACKE/src/lapacke_zgesvxx.c index 3bc9e2584..cf0822b15 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgesvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgesvxx.c @@ -53,33 +53,35 @@ lapack_int LAPACKE_zgesvxx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -14; - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'c' ) ) ) { - if( LAPACKE_d_nancheck( n, c, 1 ) ) { - return -13; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, af, ldaf ) ) { + return -8; + } } - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -25; + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -14; } - } - if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || - LAPACKE_lsame( *equed, 'r' ) ) ) { - if( LAPACKE_d_nancheck( n, r, 1 ) ) { - return -12; + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'c' ) ) ) { + if( LAPACKE_d_nancheck( n, c, 1 ) ) { + return -13; + } + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -25; + } + } + if( LAPACKE_lsame( fact, 'f' ) && ( LAPACKE_lsame( *equed, 'b' ) || + LAPACKE_lsame( *equed, 'r' ) ) ) { + if( LAPACKE_d_nancheck( n, r, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgetf2.c b/lapack-netlib/LAPACKE/src/lapacke_zgetf2.c index 8487197b4..9e05cfed9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgetf2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgetf2.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zgetf2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_zgetf2_work( matrix_layout, m, n, a, lda, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgetrf.c b/lapack-netlib/LAPACKE/src/lapacke_zgetrf.c index 9e028b8d4..f074b9edb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgetrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgetrf.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zgetrf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_zgetrf_work( matrix_layout, m, n, a, lda, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgetrf2.c b/lapack-netlib/LAPACKE/src/lapacke_zgetrf2.c index 2e8cd78bc..44793c9f8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgetrf2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgetrf2.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zgetrf2( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_zgetrf2_work( matrix_layout, m, n, a, lda, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgetri.c b/lapack-netlib/LAPACKE/src/lapacke_zgetri.c index 8a63c3ac2..f624deaeb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgetri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgetri.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zgetri( int matrix_layout, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -3; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgetrs.c b/lapack-netlib/LAPACKE/src/lapacke_zgetrs.c index dfc3af829..3ce08b778 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgetrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgetrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zgetrs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_zgetrs_work( matrix_layout, trans, n, nrhs, a, lda, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgetsls.c b/lapack-netlib/LAPACKE/src/lapacke_zgetsls.c index 6e73657d5..62fe80b1e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgetsls.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgetsls.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_zgetsls( int matrix_layout, char trans, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; - } - if( LAPACKE_zge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } + if( LAPACKE_zge_nancheck( matrix_layout, MAX(m,n), nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zggbak.c b/lapack-netlib/LAPACKE/src/lapacke_zggbak.c index dc0f81b17..9acddb97c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zggbak.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zggbak.c @@ -43,15 +43,17 @@ lapack_int LAPACKE_zggbak( int matrix_layout, char job, char side, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, lscale, 1 ) ) { - return -7; - } - if( LAPACKE_d_nancheck( n, rscale, 1 ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, m, v, ldv ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, lscale, 1 ) ) { + return -7; + } + if( LAPACKE_d_nancheck( n, rscale, 1 ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, m, v, ldv ) ) { + return -10; + } } #endif return LAPACKE_zggbak_work( matrix_layout, job, side, n, ilo, ihi, lscale, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zggbal.c b/lapack-netlib/LAPACKE/src/lapacke_zggbal.c index 7197d6899..8bbfd2893 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zggbal.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zggbal.c @@ -48,17 +48,19 @@ lapack_int LAPACKE_zggbal( int matrix_layout, char job, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( job, 'p' ) || LAPACKE_lsame( job, 's' ) || - LAPACKE_lsame( job, 'b' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( job, 'p' ) || LAPACKE_lsame( job, 's' ) || + LAPACKE_lsame( job, 'b' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } } - } - if( LAPACKE_lsame( job, 'p' ) || LAPACKE_lsame( job, 's' ) || - LAPACKE_lsame( job, 'b' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -6; + if( LAPACKE_lsame( job, 'p' ) || LAPACKE_lsame( job, 's' ) || + LAPACKE_lsame( job, 'b' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -6; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgges.c b/lapack-netlib/LAPACKE/src/lapacke_zgges.c index bcfc56eea..68478fe51 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgges.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgges.c @@ -53,12 +53,14 @@ lapack_int LAPACKE_zgges( int matrix_layout, char jobvsl, char jobvsr, char sort return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgges3.c b/lapack-netlib/LAPACKE/src/lapacke_zgges3.c index fc9813c8e..7d61245e1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgges3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgges3.c @@ -53,12 +53,14 @@ lapack_int LAPACKE_zgges3( int matrix_layout, char jobvsl, char jobvsr, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zggesx.c b/lapack-netlib/LAPACKE/src/lapacke_zggesx.c index e36e794d6..6b4d27045 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zggesx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zggesx.c @@ -58,12 +58,14 @@ lapack_int LAPACKE_zggesx( int matrix_layout, char jobvsl, char jobvsr, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zggev.c b/lapack-netlib/LAPACKE/src/lapacke_zggev.c index 1b8bf00f8..953b3c0e3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zggev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zggev.c @@ -51,12 +51,14 @@ lapack_int LAPACKE_zggev( int matrix_layout, char jobvl, char jobvr, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zggev3.c b/lapack-netlib/LAPACKE/src/lapacke_zggev3.c index bfac8fe91..626fe3da9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zggev3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zggev3.c @@ -52,12 +52,14 @@ lapack_int LAPACKE_zggev3( int matrix_layout, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zggevx.c b/lapack-netlib/LAPACKE/src/lapacke_zggevx.c index b43dcb53d..ca39e9919 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zggevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zggevx.c @@ -59,12 +59,14 @@ lapack_int LAPACKE_zggevx( int matrix_layout, char balanc, char jobvl, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } } #endif /* Additional scalars initializations for work arrays */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zggglm.c b/lapack-netlib/LAPACKE/src/lapacke_zggglm.c index c4bb1313e..0c9912ca9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zggglm.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zggglm.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_zggglm( int matrix_layout, lapack_int n, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, m, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, p, b, ldb ) ) { - return -7; - } - if( LAPACKE_z_nancheck( n, d, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, m, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, p, b, ldb ) ) { + return -7; + } + if( LAPACKE_z_nancheck( n, d, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgghd3.c b/lapack-netlib/LAPACKE/src/lapacke_zgghd3.c index c43f1a55d..97d1e6116 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgghd3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgghd3.c @@ -50,21 +50,23 @@ lapack_int LAPACKE_zgghd3( int matrix_layout, char compq, char compz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; } - } - if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -13; + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -11; + } + } + if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -13; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgghrd.c b/lapack-netlib/LAPACKE/src/lapacke_zgghrd.c index f931b161e..e6b2ddca3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgghrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgghrd.c @@ -45,21 +45,23 @@ lapack_int LAPACKE_zgghrd( int matrix_layout, char compq, char compz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; } - } - if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -13; + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -11; + } + } + if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -13; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgglse.c b/lapack-netlib/LAPACKE/src/lapacke_zgglse.c index 2138767d7..40faf672f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgglse.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgglse.c @@ -48,18 +48,20 @@ lapack_int LAPACKE_zgglse( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -7; - } - if( LAPACKE_z_nancheck( m, c, 1 ) ) { - return -9; - } - if( LAPACKE_z_nancheck( p, d, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -7; + } + if( LAPACKE_z_nancheck( m, c, 1 ) ) { + return -9; + } + if( LAPACKE_z_nancheck( p, d, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zggqrf.c b/lapack-netlib/LAPACKE/src/lapacke_zggqrf.c index 5b80b4ca5..35c908f0a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zggqrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zggqrf.c @@ -48,12 +48,14 @@ lapack_int LAPACKE_zggqrf( int matrix_layout, lapack_int n, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, m, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, p, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, m, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, p, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zggrqf.c b/lapack-netlib/LAPACKE/src/lapacke_zggrqf.c index fb6acc3c6..328f8b0a6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zggrqf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zggrqf.c @@ -48,12 +48,14 @@ lapack_int LAPACKE_zggrqf( int matrix_layout, lapack_int m, lapack_int p, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zggsvd.c b/lapack-netlib/LAPACKE/src/lapacke_zggsvd.c index 9aa39dfa4..e6425f485 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zggsvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zggsvd.c @@ -52,12 +52,14 @@ lapack_int LAPACKE_zggsvd( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -10; - } - if( LAPACKE_zge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -10; + } + if( LAPACKE_zge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zggsvd3.c b/lapack-netlib/LAPACKE/src/lapacke_zggsvd3.c index e274fe049..62dce8038 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zggsvd3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zggsvd3.c @@ -54,12 +54,14 @@ lapack_int LAPACKE_zggsvd3( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -10; - } - if( LAPACKE_zge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -10; + } + if( LAPACKE_zge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -12; + } } #endif /* Query optimal size for working array */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zggsvp.c b/lapack-netlib/LAPACKE/src/lapacke_zggsvp.c index 01136e81a..537bf6c03 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zggsvp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zggsvp.c @@ -53,18 +53,20 @@ lapack_int LAPACKE_zggsvp( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -10; - } - if( LAPACKE_d_nancheck( 1, &tola, 1 ) ) { - return -12; - } - if( LAPACKE_d_nancheck( 1, &tolb, 1 ) ) { - return -13; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -10; + } + if( LAPACKE_d_nancheck( 1, &tola, 1 ) ) { + return -12; + } + if( LAPACKE_d_nancheck( 1, &tolb, 1 ) ) { + return -13; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zggsvp3.c b/lapack-netlib/LAPACKE/src/lapacke_zggsvp3.c index d853f7da3..d549305cc 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zggsvp3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zggsvp3.c @@ -55,18 +55,20 @@ lapack_int LAPACKE_zggsvp3( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -10; - } - if( LAPACKE_d_nancheck( 1, &tola, 1 ) ) { - return -12; - } - if( LAPACKE_d_nancheck( 1, &tolb, 1 ) ) { - return -13; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -10; + } + if( LAPACKE_d_nancheck( 1, &tola, 1 ) ) { + return -12; + } + if( LAPACKE_d_nancheck( 1, &tolb, 1 ) ) { + return -13; + } } #endif /* Query optimal size for working array */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgtcon.c b/lapack-netlib/LAPACKE/src/lapacke_zgtcon.c index bde0c83b1..50f334ff5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgtcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgtcon.c @@ -43,21 +43,23 @@ lapack_int LAPACKE_zgtcon( char norm, lapack_int n, lapack_int info = 0; lapack_complex_double* work = NULL; #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -8; - } - if( LAPACKE_z_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_z_nancheck( n-1, dl, 1 ) ) { - return -3; - } - if( LAPACKE_z_nancheck( n-1, du, 1 ) ) { - return -5; - } - if( LAPACKE_z_nancheck( n-2, du2, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -8; + } + if( LAPACKE_z_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_z_nancheck( n-1, dl, 1 ) ) { + return -3; + } + if( LAPACKE_z_nancheck( n-1, du, 1 ) ) { + return -5; + } + if( LAPACKE_z_nancheck( n-2, du2, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgtrfs.c b/lapack-netlib/LAPACKE/src/lapacke_zgtrfs.c index b4fea7bf8..9df5ce920 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgtrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgtrfs.c @@ -54,33 +54,35 @@ lapack_int LAPACKE_zgtrfs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( LAPACKE_z_nancheck( n, d, 1 ) ) { - return -6; - } - if( LAPACKE_z_nancheck( n, df, 1 ) ) { - return -9; - } - if( LAPACKE_z_nancheck( n-1, dl, 1 ) ) { - return -5; - } - if( LAPACKE_z_nancheck( n-1, dlf, 1 ) ) { - return -8; - } - if( LAPACKE_z_nancheck( n-1, du, 1 ) ) { - return -7; - } - if( LAPACKE_z_nancheck( n-2, du2, 1 ) ) { - return -11; - } - if( LAPACKE_z_nancheck( n-1, duf, 1 ) ) { - return -10; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -15; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( LAPACKE_z_nancheck( n, d, 1 ) ) { + return -6; + } + if( LAPACKE_z_nancheck( n, df, 1 ) ) { + return -9; + } + if( LAPACKE_z_nancheck( n-1, dl, 1 ) ) { + return -5; + } + if( LAPACKE_z_nancheck( n-1, dlf, 1 ) ) { + return -8; + } + if( LAPACKE_z_nancheck( n-1, du, 1 ) ) { + return -7; + } + if( LAPACKE_z_nancheck( n-2, du2, 1 ) ) { + return -11; + } + if( LAPACKE_z_nancheck( n-1, duf, 1 ) ) { + return -10; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -15; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgtsv.c b/lapack-netlib/LAPACKE/src/lapacke_zgtsv.c index e9c8cf5fe..28c3fb3ef 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgtsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgtsv.c @@ -43,18 +43,20 @@ lapack_int LAPACKE_zgtsv( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_z_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_z_nancheck( n-1, dl, 1 ) ) { - return -4; - } - if( LAPACKE_z_nancheck( n-1, du, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_z_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_z_nancheck( n-1, dl, 1 ) ) { + return -4; + } + if( LAPACKE_z_nancheck( n-1, du, 1 ) ) { + return -6; + } } #endif return LAPACKE_zgtsv_work( matrix_layout, n, nrhs, dl, d, du, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgtsvx.c b/lapack-netlib/LAPACKE/src/lapacke_zgtsvx.c index c151388b4..c20a85025 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgtsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgtsvx.c @@ -54,37 +54,39 @@ lapack_int LAPACKE_zgtsvx( int matrix_layout, char fact, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -14; - } - if( LAPACKE_z_nancheck( n, d, 1 ) ) { - return -7; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_z_nancheck( n, df, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -14; } - } - if( LAPACKE_z_nancheck( n-1, dl, 1 ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_z_nancheck( n-1, dlf, 1 ) ) { - return -9; + if( LAPACKE_z_nancheck( n, d, 1 ) ) { + return -7; } - } - if( LAPACKE_z_nancheck( n-1, du, 1 ) ) { - return -8; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_z_nancheck( n-2, du2, 1 ) ) { - return -12; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_z_nancheck( n, df, 1 ) ) { + return -10; + } } - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_z_nancheck( n-1, duf, 1 ) ) { - return -11; + if( LAPACKE_z_nancheck( n-1, dl, 1 ) ) { + return -6; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_z_nancheck( n-1, dlf, 1 ) ) { + return -9; + } + } + if( LAPACKE_z_nancheck( n-1, du, 1 ) ) { + return -8; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_z_nancheck( n-2, du2, 1 ) ) { + return -12; + } + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_z_nancheck( n-1, duf, 1 ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgttrf.c b/lapack-netlib/LAPACKE/src/lapacke_zgttrf.c index 2afda6aa2..8445ea672 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgttrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgttrf.c @@ -38,15 +38,17 @@ lapack_int LAPACKE_zgttrf( lapack_int n, lapack_complex_double* dl, lapack_complex_double* du2, lapack_int* ipiv ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_z_nancheck( n, d, 1 ) ) { - return -3; - } - if( LAPACKE_z_nancheck( n-1, dl, 1 ) ) { - return -2; - } - if( LAPACKE_z_nancheck( n-1, du, 1 ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_z_nancheck( n, d, 1 ) ) { + return -3; + } + if( LAPACKE_z_nancheck( n-1, dl, 1 ) ) { + return -2; + } + if( LAPACKE_z_nancheck( n-1, du, 1 ) ) { + return -4; + } } #endif return LAPACKE_zgttrf_work( n, dl, d, du, du2, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgttrs.c b/lapack-netlib/LAPACKE/src/lapacke_zgttrs.c index d730834f8..bc8599fab 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgttrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgttrs.c @@ -46,21 +46,23 @@ lapack_int LAPACKE_zgttrs( int matrix_layout, char trans, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_z_nancheck( n, d, 1 ) ) { - return -6; - } - if( LAPACKE_z_nancheck( n-1, dl, 1 ) ) { - return -5; - } - if( LAPACKE_z_nancheck( n-1, du, 1 ) ) { - return -7; - } - if( LAPACKE_z_nancheck( n-2, du2, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_z_nancheck( n, d, 1 ) ) { + return -6; + } + if( LAPACKE_z_nancheck( n-1, dl, 1 ) ) { + return -5; + } + if( LAPACKE_z_nancheck( n-1, du, 1 ) ) { + return -7; + } + if( LAPACKE_z_nancheck( n-2, du2, 1 ) ) { + return -8; + } } #endif return LAPACKE_zgttrs_work( matrix_layout, trans, n, nrhs, dl, d, du, du2, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhbev.c b/lapack-netlib/LAPACKE/src/lapacke_zhbev.c index 80784877d..30ac7fc79 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhbev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhbev.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zhbev( int matrix_layout, char jobz, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhbev_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_zhbev_2stage.c index 6bad98ddb..dad35b84e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhbev_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhbev_2stage.c @@ -48,9 +48,11 @@ lapack_int LAPACKE_zhbev_2stage( int matrix_layout, char jobz, char uplo, lapack return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhbevd.c b/lapack-netlib/LAPACKE/src/lapacke_zhbevd.c index 410735f92..95c6d3a54 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhbevd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhbevd.c @@ -53,9 +53,11 @@ lapack_int LAPACKE_zhbevd( int matrix_layout, char jobz, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhbevd_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_zhbevd_2stage.c index cf1263d49..eca867b28 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhbevd_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhbevd_2stage.c @@ -53,9 +53,11 @@ lapack_int LAPACKE_zhbevd_2stage( int matrix_layout, char jobz, char uplo, lapac return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhbevx.c b/lapack-netlib/LAPACKE/src/lapacke_zhbevx.c index 6e75bae95..6d069179e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhbevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhbevx.c @@ -51,21 +51,23 @@ lapack_int LAPACKE_zhbevx( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -15; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -7; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -12; + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -15; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -11; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhbevx_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_zhbevx_2stage.c index 5adf4992c..126182cb8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhbevx_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhbevx_2stage.c @@ -53,21 +53,23 @@ lapack_int LAPACKE_zhbevx_2stage( int matrix_layout, char jobz, char range, char return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -15; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -7; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -12; + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -15; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -11; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhbgst.c b/lapack-netlib/LAPACKE/src/lapacke_zhbgst.c index 86a0207a0..54deacf5a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhbgst.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhbgst.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_zhbgst( int matrix_layout, char vect, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { - return -7; - } - if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { + return -7; + } + if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhbgv.c b/lapack-netlib/LAPACKE/src/lapacke_zhbgv.c index 2429ab01a..7f1e6796a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhbgv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhbgv.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_zhbgv( int matrix_layout, char jobz, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { - return -7; - } - if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { + return -7; + } + if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhbgvd.c b/lapack-netlib/LAPACKE/src/lapacke_zhbgvd.c index bd9c94b29..91bfc0a73 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhbgvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhbgvd.c @@ -54,12 +54,14 @@ lapack_int LAPACKE_zhbgvd( int matrix_layout, char jobz, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { - return -7; - } - if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { + return -7; + } + if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhbgvx.c b/lapack-netlib/LAPACKE/src/lapacke_zhbgvx.c index f2254fe90..d61439a0c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhbgvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhbgvx.c @@ -52,24 +52,26 @@ lapack_int LAPACKE_zhbgvx( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { - return -8; - } - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -18; - } - if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -10; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -14; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, ka, ab, ldab ) ) { + return -8; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -15; + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -18; + } + if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -10; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -14; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -15; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhbtrd.c b/lapack-netlib/LAPACKE/src/lapacke_zhbtrd.c index c3abb6a25..0682d0a57 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhbtrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhbtrd.c @@ -45,13 +45,15 @@ lapack_int LAPACKE_zhbtrd( int matrix_layout, char vect, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; - } - if( LAPACKE_lsame( vect, 'u' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } + if( LAPACKE_lsame( vect, 'u' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhecon.c b/lapack-netlib/LAPACKE/src/lapacke_zhecon.c index 731796d34..f6b9d8c39 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhecon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhecon.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_zhecon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhecon_3.c b/lapack-netlib/LAPACKE/src/lapacke_zhecon_3.c index a08bc8c09..94dd047de 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhecon_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhecon_3.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zhecon_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -40,20 +40,23 @@ lapack_int LAPACKE_zhecon_3( int matrix_layout, char uplo, lapack_int n, { lapack_int info = 0; lapack_complex_double* work = NULL; + lapack_int e_start = LAPACKE_lsame( uplo, 'U' ) ? 1 : 0; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zhecon_3", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_z_nancheck( n, e, 1 ) ) { - return -6; - } - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_z_nancheck( n-1, e + e_start, 1 ) ) { + return -6; + } + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zheequb.c b/lapack-netlib/LAPACKE/src/lapacke_zheequb.c index c0790a014..cbb346e48 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zheequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zheequb.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_zheequb( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zheev.c b/lapack-netlib/LAPACKE/src/lapacke_zheev.c index 98c94400b..2c40428da 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zheev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zheev.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zheev( int matrix_layout, char jobz, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zheev_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_zheev_2stage.c index c7ef18da7..4369de26a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zheev_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zheev_2stage.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zheev_2stage( int matrix_layout, char jobz, char uplo, lapack return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zheevd.c b/lapack-netlib/LAPACKE/src/lapacke_zheevd.c index 61e2d2589..4b1afb95c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zheevd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zheevd.c @@ -51,9 +51,11 @@ lapack_int LAPACKE_zheevd( int matrix_layout, char jobz, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zheevd_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_zheevd_2stage.c index b6dd0a202..9016da54c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zheevd_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zheevd_2stage.c @@ -51,9 +51,11 @@ lapack_int LAPACKE_zheevd_2stage( int matrix_layout, char jobz, char uplo, lapac return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zheevr.c b/lapack-netlib/LAPACKE/src/lapacke_zheevr.c index d20b0f195..52e7a5bee 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zheevr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zheevr.c @@ -55,21 +55,23 @@ lapack_int LAPACKE_zheevr( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -12; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -9; + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -12; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -8; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zheevr_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_zheevr_2stage.c index 38748e4cb..faf949aef 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zheevr_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zheevr_2stage.c @@ -55,21 +55,23 @@ lapack_int LAPACKE_zheevr_2stage( int matrix_layout, char jobz, char range, char return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -12; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -9; + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -12; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -8; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zheevr_work.c b/lapack-netlib/LAPACKE/src/lapacke_zheevr_work.c index 2ccf9f940..0b8bd5a3e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zheevr_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zheevr_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function zheevr * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -54,8 +54,9 @@ lapack_int LAPACKE_zheevr_work( int matrix_layout, char jobz, char range, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int ncols_z = ( LAPACKE_lsame( range, 'a' ) || - LAPACKE_lsame( range, 'v' ) ) ? n : + lapack_int ncols_z = ( !LAPACKE_lsame( jobz, 'v' ) ) ? 1 : + ( LAPACKE_lsame( range, 'a' ) || + LAPACKE_lsame( range, 'v' ) ) ? n : ( LAPACKE_lsame( range, 'i' ) ? (iu-il+1) : 1); lapack_int lda_t = MAX(1,n); lapack_int ldz_t = MAX(1,n); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zheevx.c b/lapack-netlib/LAPACKE/src/lapacke_zheevx.c index 5fa4f3289..f93e98743 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zheevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zheevx.c @@ -51,21 +51,23 @@ lapack_int LAPACKE_zheevx( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -12; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -9; + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -12; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -8; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zheevx_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_zheevx_2stage.c index 1350e1796..78403631c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zheevx_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zheevx_2stage.c @@ -51,21 +51,23 @@ lapack_int LAPACKE_zheevx_2stage( int matrix_layout, char jobz, char range, char return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -12; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -9; + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -12; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -8; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zheevx_work.c b/lapack-netlib/LAPACKE/src/lapacke_zheevx_work.c index b7ac0ba5d..b77200aca 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zheevx_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zheevx_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function zheevx * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -53,8 +53,9 @@ lapack_int LAPACKE_zheevx_work( int matrix_layout, char jobz, char range, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int ncols_z = ( LAPACKE_lsame( range, 'a' ) || - LAPACKE_lsame( range, 'v' ) ) ? n : + lapack_int ncols_z = ( !LAPACKE_lsame( jobz, 'v' ) ) ? 1 : + ( LAPACKE_lsame( range, 'a' ) || + LAPACKE_lsame( range, 'v' ) ) ? n : ( LAPACKE_lsame( range, 'i' ) ? (iu-il+1) : 1); lapack_int lda_t = MAX(1,n); lapack_int ldz_t = MAX(1,n); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhegst.c b/lapack-netlib/LAPACKE/src/lapacke_zhegst.c index c9835dff3..aa2d84d84 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhegst.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhegst.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zhegst( int matrix_layout, lapack_int itype, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_zhegst_work( matrix_layout, itype, uplo, n, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhegv.c b/lapack-netlib/LAPACKE/src/lapacke_zhegv.c index 49386ad74..683fcf487 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhegv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhegv.c @@ -48,12 +48,14 @@ lapack_int LAPACKE_zhegv( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhegv_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_zhegv_2stage.c index 31f8eba86..0f1b415a9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhegv_2stage.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhegv_2stage.c @@ -48,12 +48,14 @@ lapack_int LAPACKE_zhegv_2stage( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhegvd.c b/lapack-netlib/LAPACKE/src/lapacke_zhegvd.c index 89458d754..81c3d29b4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhegvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhegvd.c @@ -53,12 +53,14 @@ lapack_int LAPACKE_zhegvd( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhegvx.c b/lapack-netlib/LAPACKE/src/lapacke_zhegvx.c index 18e94dd64..492bc4dad 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhegvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhegvx.c @@ -53,24 +53,26 @@ lapack_int LAPACKE_zhegvx( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -15; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -7; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -12; + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -15; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -11; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhegvx_work.c b/lapack-netlib/LAPACKE/src/lapacke_zhegvx_work.c index 4da4f560a..bee995ff5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhegvx_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhegvx_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function zhegvx * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" diff --git a/lapack-netlib/LAPACKE/src/lapacke_zherfs.c b/lapack-netlib/LAPACKE/src/lapacke_zherfs.c index 24bd632b2..1507ae58e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zherfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zherfs.c @@ -49,18 +49,20 @@ lapack_int LAPACKE_zherfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zherfsx.c b/lapack-netlib/LAPACKE/src/lapacke_zherfsx.c index d3371c8d8..c50e0b355 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zherfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zherfsx.c @@ -52,28 +52,30 @@ lapack_int LAPACKE_zherfsx( int matrix_layout, char uplo, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -22; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( equed, 'y' ) ) { - if( LAPACKE_d_nancheck( n, s, 1 ) ) { - return -11; + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -22; + } + } + if( LAPACKE_lsame( equed, 'y' ) ) { + if( LAPACKE_d_nancheck( n, s, 1 ) ) { + return -11; + } + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -14; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -14; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhesv.c b/lapack-netlib/LAPACKE/src/lapacke_zhesv.c index 807e61a0b..93baf17e1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhesv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhesv.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_zhesv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhesv_aa.c b/lapack-netlib/LAPACKE/src/lapacke_zhesv_aa.c index 29818469e..d8be88322 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhesv_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhesv_aa.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_zhesv_aa( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhesv_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_zhesv_aa_2stage.c new file mode 100644 index 000000000..c09423d93 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zhesv_aa_2stage.c @@ -0,0 +1,90 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function zhesv_aa_2stage +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zhesv_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* b, lapack_int ldb ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_complex_double* work = NULL; + lapack_complex_double work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_zhesv_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_zhesv_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb, &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_Z2INT( work_query ); + /* Allocate memory for work arrays */ + work = (lapack_complex_double*) + LAPACKE_malloc( sizeof(lapack_complex_double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_zhesv_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb, work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zhesv_aa_2stage", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhesv_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_zhesv_aa_2stage_work.c new file mode 100644 index 000000000..9906210ef --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zhesv_aa_2stage_work.c @@ -0,0 +1,124 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function zhesv_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zhesv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* work, lapack_int lwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_zhesv_aa_2stage( &uplo, &n, &nrhs, a, &lda, tb, + <b, ipiv, ipiv2, b, &ldb, work, &lwork, + &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_int ldb_t = MAX(1,n); + lapack_complex_double* a_t = NULL; + lapack_complex_double* tb_t = NULL; + lapack_complex_double* b_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_zhesv_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_zhesv_aa_2stage_work", info ); + return info; + } + if( ldb < nrhs ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_zhesv_aa_2stage_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_zhesv_aa_2stage( &uplo, &n, &nrhs, a, &lda_t, + tb, <b, ipiv, ipiv2, b, &ldb_t, work, + &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + b_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldb_t * MAX(1,nrhs) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + /* Transpose input matrices */ + LAPACKE_zhe_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + LAPACKE_zge_trans( matrix_layout, n, nrhs, b, ldb, b_t, ldb_t ); + /* Call LAPACK function and adjust info */ + LAPACK_zhesv_aa_2stage( &uplo, &n, &nrhs, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, b_t, &ldb_t, work, + &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_zhe_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb ); + /* Release memory and exit */ + LAPACKE_free( b_t ); +exit_level_2: + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zhesv_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_zhesv_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhesv_rk.c b/lapack-netlib/LAPACKE/src/lapacke_zhesv_rk.c index 777a7b9fa..469579d89 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhesv_rk.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhesv_rk.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zhesv_rk * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -47,15 +47,14 @@ lapack_int LAPACKE_zhesv_rk( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_z_nancheck( n, e, 1) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhesvx.c b/lapack-netlib/LAPACKE/src/lapacke_zhesvx.c index b3f36c2ce..71d22a317 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhesvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhesvx.c @@ -51,17 +51,19 @@ lapack_int LAPACKE_zhesvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -11; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhesvxx.c b/lapack-netlib/LAPACKE/src/lapacke_zhesvxx.c index 29e9b4e1a..8571ca7da 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhesvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhesvxx.c @@ -53,26 +53,28 @@ lapack_int LAPACKE_zhesvxx( int matrix_layout, char fact, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -24; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } } - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_d_nancheck( n, s, 1 ) ) { - return -12; + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -24; + } + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_d_nancheck( n, s, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zheswapr.c b/lapack-netlib/LAPACKE/src/lapacke_zheswapr.c index 61dbb24ec..070ec0992 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zheswapr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zheswapr.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zheswapr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_zheswapr_work( matrix_layout, uplo, n, a, lda, i1, i2 ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetrd.c b/lapack-netlib/LAPACKE/src/lapacke_zhetrd.c index ea0199cfa..35adae4ff 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhetrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetrd.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zhetrd( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetrf.c b/lapack-netlib/LAPACKE/src/lapacke_zhetrf.c index 0613fceaa..a160840b7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhetrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetrf.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zhetrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa.c b/lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa.c index 8649ad0b5..a2a20f2eb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zhetrf_aa( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa_2stage.c new file mode 100644 index 000000000..d009d8e9e --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa_2stage.c @@ -0,0 +1,86 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function zhetrf_aa_2stage +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zhetrf_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2 ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_complex_double* work = NULL; + lapack_complex_double work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_zhetrf_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_zhetrf_aa_2stage_work( matrix_layout, uplo, n, + a, lda, tb, ltb, ipiv, ipiv2, + &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_Z2INT( work_query ); + /* Allocate memory for work arrays */ + work = (lapack_complex_double*) + LAPACKE_malloc( sizeof(lapack_complex_double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_zhetrf_aa_2stage_work( matrix_layout, uplo, n, + a, lda, tb, ltb, ipiv, ipiv2, + work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zhetrf_aa_2stage", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa_2stage_work.c new file mode 100644 index 000000000..5b8010d9e --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa_2stage_work.c @@ -0,0 +1,109 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function zhetrf_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zhetrf_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tb, lapack_int ltb, + lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* work, lapack_int lwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_zhetrf_aa_2stage( &uplo, &n, a, &lda, tb, + <b, ipiv, ipiv2, work, &lwork, + &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_int ldb_t = MAX(1,n); + lapack_complex_double* a_t = NULL; + lapack_complex_double* tb_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_zhetrf_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_zhetrf_aa_2stage_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_zhetrf_aa_2stage( &uplo, &n, a, &lda_t, + tb, <b, ipiv, ipiv2, work, + &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + /* Transpose input matrices */ + LAPACKE_zhe_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + /* Call LAPACK function and adjust info */ + LAPACK_zhetrf_aa_2stage( &uplo, &n, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, work, + &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_zhe_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + /* Release memory and exit */ + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zhetrf_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_zhetrf_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetrf_rk.c b/lapack-netlib/LAPACKE/src/lapacke_zhetrf_rk.c index 19dc423dc..187de8a4d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhetrf_rk.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetrf_rk.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zhetrf_rk * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -46,12 +46,11 @@ lapack_int LAPACKE_zhetrf_rk( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_z_nancheck( n, e, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetrf_rk_work.c b/lapack-netlib/LAPACKE/src/lapacke_zhetrf_rk_work.c index 45d6964a2..b5c39c9ac 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhetrf_rk_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetrf_rk_work.c @@ -28,14 +28,14 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function zhetrf_rk * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" lapack_int LAPACKE_zhetrf_rk_work( int matrix_layout, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, - lapack_complex_double* e, + lapack_complex_double* e, lapack_int* ipiv, lapack_complex_double* work, lapack_int lwork ) { diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetrf_rook.c b/lapack-netlib/LAPACKE/src/lapacke_zhetrf_rook.c index 1a3ed918c..16e8f4799 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhetrf_rook.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetrf_rook.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zhetrf_rook( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetri.c b/lapack-netlib/LAPACKE/src/lapacke_zhetri.c index 725b48636..892da6e21 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhetri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetri.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_zhetri( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetri2.c b/lapack-netlib/LAPACKE/src/lapacke_zhetri2.c index 6df4c4f09..54f7b9b9a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhetri2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetri2.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zhetri2( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetri2x.c b/lapack-netlib/LAPACKE/src/lapacke_zhetri2x.c index b991412b8..a07bc8d52 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhetri2x.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetri2x.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_zhetri2x( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetri_3.c b/lapack-netlib/LAPACKE/src/lapacke_zhetri_3.c index 33790c2f7..32692e0ca 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhetri_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetri_3.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zhetri_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -41,17 +41,20 @@ lapack_int LAPACKE_zhetri_3( int matrix_layout, char uplo, lapack_int n, lapack_int lwork = -1; lapack_complex_double* work = NULL; lapack_complex_double work_query; + lapack_int e_start = LAPACKE_lsame( uplo, 'U' ) ? 1 : 0; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zhetri_3", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_z_nancheck( n, e, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_z_nancheck( n-1, e + e_start, 1 ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetrs.c b/lapack-netlib/LAPACKE/src/lapacke_zhetrs.c index 410f3919a..530e3fd2e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhetrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zhetrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_zhetrs_work( matrix_layout, uplo, n, nrhs, a, lda, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetrs2.c b/lapack-netlib/LAPACKE/src/lapacke_zhetrs2.c index 0e72494b6..be8c2418d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhetrs2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetrs2.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_zhetrs2( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetrs_3.c b/lapack-netlib/LAPACKE/src/lapacke_zhetrs_3.c index 016bc7929..fb74e9bf9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhetrs_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetrs_3.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zhetrs_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -44,17 +44,19 @@ lapack_int LAPACKE_zhetrs_3( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_z_nancheck( n, e ,1 ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_z_nancheck( n, e ,1 ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif return LAPACKE_zhetrs_3_work( matrix_layout, uplo, n, nrhs, a, lda, - e, ipiv, b, ldb ); + e, ipiv, b, ldb ); } diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetrs_aa.c b/lapack-netlib/LAPACKE/src/lapacke_zhetrs_aa.c index b15786f1f..ddcd65941 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhetrs_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetrs_aa.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_zhetrs_aa( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetrs_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_zhetrs_aa_2stage.c new file mode 100644 index 000000000..b083d6f38 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetrs_aa_2stage.c @@ -0,0 +1,66 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function zhetrs_aa_2stage +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zhetrs_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tb, + lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* b, lapack_int ldb ) +{ + lapack_int info = 0; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_zhetrs_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; + } + } +#endif + /* Call middle-level interface */ + info = LAPACKE_zhetrs_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb); + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetrs_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_zhetrs_aa_2stage_work.c new file mode 100644 index 000000000..611123367 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetrs_aa_2stage_work.c @@ -0,0 +1,114 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function zhetrs_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zhetrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, lapack_complex_double* b, lapack_int ldb ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_zhetrs_aa_2stage( &uplo, &n, &nrhs, a, &lda, tb, + <b, ipiv, ipiv2, b, &ldb, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_int ldb_t = MAX(1,n); + lapack_complex_double* a_t = NULL; + lapack_complex_double* tb_t = NULL; + lapack_complex_double* b_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_zhetrs_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_zhetrs_aa_2stage_work", info ); + return info; + } + if( ldb < nrhs ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_zhetrs_aa_2stage_work", info ); + return info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + b_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldb_t * MAX(1,nrhs) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + /* Transpose input matrices */ + LAPACKE_zhe_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + LAPACKE_zge_trans( matrix_layout, n, nrhs, b, ldb, b_t, ldb_t ); + /* Call LAPACK function and adjust info */ + LAPACK_zhetrs_aa_2stage( &uplo, &n, &nrhs, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, b_t, &ldb_t, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_zhe_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb ); + /* Release memory and exit */ + LAPACKE_free( b_t ); +exit_level_2: + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zhetrs_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_zhetrs_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetrs_rook.c b/lapack-netlib/LAPACKE/src/lapacke_zhetrs_rook.c index 806732beb..befeafda3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhetrs_rook.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetrs_rook.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zhetrs_rook( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_zhetrs_rook_work( matrix_layout, uplo, n, nrhs, a, lda, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhfrk.c b/lapack-netlib/LAPACKE/src/lapacke_zhfrk.c index 6a42a47e4..3824692c2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhfrk.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhfrk.c @@ -44,20 +44,22 @@ lapack_int LAPACKE_zhfrk( int matrix_layout, char transr, char uplo, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - ka = LAPACKE_lsame( trans, 'n' ) ? k : n; - na = LAPACKE_lsame( trans, 'n' ) ? n : k; - if( LAPACKE_zge_nancheck( matrix_layout, na, ka, a, lda ) ) { - return -8; - } - if( LAPACKE_d_nancheck( 1, &alpha, 1 ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &beta, 1 ) ) { - return -10; - } - if( LAPACKE_zpf_nancheck( n, c ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + ka = LAPACKE_lsame( trans, 'n' ) ? k : n; + na = LAPACKE_lsame( trans, 'n' ) ? n : k; + if( LAPACKE_zge_nancheck( matrix_layout, na, ka, a, lda ) ) { + return -8; + } + if( LAPACKE_d_nancheck( 1, &alpha, 1 ) ) { + return -7; + } + if( LAPACKE_d_nancheck( 1, &beta, 1 ) ) { + return -10; + } + if( LAPACKE_zpf_nancheck( n, c ) ) { + return -11; + } } #endif return LAPACKE_zhfrk_work( matrix_layout, transr, uplo, trans, n, k, alpha, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhgeqz.c b/lapack-netlib/LAPACKE/src/lapacke_zhgeqz.c index f91988c7c..fc912428b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhgeqz.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhgeqz.c @@ -52,21 +52,23 @@ lapack_int LAPACKE_zhgeqz( int matrix_layout, char job, char compq, char compz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, h, ldh ) ) { - return -8; - } - if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -14; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, h, ldh ) ) { + return -8; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -10; - } - if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -16; + if( LAPACKE_lsame( compq, 'i' ) || LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -14; + } + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -10; + } + if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -16; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhpcon.c b/lapack-netlib/LAPACKE/src/lapacke_zhpcon.c index 2ca04da0a..b458f63a1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhpcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhpcon.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_zhpcon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -6; - } - if( LAPACKE_zhp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -6; + } + if( LAPACKE_zhp_nancheck( n, ap ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhpev.c b/lapack-netlib/LAPACKE/src/lapacke_zhpev.c index d442381b6..72f5850f2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhpev.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhpev.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_zhpev( int matrix_layout, char jobz, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhp_nancheck( n, ap ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhp_nancheck( n, ap ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhpevd.c b/lapack-netlib/LAPACKE/src/lapacke_zhpevd.c index ed8026679..948bb9c10 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhpevd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhpevd.c @@ -52,9 +52,11 @@ lapack_int LAPACKE_zhpevd( int matrix_layout, char jobz, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhp_nancheck( n, ap ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhp_nancheck( n, ap ) ) { + return -5; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhpevx.c b/lapack-netlib/LAPACKE/src/lapacke_zhpevx.c index 09839ef8e..86293e91e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhpevx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhpevx.c @@ -49,21 +49,23 @@ lapack_int LAPACKE_zhpevx( int matrix_layout, char jobz, char range, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -11; - } - if( LAPACKE_zhp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -11; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -8; + if( LAPACKE_zhp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -7; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -8; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhpgst.c b/lapack-netlib/LAPACKE/src/lapacke_zhpgst.c index f589855e9..6099fda94 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhpgst.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhpgst.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_zhpgst( int matrix_layout, lapack_int itype, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_zhp_nancheck( n, bp ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_zhp_nancheck( n, bp ) ) { + return -6; + } } #endif return LAPACKE_zhpgst_work( matrix_layout, itype, uplo, n, ap, bp ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhpgv.c b/lapack-netlib/LAPACKE/src/lapacke_zhpgv.c index b5688122a..765bbd4d9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhpgv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhpgv.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_zhpgv( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_zhp_nancheck( n, bp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_zhp_nancheck( n, bp ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhpgvd.c b/lapack-netlib/LAPACKE/src/lapacke_zhpgvd.c index 5be65b321..be18d3313 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhpgvd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhpgvd.c @@ -53,12 +53,14 @@ lapack_int LAPACKE_zhpgvd( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_zhp_nancheck( n, bp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_zhp_nancheck( n, bp ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhpgvx.c b/lapack-netlib/LAPACKE/src/lapacke_zhpgvx.c index c6c378f3a..3f572238f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhpgvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhpgvx.c @@ -50,24 +50,26 @@ lapack_int LAPACKE_zhpgvx( int matrix_layout, lapack_int itype, char jobz, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -13; - } - if( LAPACKE_zhp_nancheck( n, ap ) ) { - return -7; - } - if( LAPACKE_zhp_nancheck( n, bp ) ) { - return -8; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -13; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -10; + if( LAPACKE_zhp_nancheck( n, ap ) ) { + return -7; + } + if( LAPACKE_zhp_nancheck( n, bp ) ) { + return -8; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -9; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhprfs.c b/lapack-netlib/LAPACKE/src/lapacke_zhprfs.c index 515814827..4a8c71593 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhprfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhprfs.c @@ -49,18 +49,20 @@ lapack_int LAPACKE_zhprfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhp_nancheck( n, afp ) ) { - return -6; - } - if( LAPACKE_zhp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhp_nancheck( n, afp ) ) { + return -6; + } + if( LAPACKE_zhp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhpsv.c b/lapack-netlib/LAPACKE/src/lapacke_zhpsv.c index 3157ba4b6..e0be842f3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhpsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhpsv.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zhpsv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_zhpsv_work( matrix_layout, uplo, n, nrhs, ap, ipiv, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhpsvx.c b/lapack-netlib/LAPACKE/src/lapacke_zhpsvx.c index f3f41d500..b3035291a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhpsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhpsvx.c @@ -48,17 +48,19 @@ lapack_int LAPACKE_zhpsvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_zhp_nancheck( n, afp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_zhp_nancheck( n, afp ) ) { + return -7; + } + } + if( LAPACKE_zhp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; } - } - if( LAPACKE_zhp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhptrd.c b/lapack-netlib/LAPACKE/src/lapacke_zhptrd.c index 5e8d1d360..8af15b65b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhptrd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhptrd.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zhptrd( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_zhptrd_work( matrix_layout, uplo, n, ap, d, e, tau ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhptrf.c b/lapack-netlib/LAPACKE/src/lapacke_zhptrf.c index 1b41ed547..f22a708bf 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhptrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhptrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_zhptrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_zhptrf_work( matrix_layout, uplo, n, ap, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhptri.c b/lapack-netlib/LAPACKE/src/lapacke_zhptri.c index 76a0a8a2f..c20c3a577 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhptri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhptri.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_zhptri( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhp_nancheck( n, ap ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhptrs.c b/lapack-netlib/LAPACKE/src/lapacke_zhptrs.c index 790e91b71..5a62629d4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhptrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhptrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zhptrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_zhptrs_work( matrix_layout, uplo, n, nrhs, ap, ipiv, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhsein.c b/lapack-netlib/LAPACKE/src/lapacke_zhsein.c index b013f7aa5..481485cde 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhsein.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhsein.c @@ -49,22 +49,24 @@ lapack_int LAPACKE_zhsein( int matrix_layout, char job, char eigsrc, char initv, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, h, ldh ) ) { - return -7; - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'l' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, h, ldh ) ) { + return -7; } - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'r' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -12; + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'l' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -10; + } + } + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'r' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -12; + } + } + if( LAPACKE_z_nancheck( n, w, 1 ) ) { + return -9; } - } - if( LAPACKE_z_nancheck( n, w, 1 ) ) { - return -9; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhseqr.c b/lapack-netlib/LAPACKE/src/lapacke_zhseqr.c index 33ebf25d1..6d1f71a99 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhseqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhseqr.c @@ -48,13 +48,15 @@ lapack_int LAPACKE_zhseqr( int matrix_layout, char job, char compz, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, h, ldh ) ) { - return -7; - } - if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, h, ldh ) ) { + return -7; + } + if( LAPACKE_lsame( compz, 'i' ) || LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlacgv.c b/lapack-netlib/LAPACKE/src/lapacke_zlacgv.c index b2316d6f0..3b1130ba5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlacgv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlacgv.c @@ -37,9 +37,11 @@ lapack_int LAPACKE_zlacgv( lapack_int n, lapack_complex_double* x, lapack_int incx ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_z_nancheck( 1+(n-1)*ABS(incx), x, incx ) ) { - return -2; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_z_nancheck( 1+(n-1)*ABS(incx), x, incx ) ) { + return -2; + } } #endif return LAPACKE_zlacgv_work( n, x, incx ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlacn2.c b/lapack-netlib/LAPACKE/src/lapacke_zlacn2.c index e64c0716a..76c6fd9e2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlacn2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlacn2.c @@ -38,12 +38,14 @@ lapack_int LAPACKE_zlacn2( lapack_int n, lapack_complex_double* v, double* est, lapack_int* kase, lapack_int* isave ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, est, 1 ) ) { - return -5; - } - if( LAPACKE_z_nancheck( n, x, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, est, 1 ) ) { + return -5; + } + if( LAPACKE_z_nancheck( n, x, 1 ) ) { + return -3; + } } #endif return LAPACKE_zlacn2_work( n, v, x, est, kase, isave ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlacp2.c b/lapack-netlib/LAPACKE/src/lapacke_zlacp2.c index d80fbaf5b..da5a178f5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlacp2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlacp2.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zlacp2( int matrix_layout, char uplo, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif return LAPACKE_zlacp2_work( matrix_layout, uplo, m, n, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlacp2_work.c b/lapack-netlib/LAPACKE/src/lapacke_zlacp2_work.c index 5196d7a3c..87dbc369e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlacp2_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlacp2_work.c @@ -31,7 +31,6 @@ * Generated January, 2013 *****************************************************************************/ -#include "lapacke.h" #include "lapacke_utils.h" lapack_int LAPACKE_zlacp2_work( int matrix_layout, char uplo, lapack_int m, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlacpy.c b/lapack-netlib/LAPACKE/src/lapacke_zlacpy.c index 8e59e6b02..2b9b582f5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlacpy.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlacpy.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_zlacpy( int matrix_layout, char uplo, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif return LAPACKE_zlacpy_work( matrix_layout, uplo, m, n, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlacrm.c b/lapack-netlib/LAPACKE/src/lapacke_zlacrm.c new file mode 100644 index 000000000..a6a295d95 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zlacrm.c @@ -0,0 +1,76 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function zlacrm +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zlacrm(int matrix_layout, lapack_int m, + lapack_int n, const lapack_complex_double* a, + lapack_int lda, const double* b, lapack_int ldb, + lapack_complex_double* c, lapack_int ldc) +{ + lapack_int info = 0; + double* rwork = NULL; + + if (matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR) { + LAPACKE_xerbla("LAPACKE_zlacrm", -1); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if ( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } + if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -6; + } + } +#endif + /* Allocate memory for work array(s) */ + rwork = (double*) + LAPACKE_malloc(sizeof(double) * MAX(1, 2 * m * n)); + if (rwork == NULL) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_zlacrm_work(matrix_layout, m, n, a, lda, b, ldb, + c, ldc, rwork); + /* Release memory and exit */ + LAPACKE_free(rwork); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zlacrm", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlacrm_work.c b/lapack-netlib/LAPACKE/src/lapacke_zlacrm_work.c new file mode 100644 index 000000000..a2fd362cc --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zlacrm_work.c @@ -0,0 +1,110 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function zlacrm +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zlacrm_work(int matrix_layout, lapack_int m, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + const double* b, lapack_int ldb, + lapack_complex_double* c, lapack_int ldc, + double* rwork) +{ + lapack_int info = 0; + if (matrix_layout == LAPACK_COL_MAJOR) { + /* Call LAPACK function */ + LAPACK_zlacrm(&m, &n, a, &lda, b, &ldb, c, &ldc, rwork); + } else if (matrix_layout == LAPACK_ROW_MAJOR) { + lapack_int lda_t = MAX(1,m); + lapack_int ldb_t = MAX(1,n); + lapack_int ldc_t = MAX(1,m); + lapack_complex_double* a_t = NULL; + double* b_t = NULL; + lapack_complex_double* c_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -5; + LAPACKE_xerbla( "LAPACKE_zlacrm_work", info ); + return info; + } + if( ldb < n ) { + info = -7; + LAPACKE_xerbla( "LAPACKE_zlacrm_work", info ); + return info; + } + if( ldc < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_zlacrm_work", info ); + return info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_double*) + LAPACKE_malloc(sizeof(lapack_complex_double) * lda_t * MAX(1,n)); + b_t = (double*) + LAPACKE_malloc(sizeof(double) * ldb_t * MAX(1,n)); + c_t = (lapack_complex_double*) + LAPACKE_malloc((sizeof(lapack_complex_double) * ldc_t * MAX(1,n))); + if (a_t == NULL) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + if (b_t == NULL) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + if (c_t == NULL) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + /* Transpose input matrices */ + LAPACKE_zge_trans(matrix_layout, m, n, a, lda, a_t, lda_t); + LAPACKE_dge_trans(matrix_layout, n, n, b, ldb, b_t, ldb_t); + /* Call LAPACK function */ + LAPACK_zlacrm(&m, &n, a_t, &lda_t, b_t, &ldb_t, c_t, &ldc_t, rwork); + /* Transpose output matrices */ + LAPACKE_zge_trans(LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc); + /* Release memory and exit */ + LAPACKE_free(c_t); +exit_level_2: + LAPACKE_free(b_t); +exit_level_1: + LAPACKE_free(a_t); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zlacrm_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla("LAPACKE_zlacrm_work", -1); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlag2c.c b/lapack-netlib/LAPACKE/src/lapacke_zlag2c.c index 1b3ea9167..d1b330efb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlag2c.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlag2c.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zlag2c( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_zlag2c_work( matrix_layout, m, n, a, lda, sa, ldsa ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlagge.c b/lapack-netlib/LAPACKE/src/lapacke_zlagge.c index 68704c8d2..5710ddd14 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlagge.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlagge.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_zlagge( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( MIN(m,n), d, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( MIN(m,n), d, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlaghe.c b/lapack-netlib/LAPACKE/src/lapacke_zlaghe.c index 05967746f..923b0fa7d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlaghe.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlaghe.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_zlaghe( int matrix_layout, lapack_int n, lapack_int k, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlagsy.c b/lapack-netlib/LAPACKE/src/lapacke_zlagsy.c index a47533e5b..d2214970e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlagsy.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlagsy.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_zlagsy( int matrix_layout, lapack_int n, lapack_int k, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlange.c b/lapack-netlib/LAPACKE/src/lapacke_zlange.c index 6ea731942..a87b87225 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlange.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlange.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zlange * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -38,16 +38,18 @@ double LAPACKE_zlange( int matrix_layout, char norm, lapack_int m, lapack_int lda ) { lapack_int info = 0; - double res = 0.; + double res = 0.; double* work = NULL; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zlange", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlange_work.c b/lapack-netlib/LAPACKE/src/lapacke_zlange_work.c index 60d1812e7..83cbbc327 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlange_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlange_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function zlange * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -38,38 +38,42 @@ double LAPACKE_zlange_work( int matrix_layout, char norm, lapack_int m, lapack_int lda, double* work ) { lapack_int info = 0; - double res = 0.; + double res = 0.; + char norm_lapack; if( matrix_layout == LAPACK_COL_MAJOR ) { - /* Call LAPACK function and adjust info */ + /* Call LAPACK function */ res = LAPACK_zlange( &norm, &m, &n, a, &lda, work ); - if( info < 0 ) { - info = info - 1; - } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int lda_t = MAX(1,m); - lapack_complex_double* a_t = NULL; + double* work_lapack = NULL; /* Check leading dimension(s) */ if( lda < n ) { info = -6; LAPACKE_xerbla( "LAPACKE_zlange_work", info ); return info; } - /* Allocate memory for temporary array(s) */ - a_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) ); - if( a_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; + if( LAPACKE_lsame( norm, '1' ) || LAPACKE_lsame( norm, 'o' ) ) { + norm_lapack = 'i'; + } else if( LAPACKE_lsame( norm, 'i' ) ) { + norm_lapack = '1'; + } else { + norm_lapack = norm; + } + /* Allocate memory for work array(s) */ + if( LAPACKE_lsame( norm_lapack, 'i' ) ) { + work_lapack = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,n) ); + if( work_lapack == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } } - /* Transpose input matrices */ - LAPACKE_zge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); - /* Call LAPACK function and adjust info */ - res = LAPACK_zlange( &norm, &m, &n, a_t, &lda_t, work ); - info = 0; /* LAPACK call is ok! */ + /* Call LAPACK function */ + res = LAPACK_zlange( &norm_lapack, &n, &m, a, &lda, work_lapack ); /* Release memory and exit */ - LAPACKE_free( a_t ); + if( work_lapack ) { + LAPACKE_free( work_lapack ); + } exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + if( info == LAPACK_WORK_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_zlange_work", info ); } } else { diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlanhe.c b/lapack-netlib/LAPACKE/src/lapacke_zlanhe.c index 233231aed..9b99b29c8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlanhe.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlanhe.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zlanhe * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -37,16 +37,18 @@ double LAPACKE_zlanhe( int matrix_layout, char norm, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda ) { lapack_int info = 0; - double res = 0.; + double res = 0.; double* work = NULL; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zlanhe", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlanhe_work.c b/lapack-netlib/LAPACKE/src/lapacke_zlanhe_work.c index b3e4de5d3..22127dfee 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlanhe_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlanhe_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function zlanhe * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -38,7 +38,7 @@ double LAPACKE_zlanhe_work( int matrix_layout, char norm, char uplo, lapack_int lda, double* work ) { lapack_int info = 0; - double res = 0.; + double res = 0.; if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ res = LAPACK_zlanhe( &norm, &uplo, &n, a, &lda, work ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlansy.c b/lapack-netlib/LAPACKE/src/lapacke_zlansy.c index 491a62902..d1e9e0d8e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlansy.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlansy.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zlansy * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -37,16 +37,18 @@ double LAPACKE_zlansy( int matrix_layout, char norm, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda ) { lapack_int info = 0; - double res = 0.; + double res = 0.; double* work = NULL; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zlansy", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlansy_work.c b/lapack-netlib/LAPACKE/src/lapacke_zlansy_work.c index 6088b4424..a88390f76 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlansy_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlansy_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function zlansy * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -38,7 +38,7 @@ double LAPACKE_zlansy_work( int matrix_layout, char norm, char uplo, lapack_int lda, double* work ) { lapack_int info = 0; - double res = 0.; + double res = 0.; if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ res = LAPACK_zlansy( &norm, &uplo, &n, a, &lda, work ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlantr.c b/lapack-netlib/LAPACKE/src/lapacke_zlantr.c index 29f7cf27d..e2e0bec24 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlantr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlantr.c @@ -38,16 +38,18 @@ double LAPACKE_zlantr( int matrix_layout, char norm, char uplo, char diag, const lapack_complex_double* a, lapack_int lda ) { lapack_int info = 0; - double res = 0.; + double res = 0.; double* work = NULL; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zlantr", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ztr_nancheck( matrix_layout, uplo, diag, MIN(m,n), a, lda ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ztr_nancheck( matrix_layout, uplo, diag, MIN(m,n), a, lda ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlapmr.c b/lapack-netlib/LAPACKE/src/lapacke_zlapmr.c index 2ce3ab886..b81976ca6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlapmr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlapmr.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zlapmr( int matrix_layout, lapack_logical forwrd, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, x, ldx ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -5; + } } #endif return LAPACKE_zlapmr_work( matrix_layout, forwrd, m, n, x, ldx, k ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlapmt.c b/lapack-netlib/LAPACKE/src/lapacke_zlapmt.c index 8186560e7..c83583f2f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlapmt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlapmt.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zlapmt( int matrix_layout, lapack_logical forwrd, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, x, ldx ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -5; + } } #endif return LAPACKE_zlapmt_work( matrix_layout, forwrd, m, n, x, ldx, k ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlarcm.c b/lapack-netlib/LAPACKE/src/lapacke_zlarcm.c new file mode 100644 index 000000000..642192262 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zlarcm.c @@ -0,0 +1,76 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function zlarcm +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zlarcm(int matrix_layout, lapack_int m, + lapack_int n, const double* a, lapack_int lda, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* c, lapack_int ldc) +{ + lapack_int info = 0; + double* rwork = NULL; + + if (matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR) { + LAPACKE_xerbla("LAPACKE_zlarcm", -1); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if ( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, m, a, lda ) ) { + return -4; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -6; + } + } +#endif + /* Allocate memory for work array(s) */ + rwork = (double*) + LAPACKE_malloc(sizeof(double) * MAX(1, 2 * m * n)); + if (rwork == NULL) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_zlarcm_work(matrix_layout, m, n, a, lda, b, ldb, + c, ldc, rwork); + /* Release memory and exit */ + LAPACKE_free(rwork); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zlarcm", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlarcm_work.c b/lapack-netlib/LAPACKE/src/lapacke_zlarcm_work.c new file mode 100644 index 000000000..183d958db --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zlarcm_work.c @@ -0,0 +1,110 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function zlarcm +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zlarcm_work(int matrix_layout, lapack_int m, lapack_int n, + const double* a, lapack_int lda, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* c, lapack_int ldc, + double* rwork) +{ + lapack_int info = 0; + if (matrix_layout == LAPACK_COL_MAJOR) { + /* Call LAPACK function */ + LAPACK_zlarcm(&m, &n, a, &lda, b, &ldb, c, &ldc, rwork); + } else if (matrix_layout == LAPACK_ROW_MAJOR) { + lapack_int lda_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldc_t = MAX(1,m); + double* a_t = NULL; + lapack_complex_double* b_t = NULL; + lapack_complex_double* c_t = NULL; + /* Check leading dimension(s) */ + if( lda < m ) { + info = -5; + LAPACKE_xerbla( "LAPACKE_zlarcm_work", info ); + return info; + } + if( ldb < n ) { + info = -7; + LAPACKE_xerbla( "LAPACKE_zlarcm_work", info ); + return info; + } + if( ldc < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_zlarcm_work", info ); + return info; + } + /* Allocate memory for temporary array(s) */ + a_t = (double*) + LAPACKE_malloc(sizeof(double) * lda_t * MAX(1,m)); + b_t = (lapack_complex_double*) + LAPACKE_malloc(sizeof(lapack_complex_double) * ldb_t * MAX(1,n)); + c_t = (lapack_complex_double*) + LAPACKE_malloc((sizeof(lapack_complex_double) * ldc_t * MAX(1,n))); + if (a_t == NULL) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + if (b_t == NULL) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + if (c_t == NULL) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + /* Transpose input matrices */ + LAPACKE_dge_trans(matrix_layout, m, m, a, lda, a_t, lda_t); + LAPACKE_zge_trans(matrix_layout, m, n, b, ldb, b_t, ldb_t); + /* Call LAPACK function */ + LAPACK_zlarcm(&m, &n, a_t, &lda_t, b_t, &ldb_t, c_t, &ldc_t, rwork); + /* Transpose output matrices */ + LAPACKE_zge_trans(LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc); + /* Release memory and exit */ + LAPACKE_free(c_t); +exit_level_2: + LAPACKE_free(b_t); +exit_level_1: + LAPACKE_free(a_t); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zlarcm_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla("LAPACKE_zlarcm_work", -1); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlarfb.c b/lapack-netlib/LAPACKE/src/lapacke_zlarfb.c index e4f993f14..6ea4960f3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlarfb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlarfb.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zlarfb * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -41,7 +41,7 @@ lapack_int LAPACKE_zlarfb( int matrix_layout, char side, char trans, char direct lapack_int ldc ) { lapack_int info = 0; - lapack_int ldwork = ( side=='l')?n:(( side=='r')?m:1); + lapack_int ldwork; lapack_complex_double* work = NULL; lapack_int ncols_v, nrows_v; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { @@ -49,57 +49,66 @@ lapack_int LAPACKE_zlarfb( int matrix_layout, char side, char trans, char direct return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -13; - } - if( LAPACKE_zge_nancheck( matrix_layout, k, k, t, ldt ) ) { - return -11; - } - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_ztr_nancheck( matrix_layout, 'l', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v-k, ncols_v, &v[k*ldv], - ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_zlarfb", -8 ); - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : + ( ( LAPACKE_lsame( storev, 'r' ) && + LAPACKE_lsame( side, 'l' ) ) ? m : + ( ( LAPACKE_lsame( storev, 'r' ) && + LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + nrows_v = ( LAPACKE_lsame( storev, 'c' ) && + LAPACKE_lsame( side, 'l' ) ) ? m : + ( ( LAPACKE_lsame( storev, 'c' ) && + LAPACKE_lsame( side, 'r' ) ) ? n : + ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -13; } - if( LAPACKE_ztr_nancheck( matrix_layout, 'u', 'u', k, - &v[(nrows_v-k)*ldv], ldv ) ) - return -9; - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v-k, ncols_v, v, ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_ztr_nancheck( matrix_layout, 'u', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, ncols_v-k, &v[k], - ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_zlarfb", -8 ); - return -8; + if( LAPACKE_zge_nancheck( matrix_layout, k, k, t, ldt ) ) { + return -11; + } + if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { + if( LAPACKE_ztr_nancheck( matrix_layout, 'l', 'u', k, v, ldv ) ) + return -9; + if( LAPACKE_zge_nancheck( matrix_layout, nrows_v-k, ncols_v, &v[k*ldv], + ldv ) ) + return -9; + } else if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'b' ) ) { + if( k > nrows_v ) { + LAPACKE_xerbla( "LAPACKE_zlarfb", -8 ); + return -8; + } + if( LAPACKE_ztr_nancheck( matrix_layout, 'u', 'u', k, + &v[(nrows_v-k)*ldv], ldv ) ) + return -9; + if( LAPACKE_zge_nancheck( matrix_layout, nrows_v-k, ncols_v, v, ldv ) ) + return -9; + } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { + if( LAPACKE_ztr_nancheck( matrix_layout, 'u', 'u', k, v, ldv ) ) + return -9; + if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, ncols_v-k, &v[k], + ldv ) ) + return -9; + } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { + if( k > ncols_v ) { + LAPACKE_xerbla( "LAPACKE_zlarfb", -8 ); + return -8; + } + if( LAPACKE_ztr_nancheck( matrix_layout, 'l', 'u', k, &v[ncols_v-k], + ldv ) ) + return -9; + if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, ncols_v-k, v, ldv ) ) + return -9; } - if( LAPACKE_ztr_nancheck( matrix_layout, 'l', 'u', k, &v[ncols_v-k], - ldv ) ) - return -9; - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, ncols_v-k, v, ldv ) ) - return -9; } #endif + if( LAPACKE_lsame( side, 'l' ) ) { + ldwork = n; + } else if( LAPACKE_lsame( side, 'r' ) ) { + ldwork = m; + } else { + ldwork = 1; + } /* Allocate memory for working array(s) */ work = (lapack_complex_double*) LAPACKE_malloc( sizeof(lapack_complex_double) * ldwork * MAX(1,k) ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlarfg.c b/lapack-netlib/LAPACKE/src/lapacke_zlarfg.c index ea3a37ff9..14e587fcc 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlarfg.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlarfg.c @@ -38,12 +38,14 @@ lapack_int LAPACKE_zlarfg( lapack_int n, lapack_complex_double* alpha, lapack_complex_double* tau ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_z_nancheck( 1, alpha, 1 ) ) { - return -2; - } - if( LAPACKE_z_nancheck( 1+(n-2)*ABS(incx), x, incx ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_z_nancheck( 1, alpha, 1 ) ) { + return -2; + } + if( LAPACKE_z_nancheck( 1+(n-2)*ABS(incx), x, incx ) ) { + return -3; + } } #endif return LAPACKE_zlarfg_work( n, alpha, x, incx, tau ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlarft.c b/lapack-netlib/LAPACKE/src/lapacke_zlarft.c index 2cde895e8..be218df3b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlarft.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlarft.c @@ -45,16 +45,18 @@ lapack_int LAPACKE_zlarft( int matrix_layout, char direct, char storev, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( LAPACKE_lsame( storev, 'r' ) ? n : 1); - nrows_v = LAPACKE_lsame( storev, 'c' ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1); - if( LAPACKE_z_nancheck( k, tau, 1 ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : + ( LAPACKE_lsame( storev, 'r' ) ? n : 1); + nrows_v = LAPACKE_lsame( storev, 'c' ) ? n : + ( LAPACKE_lsame( storev, 'r' ) ? k : 1); + if( LAPACKE_z_nancheck( k, tau, 1 ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { + return -6; + } } #endif return LAPACKE_zlarft_work( matrix_layout, direct, storev, n, k, v, ldv, tau, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlarfx.c b/lapack-netlib/LAPACKE/src/lapacke_zlarfx.c index b1b1ec581..1dd1f5204 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlarfx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlarfx.c @@ -43,15 +43,17 @@ lapack_int LAPACKE_zlarfx( int matrix_layout, char side, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -7; - } - if( LAPACKE_z_nancheck( 1, &tau, 1 ) ) { - return -6; - } - if( LAPACKE_z_nancheck( m, v, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -7; + } + if( LAPACKE_z_nancheck( 1, &tau, 1 ) ) { + return -6; + } + if( LAPACKE_z_nancheck( m, v, 1 ) ) { + return -5; + } } #endif return LAPACKE_zlarfx_work( matrix_layout, side, m, n, v, tau, c, ldc, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlascl.c b/lapack-netlib/LAPACKE/src/lapacke_zlascl.c index de4b9c219..7e37d559c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlascl.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlascl.c @@ -43,68 +43,70 @@ lapack_int LAPACKE_zlascl( int matrix_layout, char type, lapack_int kl, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - switch (type) { - case 'G': - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + switch (type) { + case 'G': + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -9; + } + break; + case 'L': + // TYPE = 'L' - lower triangle of general matrix + if( matrix_layout == LAPACK_COL_MAJOR && + LAPACKE_zgb_nancheck( matrix_layout, m, n, m-1, 0, a, lda+1 ) ) { + return -9; + } + if( matrix_layout == LAPACK_ROW_MAJOR && + LAPACKE_zgb_nancheck( LAPACK_COL_MAJOR, n, m, 0, m-1, a-m+1, lda+1 ) ) { + return -9; + } + break; + case 'U': + // TYPE = 'U' - upper triangle of general matrix + if( matrix_layout == LAPACK_COL_MAJOR && + LAPACKE_zgb_nancheck( matrix_layout, m, n, 0, n-1, a-n+1, lda+1 ) ) { + return -9; + } + if( matrix_layout == LAPACK_ROW_MAJOR && + LAPACKE_zgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 0, a, lda+1 ) ) { + return -9; + } + break; + case 'H': + // TYPE = 'H' - part of upper Hessenberg matrix in general matrix + if( matrix_layout == LAPACK_COL_MAJOR && + LAPACKE_zgb_nancheck( matrix_layout, m, n, 1, n-1, a-n+1, lda+1 ) ) { + return -9; + } + if( matrix_layout == LAPACK_ROW_MAJOR && + LAPACKE_zgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 1, a-1, lda+1 ) ) { + return -9; + } + case 'B': + // TYPE = 'B' - lower part of symmetric band matrix (assume m==n) + if( LAPACKE_zhb_nancheck( matrix_layout, 'L', n, kl, a, lda ) ) { + return -9; + } + break; + case 'Q': + // TYPE = 'Q' - upper part of symmetric band matrix (assume m==n) + if( LAPACKE_zhb_nancheck( matrix_layout, 'U', n, ku, a, lda ) ) { + return -9; + } + break; + case 'Z': + // TYPE = 'Z' - band matrix laid out for ?GBTRF + if( matrix_layout == LAPACK_COL_MAJOR && + LAPACKE_zgb_nancheck( matrix_layout, m, n, kl, ku, a+kl, lda ) ) { + return -9; + } + if( matrix_layout == LAPACK_ROW_MAJOR && + LAPACKE_zgb_nancheck( matrix_layout, m, n, kl, ku, a+lda*kl, lda ) ) { + return -9; + } + break; } - break; - case 'L': - // TYPE = 'L' - lower triangle of general matrix - if( matrix_layout == LAPACK_COL_MAJOR && - LAPACKE_zgb_nancheck( matrix_layout, m, n, m-1, 0, a, lda+1 ) ) { - return -9; - } - if( matrix_layout == LAPACK_ROW_MAJOR && - LAPACKE_zgb_nancheck( LAPACK_COL_MAJOR, n, m, 0, m-1, a-m+1, lda+1 ) ) { - return -9; - } - break; - case 'U': - // TYPE = 'U' - upper triangle of general matrix - if( matrix_layout == LAPACK_COL_MAJOR && - LAPACKE_zgb_nancheck( matrix_layout, m, n, 0, n-1, a-n+1, lda+1 ) ) { - return -9; - } - if( matrix_layout == LAPACK_ROW_MAJOR && - LAPACKE_zgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 0, a, lda+1 ) ) { - return -9; - } - break; - case 'H': - // TYPE = 'H' - part of upper Hessenberg matrix in general matrix - if( matrix_layout == LAPACK_COL_MAJOR && - LAPACKE_zgb_nancheck( matrix_layout, m, n, 1, n-1, a-n+1, lda+1 ) ) { - return -9; - } - if( matrix_layout == LAPACK_ROW_MAJOR && - LAPACKE_zgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 1, a-1, lda+1 ) ) { - return -9; - } - case 'B': - // TYPE = 'B' - lower part of symmetric band matrix (assume m==n) - if( LAPACKE_zhb_nancheck( matrix_layout, 'L', n, kl, a, lda ) ) { - return -9; - } - break; - case 'Q': - // TYPE = 'Q' - upper part of symmetric band matrix (assume m==n) - if( LAPACKE_zhb_nancheck( matrix_layout, 'U', n, ku, a, lda ) ) { - return -9; - } - break; - case 'Z': - // TYPE = 'Z' - band matrix laid out for ?GBTRF - if( matrix_layout == LAPACK_COL_MAJOR && - LAPACKE_zgb_nancheck( matrix_layout, m, n, kl, ku, a+kl, lda ) ) { - return -9; - } - if( matrix_layout == LAPACK_ROW_MAJOR && - LAPACKE_zgb_nancheck( matrix_layout, m, n, kl, ku, a+lda*kl, lda ) ) { - return -9; - } - break; } #endif return LAPACKE_zlascl_work( matrix_layout, type, kl, ku, cfrom, cto, m, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlaset.c b/lapack-netlib/LAPACKE/src/lapacke_zlaset.c index 2dc017929..d43a424ea 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlaset.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlaset.c @@ -51,12 +51,14 @@ lapack_int LAPACKE_zlaset( int matrix_layout, char uplo, lapack_int m, #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_z_nancheck( 1, &alpha, 1 ) ) { - return -5; - } - if( LAPACKE_z_nancheck( 1, &beta, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_z_nancheck( 1, &alpha, 1 ) ) { + return -5; + } + if( LAPACKE_z_nancheck( 1, &beta, 1 ) ) { + return -6; + } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlassq.c b/lapack-netlib/LAPACKE/src/lapacke_zlassq.c new file mode 100644 index 000000000..a218c9b62 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zlassq.c @@ -0,0 +1,54 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function zlassq +* Author: Julien langou +* Generated February 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zlassq( lapack_int n, lapack_complex_double* x, + lapack_int incx, double* scale, double* sumsq ) +{ +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input vector `x` and in/out scalars `scale` and `sumsq` for NaNs */ + if( LAPACKE_z_nancheck( 1+(n-2)*ABS(incx), x, incx ) ) { + return -2; + } + if( LAPACKE_d_nancheck( 1, scale, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( 1, sumsq, 1 ) ) { + return -5; + } + } +#endif + return LAPACKE_zlassq_work( n, x, incx, scale, sumsq ); +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlassq_work.c b/lapack-netlib/LAPACKE/src/lapacke_zlassq_work.c new file mode 100644 index 000000000..a4920b06a --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zlassq_work.c @@ -0,0 +1,42 @@ +/***************************************************************************** + Copyright (c) 2017, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function zlassq +* Author: Julien Langou +* Generated February, 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zlassq_work( lapack_int n, lapack_complex_double* x, + lapack_int incx, double* scale, double* sumsq ) +{ + lapack_int info = 0; + LAPACK_zlassq( &n, x, &incx, scale, sumsq ); + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlaswp.c b/lapack-netlib/LAPACKE/src/lapacke_zlaswp.c index 41e002ae0..c18a3d7df 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlaswp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlaswp.c @@ -43,19 +43,21 @@ lapack_int LAPACKE_zlaswp( int matrix_layout, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ -/***************************************************************************** -* Disable the check as is below, the check below was checking for NaN -* from lda to n since there is no (obvious) way to knowing m. This is not -* a good idea. We could get a lower bound of m by scanning from ipiv. Or -* we could pass on the NaN check to LAPACKE_dlaswp_work. For now disable -* the buggy Nan check. -* See forum: http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=4827 -*****************************************************************************/ -/* if( LAPACKE_zge_nancheck( matrix_layout, lda, n, a, lda ) ) { -* return -3; -* } -*/ + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + /***************************************************************************** + * Disable the check as is below, the check below was checking for NaN + * from lda to n since there is no (obvious) way to knowing m. This is not + * a good idea. We could get a lower bound of m by scanning from ipiv. Or + * we could pass on the NaN check to LAPACKE_dlaswp_work. For now disable + * the buggy Nan check. + * See forum: http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=4827 + *****************************************************************************/ + /* if( LAPACKE_zge_nancheck( matrix_layout, lda, n, a, lda ) ) { + * return -3; + * } + */ + } #endif return LAPACKE_zlaswp_work( matrix_layout, n, a, lda, k1, k2, ipiv, incx ); } diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlaswp_work.c b/lapack-netlib/LAPACKE/src/lapacke_zlaswp_work.c index 0ab0aae4e..eafb9a59a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlaswp_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlaswp_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function zlaswp * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlatms.c b/lapack-netlib/LAPACKE/src/lapacke_zlatms.c index ca0787969..4f131d8ca 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlatms.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlatms.c @@ -46,18 +46,20 @@ lapack_int LAPACKE_zlatms( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -14; - } - if( LAPACKE_d_nancheck( 1, &cond, 1 ) ) { - return -9; - } - if( LAPACKE_d_nancheck( MIN(n,m), d, 1 ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &dmax, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -14; + } + if( LAPACKE_d_nancheck( 1, &cond, 1 ) ) { + return -9; + } + if( LAPACKE_d_nancheck( MIN(n,m), d, 1 ) ) { + return -7; + } + if( LAPACKE_d_nancheck( 1, &dmax, 1 ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlauum.c b/lapack-netlib/LAPACKE/src/lapacke_zlauum.c index 3f098d5f4..fbda5b0bc 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlauum.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlauum.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_zlauum( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_zlauum_work( matrix_layout, uplo, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpbcon.c b/lapack-netlib/LAPACKE/src/lapacke_zpbcon.c index 1d7dc4c35..f1091a264 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpbcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpbcon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_zpbcon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -5; - } - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -5; + } + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpbequ.c b/lapack-netlib/LAPACKE/src/lapacke_zpbequ.c index c66989bfe..caedf208b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpbequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpbequ.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_zpbequ( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -5; + } } #endif return LAPACKE_zpbequ_work( matrix_layout, uplo, n, kd, ab, ldab, s, scond, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpbrfs.c b/lapack-netlib/LAPACKE/src/lapacke_zpbrfs.c index ec6dcbd8c..dd6d62435 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpbrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpbrfs.c @@ -49,18 +49,20 @@ lapack_int LAPACKE_zpbrfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; - } - if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, afb, ldafb ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } + if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, afb, ldafb ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpbstf.c b/lapack-netlib/LAPACKE/src/lapacke_zpbstf.c index f0c49e231..0f717bfdd 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpbstf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpbstf.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zpbstf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kb, bb, ldbb ) ) { + return -5; + } } #endif return LAPACKE_zpbstf_work( matrix_layout, uplo, n, kb, bb, ldbb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpbsv.c b/lapack-netlib/LAPACKE/src/lapacke_zpbsv.c index d41f3953d..8ec762aa4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpbsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpbsv.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zpbsv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_zpbsv_work( matrix_layout, uplo, n, kd, nrhs, ab, ldab, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpbsvx.c b/lapack-netlib/LAPACKE/src/lapacke_zpbsvx.c index de5fe3cee..ef0fe4669 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpbsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpbsvx.c @@ -50,21 +50,23 @@ lapack_int LAPACKE_zpbsvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -7; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, afb, ldafb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -7; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { - if( LAPACKE_d_nancheck( n, s, 1 ) ) { - return -12; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, afb, ldafb ) ) { + return -9; + } + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { + if( LAPACKE_d_nancheck( n, s, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpbtrf.c b/lapack-netlib/LAPACKE/src/lapacke_zpbtrf.c index 9c651436c..f0cd1fac4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpbtrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpbtrf.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zpbtrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -5; + } } #endif return LAPACKE_zpbtrf_work( matrix_layout, uplo, n, kd, ab, ldab ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpbtrs.c b/lapack-netlib/LAPACKE/src/lapacke_zpbtrs.c index a84ae4482..1302831f2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpbtrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpbtrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zpbtrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { - return -6; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpb_nancheck( matrix_layout, uplo, n, kd, ab, ldab ) ) { + return -6; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_zpbtrs_work( matrix_layout, uplo, n, kd, nrhs, ab, ldab, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpftrf.c b/lapack-netlib/LAPACKE/src/lapacke_zpftrf.c index 041a1d55b..cc18a12ed 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpftrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpftrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_zpftrf( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpf_nancheck( n, a ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpf_nancheck( n, a ) ) { + return -5; + } } #endif return LAPACKE_zpftrf_work( matrix_layout, transr, uplo, n, a ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpftri.c b/lapack-netlib/LAPACKE/src/lapacke_zpftri.c index 0141b8352..6ca58ed2b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpftri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpftri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_zpftri( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpf_nancheck( n, a ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpf_nancheck( n, a ) ) { + return -5; + } } #endif return LAPACKE_zpftri_work( matrix_layout, transr, uplo, n, a ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpftrs.c b/lapack-netlib/LAPACKE/src/lapacke_zpftrs.c index 8dfd3c341..db355583e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpftrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpftrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zpftrs( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpf_nancheck( n, a ) ) { - return -6; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpf_nancheck( n, a ) ) { + return -6; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_zpftrs_work( matrix_layout, transr, uplo, n, nrhs, a, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpocon.c b/lapack-netlib/LAPACKE/src/lapacke_zpocon.c index a552f262c..3d943825b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpocon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpocon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_zpocon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpoequ.c b/lapack-netlib/LAPACKE/src/lapacke_zpoequ.c index e55c1ad0d..7bb4fbb44 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpoequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpoequ.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zpoequ( int matrix_layout, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -3; + } } #endif return LAPACKE_zpoequ_work( matrix_layout, n, a, lda, s, scond, amax ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpoequb.c b/lapack-netlib/LAPACKE/src/lapacke_zpoequb.c index 1d9eb7af7..3c188c6d4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpoequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpoequb.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zpoequb( int matrix_layout, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -3; + } } #endif return LAPACKE_zpoequb_work( matrix_layout, n, a, lda, s, scond, amax ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zporfs.c b/lapack-netlib/LAPACKE/src/lapacke_zporfs.c index a50d61a93..a3e958a0c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zporfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zporfs.c @@ -48,18 +48,20 @@ lapack_int LAPACKE_zporfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -11; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zporfsx.c b/lapack-netlib/LAPACKE/src/lapacke_zporfsx.c index d640f3e4d..e8ea525fa 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zporfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zporfsx.c @@ -52,28 +52,30 @@ lapack_int LAPACKE_zporfsx( int matrix_layout, char uplo, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -11; - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -21; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( equed, 'y' ) ) { - if( LAPACKE_d_nancheck( n, s, 1 ) ) { - return -10; + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -21; + } + } + if( LAPACKE_lsame( equed, 'y' ) ) { + if( LAPACKE_d_nancheck( n, s, 1 ) ) { + return -10; + } + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -13; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -13; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zposv.c b/lapack-netlib/LAPACKE/src/lapacke_zposv.c index a45143db5..a2bf87418 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zposv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zposv.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zposv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_zposv_work( matrix_layout, uplo, n, nrhs, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zposvx.c b/lapack-netlib/LAPACKE/src/lapacke_zposvx.c index 9eabd72bc..c5f1b3866 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zposvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zposvx.c @@ -49,21 +49,23 @@ lapack_int LAPACKE_zposvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { - if( LAPACKE_d_nancheck( n, s, 1 ) ) { - return -11; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { + if( LAPACKE_d_nancheck( n, s, 1 ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zposvxx.c b/lapack-netlib/LAPACKE/src/lapacke_zposvxx.c index 50fbc9eb6..c507aab6f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zposvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zposvxx.c @@ -52,26 +52,28 @@ lapack_int LAPACKE_zposvxx( int matrix_layout, char fact, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -23; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } } - } - if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { - if( LAPACKE_d_nancheck( n, s, 1 ) ) { - return -11; + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -23; + } + } + if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { + if( LAPACKE_d_nancheck( n, s, 1 ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpotrf.c b/lapack-netlib/LAPACKE/src/lapacke_zpotrf.c index 7be5eb5e4..07ef2572f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpotrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpotrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_zpotrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_zpotrf_work( matrix_layout, uplo, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpotrf2.c b/lapack-netlib/LAPACKE/src/lapacke_zpotrf2.c index 60ff3318d..9628425a1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpotrf2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpotrf2.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_zpotrf2( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_zpotrf2_work( matrix_layout, uplo, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpotri.c b/lapack-netlib/LAPACKE/src/lapacke_zpotri.c index 1b1938b57..68252cd2d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpotri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpotri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_zpotri( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_zpotri_work( matrix_layout, uplo, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpotrs.c b/lapack-netlib/LAPACKE/src/lapacke_zpotrs.c index 77ff17ba4..bd8ca4b77 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpotrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpotrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zpotrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_zpotrs_work( matrix_layout, uplo, n, nrhs, a, lda, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zppcon.c b/lapack-netlib/LAPACKE/src/lapacke_zppcon.c index a49d0c84e..d2611beb0 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zppcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zppcon.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_zppcon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -5; - } - if( LAPACKE_zpp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -5; + } + if( LAPACKE_zpp_nancheck( n, ap ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zppequ.c b/lapack-netlib/LAPACKE/src/lapacke_zppequ.c index b66e2fde6..136b00b51 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zppequ.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zppequ.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zppequ( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_zppequ_work( matrix_layout, uplo, n, ap, s, scond, amax ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpprfs.c b/lapack-netlib/LAPACKE/src/lapacke_zpprfs.c index 845736378..b898a1dfc 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpprfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpprfs.c @@ -48,18 +48,20 @@ lapack_int LAPACKE_zpprfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpp_nancheck( n, afp ) ) { - return -6; - } - if( LAPACKE_zpp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpp_nancheck( n, afp ) ) { + return -6; + } + if( LAPACKE_zpp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zppsv.c b/lapack-netlib/LAPACKE/src/lapacke_zppsv.c index 8cb88fca9..9cb839e0f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zppsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zppsv.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_zppsv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -6; + } } #endif return LAPACKE_zppsv_work( matrix_layout, uplo, n, nrhs, ap, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zppsvx.c b/lapack-netlib/LAPACKE/src/lapacke_zppsvx.c index 483c17128..4adcc02df 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zppsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zppsvx.c @@ -48,21 +48,23 @@ lapack_int LAPACKE_zppsvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_zpp_nancheck( n, afp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_zpp_nancheck( n, afp ) ) { + return -7; + } } - } - if( LAPACKE_zpp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { - if( LAPACKE_d_nancheck( n, s, 1 ) ) { - return -9; + if( LAPACKE_zpp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_lsame( fact, 'f' ) && LAPACKE_lsame( *equed, 'y' ) ) { + if( LAPACKE_d_nancheck( n, s, 1 ) ) { + return -9; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpptrf.c b/lapack-netlib/LAPACKE/src/lapacke_zpptrf.c index 5698f308a..749b82256 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpptrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpptrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_zpptrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_zpptrf_work( matrix_layout, uplo, n, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpptri.c b/lapack-netlib/LAPACKE/src/lapacke_zpptri.c index 69658ca7f..e5f9be90d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpptri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpptri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_zpptri( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_zpptri_work( matrix_layout, uplo, n, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpptrs.c b/lapack-netlib/LAPACKE/src/lapacke_zpptrs.c index 27989b294..25dd5f196 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpptrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpptrs.c @@ -42,12 +42,14 @@ lapack_int LAPACKE_zpptrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -6; + } } #endif return LAPACKE_zpptrs_work( matrix_layout, uplo, n, nrhs, ap, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpstrf.c b/lapack-netlib/LAPACKE/src/lapacke_zpstrf.c index 52d209815..ffe4bf625 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpstrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpstrf.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_zpstrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_d_nancheck( 1, &tol, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpo_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_d_nancheck( 1, &tol, 1 ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zptcon.c b/lapack-netlib/LAPACKE/src/lapacke_zptcon.c index db9feed47..ecc705342 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zptcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zptcon.c @@ -40,15 +40,17 @@ lapack_int LAPACKE_zptcon( lapack_int n, const double* d, lapack_int info = 0; double* work = NULL; #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -2; - } - if( LAPACKE_z_nancheck( n-1, e, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -2; + } + if( LAPACKE_z_nancheck( n-1, e, 1 ) ) { + return -3; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpteqr.c b/lapack-netlib/LAPACKE/src/lapacke_zpteqr.c index d6009f349..522cb6a08 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpteqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpteqr.c @@ -46,16 +46,18 @@ lapack_int LAPACKE_zpteqr( int matrix_layout, char compz, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -5; - } - if( LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -5; + } + if( LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -6; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zptrfs.c b/lapack-netlib/LAPACKE/src/lapacke_zptrfs.c index e39c4f8dc..4232f79ee 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zptrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zptrfs.c @@ -49,24 +49,26 @@ lapack_int LAPACKE_zptrfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n, df, 1 ) ) { - return -7; - } - if( LAPACKE_z_nancheck( n-1, e, 1 ) ) { - return -6; - } - if( LAPACKE_z_nancheck( n-1, ef, 1 ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_d_nancheck( n, df, 1 ) ) { + return -7; + } + if( LAPACKE_z_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_z_nancheck( n-1, ef, 1 ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -11; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zptsv.c b/lapack-netlib/LAPACKE/src/lapacke_zptsv.c index 9c54750ac..d6b0f8b07 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zptsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zptsv.c @@ -42,15 +42,17 @@ lapack_int LAPACKE_zptsv( int matrix_layout, lapack_int n, lapack_int nrhs, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -6; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_z_nancheck( n-1, e, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -6; + } + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_z_nancheck( n-1, e, 1 ) ) { + return -5; + } } #endif return LAPACKE_zptsv_work( matrix_layout, n, nrhs, d, e, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zptsvx.c b/lapack-netlib/LAPACKE/src/lapacke_zptsvx.c index 1e9010391..6987f850b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zptsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zptsvx.c @@ -49,24 +49,26 @@ lapack_int LAPACKE_zptsvx( int matrix_layout, char fact, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_d_nancheck( n, df, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; } - } - if( LAPACKE_z_nancheck( n-1, e, 1 ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_z_nancheck( n-1, ef, 1 ) ) { - return -8; + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_d_nancheck( n, df, 1 ) ) { + return -7; + } + } + if( LAPACKE_z_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_z_nancheck( n-1, ef, 1 ) ) { + return -8; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpttrf.c b/lapack-netlib/LAPACKE/src/lapacke_zpttrf.c index d2ffc5036..b0d8a4aa4 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpttrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpttrf.c @@ -36,12 +36,14 @@ lapack_int LAPACKE_zpttrf( lapack_int n, double* d, lapack_complex_double* e ) { #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -2; - } - if( LAPACKE_z_nancheck( n-1, e, 1 ) ) { - return -3; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -2; + } + if( LAPACKE_z_nancheck( n-1, e, 1 ) ) { + return -3; + } } #endif return LAPACKE_zpttrf_work( n, d, e ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zpttrs.c b/lapack-netlib/LAPACKE/src/lapacke_zpttrs.c index 8a18e46fb..416269db3 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zpttrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zpttrs.c @@ -43,15 +43,17 @@ lapack_int LAPACKE_zpttrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_z_nancheck( n-1, e, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_z_nancheck( n-1, e, 1 ) ) { + return -6; + } } #endif return LAPACKE_zpttrs_work( matrix_layout, uplo, n, nrhs, d, e, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zspcon.c b/lapack-netlib/LAPACKE/src/lapacke_zspcon.c index 410e35070..dae745b48 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zspcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zspcon.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_zspcon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -6; - } - if( LAPACKE_zsp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -6; + } + if( LAPACKE_zsp_nancheck( n, ap ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsprfs.c b/lapack-netlib/LAPACKE/src/lapacke_zsprfs.c index 170a1111a..c6b2a4e2e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsprfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsprfs.c @@ -49,18 +49,20 @@ lapack_int LAPACKE_zsprfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsp_nancheck( n, afp ) ) { - return -6; - } - if( LAPACKE_zsp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsp_nancheck( n, afp ) ) { + return -6; + } + if( LAPACKE_zsp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zspsv.c b/lapack-netlib/LAPACKE/src/lapacke_zspsv.c index 6ba98de9c..35288dbf9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zspsv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zspsv.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zspsv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_zspsv_work( matrix_layout, uplo, n, nrhs, ap, ipiv, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zspsvx.c b/lapack-netlib/LAPACKE/src/lapacke_zspsvx.c index 0d3b85bf1..f57193301 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zspsvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zspsvx.c @@ -48,17 +48,19 @@ lapack_int LAPACKE_zspsvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_zsp_nancheck( n, afp ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_zsp_nancheck( n, afp ) ) { + return -7; + } + } + if( LAPACKE_zsp_nancheck( n, ap ) ) { + return -6; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; } - } - if( LAPACKE_zsp_nancheck( n, ap ) ) { - return -6; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsptrf.c b/lapack-netlib/LAPACKE/src/lapacke_zsptrf.c index 49af07e8e..08a4e317f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsptrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsptrf.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_zsptrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_zsptrf_work( matrix_layout, uplo, n, ap, ipiv ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsptri.c b/lapack-netlib/LAPACKE/src/lapacke_zsptri.c index c68ebacf2..4e0481e4a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsptri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsptri.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_zsptri( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsp_nancheck( n, ap ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsptrs.c b/lapack-netlib/LAPACKE/src/lapacke_zsptrs.c index db3a4210d..7a0f8d054 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsptrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsptrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zsptrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsp_nancheck( n, ap ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsp_nancheck( n, ap ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -7; + } } #endif return LAPACKE_zsptrs_work( matrix_layout, uplo, n, nrhs, ap, ipiv, b, ldb ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zstedc.c b/lapack-netlib/LAPACKE/src/lapacke_zstedc.c index 4e194f19e..1bd7274c1 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zstedc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zstedc.c @@ -52,16 +52,18 @@ lapack_int LAPACKE_zstedc( int matrix_layout, char compz, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -5; - } - if( LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -5; + } + if( LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -6; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zstegr.c b/lapack-netlib/LAPACKE/src/lapacke_zstegr.c index 189e6be58..2a65dcc4d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zstegr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zstegr.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zstegr * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -52,24 +52,26 @@ lapack_int LAPACKE_zstegr( int matrix_layout, char jobz, char range, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { - return -11; - } - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n, e, 1 ) ) { - return -6; - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( 1, &abstol, 1 ) ) { + return -11; } - } - if( LAPACKE_lsame( range, 'v' ) ) { - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -8; + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -7; + } + } + if( LAPACKE_lsame( range, 'v' ) ) { + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -8; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zstein.c b/lapack-netlib/LAPACKE/src/lapacke_zstein.c index 1d1b2ef5c..d54f3bb67 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zstein.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zstein.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zstein * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -47,15 +47,17 @@ lapack_int LAPACKE_zstein( int matrix_layout, lapack_int n, const double* d, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -3; - } - if( LAPACKE_d_nancheck( n, e, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n, w, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -3; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n, w, 1 ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zstemr.c b/lapack-netlib/LAPACKE/src/lapacke_zstemr.c index 82379bb8d..c1144488e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zstemr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zstemr.c @@ -52,18 +52,20 @@ lapack_int LAPACKE_zstemr( int matrix_layout, char jobz, char range, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -5; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -6; - } - if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { - return -7; - } - if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -5; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -6; + } + if( LAPACKE_d_nancheck( 1, &vl, 1 ) ) { + return -7; + } + if( LAPACKE_d_nancheck( 1, &vu, 1 ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zstemr_work.c b/lapack-netlib/LAPACKE/src/lapacke_zstemr_work.c index 5b7231d0e..02d459a48 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zstemr_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zstemr_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function zstemr * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -56,7 +56,7 @@ lapack_int LAPACKE_zstemr_work( int matrix_layout, char jobz, char range, lapack_int ldz_t = MAX(1,n); lapack_complex_double* z_t = NULL; /* Check leading dimension(s) */ - if( ldz < n ) { + if( ldz < 1 || ( LAPACKE_lsame( jobz, 'v' ) && ldz < n ) ) { info = -14; LAPACKE_xerbla( "LAPACKE_zstemr_work", info ); return info; diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsteqr.c b/lapack-netlib/LAPACKE/src/lapacke_zsteqr.c index c25b99fe0..3b27f53b9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsteqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsteqr.c @@ -46,16 +46,18 @@ lapack_int LAPACKE_zsteqr( int matrix_layout, char compz, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_d_nancheck( n, d, 1 ) ) { - return -4; - } - if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { - return -5; - } - if( LAPACKE_lsame( compz, 'v' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_d_nancheck( n, d, 1 ) ) { + return -4; + } + if( LAPACKE_d_nancheck( n-1, e, 1 ) ) { + return -5; + } + if( LAPACKE_lsame( compz, 'v' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -6; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsycon.c b/lapack-netlib/LAPACKE/src/lapacke_zsycon.c index 31717e4d0..b4f726bdb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsycon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsycon.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_zsycon( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsycon_3.c b/lapack-netlib/LAPACKE/src/lapacke_zsycon_3.c index 03900b66c..1a3b3ddf7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsycon_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsycon_3.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zsycon_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -40,20 +40,23 @@ lapack_int LAPACKE_zsycon_3( int matrix_layout, char uplo, lapack_int n, { lapack_int info = 0; lapack_complex_double* work = NULL; + lapack_int e_start = LAPACKE_lsame( uplo, 'U' ) ? 1 : 0; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zsycon_3", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_z_nancheck( n, e, 1 ) ) { - return -6; - } - if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_z_nancheck( n-1, e + e_start, 1 ) ) { + return -6; + } + if( LAPACKE_d_nancheck( 1, &anorm, 1 ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsyconv.c b/lapack-netlib/LAPACKE/src/lapacke_zsyconv.c index e0b9166a6..2826efa53 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsyconv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsyconv.c @@ -43,9 +43,11 @@ lapack_int LAPACKE_zsyconv( int matrix_layout, char uplo, char way, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif /* Call middle-level interface */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsyequb.c b/lapack-netlib/LAPACKE/src/lapacke_zsyequb.c index 7cbfa3c3d..669c3dd9e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsyequb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsyequb.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_zsyequb( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsyr.c b/lapack-netlib/LAPACKE/src/lapacke_zsyr.c index b6dfa4452..097f0799b 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsyr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsyr.c @@ -43,15 +43,17 @@ lapack_int LAPACKE_zsyr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -7; - } - if( LAPACKE_z_nancheck( 1, &alpha, 1 ) ) { - return -4; - } - if( LAPACKE_z_nancheck( n, x, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -7; + } + if( LAPACKE_z_nancheck( 1, &alpha, 1 ) ) { + return -4; + } + if( LAPACKE_z_nancheck( n, x, 1 ) ) { + return -5; + } } #endif return LAPACKE_zsyr_work( matrix_layout, uplo, n, alpha, x, incx, a, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsyrfs.c b/lapack-netlib/LAPACKE/src/lapacke_zsyrfs.c index dfaa42051..a0246ba03 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsyrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsyrfs.c @@ -49,18 +49,20 @@ lapack_int LAPACKE_zsyrfs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsyrfsx.c b/lapack-netlib/LAPACKE/src/lapacke_zsyrfsx.c index 6d2fad6ed..a7401acca 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsyrfsx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsyrfsx.c @@ -52,28 +52,30 @@ lapack_int LAPACKE_zsyrfsx( int matrix_layout, char uplo, char equed, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -12; - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -22; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( equed, 'y' ) ) { - if( LAPACKE_d_nancheck( n, s, 1 ) ) { - return -11; + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -12; + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -22; + } + } + if( LAPACKE_lsame( equed, 'y' ) ) { + if( LAPACKE_d_nancheck( n, s, 1 ) ) { + return -11; + } + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -14; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -14; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsysv.c b/lapack-netlib/LAPACKE/src/lapacke_zsysv.c index 5ba1a6f30..1cb7d4673 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsysv.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsysv.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_zsysv( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsysv_aa.c b/lapack-netlib/LAPACKE/src/lapacke_zsysv_aa.c index d858a0b39..c2bf5c340 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsysv_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsysv_aa.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_zsysv_aa( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsysv_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_zsysv_aa_2stage.c new file mode 100644 index 000000000..c30a381cd --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zsysv_aa_2stage.c @@ -0,0 +1,89 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function zsysv_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zsysv_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, lapack_complex_double* b, lapack_int ldb ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_complex_double* work = NULL; + lapack_complex_double work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_zsysv_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_zsysv_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb, &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_Z2INT( work_query ); + /* Allocate memory for work arrays */ + work = (lapack_complex_double*) + LAPACKE_malloc( sizeof(lapack_complex_double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_zsysv_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb, work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zsysv_aa_2stage", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsysv_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_zsysv_aa_2stage_work.c new file mode 100644 index 000000000..8b46257d0 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zsysv_aa_2stage_work.c @@ -0,0 +1,124 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function zsysv_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zsysv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* work, lapack_int lwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_zsysv_aa_2stage( &uplo, &n, &nrhs, a, &lda, tb, + <b, ipiv, ipiv2, b, &ldb, work, &lwork, + &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_int ldb_t = MAX(1,n); + lapack_complex_double* a_t = NULL; + lapack_complex_double* tb_t = NULL; + lapack_complex_double* b_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_zsysv_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_zsysv_aa_2stage_work", info ); + return info; + } + if( ldb < nrhs ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_zsysv_aa_2stage_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_zsysv_aa_2stage( &uplo, &n, &nrhs, a, &lda_t, + tb, <b, ipiv, ipiv2, b, &ldb_t, work, + &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + b_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldb_t * MAX(1,nrhs) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + /* Transpose input matrices */ + LAPACKE_zsy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + LAPACKE_zge_trans( matrix_layout, n, nrhs, b, ldb, b_t, ldb_t ); + /* Call LAPACK function and adjust info */ + LAPACK_zsysv_aa_2stage( &uplo, &n, &nrhs, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, b_t, &ldb_t, work, + &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_zsy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb ); + /* Release memory and exit */ + LAPACKE_free( b_t ); +exit_level_2: + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zsysv_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_zsysv_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsysv_rk.c b/lapack-netlib/LAPACKE/src/lapacke_zsysv_rk.c index a33851193..4dc56151e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsysv_rk.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsysv_rk.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zsysv_rk * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -47,15 +47,14 @@ lapack_int LAPACKE_zsysv_rk( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_z_nancheck( n, e, 1) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsysv_rook.c b/lapack-netlib/LAPACKE/src/lapacke_zsysv_rook.c index b97ef2ebe..1e81b5689 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsysv_rook.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsysv_rook.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_zsysv_rook( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsysvx.c b/lapack-netlib/LAPACKE/src/lapacke_zsysvx.c index cd689b15c..d35835559 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsysvx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsysvx.c @@ -51,17 +51,19 @@ lapack_int LAPACKE_zsysvx( int matrix_layout, char fact, char uplo, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -11; } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsysvxx.c b/lapack-netlib/LAPACKE/src/lapacke_zsysvxx.c index e903c5e1d..d7539404a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsysvxx.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsysvxx.c @@ -53,26 +53,28 @@ lapack_int LAPACKE_zsysvxx( int matrix_layout, char fact, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -6; - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -13; - } - if( nparams>0 ) { - if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { - return -24; + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, af, ldaf ) ) { + return -8; + } } - } - if( LAPACKE_lsame( fact, 'f' ) ) { - if( LAPACKE_d_nancheck( n, s, 1 ) ) { - return -12; + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -13; + } + if( nparams>0 ) { + if( LAPACKE_d_nancheck( nparams, params, 1 ) ) { + return -24; + } + } + if( LAPACKE_lsame( fact, 'f' ) ) { + if( LAPACKE_d_nancheck( n, s, 1 ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsyswapr.c b/lapack-netlib/LAPACKE/src/lapacke_zsyswapr.c index 9a08cf724..17d580e42 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsyswapr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsyswapr.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_zsyswapr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_zsyswapr_work( matrix_layout, uplo, n, a, lda, i1, i2 ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytrf.c b/lapack-netlib/LAPACKE/src/lapacke_zsytrf.c index f91ff5385..56868b99e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsytrf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytrf.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zsytrf( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa.c b/lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa.c index 97b1f1df8..74a81f483 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zsytrf_aa( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa_2stage.c new file mode 100644 index 000000000..e629c8571 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa_2stage.c @@ -0,0 +1,86 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function zsytrf_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zsytrf_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2 ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_complex_double* work = NULL; + lapack_complex_double work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_zsytrf_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_zsytrf_aa_2stage_work( matrix_layout, uplo, n, + a, lda, tb, ltb, ipiv, ipiv2, + &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_Z2INT( work_query ); + /* Allocate memory for work arrays */ + work = (lapack_complex_double*) + LAPACKE_malloc( sizeof(lapack_complex_double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_zsytrf_aa_2stage_work( matrix_layout, uplo, n, + a, lda, tb, ltb, ipiv, ipiv2, + work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zsytrf_aa_2stage", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa_2stage_work.c new file mode 100644 index 000000000..f91c42257 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa_2stage_work.c @@ -0,0 +1,109 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function zsytrf_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zsytrf_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tb, lapack_int ltb, + lapack_int* ipiv, lapack_int* ipiv2, + lapack_complex_double* work, lapack_int lwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_zsytrf_aa_2stage( &uplo, &n, a, &lda, tb, + <b, ipiv, ipiv2, work, &lwork, + &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_int ldb_t = MAX(1,n); + lapack_complex_double* a_t = NULL; + lapack_complex_double* tb_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_zsytrf_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_zsytrf_aa_2stage_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_zsytrf_aa_2stage( &uplo, &n, a, &lda_t, + tb, <b, ipiv, ipiv2, work, + &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + /* Transpose input matrices */ + LAPACKE_zsy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + /* Call LAPACK function and adjust info */ + LAPACK_zsytrf_aa_2stage( &uplo, &n, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, work, + &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_zsy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + /* Release memory and exit */ + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zsytrf_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_zsytrf_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytrf_rk.c b/lapack-netlib/LAPACKE/src/lapacke_zsytrf_rk.c index c4ead32de..f1722f434 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsytrf_rk.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytrf_rk.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zsytrf_rk * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -46,12 +46,11 @@ lapack_int LAPACKE_zsytrf_rk( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_z_nancheck( n, e, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytrf_rook.c b/lapack-netlib/LAPACKE/src/lapacke_zsytrf_rook.c index 91129c749..58411d0c9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsytrf_rook.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytrf_rook.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zsytrf_rook( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytri.c b/lapack-netlib/LAPACKE/src/lapacke_zsytri.c index 72c151713..a1c6db170 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsytri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytri.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_zsytri( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytri2.c b/lapack-netlib/LAPACKE/src/lapacke_zsytri2.c index 50b9e83b8..434fbe191 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsytri2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytri2.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_zsytri2( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytri2x.c b/lapack-netlib/LAPACKE/src/lapacke_zsytri2x.c index 38dddf6fb..770e8a481 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsytri2x.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytri2x.c @@ -44,9 +44,11 @@ lapack_int LAPACKE_zsytri2x( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytri_3.c b/lapack-netlib/LAPACKE/src/lapacke_zsytri_3.c index 0902c57fa..bd5677554 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsytri_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytri_3.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zsytri_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -41,17 +41,20 @@ lapack_int LAPACKE_zsytri_3( int matrix_layout, char uplo, lapack_int n, lapack_int lwork = -1; lapack_complex_double* work = NULL; lapack_complex_double work_query; + lapack_int e_start = LAPACKE_lsame( uplo, 'U' ) ? 1 : 0; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zsytri_3", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -4; - } - if( LAPACKE_z_nancheck( n, e, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -4; + } + if( LAPACKE_z_nancheck( n-1, e + e_start, 1 ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytrs.c b/lapack-netlib/LAPACKE/src/lapacke_zsytrs.c index 8819b3fd8..9b8e064ca 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsytrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zsytrs( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_zsytrs_work( matrix_layout, uplo, n, nrhs, a, lda, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytrs2.c b/lapack-netlib/LAPACKE/src/lapacke_zsytrs2.c index 741f9fd45..3c85f9796 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsytrs2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytrs2.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_zsytrs2( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytrs_3.c b/lapack-netlib/LAPACKE/src/lapacke_zsytrs_3.c index 47b3c36da..fdfe059a9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsytrs_3.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytrs_3.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zsytrs_3 * Author: Intel Corporation -* Generated December 2016 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -44,17 +44,19 @@ lapack_int LAPACKE_zsytrs_3( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_z_nancheck( n, e ,1 ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_z_nancheck( n, e ,1 ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif return LAPACKE_zsytrs_3_work( matrix_layout, uplo, n, nrhs, a, lda, - e, ipiv, b, ldb ); + e, ipiv, b, ldb ); } diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytrs_aa.c b/lapack-netlib/LAPACKE/src/lapacke_zsytrs_aa.c index d306b0f0b..a01567765 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsytrs_aa.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytrs_aa.c @@ -47,12 +47,14 @@ lapack_int LAPACKE_zsytrs_aa( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytrs_aa_2stage.c b/lapack-netlib/LAPACKE/src/lapacke_zsytrs_aa_2stage.c new file mode 100644 index 000000000..8713aba49 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytrs_aa_2stage.c @@ -0,0 +1,65 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function zsytrs_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zsytrs_aa_2stage( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, lapack_complex_double* b, lapack_int ldb ) +{ + lapack_int info = 0; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_zsytrs_aa_2stage", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, 4*n, 1, tb, ltb ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -11; + } + } +#endif + /* Call middle-level interface */ + info = LAPACKE_zsytrs_aa_2stage_work( matrix_layout, uplo, n, nrhs, + a, lda, tb, ltb, ipiv, ipiv2, b, + ldb ); + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytrs_aa_2stage_work.c b/lapack-netlib/LAPACKE/src/lapacke_zsytrs_aa_2stage_work.c new file mode 100644 index 000000000..76c513980 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytrs_aa_2stage_work.c @@ -0,0 +1,114 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function zsytrs_aa +* Author: Intel Corporation +* Generated November 2017 +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zsytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tb, lapack_int ltb, lapack_int* ipiv, + lapack_int* ipiv2, lapack_complex_double* b, lapack_int ldb ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_zsytrs_aa_2stage( &uplo, &n, &nrhs, a, &lda, tb, + <b, ipiv, ipiv2, b, &ldb, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,n); + lapack_int ldb_t = MAX(1,n); + lapack_complex_double* a_t = NULL; + lapack_complex_double* tb_t = NULL; + lapack_complex_double* b_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -6; + LAPACKE_xerbla( "LAPACKE_zsytrs_aa_2stage_work", info ); + return info; + } + if( ltb < 4*n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_zsytrs_aa_2stage_work", info ); + return info; + } + if( ldb < nrhs ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_zsytrs_aa_2stage_work", info ); + return info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + tb_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ltb ); + if( tb_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + b_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldb_t * MAX(1,nrhs) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + /* Transpose input matrices */ + LAPACKE_zsy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); + LAPACKE_zge_trans( matrix_layout, n, nrhs, b, ldb, b_t, ldb_t ); + /* Call LAPACK function and adjust info */ + LAPACK_zsytrs_aa_2stage( &uplo, &n, &nrhs, a_t, &lda_t, + tb_t, <b, ipiv, ipiv2, b_t, &ldb_t, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_zsy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb ); + /* Release memory and exit */ + LAPACKE_free( b_t ); +exit_level_2: + LAPACKE_free( tb_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zsytrs_aa_2stage_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_zsytrs_aa_2stage_work", info ); + } + return info; +} diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytrs_rook.c b/lapack-netlib/LAPACKE/src/lapacke_zsytrs_rook.c index d62bebc1c..052c17120 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsytrs_rook.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytrs_rook.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_zsytrs_rook( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zsy_nancheck( matrix_layout, uplo, n, a, lda ) ) { + return -5; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_zsytrs_rook_work( matrix_layout, uplo, n, nrhs, a, lda, ipiv, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztbcon.c b/lapack-netlib/LAPACKE/src/lapacke_ztbcon.c index 62e54724c..0bbe1e970 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztbcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztbcon.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_ztbcon( int matrix_layout, char norm, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ztb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ztb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { + return -7; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztbrfs.c b/lapack-netlib/LAPACKE/src/lapacke_ztbrfs.c index fb505ed4c..aa2a93660 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztbrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztbrfs.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_ztbrfs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ztb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -12; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ztb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -12; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztbtrs.c b/lapack-netlib/LAPACKE/src/lapacke_ztbtrs.c index 05bfbb61f..4c4e1885e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztbtrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztbtrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_ztbtrs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ztb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ztb_nancheck( matrix_layout, uplo, diag, n, kd, ab, ldab ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -10; + } } #endif return LAPACKE_ztbtrs_work( matrix_layout, uplo, trans, diag, n, kd, nrhs, diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztfsm.c b/lapack-netlib/LAPACKE/src/lapacke_ztfsm.c index fb1db86ba..79ca29a6d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztfsm.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztfsm.c @@ -44,18 +44,20 @@ lapack_int LAPACKE_ztfsm( int matrix_layout, char transr, char side, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( IS_Z_NONZERO(alpha) ) { - if( LAPACKE_ztf_nancheck( matrix_layout, transr, uplo, diag, n, a ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( IS_Z_NONZERO(alpha) ) { + if( LAPACKE_ztf_nancheck( matrix_layout, transr, uplo, diag, n, a ) ) { + return -10; + } } - } - if( LAPACKE_z_nancheck( 1, &alpha, 1 ) ) { - return -9; - } - if( IS_Z_NONZERO(alpha) ) { - if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -11; + if( LAPACKE_z_nancheck( 1, &alpha, 1 ) ) { + return -9; + } + if( IS_Z_NONZERO(alpha) ) { + if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztftri.c b/lapack-netlib/LAPACKE/src/lapacke_ztftri.c index 016b395ea..484279632 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztftri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztftri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_ztftri( int matrix_layout, char transr, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ztf_nancheck( matrix_layout, transr, uplo, diag, n, a ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ztf_nancheck( matrix_layout, transr, uplo, diag, n, a ) ) { + return -6; + } } #endif return LAPACKE_ztftri_work( matrix_layout, transr, uplo, diag, n, a ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztfttp.c b/lapack-netlib/LAPACKE/src/lapacke_ztfttp.c index f1e1add12..8b9853854 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztfttp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztfttp.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_ztfttp( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpf_nancheck( n, arf ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpf_nancheck( n, arf ) ) { + return -5; + } } #endif return LAPACKE_ztfttp_work( matrix_layout, transr, uplo, n, arf, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztfttr.c b/lapack-netlib/LAPACKE/src/lapacke_ztfttr.c index f53037dfe..f367a5960 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztfttr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztfttr.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_ztfttr( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpf_nancheck( n, arf ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpf_nancheck( n, arf ) ) { + return -5; + } } #endif return LAPACKE_ztfttr_work( matrix_layout, transr, uplo, n, arf, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztgevc.c b/lapack-netlib/LAPACKE/src/lapacke_ztgevc.c index cfcf5752f..a195c8cc6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztgevc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztgevc.c @@ -49,21 +49,23 @@ lapack_int LAPACKE_ztgevc( int matrix_layout, char side, char howmny, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, p, ldp ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, s, lds ) ) { - return -6; - } - if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'l' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, p, ldp ) ) { + return -8; } - } - if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'r' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -12; + if( LAPACKE_zge_nancheck( matrix_layout, n, n, s, lds ) ) { + return -6; + } + if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'l' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -10; + } + } + if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'r' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztgexc.c b/lapack-netlib/LAPACKE/src/lapacke_ztgexc.c index 4c434280a..b01c10014 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztgexc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztgexc.c @@ -46,21 +46,23 @@ lapack_int LAPACKE_ztgexc( int matrix_layout, lapack_logical wantq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -7; - } - if( wantq ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; } - } - if( wantz ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -11; + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -7; + } + if( wantq ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -9; + } + } + if( wantz ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -11; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztgsen.c b/lapack-netlib/LAPACKE/src/lapacke_ztgsen.c index 946ea7ca0..60f48ba8f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztgsen.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztgsen.c @@ -56,21 +56,23 @@ lapack_int LAPACKE_ztgsen( int matrix_layout, lapack_int ijob, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( wantq ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -13; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -7; } - } - if( wantz ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { - return -15; + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( wantq ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -13; + } + } + if( wantz ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, z, ldz ) ) { + return -15; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztgsja.c b/lapack-netlib/LAPACKE/src/lapacke_ztgsja.c index aae4e24ff..485a62a26 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztgsja.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztgsja.c @@ -51,32 +51,34 @@ lapack_int LAPACKE_ztgsja( int matrix_layout, char jobu, char jobv, char jobq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -10; - } - if( LAPACKE_zge_nancheck( matrix_layout, p, n, b, ldb ) ) { - return -12; - } - if( LAPACKE_lsame( jobq, 'i' ) || LAPACKE_lsame( jobq, 'q' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -22; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -10; } - } - if( LAPACKE_d_nancheck( 1, &tola, 1 ) ) { - return -14; - } - if( LAPACKE_d_nancheck( 1, &tolb, 1 ) ) { - return -15; - } - if( LAPACKE_lsame( jobu, 'i' ) || LAPACKE_lsame( jobu, 'u' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, m, m, u, ldu ) ) { - return -18; + if( LAPACKE_zge_nancheck( matrix_layout, p, n, b, ldb ) ) { + return -12; } - } - if( LAPACKE_lsame( jobv, 'i' ) || LAPACKE_lsame( jobv, 'v' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, p, p, v, ldv ) ) { - return -20; + if( LAPACKE_lsame( jobq, 'i' ) || LAPACKE_lsame( jobq, 'q' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -22; + } + } + if( LAPACKE_d_nancheck( 1, &tola, 1 ) ) { + return -14; + } + if( LAPACKE_d_nancheck( 1, &tolb, 1 ) ) { + return -15; + } + if( LAPACKE_lsame( jobu, 'i' ) || LAPACKE_lsame( jobu, 'u' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, m, m, u, ldu ) ) { + return -18; + } + } + if( LAPACKE_lsame( jobv, 'i' ) || LAPACKE_lsame( jobv, 'v' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, p, p, v, ldv ) ) { + return -20; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztgsna.c b/lapack-netlib/LAPACKE/src/lapacke_ztgsna.c index 315003133..0e2599e03 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztgsna.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztgsna.c @@ -52,21 +52,23 @@ lapack_int LAPACKE_ztgsna( int matrix_layout, char job, char howmny, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; } - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -12; + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -10; + } + } + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -12; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztgsyl.c b/lapack-netlib/LAPACKE/src/lapacke_ztgsyl.c index 29ca79895..a0b303b46 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztgsyl.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztgsyl.c @@ -53,24 +53,26 @@ lapack_int LAPACKE_ztgsyl( int matrix_layout, char trans, lapack_int ijob, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, m, a, lda ) ) { - return -6; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, m, d, ldd ) ) { - return -12; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, e, lde ) ) { - return -14; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, f, ldf ) ) { - return -16; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, m, a, lda ) ) { + return -6; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, m, d, ldd ) ) { + return -12; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, e, lde ) ) { + return -14; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, f, ldf ) ) { + return -16; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztpcon.c b/lapack-netlib/LAPACKE/src/lapacke_ztpcon.c index 69bf2805f..dd556af63 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztpcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztpcon.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_ztpcon( int matrix_layout, char norm, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ztp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ztp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztpmqrt.c b/lapack-netlib/LAPACKE/src/lapacke_ztpmqrt.c index e3bd95a65..bae34b1ef 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztpmqrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztpmqrt.c @@ -51,24 +51,26 @@ lapack_int LAPACKE_ztpmqrt( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - ncols_a = LAPACKE_lsame( side, 'L' ) ? n : - ( LAPACKE_lsame( side, 'R' ) ? k : 0 ); - nrows_a = LAPACKE_lsame( side, 'L' ) ? k : - ( LAPACKE_lsame( side, 'R' ) ? m : 0 ); - nrows_v = LAPACKE_lsame( side, 'L' ) ? m : - ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); - if( LAPACKE_zge_nancheck( matrix_layout, nrows_a, ncols_a, a, lda ) ) { - return -13; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -15; - } - if( LAPACKE_zge_nancheck( matrix_layout, nb, k, t, ldt ) ) { - return -11; - } - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, k, v, ldv ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + ncols_a = LAPACKE_lsame( side, 'L' ) ? n : + ( LAPACKE_lsame( side, 'R' ) ? k : 0 ); + nrows_a = LAPACKE_lsame( side, 'L' ) ? k : + ( LAPACKE_lsame( side, 'R' ) ? m : 0 ); + nrows_v = LAPACKE_lsame( side, 'L' ) ? m : + ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); + if( LAPACKE_zge_nancheck( matrix_layout, nrows_a, ncols_a, a, lda ) ) { + return -13; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -15; + } + if( LAPACKE_zge_nancheck( matrix_layout, nb, k, t, ldt ) ) { + return -11; + } + if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, k, v, ldv ) ) { + return -9; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztpqrt.c b/lapack-netlib/LAPACKE/src/lapacke_ztpqrt.c index 7ddf5c6ba..f9eb95db5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztpqrt.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztpqrt.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_ztpqrt( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -6; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -6; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -8; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztpqrt2.c b/lapack-netlib/LAPACKE/src/lapacke_ztpqrt2.c index 4cefa0124..a6707ed70 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztpqrt2.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztpqrt2.c @@ -44,12 +44,14 @@ lapack_int LAPACKE_ztpqrt2( int matrix_layout, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -6; + } } #endif return LAPACKE_ztpqrt2_work( matrix_layout, m, n, l, a, lda, b, ldb, t, ldt ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztprfb.c b/lapack-netlib/LAPACKE/src/lapacke_ztprfb.c index b6894e8fa..fce801762 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztprfb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztprfb.c @@ -51,30 +51,32 @@ lapack_int LAPACKE_ztprfb( int matrix_layout, char side, char trans, char direct return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( storev, 'C' ) ) { - ncols_v = k; - nrows_v = LAPACKE_lsame( side, 'L' ) ? m : - ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); - } else if( LAPACKE_lsame( storev, 'R' ) ) { - ncols_v = LAPACKE_lsame( side, 'L' ) ? m : - ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); - nrows_v = k; - } else { - ncols_v = 0; - nrows_v = 0; - } - if( LAPACKE_zge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -14; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) { - return -16; - } - if( LAPACKE_zge_nancheck( matrix_layout, k, k, t, ldt ) ) { - return -12; - } - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( storev, 'C' ) ) { + ncols_v = k; + nrows_v = LAPACKE_lsame( side, 'L' ) ? m : + ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); + } else if( LAPACKE_lsame( storev, 'R' ) ) { + ncols_v = LAPACKE_lsame( side, 'L' ) ? m : + ( LAPACKE_lsame( side, 'R' ) ? n : 0 ); + nrows_v = k; + } else { + ncols_v = 0; + nrows_v = 0; + } + if( LAPACKE_zge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -14; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -16; + } + if( LAPACKE_zge_nancheck( matrix_layout, k, k, t, ldt ) ) { + return -12; + } + if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { + return -10; + } } #endif if (side=='l' || side=='L') { diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztprfs.c b/lapack-netlib/LAPACKE/src/lapacke_ztprfs.c index 0cfb31061..51b8cbf19 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztprfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztprfs.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_ztprfs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ztp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ztp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -10; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztptri.c b/lapack-netlib/LAPACKE/src/lapacke_ztptri.c index e28fd4661..15c8b423e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztptri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztptri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_ztptri( int matrix_layout, char uplo, char diag, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ztp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ztp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { + return -5; + } } #endif return LAPACKE_ztptri_work( matrix_layout, uplo, diag, n, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztptrs.c b/lapack-netlib/LAPACKE/src/lapacke_ztptrs.c index 5026c72b6..6f7bb4056 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztptrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztptrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_ztptrs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ztp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ztp_nancheck( matrix_layout, uplo, diag, n, ap ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -8; + } } #endif return LAPACKE_ztptrs_work( matrix_layout, uplo, trans, diag, n, nrhs, ap, b, diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztpttf.c b/lapack-netlib/LAPACKE/src/lapacke_ztpttf.c index 28d83040f..44ad3c791 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztpttf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztpttf.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_ztpttf( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpp_nancheck( n, ap ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpp_nancheck( n, ap ) ) { + return -5; + } } #endif return LAPACKE_ztpttf_work( matrix_layout, transr, uplo, n, ap, arf ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztpttr.c b/lapack-netlib/LAPACKE/src/lapacke_ztpttr.c index bf3a15253..30274aaa9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztpttr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztpttr.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_ztpttr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpp_nancheck( n, ap ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpp_nancheck( n, ap ) ) { + return -4; + } } #endif return LAPACKE_ztpttr_work( matrix_layout, uplo, n, ap, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztrcon.c b/lapack-netlib/LAPACKE/src/lapacke_ztrcon.c index 726b7d5dc..ef921a461 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztrcon.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztrcon.c @@ -45,9 +45,11 @@ lapack_int LAPACKE_ztrcon( int matrix_layout, char norm, char uplo, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ztr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ztr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { + return -6; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztrevc.c b/lapack-netlib/LAPACKE/src/lapacke_ztrevc.c index 71fca151e..ff7c105b8 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztrevc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztrevc.c @@ -48,18 +48,20 @@ lapack_int LAPACKE_ztrevc( int matrix_layout, char side, char howmny, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -6; - } - if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'l' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -6; } - } - if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'r' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -10; + if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'l' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -8; + } + } + if( LAPACKE_lsame( side, 'b' ) || LAPACKE_lsame( side, 'r' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztrexc.c b/lapack-netlib/LAPACKE/src/lapacke_ztrexc.c index e3e165843..28a1a72af 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztrexc.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztrexc.c @@ -43,14 +43,16 @@ lapack_int LAPACKE_ztrexc( int matrix_layout, char compq, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -6; + } + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -4; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -4; } #endif return LAPACKE_ztrexc_work( matrix_layout, compq, n, t, ldt, q, ldq, ifst, diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztrrfs.c b/lapack-netlib/LAPACKE/src/lapacke_ztrrfs.c index bba372458..1e89e3221 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztrrfs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztrrfs.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_ztrrfs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ztr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ztr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, x, ldx ) ) { + return -11; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztrsen.c b/lapack-netlib/LAPACKE/src/lapacke_ztrsen.c index 6d044144c..f65d914aa 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztrsen.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztrsen.c @@ -49,14 +49,16 @@ lapack_int LAPACKE_ztrsen( int matrix_layout, char job, char compq, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_lsame( compq, 'v' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_lsame( compq, 'v' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, n, q, ldq ) ) { + return -8; + } + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -6; } - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -6; } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztrsna.c b/lapack-netlib/LAPACKE/src/lapacke_ztrsna.c index ae52ce0fb..0fd9ae189 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztrsna.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztrsna.c @@ -50,18 +50,20 @@ lapack_int LAPACKE_ztrsna( int matrix_layout, char job, char howmny, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, t, ldt ) ) { - return -6; - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, t, ldt ) ) { + return -6; } - } - if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { - return -10; + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vl, ldvl ) ) { + return -8; + } + } + if( LAPACKE_lsame( job, 'b' ) || LAPACKE_lsame( job, 'e' ) ) { + if( LAPACKE_zge_nancheck( matrix_layout, n, mm, vr, ldvr ) ) { + return -10; + } } } #endif diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztrsyl.c b/lapack-netlib/LAPACKE/src/lapacke_ztrsyl.c index eefbe046e..98f9cda56 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztrsyl.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztrsyl.c @@ -45,15 +45,17 @@ lapack_int LAPACKE_ztrsyl( int matrix_layout, char trana, char tranb, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, m, a, lda ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { - return -9; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -11; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, m, a, lda ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) { + return -9; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -11; + } } #endif return LAPACKE_ztrsyl_work( matrix_layout, trana, tranb, isgn, m, n, a, lda, diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztrtri.c b/lapack-netlib/LAPACKE/src/lapacke_ztrtri.c index 6b088fdbc..2f1c639f5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztrtri.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztrtri.c @@ -41,9 +41,11 @@ lapack_int LAPACKE_ztrtri( int matrix_layout, char uplo, char diag, lapack_int n return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ztr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ztr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { + return -5; + } } #endif return LAPACKE_ztrtri_work( matrix_layout, uplo, diag, n, a, lda ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztrtrs.c b/lapack-netlib/LAPACKE/src/lapacke_ztrtrs.c index 851e0c15f..3482fcdd9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztrtrs.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztrtrs.c @@ -43,12 +43,14 @@ lapack_int LAPACKE_ztrtrs( int matrix_layout, char uplo, char trans, char diag, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_ztr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ztr_nancheck( matrix_layout, uplo, diag, n, a, lda ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, n, nrhs, b, ldb ) ) { + return -9; + } } #endif return LAPACKE_ztrtrs_work( matrix_layout, uplo, trans, diag, n, nrhs, a, diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztrttf.c b/lapack-netlib/LAPACKE/src/lapacke_ztrttf.c index 6912d2257..8a5dfc271 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztrttf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztrttf.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_ztrttf( int matrix_layout, char transr, char uplo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } } #endif return LAPACKE_ztrttf_work( matrix_layout, transr, uplo, n, a, lda, arf ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztrttp.c b/lapack-netlib/LAPACKE/src/lapacke_ztrttp.c index 9cc4dbd24..5dcf633bb 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztrttp.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztrttp.c @@ -42,9 +42,11 @@ lapack_int LAPACKE_ztrttp( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } } #endif return LAPACKE_ztrttp_work( matrix_layout, uplo, n, a, lda, ap ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztzrzf.c b/lapack-netlib/LAPACKE/src/lapacke_ztzrzf.c index 88770eb84..e4e7d44e7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ztzrzf.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ztzrzf.c @@ -46,9 +46,11 @@ lapack_int LAPACKE_ztzrzf( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -4; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -4; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zunbdb.c b/lapack-netlib/LAPACKE/src/lapacke_zunbdb.c index 1aad69f1d..91ab2c4de 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zunbdb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zunbdb.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zunbdb * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -49,28 +49,31 @@ lapack_int LAPACKE_zunbdb( int matrix_layout, char trans, char signs, lapack_int lwork = -1; lapack_complex_double* work = NULL; lapack_complex_double work_query; - lapack_int nrows_x11, nrows_x12, nrows_x21, nrows_x22; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zunbdb", -1 ); return -1; } -#ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x12, m-q, x12, ldx12 ) ) { - return -9; + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; } - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { - return -11; - } - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x22, m-q, x22, ldx22 ) ) { - return -13; +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( lapack_layout, p, q, x11, ldx11 ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( lapack_layout, p, m-q, x12, ldx12 ) ) { + return -9; + } + if( LAPACKE_zge_nancheck( lapack_layout, m-p, q, x21, ldx21 ) ) { + return -11; + } + if( LAPACKE_zge_nancheck( lapack_layout, m-p, m-q, x22, ldx22 ) ) { + return -13; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zunbdb_work.c b/lapack-netlib/LAPACKE/src/lapacke_zunbdb_work.c index 689434a58..7a4cb97e2 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zunbdb_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zunbdb_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function zunbdb * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -47,121 +47,35 @@ lapack_int LAPACKE_zunbdb_work( int matrix_layout, char trans, char signs, lapack_complex_double* work, lapack_int lwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; + } /* Call LAPACK function and adjust info */ - LAPACK_zunbdb( &trans, &signs, &m, &p, &q, x11, &ldx11, x12, &ldx12, + LAPACK_zunbdb( <rans, &signs, &m, &p, &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, &ldx22, theta, phi, taup1, taup2, tauq1, tauq2, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - lapack_int nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - lapack_int nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - lapack_int nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - lapack_int ldx11_t = MAX(1,nrows_x11); - lapack_int ldx12_t = MAX(1,nrows_x12); - lapack_int ldx21_t = MAX(1,nrows_x21); - lapack_int ldx22_t = MAX(1,nrows_x22); - lapack_complex_double* x11_t = NULL; - lapack_complex_double* x12_t = NULL; - lapack_complex_double* x21_t = NULL; - lapack_complex_double* x22_t = NULL; - /* Check leading dimension(s) */ - if( ldx11 < q ) { - info = -8; - LAPACKE_xerbla( "LAPACKE_zunbdb_work", info ); - return info; - } - if( ldx12 < m-q ) { - info = -10; - LAPACKE_xerbla( "LAPACKE_zunbdb_work", info ); - return info; - } - if( ldx21 < q ) { - info = -12; - LAPACKE_xerbla( "LAPACKE_zunbdb_work", info ); - return info; - } - if( ldx22 < m-q ) { - info = -14; - LAPACKE_xerbla( "LAPACKE_zunbdb_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { - LAPACK_zunbdb( &trans, &signs, &m, &p, &q, x11, &ldx11_t, x12, - &ldx12_t, x21, &ldx21_t, x22, &ldx22_t, theta, phi, - taup1, taup2, tauq1, tauq2, work, &lwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - x11_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldx11_t * MAX(1,q) ); - if( x11_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - x12_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldx12_t * MAX(1,m-q) ); - if( x12_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - x21_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldx21_t * MAX(1,q) ); - if( x21_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; - } - x22_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldx22_t * MAX(1,m-q) ); - if( x22_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - /* Transpose input matrices */ - LAPACKE_zge_trans( matrix_layout, nrows_x11, q, x11, ldx11, x11_t, - ldx11_t ); - LAPACKE_zge_trans( matrix_layout, nrows_x12, m-q, x12, ldx12, x12_t, - ldx12_t ); - LAPACKE_zge_trans( matrix_layout, nrows_x21, q, x21, ldx21, x21_t, - ldx21_t ); - LAPACKE_zge_trans( matrix_layout, nrows_x22, m-q, x22, ldx22, x22_t, - ldx22_t ); - /* Call LAPACK function and adjust info */ - LAPACK_zunbdb( &trans, &signs, &m, &p, &q, x11_t, &ldx11_t, x12_t, - &ldx12_t, x21_t, &ldx21_t, x22_t, &ldx22_t, theta, phi, - taup1, taup2, tauq1, tauq2, work, &lwork, &info ); - if( info < 0 ) { - info = info - 1; - } - /* Transpose output matrices */ - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_x11, q, x11_t, ldx11_t, x11, - ldx11 ); - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_x12, m-q, x12_t, ldx12_t, - x12, ldx12 ); - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_x21, q, x21_t, ldx21_t, x21, - ldx21 ); - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_x22, m-q, x22_t, ldx22_t, - x22, ldx22 ); - /* Release memory and exit */ - LAPACKE_free( x22_t ); -exit_level_3: - LAPACKE_free( x21_t ); -exit_level_2: - LAPACKE_free( x12_t ); -exit_level_1: - LAPACKE_free( x11_t ); -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_zunbdb_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_zunbdb_work", info ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zuncsd.c b/lapack-netlib/LAPACKE/src/lapacke_zuncsd.c index 384b3e095..196089812 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zuncsd.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zuncsd.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zuncsd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -54,28 +54,31 @@ lapack_int LAPACKE_zuncsd( int matrix_layout, char jobu1, char jobu2, lapack_complex_double* work = NULL; double rwork_query; lapack_complex_double work_query; - lapack_int nrows_x11, nrows_x12, nrows_x21, nrows_x22; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zuncsd", -1 ); return -1; } -#ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { - return -11; - } - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x12, m-q, x12, ldx12 ) ) { - return -13; + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; } - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { - return -15; - } - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x22, m-q, x22, ldx22 ) ) { - return -17; +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( lapack_layout, p, q, x11, ldx11 ) ) { + return -11; + } + if( LAPACKE_zge_nancheck( lapack_layout, p, m-q, x12, ldx12 ) ) { + return -13; + } + if( LAPACKE_zge_nancheck( lapack_layout, m-p, q, x21, ldx21 ) ) { + return -15; + } + if( LAPACKE_zge_nancheck( lapack_layout, m-p, m-q, x22, ldx22 ) ) { + return -17; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zuncsd2by1.c b/lapack-netlib/LAPACKE/src/lapacke_zuncsd2by1.c index f5dbd55be..61ac9c089 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zuncsd2by1.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zuncsd2by1.c @@ -55,17 +55,18 @@ lapack_int LAPACKE_zuncsd2by1( int matrix_layout, char jobu1, char jobu2, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nrows_x11 = p ; - nrows_x21 = m-p ; - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nrows_x11 = p; + nrows_x21 = m-p; + if( LAPACKE_zge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { + return -8; + } + + if( LAPACKE_zge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { + return -9; + } } - - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { - return -9; - } - #endif /* Allocate memory for working array(s) */ iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * MAX(1,m-MIN(MIN(p,m-p),MIN(q,m-q))) ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zuncsd_work.c b/lapack-netlib/LAPACKE/src/lapacke_zuncsd_work.c index 7c3b39504..4d474a715 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zuncsd_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zuncsd_work.c @@ -28,7 +28,7 @@ ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function zuncsd * Author: Intel Corporation -* Generated November 2015 +* Generated June 2017 *****************************************************************************/ #include "lapacke_utils.h" @@ -50,226 +50,36 @@ lapack_int LAPACKE_zuncsd_work( int matrix_layout, char jobu1, char jobu2, lapack_int* iwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { - /* Call LAPACK function and adjust info */ - LAPACK_zuncsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, &p, - &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, &ldx22, - theta, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, &ldv2t, - work, &lwork, rwork, &lrwork, iwork, &info ); - if( info < 0 ) { - info = info - 1; - } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - lapack_int nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - lapack_int nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - lapack_int nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - lapack_int nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - lapack_int nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - lapack_int nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - lapack_int nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - lapack_int ldu1_t = MAX(1,nrows_u1); - lapack_int ldu2_t = MAX(1,nrows_u2); - lapack_int ldv1t_t = MAX(1,nrows_v1t); - lapack_int ldv2t_t = MAX(1,nrows_v2t); - lapack_int ldx11_t = MAX(1,nrows_x11); - lapack_int ldx12_t = MAX(1,nrows_x12); - lapack_int ldx21_t = MAX(1,nrows_x21); - lapack_int ldx22_t = MAX(1,nrows_x22); - lapack_complex_double* x11_t = NULL; - lapack_complex_double* x12_t = NULL; - lapack_complex_double* x21_t = NULL; - lapack_complex_double* x22_t = NULL; - lapack_complex_double* u1_t = NULL; - lapack_complex_double* u2_t = NULL; - lapack_complex_double* v1t_t = NULL; - lapack_complex_double* v2t_t = NULL; - /* Check leading dimension(s) */ - if( ldu1 < p ) { - info = -21; - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - return info; - } - if( ldu2 < m-p ) { - info = -23; - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - return info; - } - if( ldv1t < q ) { - info = -25; - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - return info; - } - if( ldv2t < m-q ) { - info = -27; - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - return info; - } - if( ldx11 < q ) { - info = -12; - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - return info; - } - if( ldx12 < m-q ) { - info = -14; - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - return info; - } - if( ldx21 < q ) { - info = -16; - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - return info; - } - if( ldx22 < m-q ) { - info = -18; - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lrwork == -1 || lwork == -1 ) { - LAPACK_zuncsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, - &p, &q, x11, &ldx11_t, x12, &ldx12_t, x21, &ldx21_t, - x22, &ldx22_t, theta, u1, &ldu1_t, u2, &ldu2_t, v1t, - &ldv1t_t, v2t, &ldv2t_t, work, &lwork, rwork, - &lrwork, iwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - x11_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldx11_t * MAX(1,q) ); - if( x11_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - x12_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldx12_t * MAX(1,m-q) ); - if( x12_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - x21_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldx21_t * MAX(1,q) ); - if( x21_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; } - x22_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldx22_t * MAX(1,m-q) ); - if( x22_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - if( LAPACKE_lsame( jobu1, 'y' ) ) { - u1_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldu1_t * MAX(1,p) ); - if( u1_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_4; - } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - u2_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldu2_t * MAX(1,m-p) ); - if( u2_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_5; - } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - v1t_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldv1t_t * MAX(1,q) ); - if( v1t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_6; - } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - v2t_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldv2t_t * MAX(1,m-q) ); - if( v2t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_7; - } - } - /* Transpose input matrices */ - LAPACKE_zge_trans( matrix_layout, nrows_x11, q, x11, ldx11, x11_t, - ldx11_t ); - LAPACKE_zge_trans( matrix_layout, nrows_x12, m-q, x12, ldx12, x12_t, - ldx12_t ); - LAPACKE_zge_trans( matrix_layout, nrows_x21, q, x21, ldx21, x21_t, - ldx21_t ); - LAPACKE_zge_trans( matrix_layout, nrows_x22, m-q, x22, ldx22, x22_t, - ldx22_t ); /* Call LAPACK function and adjust info */ - LAPACK_zuncsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, &p, - &q, x11_t, &ldx11_t, x12_t, &ldx12_t, x21_t, &ldx21_t, - x22_t, &ldx22_t, theta, u1_t, &ldu1_t, u2_t, &ldu2_t, - v1t_t, &ldv1t_t, v2t_t, &ldv2t_t, work, &lwork, rwork, - &lrwork, iwork, &info ); + LAPACK_zuncsd( &jobu1, &jobu2, &jobv1t, &jobv2t, <rans, &signs, &m, + &p, &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, + &ldx22, theta, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, + &ldv2t, work, &lwork, rwork, &lrwork, iwork, &info ); if( info < 0 ) { info = info - 1; } - /* Transpose output matrices */ - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_x11, q, x11_t, ldx11_t, x11, - ldx11 ); - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_x12, m-q, x12_t, ldx12_t, - x12, ldx12 ); - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_x21, q, x21_t, ldx21_t, x21, - ldx21 ); - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_x22, m-q, x22_t, ldx22_t, - x22, ldx22 ); - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_u1, p, u1_t, ldu1_t, u1, - ldu1 ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_u2, m-p, u2_t, ldu2_t, - u2, ldu2 ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_v1t, q, v1t_t, ldv1t_t, - v1t, ldv1t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_v2t, m-q, v2t_t, ldv2t_t, - v2t, ldv2t ); - } - /* Release memory and exit */ - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_free( v2t_t ); - } -exit_level_7: - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_free( v1t_t ); - } -exit_level_6: - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_free( u2_t ); - } -exit_level_5: - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_free( u1_t ); - } -exit_level_4: - LAPACKE_free( x22_t ); -exit_level_3: - LAPACKE_free( x21_t ); -exit_level_2: - LAPACKE_free( x12_t ); -exit_level_1: - LAPACKE_free( x11_t ); -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zungbr.c b/lapack-netlib/LAPACKE/src/lapacke_zungbr.c index 3c0f244ae..9a91fc44a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zungbr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zungbr.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_zungbr( int matrix_layout, char vect, lapack_int m, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -6; - } - if( LAPACKE_z_nancheck( MIN(m,k), tau, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } + if( LAPACKE_z_nancheck( MIN(m,k), tau, 1 ) ) { + return -8; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zunghr.c b/lapack-netlib/LAPACKE/src/lapacke_zunghr.c index 88f6150f3..9d681ded5 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zunghr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zunghr.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_zunghr( int matrix_layout, lapack_int n, lapack_int ilo, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -5; - } - if( LAPACKE_z_nancheck( n-1, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -5; + } + if( LAPACKE_z_nancheck( n-1, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zunglq.c b/lapack-netlib/LAPACKE/src/lapacke_zunglq.c index 56dc42e97..21af17dfd 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zunglq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zunglq.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_zunglq( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_z_nancheck( k, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_z_nancheck( k, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zungql.c b/lapack-netlib/LAPACKE/src/lapacke_zungql.c index 5f99b74c8..a4204b6ef 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zungql.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zungql.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_zungql( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_z_nancheck( k, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_z_nancheck( k, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zungqr.c b/lapack-netlib/LAPACKE/src/lapacke_zungqr.c index 8354e42d8..b15d0c923 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zungqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zungqr.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_zungqr( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_z_nancheck( k, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_z_nancheck( k, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zungrq.c b/lapack-netlib/LAPACKE/src/lapacke_zungrq.c index 3a3083dca..6c6378bfa 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zungrq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zungrq.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_zungrq( int matrix_layout, lapack_int m, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { - return -5; - } - if( LAPACKE_z_nancheck( k, tau, 1 ) ) { - return -7; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -5; + } + if( LAPACKE_z_nancheck( k, tau, 1 ) ) { + return -7; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zungtr.c b/lapack-netlib/LAPACKE/src/lapacke_zungtr.c index 46c947b4a..51785347e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zungtr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zungtr.c @@ -46,12 +46,14 @@ lapack_int LAPACKE_zungtr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { - return -4; - } - if( LAPACKE_z_nancheck( n-1, tau, 1 ) ) { - return -6; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, n, n, a, lda ) ) { + return -4; + } + if( LAPACKE_z_nancheck( n-1, tau, 1 ) ) { + return -6; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zunmbr.c b/lapack-netlib/LAPACKE/src/lapacke_zunmbr.c index 87a5b1818..2449d8511 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zunmbr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zunmbr.c @@ -49,17 +49,19 @@ lapack_int LAPACKE_zunmbr( int matrix_layout, char vect, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - nq = LAPACKE_lsame( side, 'l' ) ? m : n; - r = LAPACKE_lsame( vect, 'q' ) ? nq : MIN(nq,k); - if( LAPACKE_zge_nancheck( matrix_layout, r, MIN(nq,k), a, lda ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -11; - } - if( LAPACKE_z_nancheck( MIN(nq,k), tau, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + nq = LAPACKE_lsame( side, 'l' ) ? m : n; + r = LAPACKE_lsame( vect, 'q' ) ? nq : MIN(nq,k); + if( LAPACKE_zge_nancheck( matrix_layout, r, MIN(nq,k), a, lda ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -11; + } + if( LAPACKE_z_nancheck( MIN(nq,k), tau, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zunmhr.c b/lapack-netlib/LAPACKE/src/lapacke_zunmhr.c index 82dbfd8d4..357d71184 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zunmhr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zunmhr.c @@ -49,16 +49,18 @@ lapack_int LAPACKE_zunmhr( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_zge_nancheck( matrix_layout, r, r, a, lda ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -11; - } - if( LAPACKE_z_nancheck( m-1, tau, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_zge_nancheck( matrix_layout, r, r, a, lda ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -11; + } + if( LAPACKE_z_nancheck( m-1, tau, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zunmlq.c b/lapack-netlib/LAPACKE/src/lapacke_zunmlq.c index 73a9f42f7..c306bbc84 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zunmlq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zunmlq.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_zunmlq( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_z_nancheck( k, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_z_nancheck( k, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zunmql.c b/lapack-netlib/LAPACKE/src/lapacke_zunmql.c index 41fb94151..3c06a1a1e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zunmql.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zunmql.c @@ -49,16 +49,18 @@ lapack_int LAPACKE_zunmql( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_zge_nancheck( matrix_layout, r, k, a, lda ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_z_nancheck( k, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_zge_nancheck( matrix_layout, r, k, a, lda ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_z_nancheck( k, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zunmqr.c b/lapack-netlib/LAPACKE/src/lapacke_zunmqr.c index 29510d6aa..7a43ebc50 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zunmqr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zunmqr.c @@ -49,16 +49,18 @@ lapack_int LAPACKE_zunmqr( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_zge_nancheck( matrix_layout, r, k, a, lda ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_z_nancheck( k, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_zge_nancheck( matrix_layout, r, k, a, lda ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_z_nancheck( k, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zunmrq.c b/lapack-netlib/LAPACKE/src/lapacke_zunmrq.c index 54a9e14ef..d3ffb39b7 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zunmrq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zunmrq.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_zunmrq( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_z_nancheck( k, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_z_nancheck( k, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zunmrz.c b/lapack-netlib/LAPACKE/src/lapacke_zunmrz.c index ea6eae4c8..a8f1ce079 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zunmrz.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zunmrz.c @@ -48,15 +48,17 @@ lapack_int LAPACKE_zunmrz( int matrix_layout, char side, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zge_nancheck( matrix_layout, k, m, a, lda ) ) { - return -8; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -11; - } - if( LAPACKE_z_nancheck( k, tau, 1 ) ) { - return -10; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, k, m, a, lda ) ) { + return -8; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -11; + } + if( LAPACKE_z_nancheck( k, tau, 1 ) ) { + return -10; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zunmtr.c b/lapack-netlib/LAPACKE/src/lapacke_zunmtr.c index 0410dd7db..f8936cd5a 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zunmtr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zunmtr.c @@ -49,16 +49,18 @@ lapack_int LAPACKE_zunmtr( int matrix_layout, char side, char uplo, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_zge_nancheck( matrix_layout, r, r, a, lda ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -10; - } - if( LAPACKE_z_nancheck( m-1, tau, 1 ) ) { - return -9; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_zge_nancheck( matrix_layout, r, r, a, lda ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -10; + } + if( LAPACKE_z_nancheck( m-1, tau, 1 ) ) { + return -9; + } } #endif /* Query optimal working array(s) size */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zupgtr.c b/lapack-netlib/LAPACKE/src/lapacke_zupgtr.c index 2fc619019..8b4b7ec93 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zupgtr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zupgtr.c @@ -45,12 +45,14 @@ lapack_int LAPACKE_zupgtr( int matrix_layout, char uplo, lapack_int n, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - if( LAPACKE_zpp_nancheck( n, ap ) ) { - return -4; - } - if( LAPACKE_z_nancheck( n-1, tau, 1 ) ) { - return -5; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zpp_nancheck( n, ap ) ) { + return -4; + } + if( LAPACKE_z_nancheck( n-1, tau, 1 ) ) { + return -5; + } } #endif /* Allocate memory for working array(s) */ diff --git a/lapack-netlib/LAPACKE/src/lapacke_zupmtr.c b/lapack-netlib/LAPACKE/src/lapacke_zupmtr.c index 4a567b95a..d735c5561 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zupmtr.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zupmtr.c @@ -49,16 +49,18 @@ lapack_int LAPACKE_zupmtr( int matrix_layout, char side, char uplo, char trans, return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK - /* Optionally check input matrices for NaNs */ - r = LAPACKE_lsame( side, 'l' ) ? m : n; - if( LAPACKE_zpp_nancheck( r, ap ) ) { - return -7; - } - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -9; - } - if( LAPACKE_z_nancheck( m-1, tau, 1 ) ) { - return -8; + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + r = LAPACKE_lsame( side, 'l' ) ? m : n; + if( LAPACKE_zpp_nancheck( r, ap ) ) { + return -7; + } + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -9; + } + if( LAPACKE_z_nancheck( m-1, tau, 1 ) ) { + return -8; + } } #endif /* Additional scalars initializations for work arrays */ diff --git a/lapack-netlib/LAPACKE/utils/CMakeLists.txt b/lapack-netlib/LAPACKE/utils/CMakeLists.txt index c8b8511e7..dd36ee33e 100644 --- a/lapack-netlib/LAPACKE/utils/CMakeLists.txt +++ b/lapack-netlib/LAPACKE/utils/CMakeLists.txt @@ -1,4 +1,4 @@ -set(UTILS_OBJ +set(UTILS lapacke_c_nancheck.c lapacke_ctr_trans.c lapacke_make_complex_float.c lapacke_zgb_nancheck.c lapacke_cgb_nancheck.c lapacke_d_nancheck.c lapacke_s_nancheck.c lapacke_zgb_trans.c lapacke_cgb_trans.c lapacke_dgb_nancheck.c lapacke_sgb_nancheck.c lapacke_zge_nancheck.c diff --git a/lapack-netlib/LAPACKE/utils/Makefile b/lapack-netlib/LAPACKE/utils/Makefile index 57b8f0dd1..1f639c6ea 100644 --- a/lapack-netlib/LAPACKE/utils/Makefile +++ b/lapack-netlib/LAPACKE/utils/Makefile @@ -186,11 +186,12 @@ OBJ = lapacke_cgb_nancheck.o \ all: lib lib: $(OBJ) - $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(OBJ) + $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $^ $(RANLIB) ../../$(LAPACKELIB) +clean: cleanobj +cleanobj: + rm -f *.o + .c.o: $(CC) $(CFLAGS) -I../include -c -o $@ $< - -clean: - rm -f *.o diff --git a/lapack-netlib/LAPACKE/utils/lapacke_ctp_trans.c b/lapack-netlib/LAPACKE/utils/lapacke_ctp_trans.c index 2c51ddd61..6678e1a5b 100644 --- a/lapack-netlib/LAPACKE/utils/lapacke_ctp_trans.c +++ b/lapack-netlib/LAPACKE/utils/lapacke_ctp_trans.c @@ -69,7 +69,7 @@ void LAPACKE_ctp_trans( int matrix_layout, char uplo, char diag, * and col_major lower and row_major upper are equals too - * using one code for equal cases. XOR( colmaj, upper ) */ - if( ( colmaj || upper ) && !( colmaj && upper ) ) { + if( !( colmaj || upper ) || ( colmaj && upper ) ) { for( j = st; j < n; j++ ) { for( i = 0; i < j+1-st; i++ ) { out[ j-i + (i*(2*n-i+1))/2 ] = in[ ((j+1)*j)/2 + i ]; diff --git a/lapack-netlib/LAPACKE/utils/lapacke_dtp_trans.c b/lapack-netlib/LAPACKE/utils/lapacke_dtp_trans.c index 483af39e6..7d7455de7 100644 --- a/lapack-netlib/LAPACKE/utils/lapacke_dtp_trans.c +++ b/lapack-netlib/LAPACKE/utils/lapacke_dtp_trans.c @@ -69,7 +69,7 @@ void LAPACKE_dtp_trans( int matrix_layout, char uplo, char diag, * and col_major lower and row_major upper are equals too - * using one code for equal cases. XOR( colmaj, upper ) */ - if( ( colmaj || upper ) && !( colmaj && upper ) ) { + if( !( colmaj || upper ) || ( colmaj && upper ) ) { for( j = st; j < n; j++ ) { for( i = 0; i < j+1-st; i++ ) { out[ j-i + (i*(2*n-i+1))/2 ] = in[ ((j+1)*j)/2 + i ]; diff --git a/lapack-netlib/LAPACKE/utils/lapacke_stp_trans.c b/lapack-netlib/LAPACKE/utils/lapacke_stp_trans.c index 237084220..7e676004f 100644 --- a/lapack-netlib/LAPACKE/utils/lapacke_stp_trans.c +++ b/lapack-netlib/LAPACKE/utils/lapacke_stp_trans.c @@ -69,7 +69,7 @@ void LAPACKE_stp_trans( int matrix_layout, char uplo, char diag, * and col_major lower and row_major upper are equals too - * using one code for equal cases. XOR( colmaj, upper ) */ - if( ( colmaj || upper ) && !( colmaj && upper ) ) { + if( !( colmaj || upper ) || ( colmaj && upper ) ) { for( j = st; j < n; j++ ) { for( i = 0; i < j+1-st; i++ ) { out[ j-i + (i*(2*n-i+1))/2 ] = in[ ((j+1)*j)/2 + i ]; diff --git a/lapack-netlib/LAPACKE/utils/lapacke_ztp_trans.c b/lapack-netlib/LAPACKE/utils/lapacke_ztp_trans.c index b3654e5f1..e7369baae 100644 --- a/lapack-netlib/LAPACKE/utils/lapacke_ztp_trans.c +++ b/lapack-netlib/LAPACKE/utils/lapacke_ztp_trans.c @@ -69,7 +69,7 @@ void LAPACKE_ztp_trans( int matrix_layout, char uplo, char diag, * and col_major lower and row_major upper are equals too - * using one code for equal cases. XOR( colmaj, upper ) */ - if( ( colmaj || upper ) && !( colmaj && upper ) ) { + if( !( colmaj || upper ) || ( colmaj && upper ) ) { for( j = st; j < n; j++ ) { for( i = 0; i < j+1-st; i++ ) { out[ j-i + (i*(2*n-i+1))/2 ] = in[ ((j+1)*j)/2 + i ]; diff --git a/lapack-netlib/LICENSE b/lapack-netlib/LICENSE index eefcbdaee..94cdb0f85 100644 --- a/lapack-netlib/LICENSE +++ b/lapack-netlib/LICENSE @@ -1,9 +1,9 @@ -Copyright (c) 1992-2016 The University of Tennessee and The University +Copyright (c) 1992-2017 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. -Copyright (c) 2000-2016 The University of California Berkeley. All +Copyright (c) 2000-2017 The University of California Berkeley. All rights reserved. -Copyright (c) 2006-2016 The University of Colorado Denver. All rights +Copyright (c) 2006-2017 The University of Colorado Denver. All rights reserved. $COPYRIGHT$ diff --git a/lapack-netlib/Makefile b/lapack-netlib/Makefile index edde5ff9c..1d7e82c34 100644 --- a/lapack-netlib/Makefile +++ b/lapack-netlib/Makefile @@ -6,125 +6,144 @@ include make.inc -all: lapack_install lib blas_testing lapack_testing +all: lapack_install lib blas_testing lapack_testing lib: lapacklib tmglib #lib: blaslib variants lapacklib tmglib -clean: cleanlib cleantesting cleanblas_testing cleancblas_testing - -lapack_install: - ( cd INSTALL; $(MAKE) ) -# ./testlsame; ./testslamch; ./testdlamch; \ -# ./testsecond; ./testdsecnd; ./testieee; ./testversion ) - blaslib: - ( cd BLAS/SRC; $(MAKE) ) + $(MAKE) -C BLAS cblaslib: - ( cd CBLAS; $(MAKE) ) + $(MAKE) -C CBLAS -lapacklib: lapack_install - ( cd SRC; $(MAKE) ) +lapacklib: + $(MAKE) -C SRC -lapackelib: lapacklib - ( cd LAPACKE; $(MAKE) ) - -cblas_example: cblaslib blaslib - ( cd CBLAS/examples; $(MAKE) ) +lapackelib: + $(MAKE) -C LAPACKE -lapacke_example: lapackelib - ( cd LAPACKE/example; $(MAKE) ) +tmglib: + $(MAKE) -C TESTING/MATGEN variants: - ( cd SRC/VARIANTS ; $(MAKE)) + $(MAKE) -C SRC/VARIANTS -tmglib: - ( cd TESTING/MATGEN; $(MAKE) ) +lapack_install: + $(MAKE) -C INSTALL run + +blas_testing: blaslib + $(MAKE) -C BLAS blas_testing -lapack_testing: lib - ( cd TESTING ; $(MAKE) ) +cblas_testing: cblaslib blaslib + $(MAKE) -C CBLAS cblas_testing + +lapack_testing: tmglib lapacklib blaslib + $(MAKE) -C TESTING/LIN cleanexe + $(MAKE) -C TESTING ./lapack_testing.py -variants_testing: lib variants - ( cd TESTING ; rm -f xlintst* ; $(MAKE) VARLIB='SRC/VARIANTS/LIB/cholrl.a' ; \ - mv stest.out stest_cholrl.out ; mv dtest.out dtest_cholrl.out ; mv ctest.out ctest_cholrl.out ; mv ztest.out ztest_cholrl.out ) - ( cd TESTING ; rm -f xlintst* ; $(MAKE) VARLIB='SRC/VARIANTS/LIB/choltop.a' ; \ - mv stest.out stest_choltop.out ; mv dtest.out dtest_choltop.out ; mv ctest.out ctest_choltop.out ; mv ztest.out ztest_choltop.out ) - ( cd TESTING ; rm -f xlintst* ; $(MAKE) VARLIB='SRC/VARIANTS/LIB/lucr.a' ; \ - mv stest.out stest_lucr.out ; mv dtest.out dtest_lucr.out ; mv ctest.out ctest_lucr.out ; mv ztest.out ztest_lucr.out ) - ( cd TESTING ; rm -f xlintst* ; $(MAKE) VARLIB='SRC/VARIANTS/LIB/lull.a' ; \ - mv stest.out stest_lull.out ; mv dtest.out dtest_lull.out ; mv ctest.out ctest_lull.out ; mv ztest.out ztest_lull.out ) - ( cd TESTING ; rm -f xlintst* ; $(MAKE) VARLIB='SRC/VARIANTS/LIB/lurec.a' ; \ - mv stest.out stest_lurec.out ; mv dtest.out dtest_lurec.out ; mv ctest.out ctest_lurec.out ; mv ztest.out ztest_lurec.out ) - ( cd TESTING ; rm -f xlintst* ; $(MAKE) VARLIB='SRC/VARIANTS/LIB/qrll.a' ; \ - mv stest.out stest_qrll.out ; mv dtest.out dtest_qrll.out ; mv ctest.out ctest_qrll.out ; mv ztest.out ztest_qrll.out ) - -blas_testing: - ( cd BLAS/TESTING; $(MAKE) -f Makeblat1 ) - ( cd BLAS; ./xblat1s > sblat1.out ; \ - ./xblat1d > dblat1.out ; \ - ./xblat1c > cblat1.out ; \ - ./xblat1z > zblat1.out ) - ( cd BLAS/TESTING; $(MAKE) -f Makeblat2 ) - ( cd BLAS; ./xblat2s < sblat2.in ; \ - ./xblat2d < dblat2.in ; \ - ./xblat2c < cblat2.in ; \ - ./xblat2z < zblat2.in ) - ( cd BLAS/TESTING; $(MAKE) -f Makeblat3 ) - ( cd BLAS; ./xblat3s < sblat3.in ; \ - ./xblat3d < dblat3.in ; \ - ./xblat3c < cblat3.in ; \ - ./xblat3z < zblat3.in ) - -cblas_testing: blaslib - ( cd CBLAS ; $(MAKE) cblas_testing) - ( cd CBLAS ; $(MAKE) runtst) +variants_testing: tmglib variants lapacklib blaslib + $(MAKE) -C TESTING/LIN cleanexe + $(MAKE) -C TESTING/LIN VARLIB='SRC/VARIANTS/cholrl.a' + $(MAKE) -C TESTING stest.out && mv TESTING/stest.out TESTING/stest_cholrl.out + $(MAKE) -C TESTING dtest.out && mv TESTING/dtest.out TESTING/dtest_cholrl.out + $(MAKE) -C TESTING ctest.out && mv TESTING/ctest.out TESTING/ctest_cholrl.out + $(MAKE) -C TESTING ztest.out && mv TESTING/ztest.out TESTING/ztest_cholrl.out + $(MAKE) -C TESTING/LIN cleanexe + $(MAKE) -C TESTING/LIN VARLIB='SRC/VARIANTS/choltop.a' + $(MAKE) -C TESTING stest.out && mv TESTING/stest.out TESTING/stest_choltop.out + $(MAKE) -C TESTING dtest.out && mv TESTING/dtest.out TESTING/dtest_choltop.out + $(MAKE) -C TESTING ctest.out && mv TESTING/ctest.out TESTING/ctest_choltop.out + $(MAKE) -C TESTING ztest.out && mv TESTING/ztest.out TESTING/ztest_choltop.out + $(MAKE) -C TESTING/LIN cleanexe + $(MAKE) -C TESTING/LIN VARLIB='SRC/VARIANTS/lucr.a' + $(MAKE) -C TESTING stest.out && mv TESTING/stest.out TESTING/stest_lucr.out + $(MAKE) -C TESTING dtest.out && mv TESTING/dtest.out TESTING/dtest_lucr.out + $(MAKE) -C TESTING ctest.out && mv TESTING/ctest.out TESTING/ctest_lucr.out + $(MAKE) -C TESTING ztest.out && mv TESTING/ztest.out TESTING/ztest_lucr.out + $(MAKE) -C TESTING/LIN cleanexe + $(MAKE) -C TESTING/LIN VARLIB='SRC/VARIANTS/lull.a' + $(MAKE) -C TESTING stest.out && mv TESTING/stest.out TESTING/stest_lull.out + $(MAKE) -C TESTING dtest.out && mv TESTING/dtest.out TESTING/dtest_lull.out + $(MAKE) -C TESTING ctest.out && mv TESTING/ctest.out TESTING/ctest_lull.out + $(MAKE) -C TESTING ztest.out && mv TESTING/ztest.out TESTING/ztest_lull.out + $(MAKE) -C TESTING/LIN cleanexe + $(MAKE) -C TESTING/LIN VARLIB='SRC/VARIANTS/lurec.a' + $(MAKE) -C TESTING stest.out && mv TESTING/stest.out TESTING/stest_lurec.out + $(MAKE) -C TESTING dtest.out && mv TESTING/dtest.out TESTING/dtest_lurec.out + $(MAKE) -C TESTING ctest.out && mv TESTING/ctest.out TESTING/ctest_lurec.out + $(MAKE) -C TESTING ztest.out && mv TESTING/ztest.out TESTING/ztest_lurec.out + $(MAKE) -C TESTING/LIN cleanexe + $(MAKE) -C TESTING/LIN VARLIB='SRC/VARIANTS/qrll.a' + $(MAKE) -C TESTING stest.out && mv TESTING/stest.out TESTING/stest_qrll.out + $(MAKE) -C TESTING dtest.out && mv TESTING/dtest.out TESTING/dtest_qrll.out + $(MAKE) -C TESTING ctest.out && mv TESTING/ctest.out TESTING/ctest_qrll.out + $(MAKE) -C TESTING ztest.out && mv TESTING/ztest.out TESTING/ztest_qrll.out +cblas_example: cblaslib blaslib + $(MAKE) -C CBLAS cblas_example +lapacke_example: lapackelib lapacklib blaslib + $(MAKE) -C LAPACKE lapacke_example html: - @echo "LAPACK HTML PAGES GENRATION with Doxygen" + @echo "LAPACK HTML PAGES GENERATION with Doxygen" doxygen DOCS/Doxyfile - @echo "==================" - @echo "LAPACK HTML PAGES GENRATED in DOCS/explore-html" + @echo "==================" + @echo "LAPACK HTML PAGES GENERATED in DOCS/explore-html" @echo "Usage: open DOCS/explore-html/index.html" @echo "Online version available at http://www.netlib.org/lapack/explore-html/" @echo "==================" man: - @echo "LAPACK MAN PAGES GENRATION with Doxygen" + @echo "LAPACK MAN PAGES GENERATION with Doxygen" doxygen DOCS/Doxyfile_man @echo "==================" - @echo "LAPACK MAN PAGES GENRATED in DOCS/MAN" + @echo "LAPACK MAN PAGES GENERATED in DOCS/MAN" @echo "Set your MANPATH env variable accordingly" @echo "Usage: man dgetrf.f" @echo "==================" +clean: + $(MAKE) -C INSTALL clean + $(MAKE) -C BLAS clean + $(MAKE) -C CBLAS clean + $(MAKE) -C SRC clean + $(MAKE) -C SRC/VARIANTS clean + $(MAKE) -C TESTING clean + $(MAKE) -C TESTING/MATGEN clean + $(MAKE) -C TESTING/LIN clean + $(MAKE) -C TESTING/EIG clean + $(MAKE) -C LAPACKE clean + rm -f *.a +cleanobj: + $(MAKE) -C INSTALL cleanobj + $(MAKE) -C BLAS cleanobj + $(MAKE) -C CBLAS cleanobj + $(MAKE) -C SRC cleanobj + $(MAKE) -C SRC/VARIANTS cleanobj + $(MAKE) -C TESTING/MATGEN cleanobj + $(MAKE) -C TESTING/LIN cleanobj + $(MAKE) -C TESTING/EIG cleanobj + $(MAKE) -C LAPACKE cleanobj cleanlib: - ( cd INSTALL; $(MAKE) clean ) - ( cd BLAS/SRC; $(MAKE) clean ) - ( cd CBLAS; $(MAKE) clean ) - ( cd SRC; $(MAKE) clean ) - ( cd SRC/VARIANTS; $(MAKE) clean ) - ( cd TESTING/MATGEN; $(MAKE) clean ) - ( cd LAPACKE; $(MAKE) clean ) - - -cleanblas_testing: - ( cd BLAS/TESTING; $(MAKE) -f Makeblat1 clean ) - ( cd BLAS/TESTING; $(MAKE) -f Makeblat2 clean ) - ( cd BLAS/TESTING; $(MAKE) -f Makeblat3 clean ) - ( cd BLAS; rm -f xblat* ) - -cleancblas_testing: - ( cd CBLAS/testing; $(MAKE) clean ) - -cleantesting: - ( cd TESTING/LIN; $(MAKE) clean ) - ( cd TESTING/EIG; $(MAKE) clean ) - ( cd TESTING; rm -f xlin* xeig* ) - -cleanall: cleanlib cleanblas_testing cleancblas_testing cleantesting - rm -f *.a TESTING/*.out INSTALL/test* BLAS/*.out - + $(MAKE) -C BLAS cleanlib + $(MAKE) -C CBLAS cleanlib + $(MAKE) -C SRC cleanlib + $(MAKE) -C SRC/VARIANTS cleanlib + $(MAKE) -C TESTING/MATGEN cleanlib + $(MAKE) -C LAPACKE cleanlib + rm -f *.a +cleanexe: + $(MAKE) -C INSTALL cleanexe + $(MAKE) -C BLAS cleanexe + $(MAKE) -C CBLAS cleanexe + $(MAKE) -C TESTING/LIN cleanexe + $(MAKE) -C TESTING/EIG cleanexe + $(MAKE) -C LAPACKE cleanexe +cleantest: + $(MAKE) -C INSTALL cleantest + $(MAKE) -C BLAS cleantest + $(MAKE) -C CBLAS cleantest + $(MAKE) -C TESTING cleantest diff --git a/lapack-netlib/README.md b/lapack-netlib/README.md index 086492942..e5ac2d9c8 100644 --- a/lapack-netlib/README.md +++ b/lapack-netlib/README.md @@ -1,6 +1,9 @@ # LAPACK [![Build Status](https://travis-ci.org/Reference-LAPACK/lapack.svg?branch=master)](https://travis-ci.org/Reference-LAPACK/lapack) +[![Appveyor](https://ci.appveyor.com/api/projects/status/bh38iin398msrbtr?svg=true)](https://ci.appveyor.com/project/langou/lapack/) +[![codecov](https://codecov.io/gh/Reference-LAPACK/lapack/branch/master/graph/badge.svg)](https://codecov.io/gh/Reference-LAPACK/lapack) + * VERSION 1.0 : February 29, 1992 * VERSION 1.0a : June 30, 1992 @@ -24,133 +27,92 @@ * VERSION 3.6.0 : November 2015 * VERSION 3.6.1 : June 2016 * VERSION 3.7.0 : December 2016 +* VERSION 3.7.1 : June 2017 +* VERSION 3.8.0 : November 2017 + +LAPACK is a library of Fortran subroutines for solving the most commonly +occurring problems in numerical linear algebra. + +LAPACK is a freely-available software package. It can be included in commercial +software packages (and has been). We only ask that that proper credit be given +to the authors, for example by citing the LAPACK Users' Guide. The license used +for the software is the modified BSD license, see: +https://github.com/Reference-LAPACK/lapack/blob/master/LICENSE -LAPACK is a library of Fortran 90 with subroutines for solving -the most commonly occurring problems in numerical linear algebra. -It is freely-available software, and is copyrighted. +Like all software, it is copyrighted. It is not trademarked, but we do ask the +following: if you modify the source for these routines we ask that you change +the name of the routine and comment the changes made to the original. -LAPACK is available on netlib and can be obtained via the World Wide -Web and anonymous ftp. +We will gladly answer any questions regarding the software. If a modification +is done, however, it is the responsibility of the person who modified the +routine to provide support. - http://www.netlib.org/lapack/ +LAPACK is available from github at: +https://github.com/reference-lapack/lapack -The distribution tar file contains the Fortran source for LAPACK and the -testing programs. It also contains the Fortran77 -reference implementation of the Basic Linear Algebra Subprograms -(the Level 1, 2, and 3 BLAS) needed by LAPACK. However this code is -intended for use only if there is no other implementation of the BLAS -already available on your machine; the efficiency of LAPACK depends -very much on the efficiency of the BLAS. +LAPACK releases are also available on netlib at: +http://www.netlib.org/lapack/ + +The distribution contains (1) the Fortran source for LAPACK, and (2) its +testing programs. It also contains (3) the Fortran reference implementation of +the Basic Linear Algebra Subprograms (the Level 1, 2, and 3 BLAS) needed by +LAPACK. However this code is intended for use only if there is no other +implementation of the BLAS already available on your machine; the efficiency of +LAPACK depends very much on the efficiency of the BLAS. It also contains (4) +CBLAS, a C interface to the BLAS, and (5) LAPACKE, a C interface to LAPACK. ## Installation - - LAPACK can be installed with `make`. Configuration have to be set in the + - LAPACK can be installed with `make`. The configuration have to be set in the `make.inc` file. A `make.inc.example` for a Linux machine running GNU compilers is given in the main directory. Some specific `make.inc` are also available in the `INSTALL` directory. - LAPACK includes also the CMake build. You will need to have CMake installed on your machine (CMake is available at http://www.cmake.org/). CMake will allow an easy installation on a Windows Machine. - - Specific information to run LAPACK under Windows are available at + - Specific information to run LAPACK under Windows is available at http://icl.cs.utk.edu/lapack-for-windows/lapack/. - For further information on LAPACK please read our FAQ at - http://www.netlib.org/lapack/#_faq - A User forum is also available to help you with the LAPACK library at - http://icl.cs.utk.edu/lapack-forum/ - ## User Support -LAPACK has been thoroughly tested, on many different -types of computers. The LAPACK project supports the package in the -sense that reports of errors or poor performance will gain immediate -attention from the developers. Such reports, descriptions -of interesting applications, and other comments should be sent by -electronic mail to lapack@cs.utk.edu. +LAPACK has been thoroughly tested, on many different types of computers. The +LAPACK project supports the package in the sense that reports of errors or poor +performance will gain immediate attention from the developers. Such reports, +descriptions of interesting applications, and other comments should be sent by +electronic mail to lapack@icl.utk.edu. + +For further information on LAPACK please read our FAQ at +http://www.netlib.org/lapack/#_faq. A list of known problems, bugs, and compiler errors for LAPACK is -maintained on netlib. - * http://www.netlib.org/lapack/release_notes.html +maintained on netlib +http://www.netlib.org/lapack/release_notes.html. +Please see as well +https://github.com/Reference-LAPACK/lapack/issues. A User forum is also available to help you with the LAPACK library at - http://icl.cs.utk.edu/lapack-forum/. -You can also contact directly the LAPACK team at lapack@cs.utk.edu. +http://icl.cs.utk.edu/lapack-forum/. +You can also contact directly the LAPACK team at lapack@icl.utk.edu. ## Testing -The complete package, including test code in four -different Fortran data types (real, complex, double precision, double -complex), contains some 805,000 lines of Fortran source and comments. -You will need approximately 33 Mbytes to read the complete tape. -We recommend that you run the testing. The total -space requirements for the testing for all four data -types, including the object files, is approximately 80 Mbytes. - -A README file containing the information in this letter is located -in the LAPACK directory. Postscript and LaTeX versions of the Quick -Installation Guide are in the `LAPACK/INSTALL` directory, in the files -`lawn81.tex`, `psfig.tex`, `lawn81.ps`, and `org2.ps`. Consult the Installation -Guide for further details on installing the package and on what is contained -in each subdirectory. For complete information on the LAPACK Testing -please consult LAPACK Working Note 41 "Installation -Guide for LAPACK". +LAPACK includes a thorough test suite. We recommend that, after compilation, +you run the test suite. +For complete information on the LAPACK Testing please consult LAPACK Working +Note 41 "Installation Guide for LAPACK". ## User Guide -It is highly recommended that you obtain a copy of the Third Edition of -the LAPACK Users' Guide published by SIAM in Winter, 1999. This Users' -Guide gives a detailed description of the philosophy behind LAPACK as well -as an explanation of its usage. The LAPACK Users' Guide can be purchased from: -SIAM; Customer Service; P. O. Box 7260; Philadelphia, PA 19104; -215-382-9800, FAX 215-386-7999. It will also be available from booksellers. - -To order by email, send email to service@siam.org. The book is also -available via SIAM's World Wide Web URL at http://www.siam.org. The -ISBN number is 0-89871-447-8, and SIAM order code is SE09. The list -price for SIAM members is $31.20; the cost for nonmembers is $39.00. - To view an HTML version of the Users' Guide please refer to the URL - http://www.netlib.org/lapack/lug/lapack_lug.html. ## LAPACKE -LAPACK now includes the LAPACKE package -LAPACKE is a Standard C language APIs for LAPACK -http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack -collaboration LAPACK and INTEL Math Kernel Library - -Documentation available in the DOCS folder - -## Related Projects - -The Fortran95 interface to LAPACK is available, as well as an f2c'ed -version of LAPACK, and a C++ version of a subset of LAPACK routines. -Refer to the following URLs on netlib for further information: - - * http://www.netlib.org/lapack95/ - * http://www.netlib.org/clapack/ - * http://www.netlib.org/lapack++/ - * http://www.cs.utk.edu/java/f2j/ - -Or, for more information on the distributed-memory version of LAPACK, -consult the ScaLAPACK index on netlib: - - http://www.netlib.org/scalapack/ - - -## Working Notes -A number of technical reports were written during the development of -LAPACK and published as LAPACK Working Notes, initially by Argonne -National Laboratory and later by the University of Tennessee. Many of -these reports later appeared as journal articles. Most of these working -notes are available in pdf and postscript form from netlib. - * http://www.netlib.org/lapack/lawns/ - * http://www.netlib.org/lapack/lawnspdf/ -Otherwise, requests for copies of these working notes can be sent to -the following address. +LAPACK now includes the LAPACKE package. LAPACKE is a Standard C language API +for LAPACK This was born from a collaboration of the LAPACK and INTEL Math +Kernel Library teams. See: +http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack. -LAPACK Project, c/o J.J. Dongarra, Computer Science Department, University of Tennessee, Knoxville, Tennessee 37996-1301, USA, Email: lapack@cs.utk.edu. diff --git a/lapack-netlib/SRC/CMakeLists.txt b/lapack-netlib/SRC/CMakeLists.txt index 4d7081cf2..944401beb 100644 --- a/lapack-netlib/SRC/CMakeLists.txt +++ b/lapack-netlib/SRC/CMakeLists.txt @@ -1,10 +1,10 @@ ####################################################################### # This is the makefile to create a library for LAPACK. # The files are organized as follows: + # ALLAUX -- Auxiliary routines called from all precisions -# -# SCLAUX -- Auxiliary routines called from both REAL and COMPLEX. -# DZLAUX -- Auxiliary routines called from both DOUBLE and COMPLEX*16. +# SCLAUX -- Auxiliary routines called from single precision +# DZLAUX -- Auxiliary routines called from double precision # # DSLASRC -- Double-single mixed precision real routines called from # single, single-extra and double precision real LAPACK @@ -28,25 +28,6 @@ # # DEPRECATED -- Deprecated routines in all precisions # -# The library can be set up to include routines for any combination -# of the four precisions. To create or add to the library, enter make -# followed by one or more of the precisions desired. Some examples: -# make single -# make single complex -# make single double complex complex16 -# Alternatively, the command -# make -# without any arguments creates a library of all four precisions. -# The library is called -# lapack.a -# and is created at the next higher directory level. -# -# To remove the object files after the library is created, enter -# make clean -# On some systems, you can force the source files to be recompiled by -# entering (for example) -# make single FRC=FRC -# # ***Note*** # The functions lsame, second, dsecnd, slamch, and dlamch may have # to be installed before compiling the library. Refer to the @@ -54,7 +35,7 @@ # ####################################################################### -set(ALLAUX ilaenv.f ieeeck.f lsamen.f iparmq.f iparam2stage.F +set(ALLAUX ilaenv.f ilaenv2stage.f ieeeck.f lsamen.f iparmq.f iparam2stage.F ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f ../INSTALL/ilaver.f ../INSTALL/lsame.f xerbla.f xerbla_array.f ../INSTALL/slamch.f) @@ -103,8 +84,8 @@ set(SLASRC sgels.f sgelsd.f sgelss.f sgelsy.f sgeql2.f sgeqlf.f sgeqp3.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesv.f sgesvd.f sgesvdx.f sgesvx.f sgetc2.f sgetf2.f - sgetrf.f sgetrf2.f sgetri.f - sgetrs.f sggbak.f sggbal.f + sgetrf2.f sgetri.f + sggbak.f sggbal.f sgges.f sgges3.f sggesx.f sggev.f sggev3.f sggevx.f sggglm.f sgghrd.f sgghd3.f sgglse.f sggqrf.f sggrqf.f sggsvd3.f sggsvp3.f sgtcon.f sgtrfs.f sgtsv.f @@ -130,7 +111,7 @@ set(SLASRC sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f spbstf.f spbsv.f spbsvx.f spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f - sposvx.f spotf2.f spotrf.f spotrf2.f spotri.f spotrs.f spstrf.f spstf2.f + sposvx.f spotf2.f spotrf2.f spotri.f spstrf.f spstf2.f sppcon.f sppequ.f spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f @@ -143,12 +124,12 @@ set(SLASRC ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f ssyconvf.f ssyconvf_rook.f - ssysv_aa.f ssytrf_aa.f ssytrs_aa.f ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f ssytri_rook.f ssycon_rook.f ssysv_rook.f ssytf2_rk.f ssytrf_rk.f ssytrs_3.f ssytri_3.f ssytri_3x.f ssycon_3.f ssysv_rk.f ssysv_aa.f ssytrf_aa.f ssytrs_aa.f + ssysv_aa_2stage.f ssytrf_aa_2stage.f ssytrs_aa_2stage.f stbcon.f stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f @@ -190,8 +171,8 @@ set(CLASRC cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesv.f cgesvd.f cgesvdx.f cgesvj.f cgejsv.f cgsvj0.f cgsvj1.f - cgesvx.f cgetc2.f cgetf2.f cgetrf.f cgetrf2.f - cgetri.f cgetrs.f + cgesvx.f cgetc2.f cgetf2.f cgetrf2.f + cgetri.f cggbak.f cggbal.f cgges.f cgges3.f cggesx.f cggev.f cggev3.f cggevx.f cggglm.f cgghrd.f cgghd3.f cgglse.f cggqrf.f cggrqf.f @@ -208,6 +189,7 @@ set(CLASRC chetf2_rk.f chetrf_rk.f chetri_3.f chetri_3x.f chetrs_3.f checon_3.f chesv_rk.f chesv_aa.f chetrf_aa.f chetrs_aa.f + chesv_aa_2stage.f chetrf_aa_2stage.f chetrs_aa_2stage.f chgeqz.f chpcon.f chpev.f chpevd.f chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f chpsvx.f @@ -230,7 +212,7 @@ set(CLASRC clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f clauu2.f clauum.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f - cposv.f cposvx.f cpotf2.f cpotrf.f cpotrf2.f cpotri.f cpotrs.f cpstrf.f cpstf2.f + cposv.f cposvx.f cpotf2.f cpotrf2.f cpotri.f cpstrf.f cpstf2.f cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f crot.f cspcon.f cspmv.f cspr.f csprfs.f cspsv.f @@ -244,6 +226,7 @@ set(CLASRC csytri_rook.f csycon_rook.f csysv_rook.f csytf2_rk.f csytrf_rk.f csytrf_aa.f csytrs_3.f csytrs_aa.f csytri_3.f csytri_3x.f csycon_3.f csysv_rk.f csysv_aa.f + csysv_aa_2stage.f csytrf_aa_2stage.f csytrs_aa_2stage.f ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f ctprfs.f ctptri.f @@ -336,6 +319,7 @@ set(DLASRC dsytf2_rk.f dsytrf_rk.f dsytrs_3.f dsytri_3.f dsytri_3x.f dsycon_3.f dsysv_rk.f dsysv_aa.f dsytrf_aa.f dsytrs_aa.f + dsysv_aa_2stage.f dsytrf_aa_2stage.f dsytrs_aa_2stage.f dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f @@ -394,6 +378,7 @@ set(ZLASRC zhetf2_rk.f zhetrf_rk.f zhetri_3.f zhetri_3x.f zhetrs_3.f zhecon_3.f zhesv_rk.f zhesv_aa.f zhetrf_aa.f zhetrs_aa.f + zhesv_aa_2stage.f zhetrf_aa_2stage.f zhetrs_aa_2stage.f zhgeqz.f zhpcon.f zhpev.f zhpevd.f zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f zhpsvx.f @@ -431,6 +416,7 @@ set(ZLASRC zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f zsytrs_aa.f zsytri_rook.f zsycon_rook.f zsysv_rook.f zsytf2_rk.f zsytrf_rk.f zsytrf_aa.f zsytrs_3.f + zsysv_aa_2stage.f zsytrf_aa_2stage.f zsytrs_aa_2stage.f zsytri_3.f zsytri_3x.f zsycon_3.f zsysv_rk.f zsysv_aa.f ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f @@ -468,11 +454,6 @@ set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f zla_heamv.f zla_hercond_c.f zla_hercond_x.f zla_herpvgrw.f zla_lin_berr.f zlarscl2.f zlascl2.f zla_wwaddw.f) - -if(USE_XBLAS) - set(ALLXOBJ ${SXLASRC} ${DXLASRC} ${CXLASRC} ${ZXLASRC}) -endif() - if(BUILD_DEPRECATED) list(APPEND SLASRC DEPRECATED/sgegs.f DEPRECATED/sgegv.f DEPRECATED/sgeqpf.f DEPRECATED/sgelsx.f DEPRECATED/sggsvd.f @@ -486,40 +467,46 @@ if(BUILD_DEPRECATED) list(APPEND ZLASRC DEPRECATED/zgegs.f DEPRECATED/zgegv.f DEPRECATED/zgeqpf.f DEPRECATED/zgelsx.f DEPRECATED/zggsvd.f DEPRECATED/zggsvp.f DEPRECATED/zlahrd.f DEPRECATED/zlatzm.f DEPRECATED/ztzrqf.f) - message(STATUS "Building deprecated routines") endif() +if(USE_XBLAS) + list(APPEND SLASRC ${SXLASRC}) + list(APPEND DLASRC ${DXLASRC}) + list(APPEND CLASRC ${CXLASRC}) + list(APPEND ZLASRC ${ZXLASRC}) +endif() + + +set(SOURCES) if(BUILD_SINGLE) - set(ALLOBJ ${SLASRC} ${ALLAUX} ${SCLAUX}) - message(STATUS "Building Single Precision") + list(APPEND SOURCES ${SLASRC} ${DSLASRC} ${SCLAUX} ${ALLAUX}) endif() if(BUILD_DOUBLE) - set(ALLOBJ ${ALLOBJ} ${DLASRC} ${ALLAUX} ${DZLAUX} ${DSLASRC}) - message(STATUS "Building Double Precision") + list(APPEND SOURCES ${DLASRC} ${DSLASRC} ${DZLAUX} ${ALLAUX}) endif() if(BUILD_COMPLEX) - set(ALLOBJ ${ALLOBJ} ${CLASRC} ${ALLAUX} ${SCLAUX}) - message(STATUS "Building Complex Precision") + list(APPEND SOURCES ${CLASRC} ${ZCLASRC} ${SCLAUX} ${ALLAUX}) endif() if(BUILD_COMPLEX16) - set(ALLOBJ ${ALLOBJ} ${ZLASRC} ${ALLAUX} ${DZLAUX} ${ZCLASRC}) - message(STATUS "Building Double Complex Precision") -endif() - -if(NOT ALLOBJ) - message(FATAL_ERROR "-->LAPACK SRC BUILD: NOTHING TO BUILD, NO PRECISION SELECTED: - PLEASE ENABLE AT LEAST ONE OF THOSE: BUILD_SINGLE, BUILD_COMPLEX, BUILD_DOUBLE, BUILD_COMPLEX16.") + list(APPEND SOURCES ${ZLASRC} ${ZCLASRC} ${DZLAUX} ${ALLAUX}) endif() +list(REMOVE_DUPLICATES SOURCES) -list(REMOVE_DUPLICATES ALLOBJ) - -add_library(lapack ${ALLOBJ} ${ALLXOBJ}) -target_link_libraries(lapack ${BLAS_LIBRARIES} ${XBLAS_LIBRARY}) - +add_library(lapack ${SOURCES}) set_target_properties( lapack PROPERTIES VERSION ${LAPACK_VERSION} SOVERSION ${LAPACK_MAJOR_VERSION} ) +if(USE_XBLAS) + target_link_libraries(lapack PRIVATE ${XBLAS_LIBRARY}) +endif() +target_link_libraries(lapack PRIVATE ${BLAS_LIBRARIES}) + +if (${CMAKE_BUILD_TYPE_UPPER} STREQUAL "COVERAGE") + target_link_libraries(lapack PRIVATE gcov) + add_coverage(lapack) +endif() + lapack_install_library(lapack) diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index d3273595d..9cc2ea51b 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -3,10 +3,10 @@ include ../make.inc ####################################################################### # This is the makefile to create a library for LAPACK. # The files are organized as follows: -# ALLAUX -- Auxiliary routines called from all precisions # -# SCLAUX -- Auxiliary routines called from both REAL and COMPLEX. -# DZLAUX -- Auxiliary routines called from both DOUBLE and COMPLEX*16. +# ALLAUX -- Auxiliary routines called from all precisions +# SCLAUX -- Auxiliary routines called from single precision +# DZLAUX -- Auxiliary routines called from double precision # # DSLASRC -- Double-single mixed precision real routines called from # single, single-extra and double precision real LAPACK @@ -56,7 +56,8 @@ include ../make.inc # ####################################################################### -ALLAUX_O = ilaenv.o ieeeck.o lsamen.o xerbla.o xerbla_array.o iparmq.o iparam2stage.o\ +ALLAUX_O = ilaenv.o ilaenv2stage.o ieeeck.o lsamen.o xerbla.o xerbla_array.o \ + iparmq.o iparam2stage.o \ ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \ ../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o @@ -144,12 +145,14 @@ SLASRC_O = \ ssycon.o ssyev.o ssyevd.o ssyevr.o ssyevx.o ssygs2.o \ ssygst.o ssygv.o ssygvd.o ssygvx.o ssyrfs.o ssysv.o ssysvx.o \ ssytd2.o ssytf2.o ssytrd.o ssytrf.o ssytri.o ssytri2.o ssytri2x.o \ - ssyswapr.o ssytrs.o ssytrs2.o ssyconv.o ssyconvf_rook.o \ + ssyswapr.o ssytrs.o ssytrs2.o \ + ssyconv.o ssyconvf.o ssyconvf_rook.o \ ssytf2_rook.o ssytrf_rook.o ssytrs_rook.o \ ssytri_rook.o ssycon_rook.o ssysv_rook.o \ ssytf2_rk.o ssytrf_rk.o ssytrs_3.o \ ssytri_3.o ssytri_3x.o ssycon_3.o ssysv_rk.o \ slasyf_aa.o ssysv_aa.o ssytrf_aa.o ssytrs_aa.o \ + ssysv_aa_2stage.o ssytrf_aa_2stage.o ssytrs_aa_2stage.o \ stbcon.o \ stbrfs.o stbtrs.o stgevc.o stgex2.o stgexc.o stgsen.o \ stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \ @@ -210,7 +213,8 @@ CLASRC_O = \ chetrs_rook.o checon_rook.o chesv_rook.o \ chetf2_rk.o chetrf_rk.o chetri_3.o chetri_3x.o \ chetrs_3.o checon_3.o chesv_rk.o \ - chesv_aa.o chetrf_aa.o chetrs_aa.o clahef_aa.o\ + chesv_aa.o chetrf_aa.o chetrs_aa.o clahef_aa.o \ + chesv_aa_2stage.o chetrf_aa_2stage.o chetrs_aa_2stage.o \ chgeqz.o chpcon.o chpev.o chpevd.o \ chpevx.o chpgst.o chpgv.o chpgvd.o chpgvx.o chprfs.o chpsv.o \ chpsvx.o \ @@ -247,6 +251,7 @@ CLASRC_O = \ csytri_rook.o csycon_rook.o csysv_rook.o \ csytf2_rk.o csytrf_rk.o csytrf_aa.o csytrs_3.o csytrs_aa.o \ csytri_3.o csytri_3x.o csycon_3.o csysv_rk.o csysv_aa.o \ + csysv_aa_2stage.o csytrf_aa_2stage.o csytrs_aa_2stage.o \ ctbcon.o ctbrfs.o ctbtrs.o ctgevc.o ctgex2.o \ ctgexc.o ctgsen.o ctgsja.o ctgsna.o ctgsy2.o ctgsyl.o ctpcon.o \ ctprfs.o ctptri.o \ @@ -344,6 +349,7 @@ DLASRC_O = \ dsytf2_rk.o dsytrf_rk.o dsytrs_3.o \ dsytri_3.o dsytri_3x.o dsycon_3.o dsysv_rk.o \ dlasyf_aa.o dsysv_aa.o dsytrf_aa.o dsytrs_aa.o \ + dsysv_aa_2stage.o dsytrf_aa_2stage.o dsytrs_aa_2stage.o \ dtbcon.o dtbrfs.o dtbtrs.o dtgevc.o dtgex2.o dtgexc.o dtgsen.o \ dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \ dtptrs.o \ @@ -404,6 +410,7 @@ ZLASRC_O = \ zhetf2_rk.o zhetrf_rk.o zhetri_3.o zhetri_3x.o \ zhetrs_3.o zhecon_3.o zhesv_rk.o \ zhesv_aa.o zhetrf_aa.o zhetrs_aa.o zlahef_aa.o \ + zhesv_aa_2stage.o zhetrf_aa_2stage.o zhetrs_aa_2stage.o \ zhgeqz.o zhpcon.o zhpev.o zhpevd.o \ zhpevx.o zhpgst.o zhpgv.o zhpgvd.o zhpgvx.o zhprfs.o zhpsv.o \ zhpsvx.o \ @@ -440,6 +447,7 @@ ZLASRC_O = \ zsyconv.o zsyconvf.o zsyconvf_rook.o \ zsytf2_rook.o zsytrf_rook.o zsytrs_rook.o zsytrs_aa.o \ zsytri_rook.o zsycon_rook.o zsysv_rook.o \ + zsysv_aa_2stage.o zsytrf_aa_2stage.o zsytrs_aa_2stage.o \ zsytf2_rk.o zsytrf_rk.o zsytrf_aa.o zsytrs_3.o \ zsytri_3.o zsytri_3x.o zsycon_3.o zsysv_rk.o zsysv_aa.o \ ztbcon.o ztbrfs.o ztbtrs.o ztgevc.o ztgex2.o \ diff --git a/lapack-netlib/SRC/VARIANTS/Makefile b/lapack-netlib/SRC/VARIANTS/Makefile index 6db97e951..9f1410755 100644 --- a/lapack-netlib/SRC/VARIANTS/Makefile +++ b/lapack-netlib/SRC/VARIANTS/Makefile @@ -17,8 +17,6 @@ include ../../make.inc # 1065-1081. http://dx.doi.org/10.1137/S0895479896297744 ####################################################################### -VARIANTSDIR = LIB - CHOLRL = cholesky/RL/cpotrf.o cholesky/RL/dpotrf.o cholesky/RL/spotrf.o cholesky/RL/zpotrf.o CHOLTOP = cholesky/TOP/cpotrf.o cholesky/TOP/dpotrf.o cholesky/TOP/spotrf.o cholesky/TOP/zpotrf.o @@ -32,36 +30,37 @@ LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil.o -all: cholrl choltop lucr lull lurec qrll +all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a -cholrl: $(CHOLRL) - $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/cholrl.a $(CHOLRL) - $(RANLIB) $(VARIANTSDIR)/cholrl.a +cholrl.a: $(CHOLRL) + $(ARCH) $(ARCHFLAGS) $@ $^ + $(RANLIB) $@ -choltop: $(CHOLTOP) - $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/choltop.a $(CHOLTOP) - $(RANLIB) $(VARIANTSDIR)/choltop.a +choltop.a: $(CHOLTOP) + $(ARCH) $(ARCHFLAGS) $@ $^ + $(RANLIB) $@ -lucr: $(LUCR) - $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lucr.a $(LUCR) - $(RANLIB) $(VARIANTSDIR)/lucr.a +lucr.a: $(LUCR) + $(ARCH) $(ARCHFLAGS) $@ $^ + $(RANLIB) $@ -lull: $(LULL) - $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lull.a $(LULL) - $(RANLIB) $(VARIANTSDIR)/lull.a +lull.a: $(LULL) + $(ARCH) $(ARCHFLAGS) $@ $^ + $(RANLIB) $@ -lurec: $(LUREC) - $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lurec.a $(LUREC) - $(RANLIB) $(VARIANTSDIR)/lurec.a +lurec.a: $(LUREC) + $(ARCH) $(ARCHFLAGS) $@ $^ + $(RANLIB) $@ -qrll: $(QRLL) - $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/qrll.a $(QRLL) - $(RANLIB) $(VARIANTSDIR)/qrll.a +qrll.a: $(QRLL) + $(ARCH) $(ARCHFLAGS) $@ $^ + $(RANLIB) $@ +clean: cleanobj cleanlib +cleanobj: + rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL) +cleanlib: + rm -f *.a .f.o: $(FORTRAN) $(OPTS) -c -o $@ $< - -clean: - rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL) \ - $(VARIANTSDIR)/*.a diff --git a/lapack-netlib/SRC/VARIANTS/README b/lapack-netlib/SRC/VARIANTS/README index c20621515..4d301cc6e 100644 --- a/lapack-netlib/SRC/VARIANTS/README +++ b/lapack-netlib/SRC/VARIANTS/README @@ -1,5 +1,5 @@ - =============== - = README File = + =============== + = README File = =============== This README File is for the LAPACK driver variants. @@ -34,10 +34,10 @@ References:For a more detailed description please refer to ========= These variants are compiled by default in the build process but they are not tested by default. -The build process creates one new library per variants in the four arithmetics (singel/double/comple/double complex). -The libraries are in the SRC/VARIANTS/LIB directory. +The build process creates one new library per variants in the four arithmetics (single real/double real/single complex/double complex). +The libraries are in the SRC/VARIANTS directory. -Corresponding libraries created in SRC/VARIANTS/LIB: +Corresponding libraries created in SRC/VARIANTS: - LU Crout : lucr.a - LU Left Looking : lull.a - LU Sivan Toledo's recursive : lurec.a @@ -73,7 +73,7 @@ Default using LU Right Looking version: Using LU Left Looking version: $(FORTRAN) -c myprog.f - $(FORTRAN) -o myexe myprog.o $(PATH TO LAPACK/SRC/VARIANTS/LIB)/lull.a $(LAPACKLIB) $(BLASLIB) + $(FORTRAN) -o myexe myprog.o $(PATH TO LAPACK/SRC/VARIANTS)/lull.a $(LAPACKLIB) $(BLASLIB) =========== = SUPPORT = diff --git a/lapack-netlib/SRC/cbbcsd.f b/lapack-netlib/SRC/cbbcsd.f index d41756706..b5a18d5b8 100644 --- a/lapack-netlib/SRC/cbbcsd.f +++ b/lapack-netlib/SRC/cbbcsd.f @@ -190,7 +190,7 @@ *> *> \param[in,out] V2T *> \verbatim -*> V2T is COMPLEX array, dimenison (LDV2T,M-Q) +*> V2T is COMPLEX array, dimension (LDV2T,M-Q) *> On entry, an (M-Q)-by-(M-Q) matrix. On exit, V2T is *> premultiplied by the conjugate transpose of the right *> singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and @@ -332,7 +332,7 @@ $ V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E, $ B22D, B22E, RWORK, LRWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/cgebd2.f b/lapack-netlib/SRC/cgebd2.f index e87412023..a234f36ac 100644 --- a/lapack-netlib/SRC/cgebd2.f +++ b/lapack-netlib/SRC/cgebd2.f @@ -100,7 +100,7 @@ *> *> \param[out] TAUQ *> \verbatim -*> TAUQ is COMPLEX array dimension (min(M,N)) +*> TAUQ is COMPLEX array, dimension (min(M,N)) *> The scalar factors of the elementary reflectors which *> represent the unitary matrix Q. See Further Details. *> \endverbatim @@ -132,7 +132,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexGEcomputational * @precisions normal c -> s d z @@ -190,10 +190,10 @@ * ===================================================================== SUBROUTINE CGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, M, N diff --git a/lapack-netlib/SRC/cgebrd.f b/lapack-netlib/SRC/cgebrd.f index d01e228a5..eebd26f12 100644 --- a/lapack-netlib/SRC/cgebrd.f +++ b/lapack-netlib/SRC/cgebrd.f @@ -102,7 +102,7 @@ *> *> \param[out] TAUQ *> \verbatim -*> TAUQ is COMPLEX array dimension (min(M,N)) +*> TAUQ is COMPLEX array, dimension (min(M,N)) *> The scalar factors of the elementary reflectors which *> represent the unitary matrix Q. See Further Details. *> \endverbatim @@ -148,7 +148,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexGEcomputational * @@ -206,10 +206,10 @@ SUBROUTINE CGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LWORK, M, N @@ -229,8 +229,7 @@ * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB, - $ NBMIN, NX - REAL WS + $ NBMIN, NX, WS * .. * .. External Subroutines .. EXTERNAL CGEBD2, CGEMM, CLABRD, XERBLA diff --git a/lapack-netlib/SRC/cgeevx.f b/lapack-netlib/SRC/cgeevx.f index 2a7a5f2c8..b1ff3cc67 100644 --- a/lapack-netlib/SRC/cgeevx.f +++ b/lapack-netlib/SRC/cgeevx.f @@ -25,12 +25,12 @@ * .. Scalar Arguments .. * CHARACTER BALANC, JOBVL, JOBVR, SENSE * INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N -* REAL ABNRM +* REAL ABNRM * .. * .. Array Arguments .. -* REAL RCONDE( * ), RCONDV( * ), RWORK( * ), +* REAL RCONDE( * ), RCONDV( * ), RWORK( * ), * $ SCALE( * ) -* COMPLEX A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), +* COMPLEX A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), * $ W( * ), WORK( * ) * .. * @@ -288,7 +288,7 @@ $ RCONDV, WORK, LWORK, RWORK, INFO ) implicit none * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -296,19 +296,19 @@ * .. Scalar Arguments .. CHARACTER BALANC, JOBVL, JOBVR, SENSE INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N - REAL ABNRM + REAL ABNRM * .. * .. Array Arguments .. - REAL RCONDE( * ), RCONDV( * ), RWORK( * ), + REAL RCONDE( * ), RCONDV( * ), RWORK( * ), $ SCALE( * ) - COMPLEX A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), + COMPLEX A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), $ W( * ), WORK( * ) * .. * * ===================================================================== * * .. Parameters .. - REAL ZERO, ONE + REAL ZERO, ONE PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) * .. * .. Local Scalars .. @@ -317,8 +317,8 @@ CHARACTER JOB, SIDE INTEGER HSWORK, I, ICOND, IERR, ITAU, IWRK, K, $ LWORK_TREVC, MAXWRK, MINWRK, NOUT - REAL ANRM, BIGNUM, CSCALE, EPS, SCL, SMLNUM - COMPLEX TMP + REAL ANRM, BIGNUM, CSCALE, EPS, SCL, SMLNUM + COMPLEX TMP * .. * .. Local Arrays .. LOGICAL SELECT( 1 ) diff --git a/lapack-netlib/SRC/cgejsv.f b/lapack-netlib/SRC/cgejsv.f index 28804e763..8eb43cf50 100644 --- a/lapack-netlib/SRC/cgejsv.f +++ b/lapack-netlib/SRC/cgejsv.f @@ -1,2235 +1,2235 @@ -*> \brief \b CGEJSV -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download CGEJSV + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* SUBROUTINE CGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, -* M, N, A, LDA, SVA, U, LDU, V, LDV, -* CWORK, LWORK, RWORK, LRWORK, IWORK, INFO ) -* -* .. Scalar Arguments .. -* IMPLICIT NONE -* INTEGER INFO, LDA, LDU, LDV, LWORK, M, N -* .. -* .. Array Arguments .. -* COMPLEX A( LDA, * ), U( LDU, * ), V( LDV, * ), CWORK( LWORK ) -* REAL SVA( N ), RWORK( LRWORK ) -* INTEGER IWORK( * ) -* CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CGEJSV computes the singular value decomposition (SVD) of a complex M-by-N -*> matrix [A], where M >= N. The SVD of [A] is written as -*> -*> [A] = [U] * [SIGMA] * [V]^*, -*> -*> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N -*> diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and -*> [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are -*> the singular values of [A]. The columns of [U] and [V] are the left and -*> the right singular vectors of [A], respectively. The matrices [U] and [V] -*> are computed and stored in the arrays U and V, respectively. The diagonal -*> of [SIGMA] is computed and stored in the array SVA. -*> \endverbatim -*> -*> Arguments: -*> ========== -*> -*> \param[in] JOBA -*> \verbatim -*> JOBA is CHARACTER*1 -*> Specifies the level of accuracy: -*> = 'C': This option works well (high relative accuracy) if A = B * D, -*> with well-conditioned B and arbitrary diagonal matrix D. -*> The accuracy cannot be spoiled by COLUMN scaling. The -*> accuracy of the computed output depends on the condition of -*> B, and the procedure aims at the best theoretical accuracy. -*> The relative error max_{i=1:N}|d sigma_i| / sigma_i is -*> bounded by f(M,N)*epsilon* cond(B), independent of D. -*> The input matrix is preprocessed with the QRF with column -*> pivoting. This initial preprocessing and preconditioning by -*> a rank revealing QR factorization is common for all values of -*> JOBA. Additional actions are specified as follows: -*> = 'E': Computation as with 'C' with an additional estimate of the -*> condition number of B. It provides a realistic error bound. -*> = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings -*> D1, D2, and well-conditioned matrix C, this option gives -*> higher accuracy than the 'C' option. If the structure of the -*> input matrix is not known, and relative accuracy is -*> desirable, then this option is advisable. The input matrix A -*> is preprocessed with QR factorization with FULL (row and -*> column) pivoting. -*> = 'G' Computation as with 'F' with an additional estimate of the -*> condition number of B, where A=B*D. If A has heavily weighted -*> rows, then using this condition number gives too pessimistic -*> error bound. -*> = 'A': Small singular values are not well determined by the data -*> and are considered as noisy; the matrix is treated as -*> numerically rank defficient. The error in the computed -*> singular values is bounded by f(m,n)*epsilon*||A||. -*> The computed SVD A = U * S * V^* restores A up to -*> f(m,n)*epsilon*||A||. -*> This gives the procedure the licence to discard (set to zero) -*> all singular values below N*epsilon*||A||. -*> = 'R': Similar as in 'A'. Rank revealing property of the initial -*> QR factorization is used do reveal (using triangular factor) -*> a gap sigma_{r+1} < epsilon * sigma_r in which case the -*> numerical RANK is declared to be r. The SVD is computed with -*> absolute error bounds, but more accurately than with 'A'. -*> \endverbatim -*> -*> \param[in] JOBU -*> \verbatim -*> JOBU is CHARACTER*1 -*> Specifies whether to compute the columns of U: -*> = 'U': N columns of U are returned in the array U. -*> = 'F': full set of M left sing. vectors is returned in the array U. -*> = 'W': U may be used as workspace of length M*N. See the description -*> of U. -*> = 'N': U is not computed. -*> \endverbatim -*> -*> \param[in] JOBV -*> \verbatim -*> JOBV is CHARACTER*1 -*> Specifies whether to compute the matrix V: -*> = 'V': N columns of V are returned in the array V; Jacobi rotations -*> are not explicitly accumulated. -*> = 'J': N columns of V are returned in the array V, but they are -*> computed as the product of Jacobi rotations, if JOBT .EQ. 'N'. -*> = 'W': V may be used as workspace of length N*N. See the description -*> of V. -*> = 'N': V is not computed. -*> \endverbatim -*> -*> \param[in] JOBR -*> \verbatim -*> JOBR is CHARACTER*1 -*> Specifies the RANGE for the singular values. Issues the licence to -*> set to zero small positive singular values if they are outside -*> specified range. If A .NE. 0 is scaled so that the largest singular -*> value of c*A is around SQRT(BIG), BIG=SLAMCH('O'), then JOBR issues -*> the licence to kill columns of A whose norm in c*A is less than -*> SQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN, -*> where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E'). -*> = 'N': Do not kill small columns of c*A. This option assumes that -*> BLAS and QR factorizations and triangular solvers are -*> implemented to work in that range. If the condition of A -*> is greater than BIG, use CGESVJ. -*> = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)] -*> (roughly, as described above). This option is recommended. -*> =========================== -*> For computing the singular values in the FULL range [SFMIN,BIG] -*> use CGESVJ. -*> \endverbatim -*> -*> \param[in] JOBT -*> \verbatim -*> JOBT is CHARACTER*1 -*> If the matrix is square then the procedure may determine to use -*> transposed A if A^* seems to be better with respect to convergence. -*> If the matrix is not square, JOBT is ignored. -*> The decision is based on two values of entropy over the adjoint -*> orbit of A^* * A. See the descriptions of WORK(6) and WORK(7). -*> = 'T': transpose if entropy test indicates possibly faster -*> convergence of Jacobi process if A^* is taken as input. If A is -*> replaced with A^*, then the row pivoting is included automatically. -*> = 'N': do not speculate. -*> The option 'T' can be used to compute only the singular values, or -*> the full SVD (U, SIGMA and V). For only one set of singular vectors -*> (U or V), the caller should provide both U and V, as one of the -*> matrices is used as workspace if the matrix A is transposed. -*> The implementer can easily remove this constraint and make the -*> code more complicated. See the descriptions of U and V. -*> In general, this option is considered experimental, and 'N'; should -*> be preferred. This is subject to changes in the future. -*> \endverbatim -*> -*> \param[in] JOBP -*> \verbatim -*> JOBP is CHARACTER*1 -*> Issues the licence to introduce structured perturbations to drown -*> denormalized numbers. This licence should be active if the -*> denormals are poorly implemented, causing slow computation, -*> especially in cases of fast convergence (!). For details see [1,2]. -*> For the sake of simplicity, this perturbations are included only -*> when the full SVD or only the singular values are requested. The -*> implementer/user can easily add the perturbation for the cases of -*> computing one set of singular vectors. -*> = 'P': introduce perturbation -*> = 'N': do not perturb -*> \endverbatim -*> -*> \param[in] M -*> \verbatim -*> M is INTEGER -*> The number of rows of the input matrix A. M >= 0. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The number of columns of the input matrix A. M >= N >= 0. -*> \endverbatim -*> -*> \param[in,out] A -*> \verbatim -*> A is COMPLEX array, dimension (LDA,N) -*> On entry, the M-by-N matrix A. -*> \endverbatim -*> -*> \param[in] LDA -*> \verbatim -*> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(1,M). -*> \endverbatim -*> -*> \param[out] SVA -*> \verbatim -*> SVA is REAL array, dimension (N) -*> On exit, -*> - For WORK(1)/WORK(2) = ONE: The singular values of A. During the -*> computation SVA contains Euclidean column norms of the -*> iterated matrices in the array A. -*> - For WORK(1) .NE. WORK(2): The singular values of A are -*> (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if -*> sigma_max(A) overflows or if small singular values have been -*> saved from underflow by scaling the input matrix A. -*> - If JOBR='R' then some of the singular values may be returned -*> as exact zeros obtained by "set to zero" because they are -*> below the numerical rank threshold or are denormalized numbers. -*> \endverbatim -*> -*> \param[out] U -*> \verbatim -*> U is COMPLEX array, dimension ( LDU, N ) or ( LDU, M ) -*> If JOBU = 'U', then U contains on exit the M-by-N matrix of -*> the left singular vectors. -*> If JOBU = 'F', then U contains on exit the M-by-M matrix of -*> the left singular vectors, including an ONB -*> of the orthogonal complement of the Range(A). -*> If JOBU = 'W' .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N), -*> then U is used as workspace if the procedure -*> replaces A with A^*. In that case, [V] is computed -*> in U as left singular vectors of A^* and then -*> copied back to the V array. This 'W' option is just -*> a reminder to the caller that in this case U is -*> reserved as workspace of length N*N. -*> If JOBU = 'N' U is not referenced, unless JOBT='T'. -*> \endverbatim -*> -*> \param[in] LDU -*> \verbatim -*> LDU is INTEGER -*> The leading dimension of the array U, LDU >= 1. -*> IF JOBU = 'U' or 'F' or 'W', then LDU >= M. -*> \endverbatim -*> -*> \param[out] V -*> \verbatim -*> V is COMPLEX array, dimension ( LDV, N ) -*> If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of -*> the right singular vectors; -*> If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N), -*> then V is used as workspace if the pprocedure -*> replaces A with A^*. In that case, [U] is computed -*> in V as right singular vectors of A^* and then -*> copied back to the U array. This 'W' option is just -*> a reminder to the caller that in this case V is -*> reserved as workspace of length N*N. -*> If JOBV = 'N' V is not referenced, unless JOBT='T'. -*> \endverbatim -*> -*> \param[in] LDV -*> \verbatim -*> LDV is INTEGER -*> The leading dimension of the array V, LDV >= 1. -*> If JOBV = 'V' or 'J' or 'W', then LDV >= N. -*> \endverbatim -*> -*> \param[out] CWORK -*> \verbatim -*> CWORK is COMPLEX array, dimension (MAX(2,LWORK)) -*> If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or -*> LRWORK=-1), then on exit CWORK(1) contains the required length of -*> CWORK for the job parameters used in the call. -*> \endverbatim -*> -*> \param[in] LWORK -*> \verbatim -*> LWORK is INTEGER -*> Length of CWORK to confirm proper allocation of workspace. -*> LWORK depends on the job: -*> -*> 1. If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and -*> 1.1 .. no scaled condition estimate required (JOBA.NE.'E'.AND.JOBA.NE.'G'): -*> LWORK >= 2*N+1. This is the minimal requirement. -*> ->> For optimal performance (blocked code) the optimal value -*> is LWORK >= N + (N+1)*NB. Here NB is the optimal -*> block size for CGEQP3 and CGEQRF. -*> In general, optimal LWORK is computed as -*> LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CGEQRF), LWORK(CGESVJ)). -*> 1.2. .. an estimate of the scaled condition number of A is -*> required (JOBA='E', or 'G'). In this case, LWORK the minimal -*> requirement is LWORK >= N*N + 2*N. -*> ->> For optimal performance (blocked code) the optimal value -*> is LWORK >= max(N+(N+1)*NB, N*N+2*N)=N**2+2*N. -*> In general, the optimal length LWORK is computed as -*> LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CGEQRF), LWORK(CGESVJ), -*> N*N+LWORK(CPOCON)). -*> 2. If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'), -*> (JOBU.EQ.'N') -*> 2.1 .. no scaled condition estimate requested (JOBE.EQ.'N'): -*> -> the minimal requirement is LWORK >= 3*N. -*> -> For optimal performance, -*> LWORK >= max(N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, -*> where NB is the optimal block size for CGEQP3, CGEQRF, CGELQ, -*> CUNMLQ. In general, the optimal length LWORK is computed as -*> LWORK >= max(N+LWORK(CGEQP3), N+LWORK(CGESVJ), -*> N+LWORK(CGELQF), 2*N+LWORK(CGEQRF), N+LWORK(CUNMLQ)). -*> 2.2 .. an estimate of the scaled condition number of A is -*> required (JOBA='E', or 'G'). -*> -> the minimal requirement is LWORK >= 3*N. -*> -> For optimal performance, -*> LWORK >= max(N+(N+1)*NB, 2*N,2*N+N*NB)=2*N+N*NB, -*> where NB is the optimal block size for CGEQP3, CGEQRF, CGELQ, -*> CUNMLQ. In general, the optimal length LWORK is computed as -*> LWORK >= max(N+LWORK(CGEQP3), LWORK(CPOCON), N+LWORK(CGESVJ), -*> N+LWORK(CGELQF), 2*N+LWORK(CGEQRF), N+LWORK(CUNMLQ)). -*> 3. If SIGMA and the left singular vectors are needed -*> 3.1 .. no scaled condition estimate requested (JOBE.EQ.'N'): -*> -> the minimal requirement is LWORK >= 3*N. -*> -> For optimal performance: -*> if JOBU.EQ.'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, -*> where NB is the optimal block size for CGEQP3, CGEQRF, CUNMQR. -*> In general, the optimal length LWORK is computed as -*> LWORK >= max(N+LWORK(CGEQP3), 2*N+LWORK(CGEQRF), N+LWORK(CUNMQR)). -*> 3.2 .. an estimate of the scaled condition number of A is -*> required (JOBA='E', or 'G'). -*> -> the minimal requirement is LWORK >= 3*N. -*> -> For optimal performance: -*> if JOBU.EQ.'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, -*> where NB is the optimal block size for CGEQP3, CGEQRF, CUNMQR. -*> In general, the optimal length LWORK is computed as -*> LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CPOCON), -*> 2*N+LWORK(CGEQRF), N+LWORK(CUNMQR)). -*> -*> 4. If the full SVD is needed: (JOBU.EQ.'U' or JOBU.EQ.'F') and -*> 4.1. if JOBV.EQ.'V' -*> the minimal requirement is LWORK >= 5*N+2*N*N. -*> 4.2. if JOBV.EQ.'J' the minimal requirement is -*> LWORK >= 4*N+N*N. -*> In both cases, the allocated CWORK can accommodate blocked runs -*> of CGEQP3, CGEQRF, CGELQF, CUNMQR, CUNMLQ. -*> -*> If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or -*> LRWORK=-1), then on exit CWORK(1) contains the optimal and CWORK(2) contains the -*> minimal length of CWORK for the job parameters used in the call. -*> \endverbatim -*> -*> \param[out] RWORK -*> \verbatim -*> RWORK is REAL array, dimension (MAX(7,LWORK)) -*> On exit, -*> RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1) -*> such that SCALE*SVA(1:N) are the computed singular values -*> of A. (See the description of SVA().) -*> RWORK(2) = See the description of RWORK(1). -*> RWORK(3) = SCONDA is an estimate for the condition number of -*> column equilibrated A. (If JOBA .EQ. 'E' or 'G') -*> SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1). -*> It is computed using SPOCON. It holds -*> N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA -*> where R is the triangular factor from the QRF of A. -*> However, if R is truncated and the numerical rank is -*> determined to be strictly smaller than N, SCONDA is -*> returned as -1, thus indicating that the smallest -*> singular values might be lost. -*> -*> If full SVD is needed, the following two condition numbers are -*> useful for the analysis of the algorithm. They are provied for -*> a developer/implementer who is familiar with the details of -*> the method. -*> -*> RWORK(4) = an estimate of the scaled condition number of the -*> triangular factor in the first QR factorization. -*> RWORK(5) = an estimate of the scaled condition number of the -*> triangular factor in the second QR factorization. -*> The following two parameters are computed if JOBT .EQ. 'T'. -*> They are provided for a developer/implementer who is familiar -*> with the details of the method. -*> RWORK(6) = the entropy of A^* * A :: this is the Shannon entropy -*> of diag(A^* * A) / Trace(A^* * A) taken as point in the -*> probability simplex. -*> RWORK(7) = the entropy of A * A^*. (See the description of RWORK(6).) -*> If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or -*> LRWORK=-1), then on exit RWORK(1) contains the required length of -*> RWORK for the job parameters used in the call. -*> \endverbatim -*> -*> \param[in] LRWORK -*> \verbatim -*> LRWORK is INTEGER -*> Length of RWORK to confirm proper allocation of workspace. -*> LRWORK depends on the job: -*> -*> 1. If only the singular values are requested i.e. if -*> LSAME(JOBU,'N') .AND. LSAME(JOBV,'N') -*> then: -*> 1.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), -*> then: LRWORK = max( 7, 2 * M ). -*> 1.2. Otherwise, LRWORK = max( 7, N ). -*> 2. If singular values with the right singular vectors are requested -*> i.e. if -*> (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) .AND. -*> .NOT.(LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) -*> then: -*> 2.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), -*> then LRWORK = max( 7, 2 * M ). -*> 2.2. Otherwise, LRWORK = max( 7, N ). -*> 3. If singular values with the left singular vectors are requested, i.e. if -*> (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND. -*> .NOT.(LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) -*> then: -*> 3.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), -*> then LRWORK = max( 7, 2 * M ). -*> 3.2. Otherwise, LRWORK = max( 7, N ). -*> 4. If singular values with both the left and the right singular vectors -*> are requested, i.e. if -*> (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND. -*> (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) -*> then: -*> 4.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), -*> then LRWORK = max( 7, 2 * M ). -*> 4.2. Otherwise, LRWORK = max( 7, N ). -*> -*> If, on entry, LRWORK = -1 or LWORK=-1, a workspace query is assumed and -*> the length of RWORK is returned in RWORK(1). -*> \endverbatim -*> -*> \param[out] IWORK -*> \verbatim -*> IWORK is INTEGER array, of dimension at least 4, that further depends -*> on the job: -*> -*> 1. If only the singular values are requested then: -*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) -*> then the length of IWORK is N+M; otherwise the length of IWORK is N. -*> 2. If the singular values and the right singular vectors are requested then: -*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) -*> then the length of IWORK is N+M; otherwise the length of IWORK is N. -*> 3. If the singular values and the left singular vectors are requested then: -*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) -*> then the length of IWORK is N+M; otherwise the length of IWORK is N. -*> 4. If the singular values with both the left and the right singular vectors -*> are requested, then: -*> 4.1. If LSAME(JOBV,'J') the length of IWORK is determined as follows: -*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) -*> then the length of IWORK is N+M; otherwise the length of IWORK is N. -*> 4.2. If LSAME(JOBV,'V') the length of IWORK is determined as follows: -*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) -*> then the length of IWORK is 2*N+M; otherwise the length of IWORK is 2*N. -*> -*> On exit, -*> IWORK(1) = the numerical rank determined after the initial -*> QR factorization with pivoting. See the descriptions -*> of JOBA and JOBR. -*> IWORK(2) = the number of the computed nonzero singular values -*> IWORK(3) = if nonzero, a warning message: -*> If IWORK(3).EQ.1 then some of the column norms of A -*> were denormalized floats. The requested high accuracy -*> is not warranted by the data. -*> IWORK(4) = 1 or -1. If IWORK(4) .EQ. 1, then the procedure used A^* to -*> do the job as specified by the JOB parameters. -*> If the call to CGEJSV is a workspace query (indicated by LWORK .EQ. -1 and -*> LRWORK .EQ. -1), then on exit IWORK(1) contains the required length of -*> IWORK for the job parameters used in the call. -*> \endverbatim -*> -*> \param[out] INFO -*> \verbatim -*> INFO is INTEGER -*> < 0 : if INFO = -i, then the i-th argument had an illegal value. -*> = 0 : successful exit; -*> > 0 : CGEJSV did not converge in the maximal allowed number -*> of sweeps. The computed values may be inaccurate. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date June 2016 -* -*> \ingroup complexGEsing -* -*> \par Further Details: -* ===================== -*> -*> \verbatim -*> CGEJSV implements a preconditioned Jacobi SVD algorithm. It uses CGEQP3, -*> CGEQRF, and CGELQF as preprocessors and preconditioners. Optionally, an -*> additional row pivoting can be used as a preprocessor, which in some -*> cases results in much higher accuracy. An example is matrix A with the -*> structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned -*> diagonal matrices and C is well-conditioned matrix. In that case, complete -*> pivoting in the first QR factorizations provides accuracy dependent on the -*> condition number of C, and independent of D1, D2. Such higher accuracy is -*> not completely understood theoretically, but it works well in practice. -*> Further, if A can be written as A = B*D, with well-conditioned B and some -*> diagonal D, then the high accuracy is guaranteed, both theoretically and -*> in software, independent of D. For more details see [1], [2]. -*> The computational range for the singular values can be the full range -*> ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS -*> & LAPACK routines called by CGEJSV are implemented to work in that range. -*> If that is not the case, then the restriction for safe computation with -*> the singular values in the range of normalized IEEE numbers is that the -*> spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not -*> overflow. This code (CGEJSV) is best used in this restricted range, -*> meaning that singular values of magnitude below ||A||_2 / SLAMCH('O') are -*> returned as zeros. See JOBR for details on this. -*> Further, this implementation is somewhat slower than the one described -*> in [1,2] due to replacement of some non-LAPACK components, and because -*> the choice of some tuning parameters in the iterative part (CGESVJ) is -*> left to the implementer on a particular machine. -*> The rank revealing QR factorization (in this code: CGEQP3) should be -*> implemented as in [3]. We have a new version of CGEQP3 under development -*> that is more robust than the current one in LAPACK, with a cleaner cut in -*> rank deficient cases. It will be available in the SIGMA library [4]. -*> If M is much larger than N, it is obvious that the initial QRF with -*> column pivoting can be preprocessed by the QRF without pivoting. That -*> well known trick is not used in CGEJSV because in some cases heavy row -*> weighting can be treated with complete pivoting. The overhead in cases -*> M much larger than N is then only due to pivoting, but the benefits in -*> terms of accuracy have prevailed. The implementer/user can incorporate -*> this extra QRF step easily. The implementer can also improve data movement -*> (matrix transpose, matrix copy, matrix transposed copy) - this -*> implementation of CGEJSV uses only the simplest, naive data movement. -*> \endverbatim -* -*> \par Contributor: -* ================== -*> -*> Zlatko Drmac (Zagreb, Croatia) -* -*> \par References: -* ================ -*> -*> \verbatim -*> -*> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. -*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. -*> LAPACK Working note 169. -*> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. -*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. -*> LAPACK Working note 170. -*> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR -*> factorization software - a case study. -*> ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28. -*> LAPACK Working note 176. -*> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, -*> QSVD, (H,K)-SVD computations. -*> Department of Mathematics, University of Zagreb, 2008, 2016. -*> \endverbatim -* -*> \par Bugs, examples and comments: -* ================================= -*> -*> Please report all bugs and send interesting examples and/or comments to -*> drmac@math.hr. Thank you. -*> -* ===================================================================== - SUBROUTINE CGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, - $ M, N, A, LDA, SVA, U, LDU, V, LDV, - $ CWORK, LWORK, RWORK, LRWORK, IWORK, INFO ) -* -* -- LAPACK computational routine (version 3.7.0) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 -* -* .. Scalar Arguments .. - IMPLICIT NONE - INTEGER INFO, LDA, LDU, LDV, LWORK, LRWORK, M, N -* .. -* .. Array Arguments .. - COMPLEX A( LDA, * ), U( LDU, * ), V( LDV, * ), CWORK( LWORK ) - REAL SVA( N ), RWORK( LRWORK ) - INTEGER IWORK( * ) - CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV -* .. -* -* =========================================================================== -* -* .. Local Parameters .. - REAL ZERO, ONE - PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) - COMPLEX CZERO, CONE - PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), CONE = ( 1.0E0, 0.0E0 ) ) -* .. -* .. Local Scalars .. - COMPLEX CTEMP - REAL AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, COND_OK, - $ CONDR1, CONDR2, ENTRA, ENTRAT, EPSLN, MAXPRJ, SCALEM, - $ SCONDA, SFMIN, SMALL, TEMP1, USCAL1, USCAL2, XSC - INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING - LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LQUERY, - $ LSVEC, L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN, NOSCAL, - $ ROWPIV, RSVEC, TRANSP -* - INTEGER OPTWRK, MINWRK, MINRWRK, MINIWRK - INTEGER LWCON, LWLQF, LWQP3, LWQRF, LWUNMLQ, LWUNMQR, LWUNMQRM, - $ LWSVDJ, LWSVDJV, LRWQP3, LRWCON, LRWSVDJ, IWOFF - INTEGER LWRK_CGELQF, LWRK_CGEQP3, LWRK_CGEQP3N, LWRK_CGEQRF, - $ LWRK_CGESVJ, LWRK_CGESVJV, LWRK_CGESVJU, LWRK_CUNMLQ, - $ LWRK_CUNMQR, LWRK_CUNMQRM -* .. -* .. Local Arrays - COMPLEX CDUMMY(1) - REAL RDUMMY(1) -* -* .. Intrinsic Functions .. - INTRINSIC ABS, CMPLX, CONJG, ALOG, MAX, MIN, REAL, NINT, SQRT -* .. -* .. External Functions .. - REAL SLAMCH, SCNRM2 - INTEGER ISAMAX, ICAMAX - LOGICAL LSAME - EXTERNAL ISAMAX, ICAMAX, LSAME, SLAMCH, SCNRM2 -* .. -* .. External Subroutines .. - EXTERNAL SLASSQ, CCOPY, CGELQF, CGEQP3, CGEQRF, CLACPY, CLAPMR, - $ CLASCL, SLASCL, CLASET, CLASSQ, CLASWP, CUNGQR, CUNMLQ, - $ CUNMQR, CPOCON, SSCAL, CSSCAL, CSWAP, CTRSM, CLACGV, - $ XERBLA -* - EXTERNAL CGESVJ -* .. -* -* Test the input arguments -* - LSVEC = LSAME( JOBU, 'U' ) .OR. LSAME( JOBU, 'F' ) - JRACC = LSAME( JOBV, 'J' ) - RSVEC = LSAME( JOBV, 'V' ) .OR. JRACC - ROWPIV = LSAME( JOBA, 'F' ) .OR. LSAME( JOBA, 'G' ) - L2RANK = LSAME( JOBA, 'R' ) - L2ABER = LSAME( JOBA, 'A' ) - ERREST = LSAME( JOBA, 'E' ) .OR. LSAME( JOBA, 'G' ) - L2TRAN = LSAME( JOBT, 'T' ) .AND. ( M .EQ. N ) - L2KILL = LSAME( JOBR, 'R' ) - DEFR = LSAME( JOBR, 'N' ) - L2PERT = LSAME( JOBP, 'P' ) -* - LQUERY = ( LWORK .EQ. -1 ) .OR. ( LRWORK .EQ. -1 ) -* - IF ( .NOT.(ROWPIV .OR. L2RANK .OR. L2ABER .OR. - $ ERREST .OR. LSAME( JOBA, 'C' ) )) THEN - INFO = - 1 - ELSE IF ( .NOT.( LSVEC .OR. LSAME( JOBU, 'N' ) .OR. - $ ( LSAME( JOBU, 'W' ) .AND. RSVEC .AND. L2TRAN ) ) ) THEN - INFO = - 2 - ELSE IF ( .NOT.( RSVEC .OR. LSAME( JOBV, 'N' ) .OR. - $ ( LSAME( JOBV, 'W' ) .AND. LSVEC .AND. L2TRAN ) ) ) THEN - INFO = - 3 - ELSE IF ( .NOT. ( L2KILL .OR. DEFR ) ) THEN - INFO = - 4 - ELSE IF ( .NOT. ( LSAME(JOBT,'T') .OR. LSAME(JOBT,'N') ) ) THEN - INFO = - 5 - ELSE IF ( .NOT. ( L2PERT .OR. LSAME( JOBP, 'N' ) ) ) THEN - INFO = - 6 - ELSE IF ( M .LT. 0 ) THEN - INFO = - 7 - ELSE IF ( ( N .LT. 0 ) .OR. ( N .GT. M ) ) THEN - INFO = - 8 - ELSE IF ( LDA .LT. M ) THEN - INFO = - 10 - ELSE IF ( LSVEC .AND. ( LDU .LT. M ) ) THEN - INFO = - 13 - ELSE IF ( RSVEC .AND. ( LDV .LT. N ) ) THEN - INFO = - 15 - ELSE -* #:) - INFO = 0 - END IF -* - IF ( INFO .EQ. 0 ) THEN -* .. compute the minimal and the optimal workspace lengths -* [[The expressions for computing the minimal and the optimal -* values of LCWORK, LRWORK are written with a lot of redundancy and -* can be simplified. However, this verbose form is useful for -* maintenance and modifications of the code.]] -* -* .. minimal workspace length for CGEQP3 of an M x N matrix, -* CGEQRF of an N x N matrix, CGELQF of an N x N matrix, -* CUNMLQ for computing N x N matrix, CUNMQR for computing N x N -* matrix, CUNMQR for computing M x N matrix, respectively. - LWQP3 = N+1 - LWQRF = MAX( 1, N ) - LWLQF = MAX( 1, N ) - LWUNMLQ = MAX( 1, N ) - LWUNMQR = MAX( 1, N ) - LWUNMQRM = MAX( 1, M ) -* .. minimal workspace length for CPOCON of an N x N matrix - LWCON = 2 * N -* .. minimal workspace length for CGESVJ of an N x N matrix, -* without and with explicit accumulation of Jacobi rotations - LWSVDJ = MAX( 2 * N, 1 ) - LWSVDJV = MAX( 2 * N, 1 ) -* .. minimal REAL workspace length for CGEQP3, CPOCON, CGESVJ - LRWQP3 = N - LRWCON = N - LRWSVDJ = N - IF ( LQUERY ) THEN - CALL CGEQP3( M, N, A, LDA, IWORK, CDUMMY, CDUMMY, -1, - $ RDUMMY, IERR ) - LWRK_CGEQP3 = CDUMMY(1) - CALL CGEQRF( N, N, A, LDA, CDUMMY, CDUMMY,-1, IERR ) - LWRK_CGEQRF = CDUMMY(1) - CALL CGELQF( N, N, A, LDA, CDUMMY, CDUMMY,-1, IERR ) - LWRK_CGELQF = CDUMMY(1) - END IF - MINWRK = 2 - OPTWRK = 2 - MINIWRK = N - IF ( .NOT. (LSVEC .OR. RSVEC ) ) THEN -* .. minimal and optimal sizes of the complex workspace if -* only the singular values are requested - IF ( ERREST ) THEN - MINWRK = MAX( N+LWQP3, N**2+LWCON, N+LWQRF, LWSVDJ ) - ELSE - MINWRK = MAX( N+LWQP3, N+LWQRF, LWSVDJ ) - END IF - IF ( LQUERY ) THEN - CALL CGESVJ( 'L', 'N', 'N', N, N, A, LDA, SVA, N, V, - $ LDV, CDUMMY, -1, RDUMMY, -1, IERR ) - LWRK_CGESVJ = CDUMMY(1) - IF ( ERREST ) THEN - OPTWRK = MAX( N+LWRK_CGEQP3, N**2+LWCON, - $ N+LWRK_CGEQRF, LWRK_CGESVJ ) - ELSE - OPTWRK = MAX( N+LWRK_CGEQP3, N+LWRK_CGEQRF, - $ LWRK_CGESVJ ) - END IF - END IF - IF ( L2TRAN .OR. ROWPIV ) THEN - IF ( ERREST ) THEN - MINRWRK = MAX( 7, 2*M, LRWQP3, LRWCON, LRWSVDJ ) - ELSE - MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) - END IF - ELSE - IF ( ERREST ) THEN - MINRWRK = MAX( 7, LRWQP3, LRWCON, LRWSVDJ ) - ELSE - MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) - END IF - END IF - IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M - ELSE IF ( RSVEC .AND. (.NOT.LSVEC) ) THEN -* .. minimal and optimal sizes of the complex workspace if the -* singular values and the right singular vectors are requested - IF ( ERREST ) THEN - MINWRK = MAX( N+LWQP3, LWCON, LWSVDJ, N+LWLQF, - $ 2*N+LWQRF, N+LWSVDJ, N+LWUNMLQ ) - ELSE - MINWRK = MAX( N+LWQP3, LWSVDJ, N+LWLQF, 2*N+LWQRF, - $ N+LWSVDJ, N+LWUNMLQ ) - END IF - IF ( LQUERY ) THEN - CALL CGESVJ( 'L', 'U', 'N', N,N, U, LDU, SVA, N, A, - $ LDA, CDUMMY, -1, RDUMMY, -1, IERR ) - LWRK_CGESVJ = CDUMMY(1) - CALL CUNMLQ( 'L', 'C', N, N, N, A, LDA, CDUMMY, - $ V, LDV, CDUMMY, -1, IERR ) - LWRK_CUNMLQ = CDUMMY(1) - IF ( ERREST ) THEN - OPTWRK = MAX( N+LWRK_CGEQP3, LWCON, LWRK_CGESVJ, - $ N+LWRK_CGELQF, 2*N+LWRK_CGEQRF, - $ N+LWRK_CGESVJ, N+LWRK_CUNMLQ ) - ELSE - OPTWRK = MAX( N+LWRK_CGEQP3, LWRK_CGESVJ,N+LWRK_CGELQF, - $ 2*N+LWRK_CGEQRF, N+LWRK_CGESVJ, - $ N+LWRK_CUNMLQ ) - END IF - END IF - IF ( L2TRAN .OR. ROWPIV ) THEN - IF ( ERREST ) THEN - MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) - ELSE - MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) - END IF - ELSE - IF ( ERREST ) THEN - MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) - ELSE - MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) - END IF - END IF - IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M - ELSE IF ( LSVEC .AND. (.NOT.RSVEC) ) THEN -* .. minimal and optimal sizes of the complex workspace if the -* singular values and the left singular vectors are requested - IF ( ERREST ) THEN - MINWRK = N + MAX( LWQP3,LWCON,N+LWQRF,LWSVDJ,LWUNMQRM ) - ELSE - MINWRK = N + MAX( LWQP3, N+LWQRF, LWSVDJ, LWUNMQRM ) - END IF - IF ( LQUERY ) THEN - CALL CGESVJ( 'L', 'U', 'N', N,N, U, LDU, SVA, N, A, - $ LDA, CDUMMY, -1, RDUMMY, -1, IERR ) - LWRK_CGESVJ = CDUMMY(1) - CALL CUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, - $ LDU, CDUMMY, -1, IERR ) - LWRK_CUNMQRM = CDUMMY(1) - IF ( ERREST ) THEN - OPTWRK = N + MAX( LWRK_CGEQP3, LWCON, N+LWRK_CGEQRF, - $ LWRK_CGESVJ, LWRK_CUNMQRM ) - ELSE - OPTWRK = N + MAX( LWRK_CGEQP3, N+LWRK_CGEQRF, - $ LWRK_CGESVJ, LWRK_CUNMQRM ) - END IF - END IF - IF ( L2TRAN .OR. ROWPIV ) THEN - IF ( ERREST ) THEN - MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) - ELSE - MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) - END IF - ELSE - IF ( ERREST ) THEN - MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) - ELSE - MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) - END IF - END IF - IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M - ELSE -* .. minimal and optimal sizes of the complex workspace if the -* full SVD is requested - IF ( .NOT. JRACC ) THEN - IF ( ERREST ) THEN - MINWRK = MAX( N+LWQP3, N+LWCON, 2*N+N**2+LWCON, - $ 2*N+LWQRF, 2*N+LWQP3, - $ 2*N+N**2+N+LWLQF, 2*N+N**2+N+N**2+LWCON, - $ 2*N+N**2+N+LWSVDJ, 2*N+N**2+N+LWSVDJV, - $ 2*N+N**2+N+LWUNMQR,2*N+N**2+N+LWUNMLQ, - $ N+N**2+LWSVDJ, N+LWUNMQRM ) - ELSE - MINWRK = MAX( N+LWQP3, 2*N+N**2+LWCON, - $ 2*N+LWQRF, 2*N+LWQP3, - $ 2*N+N**2+N+LWLQF, 2*N+N**2+N+N**2+LWCON, - $ 2*N+N**2+N+LWSVDJ, 2*N+N**2+N+LWSVDJV, - $ 2*N+N**2+N+LWUNMQR,2*N+N**2+N+LWUNMLQ, - $ N+N**2+LWSVDJ, N+LWUNMQRM ) - END IF - MINIWRK = MINIWRK + N - IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M - ELSE - IF ( ERREST ) THEN - MINWRK = MAX( N+LWQP3, N+LWCON, 2*N+LWQRF, - $ 2*N+N**2+LWSVDJV, 2*N+N**2+N+LWUNMQR, - $ N+LWUNMQRM ) - ELSE - MINWRK = MAX( N+LWQP3, 2*N+LWQRF, - $ 2*N+N**2+LWSVDJV, 2*N+N**2+N+LWUNMQR, - $ N+LWUNMQRM ) - END IF - IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M - END IF - IF ( LQUERY ) THEN - CALL CUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, - $ LDU, CDUMMY, -1, IERR ) - LWRK_CUNMQRM = CDUMMY(1) - CALL CUNMQR( 'L', 'N', N, N, N, A, LDA, CDUMMY, U, - $ LDU, CDUMMY, -1, IERR ) - LWRK_CUNMQR = CDUMMY(1) - IF ( .NOT. JRACC ) THEN - CALL CGEQP3( N,N, A, LDA, IWORK, CDUMMY,CDUMMY, -1, - $ RDUMMY, IERR ) - LWRK_CGEQP3N = CDUMMY(1) - CALL CGESVJ( 'L', 'U', 'N', N, N, U, LDU, SVA, - $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) - LWRK_CGESVJ = CDUMMY(1) - CALL CGESVJ( 'U', 'U', 'N', N, N, U, LDU, SVA, - $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) - LWRK_CGESVJU = CDUMMY(1) - CALL CGESVJ( 'L', 'U', 'V', N, N, U, LDU, SVA, - $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) - LWRK_CGESVJV = CDUMMY(1) - CALL CUNMLQ( 'L', 'C', N, N, N, A, LDA, CDUMMY, - $ V, LDV, CDUMMY, -1, IERR ) - LWRK_CUNMLQ = CDUMMY(1) - IF ( ERREST ) THEN - OPTWRK = MAX( N+LWRK_CGEQP3, N+LWCON, - $ 2*N+N**2+LWCON, 2*N+LWRK_CGEQRF, - $ 2*N+LWRK_CGEQP3N, - $ 2*N+N**2+N+LWRK_CGELQF, - $ 2*N+N**2+N+N**2+LWCON, - $ 2*N+N**2+N+LWRK_CGESVJ, - $ 2*N+N**2+N+LWRK_CGESVJV, - $ 2*N+N**2+N+LWRK_CUNMQR, - $ 2*N+N**2+N+LWRK_CUNMLQ, - $ N+N**2+LWRK_CGESVJU, - $ N+LWRK_CUNMQRM ) - ELSE - OPTWRK = MAX( N+LWRK_CGEQP3, - $ 2*N+N**2+LWCON, 2*N+LWRK_CGEQRF, - $ 2*N+LWRK_CGEQP3N, - $ 2*N+N**2+N+LWRK_CGELQF, - $ 2*N+N**2+N+N**2+LWCON, - $ 2*N+N**2+N+LWRK_CGESVJ, - $ 2*N+N**2+N+LWRK_CGESVJV, - $ 2*N+N**2+N+LWRK_CUNMQR, - $ 2*N+N**2+N+LWRK_CUNMLQ, - $ N+N**2+LWRK_CGESVJU, - $ N+LWRK_CUNMQRM ) - END IF - ELSE - CALL CGESVJ( 'L', 'U', 'V', N, N, U, LDU, SVA, - $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) - LWRK_CGESVJV = CDUMMY(1) - CALL CUNMQR( 'L', 'N', N, N, N, CDUMMY, N, CDUMMY, - $ V, LDV, CDUMMY, -1, IERR ) - LWRK_CUNMQR = CDUMMY(1) - CALL CUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, - $ LDU, CDUMMY, -1, IERR ) - LWRK_CUNMQRM = CDUMMY(1) - IF ( ERREST ) THEN - OPTWRK = MAX( N+LWRK_CGEQP3, N+LWCON, - $ 2*N+LWRK_CGEQRF, 2*N+N**2, - $ 2*N+N**2+LWRK_CGESVJV, - $ 2*N+N**2+N+LWRK_CUNMQR,N+LWRK_CUNMQRM ) - ELSE - OPTWRK = MAX( N+LWRK_CGEQP3, 2*N+LWRK_CGEQRF, - $ 2*N+N**2, 2*N+N**2+LWRK_CGESVJV, - $ 2*N+N**2+N+LWRK_CUNMQR, - $ N+LWRK_CUNMQRM ) - END IF - END IF - END IF - IF ( L2TRAN .OR. ROWPIV ) THEN - MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) - ELSE - MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) - END IF - END IF - MINWRK = MAX( 2, MINWRK ) - OPTWRK = MAX( 2, OPTWRK ) - IF ( LWORK .LT. MINWRK .AND. (.NOT.LQUERY) ) INFO = - 17 - IF ( LRWORK .LT. MINRWRK .AND. (.NOT.LQUERY) ) INFO = - 19 - END IF -* - IF ( INFO .NE. 0 ) THEN -* #:( - CALL XERBLA( 'CGEJSV', - INFO ) - RETURN - ELSE IF ( LQUERY ) THEN - CWORK(1) = OPTWRK - CWORK(2) = MINWRK - RWORK(1) = MINRWRK - IWORK(1) = MAX( 4, MINIWRK ) - RETURN - END IF -* -* Quick return for void matrix (Y3K safe) -* #:) - IF ( ( M .EQ. 0 ) .OR. ( N .EQ. 0 ) ) THEN - IWORK(1:4) = 0 - RWORK(1:7) = 0 - RETURN - ENDIF -* -* Determine whether the matrix U should be M x N or M x M -* - IF ( LSVEC ) THEN - N1 = N - IF ( LSAME( JOBU, 'F' ) ) N1 = M - END IF -* -* Set numerical parameters -* -*! NOTE: Make sure SLAMCH() does not fail on the target architecture. -* - EPSLN = SLAMCH('Epsilon') - SFMIN = SLAMCH('SafeMinimum') - SMALL = SFMIN / EPSLN - BIG = SLAMCH('O') -* BIG = ONE / SFMIN -* -* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N -* -*(!) If necessary, scale SVA() to protect the largest norm from -* overflow. It is possible that this scaling pushes the smallest -* column norm left from the underflow threshold (extreme case). -* - SCALEM = ONE / SQRT(REAL(M)*REAL(N)) - NOSCAL = .TRUE. - GOSCAL = .TRUE. - DO 1874 p = 1, N - AAPP = ZERO - AAQQ = ONE - CALL CLASSQ( M, A(1,p), 1, AAPP, AAQQ ) - IF ( AAPP .GT. BIG ) THEN - INFO = - 9 - CALL XERBLA( 'CGEJSV', -INFO ) - RETURN - END IF - AAQQ = SQRT(AAQQ) - IF ( ( AAPP .LT. (BIG / AAQQ) ) .AND. NOSCAL ) THEN - SVA(p) = AAPP * AAQQ - ELSE - NOSCAL = .FALSE. - SVA(p) = AAPP * ( AAQQ * SCALEM ) - IF ( GOSCAL ) THEN - GOSCAL = .FALSE. - CALL SSCAL( p-1, SCALEM, SVA, 1 ) - END IF - END IF - 1874 CONTINUE -* - IF ( NOSCAL ) SCALEM = ONE -* - AAPP = ZERO - AAQQ = BIG - DO 4781 p = 1, N - AAPP = MAX( AAPP, SVA(p) ) - IF ( SVA(p) .NE. ZERO ) AAQQ = MIN( AAQQ, SVA(p) ) - 4781 CONTINUE -* -* Quick return for zero M x N matrix -* #:) - IF ( AAPP .EQ. ZERO ) THEN - IF ( LSVEC ) CALL CLASET( 'G', M, N1, CZERO, CONE, U, LDU ) - IF ( RSVEC ) CALL CLASET( 'G', N, N, CZERO, CONE, V, LDV ) - RWORK(1) = ONE - RWORK(2) = ONE - IF ( ERREST ) RWORK(3) = ONE - IF ( LSVEC .AND. RSVEC ) THEN - RWORK(4) = ONE - RWORK(5) = ONE - END IF - IF ( L2TRAN ) THEN - RWORK(6) = ZERO - RWORK(7) = ZERO - END IF - IWORK(1) = 0 - IWORK(2) = 0 - IWORK(3) = 0 - IWORK(4) = -1 - RETURN - END IF -* -* Issue warning if denormalized column norms detected. Override the -* high relative accuracy request. Issue licence to kill nonzero columns -* (set them to zero) whose norm is less than sigma_max / BIG (roughly). -* #:( - WARNING = 0 - IF ( AAQQ .LE. SFMIN ) THEN - L2RANK = .TRUE. - L2KILL = .TRUE. - WARNING = 1 - END IF -* -* Quick return for one-column matrix -* #:) - IF ( N .EQ. 1 ) THEN -* - IF ( LSVEC ) THEN - CALL CLASCL( 'G',0,0,SVA(1),SCALEM, M,1,A(1,1),LDA,IERR ) - CALL CLACPY( 'A', M, 1, A, LDA, U, LDU ) -* computing all M left singular vectors of the M x 1 matrix - IF ( N1 .NE. N ) THEN - CALL CGEQRF( M, N, U,LDU, CWORK, CWORK(N+1),LWORK-N,IERR ) - CALL CUNGQR( M,N1,1, U,LDU,CWORK,CWORK(N+1),LWORK-N,IERR ) - CALL CCOPY( M, A(1,1), 1, U(1,1), 1 ) - END IF - END IF - IF ( RSVEC ) THEN - V(1,1) = CONE - END IF - IF ( SVA(1) .LT. (BIG*SCALEM) ) THEN - SVA(1) = SVA(1) / SCALEM - SCALEM = ONE - END IF - RWORK(1) = ONE / SCALEM - RWORK(2) = ONE - IF ( SVA(1) .NE. ZERO ) THEN - IWORK(1) = 1 - IF ( ( SVA(1) / SCALEM) .GE. SFMIN ) THEN - IWORK(2) = 1 - ELSE - IWORK(2) = 0 - END IF - ELSE - IWORK(1) = 0 - IWORK(2) = 0 - END IF - IWORK(3) = 0 - IWORK(4) = -1 - IF ( ERREST ) RWORK(3) = ONE - IF ( LSVEC .AND. RSVEC ) THEN - RWORK(4) = ONE - RWORK(5) = ONE - END IF - IF ( L2TRAN ) THEN - RWORK(6) = ZERO - RWORK(7) = ZERO - END IF - RETURN -* - END IF -* - TRANSP = .FALSE. -* - AATMAX = -ONE - AATMIN = BIG - IF ( ROWPIV .OR. L2TRAN ) THEN -* -* Compute the row norms, needed to determine row pivoting sequence -* (in the case of heavily row weighted A, row pivoting is strongly -* advised) and to collect information needed to compare the -* structures of A * A^* and A^* * A (in the case L2TRAN.EQ..TRUE.). -* - IF ( L2TRAN ) THEN - DO 1950 p = 1, M - XSC = ZERO - TEMP1 = ONE - CALL CLASSQ( N, A(p,1), LDA, XSC, TEMP1 ) -* CLASSQ gets both the ell_2 and the ell_infinity norm -* in one pass through the vector - RWORK(M+p) = XSC * SCALEM - RWORK(p) = XSC * (SCALEM*SQRT(TEMP1)) - AATMAX = MAX( AATMAX, RWORK(p) ) - IF (RWORK(p) .NE. ZERO) - $ AATMIN = MIN(AATMIN,RWORK(p)) - 1950 CONTINUE - ELSE - DO 1904 p = 1, M - RWORK(M+p) = SCALEM*ABS( A(p,ICAMAX(N,A(p,1),LDA)) ) - AATMAX = MAX( AATMAX, RWORK(M+p) ) - AATMIN = MIN( AATMIN, RWORK(M+p) ) - 1904 CONTINUE - END IF -* - END IF -* -* For square matrix A try to determine whether A^* would be better -* input for the preconditioned Jacobi SVD, with faster convergence. -* The decision is based on an O(N) function of the vector of column -* and row norms of A, based on the Shannon entropy. This should give -* the right choice in most cases when the difference actually matters. -* It may fail and pick the slower converging side. -* - ENTRA = ZERO - ENTRAT = ZERO - IF ( L2TRAN ) THEN -* - XSC = ZERO - TEMP1 = ONE - CALL SLASSQ( N, SVA, 1, XSC, TEMP1 ) - TEMP1 = ONE / TEMP1 -* - ENTRA = ZERO - DO 1113 p = 1, N - BIG1 = ( ( SVA(p) / XSC )**2 ) * TEMP1 - IF ( BIG1 .NE. ZERO ) ENTRA = ENTRA + BIG1 * ALOG(BIG1) - 1113 CONTINUE - ENTRA = - ENTRA / ALOG(REAL(N)) -* -* Now, SVA().^2/Trace(A^* * A) is a point in the probability simplex. -* It is derived from the diagonal of A^* * A. Do the same with the -* diagonal of A * A^*, compute the entropy of the corresponding -* probability distribution. Note that A * A^* and A^* * A have the -* same trace. -* - ENTRAT = ZERO - DO 1114 p = 1, M - BIG1 = ( ( RWORK(p) / XSC )**2 ) * TEMP1 - IF ( BIG1 .NE. ZERO ) ENTRAT = ENTRAT + BIG1 * ALOG(BIG1) - 1114 CONTINUE - ENTRAT = - ENTRAT / ALOG(REAL(M)) -* -* Analyze the entropies and decide A or A^*. Smaller entropy -* usually means better input for the algorithm. -* - TRANSP = ( ENTRAT .LT. ENTRA ) -* -* If A^* is better than A, take the adjoint of A. This is allowed -* only for square matrices, M=N. - IF ( TRANSP ) THEN -* In an optimal implementation, this trivial transpose -* should be replaced with faster transpose. - DO 1115 p = 1, N - 1 - A(p,p) = CONJG(A(p,p)) - DO 1116 q = p + 1, N - CTEMP = CONJG(A(q,p)) - A(q,p) = CONJG(A(p,q)) - A(p,q) = CTEMP - 1116 CONTINUE - 1115 CONTINUE - A(N,N) = CONJG(A(N,N)) - DO 1117 p = 1, N - RWORK(M+p) = SVA(p) - SVA(p) = RWORK(p) -* previously computed row 2-norms are now column 2-norms -* of the transposed matrix - 1117 CONTINUE - TEMP1 = AAPP - AAPP = AATMAX - AATMAX = TEMP1 - TEMP1 = AAQQ - AAQQ = AATMIN - AATMIN = TEMP1 - KILL = LSVEC - LSVEC = RSVEC - RSVEC = KILL - IF ( LSVEC ) N1 = N -* - ROWPIV = .TRUE. - END IF -* - END IF -* END IF L2TRAN -* -* Scale the matrix so that its maximal singular value remains less -* than SQRT(BIG) -- the matrix is scaled so that its maximal column -* has Euclidean norm equal to SQRT(BIG/N). The only reason to keep -* SQRT(BIG) instead of BIG is the fact that CGEJSV uses LAPACK and -* BLAS routines that, in some implementations, are not capable of -* working in the full interval [SFMIN,BIG] and that they may provoke -* overflows in the intermediate results. If the singular values spread -* from SFMIN to BIG, then CGESVJ will compute them. So, in that case, -* one should use CGESVJ instead of CGEJSV. - BIG1 = SQRT( BIG ) - TEMP1 = SQRT( BIG / REAL(N) ) -* >> for future updates: allow bigger range, i.e. the largest column -* will be allowed up to BIG/N and CGESVJ will do the rest. However, for -* this all other (LAPACK) components must allow such a range. -* TEMP1 = BIG/REAL(N) -* TEMP1 = BIG * EPSLN this should 'almost' work with current LAPACK components - CALL SLASCL( 'G', 0, 0, AAPP, TEMP1, N, 1, SVA, N, IERR ) - IF ( AAQQ .GT. (AAPP * SFMIN) ) THEN - AAQQ = ( AAQQ / AAPP ) * TEMP1 - ELSE - AAQQ = ( AAQQ * TEMP1 ) / AAPP - END IF - TEMP1 = TEMP1 * SCALEM - CALL CLASCL( 'G', 0, 0, AAPP, TEMP1, M, N, A, LDA, IERR ) -* -* To undo scaling at the end of this procedure, multiply the -* computed singular values with USCAL2 / USCAL1. -* - USCAL1 = TEMP1 - USCAL2 = AAPP -* - IF ( L2KILL ) THEN -* L2KILL enforces computation of nonzero singular values in -* the restricted range of condition number of the initial A, -* sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN). - XSC = SQRT( SFMIN ) - ELSE - XSC = SMALL -* -* Now, if the condition number of A is too big, -* sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN, -* as a precaution measure, the full SVD is computed using CGESVJ -* with accumulated Jacobi rotations. This provides numerically -* more robust computation, at the cost of slightly increased run -* time. Depending on the concrete implementation of BLAS and LAPACK -* (i.e. how they behave in presence of extreme ill-conditioning) the -* implementor may decide to remove this switch. - IF ( ( AAQQ.LT.SQRT(SFMIN) ) .AND. LSVEC .AND. RSVEC ) THEN - JRACC = .TRUE. - END IF -* - END IF - IF ( AAQQ .LT. XSC ) THEN - DO 700 p = 1, N - IF ( SVA(p) .LT. XSC ) THEN - CALL CLASET( 'A', M, 1, CZERO, CZERO, A(1,p), LDA ) - SVA(p) = ZERO - END IF - 700 CONTINUE - END IF -* -* Preconditioning using QR factorization with pivoting -* - IF ( ROWPIV ) THEN -* Optional row permutation (Bjoerck row pivoting): -* A result by Cox and Higham shows that the Bjoerck's -* row pivoting combined with standard column pivoting -* has similar effect as Powell-Reid complete pivoting. -* The ell-infinity norms of A are made nonincreasing. - IF ( ( LSVEC .AND. RSVEC ) .AND. .NOT.( JRACC ) ) THEN - IWOFF = 2*N - ELSE - IWOFF = N - END IF - DO 1952 p = 1, M - 1 - q = ISAMAX( M-p+1, RWORK(M+p), 1 ) + p - 1 - IWORK(IWOFF+p) = q - IF ( p .NE. q ) THEN - TEMP1 = RWORK(M+p) - RWORK(M+p) = RWORK(M+q) - RWORK(M+q) = TEMP1 - END IF - 1952 CONTINUE - CALL CLASWP( N, A, LDA, 1, M-1, IWORK(IWOFF+1), 1 ) - END IF -* -* End of the preparation phase (scaling, optional sorting and -* transposing, optional flushing of small columns). -* -* Preconditioning -* -* If the full SVD is needed, the right singular vectors are computed -* from a matrix equation, and for that we need theoretical analysis -* of the Businger-Golub pivoting. So we use CGEQP3 as the first RR QRF. -* In all other cases the first RR QRF can be chosen by other criteria -* (eg speed by replacing global with restricted window pivoting, such -* as in xGEQPX from TOMS # 782). Good results will be obtained using -* xGEQPX with properly (!) chosen numerical parameters. -* Any improvement of CGEQP3 improves overal performance of CGEJSV. -* -* A * P1 = Q1 * [ R1^* 0]^*: - DO 1963 p = 1, N -* .. all columns are free columns - IWORK(p) = 0 - 1963 CONTINUE - CALL CGEQP3( M, N, A, LDA, IWORK, CWORK, CWORK(N+1), LWORK-N, - $ RWORK, IERR ) -* -* The upper triangular matrix R1 from the first QRF is inspected for -* rank deficiency and possibilities for deflation, or possible -* ill-conditioning. Depending on the user specified flag L2RANK, -* the procedure explores possibilities to reduce the numerical -* rank by inspecting the computed upper triangular factor. If -* L2RANK or L2ABER are up, then CGEJSV will compute the SVD of -* A + dA, where ||dA|| <= f(M,N)*EPSLN. -* - NR = 1 - IF ( L2ABER ) THEN -* Standard absolute error bound suffices. All sigma_i with -* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an -* agressive enforcement of lower numerical rank by introducing a -* backward error of the order of N*EPSLN*||A||. - TEMP1 = SQRT(REAL(N))*EPSLN - DO 3001 p = 2, N - IF ( ABS(A(p,p)) .GE. (TEMP1*ABS(A(1,1))) ) THEN - NR = NR + 1 - ELSE - GO TO 3002 - END IF - 3001 CONTINUE - 3002 CONTINUE - ELSE IF ( L2RANK ) THEN -* .. similarly as above, only slightly more gentle (less agressive). -* Sudden drop on the diagonal of R1 is used as the criterion for -* close-to-rank-defficient. - TEMP1 = SQRT(SFMIN) - DO 3401 p = 2, N - IF ( ( ABS(A(p,p)) .LT. (EPSLN*ABS(A(p-1,p-1))) ) .OR. - $ ( ABS(A(p,p)) .LT. SMALL ) .OR. - $ ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3402 - NR = NR + 1 - 3401 CONTINUE - 3402 CONTINUE -* - ELSE -* The goal is high relative accuracy. However, if the matrix -* has high scaled condition number the relative accuracy is in -* general not feasible. Later on, a condition number estimator -* will be deployed to estimate the scaled condition number. -* Here we just remove the underflowed part of the triangular -* factor. This prevents the situation in which the code is -* working hard to get the accuracy not warranted by the data. - TEMP1 = SQRT(SFMIN) - DO 3301 p = 2, N - IF ( ( ABS(A(p,p)) .LT. SMALL ) .OR. - $ ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3302 - NR = NR + 1 - 3301 CONTINUE - 3302 CONTINUE -* - END IF -* - ALMORT = .FALSE. - IF ( NR .EQ. N ) THEN - MAXPRJ = ONE - DO 3051 p = 2, N - TEMP1 = ABS(A(p,p)) / SVA(IWORK(p)) - MAXPRJ = MIN( MAXPRJ, TEMP1 ) - 3051 CONTINUE - IF ( MAXPRJ**2 .GE. ONE - REAL(N)*EPSLN ) ALMORT = .TRUE. - END IF -* -* - SCONDA = - ONE - CONDR1 = - ONE - CONDR2 = - ONE -* - IF ( ERREST ) THEN - IF ( N .EQ. NR ) THEN - IF ( RSVEC ) THEN -* .. V is available as workspace - CALL CLACPY( 'U', N, N, A, LDA, V, LDV ) - DO 3053 p = 1, N - TEMP1 = SVA(IWORK(p)) - CALL CSSCAL( p, ONE/TEMP1, V(1,p), 1 ) - 3053 CONTINUE - IF ( LSVEC )THEN - CALL CPOCON( 'U', N, V, LDV, ONE, TEMP1, - $ CWORK(N+1), RWORK, IERR ) - ELSE - CALL CPOCON( 'U', N, V, LDV, ONE, TEMP1, - $ CWORK, RWORK, IERR ) - END IF -* - ELSE IF ( LSVEC ) THEN -* .. U is available as workspace - CALL CLACPY( 'U', N, N, A, LDA, U, LDU ) - DO 3054 p = 1, N - TEMP1 = SVA(IWORK(p)) - CALL CSSCAL( p, ONE/TEMP1, U(1,p), 1 ) - 3054 CONTINUE - CALL CPOCON( 'U', N, U, LDU, ONE, TEMP1, - $ CWORK(N+1), RWORK, IERR ) - ELSE - CALL CLACPY( 'U', N, N, A, LDA, CWORK, N ) -*[] CALL CLACPY( 'U', N, N, A, LDA, CWORK(N+1), N ) -* Change: here index shifted by N to the left, CWORK(1:N) -* not needed for SIGMA only computation - DO 3052 p = 1, N - TEMP1 = SVA(IWORK(p)) -*[] CALL CSSCAL( p, ONE/TEMP1, CWORK(N+(p-1)*N+1), 1 ) - CALL CSSCAL( p, ONE/TEMP1, CWORK((p-1)*N+1), 1 ) - 3052 CONTINUE -* .. the columns of R are scaled to have unit Euclidean lengths. -*[] CALL CPOCON( 'U', N, CWORK(N+1), N, ONE, TEMP1, -*[] $ CWORK(N+N*N+1), RWORK, IERR ) - CALL CPOCON( 'U', N, CWORK, N, ONE, TEMP1, - $ CWORK(N*N+1), RWORK, IERR ) -* - END IF - IF ( TEMP1 .NE. ZERO ) THEN - SCONDA = ONE / SQRT(TEMP1) - ELSE - SCONDA = - ONE - END IF -* SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1). -* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA - ELSE - SCONDA = - ONE - END IF - END IF -* - L2PERT = L2PERT .AND. ( ABS( A(1,1)/A(NR,NR) ) .GT. SQRT(BIG1) ) -* If there is no violent scaling, artificial perturbation is not needed. -* -* Phase 3: -* - IF ( .NOT. ( RSVEC .OR. LSVEC ) ) THEN -* -* Singular Values only -* -* .. transpose A(1:NR,1:N) - DO 1946 p = 1, MIN( N-1, NR ) - CALL CCOPY( N-p, A(p,p+1), LDA, A(p+1,p), 1 ) - CALL CLACGV( N-p+1, A(p,p), 1 ) - 1946 CONTINUE - IF ( NR .EQ. N ) A(N,N) = CONJG(A(N,N)) -* -* The following two DO-loops introduce small relative perturbation -* into the strict upper triangle of the lower triangular matrix. -* Small entries below the main diagonal are also changed. -* This modification is useful if the computing environment does not -* provide/allow FLUSH TO ZERO underflow, for it prevents many -* annoying denormalized numbers in case of strongly scaled matrices. -* The perturbation is structured so that it does not introduce any -* new perturbation of the singular values, and it does not destroy -* the job done by the preconditioner. -* The licence for this perturbation is in the variable L2PERT, which -* should be .FALSE. if FLUSH TO ZERO underflow is active. -* - IF ( .NOT. ALMORT ) THEN -* - IF ( L2PERT ) THEN -* XSC = SQRT(SMALL) - XSC = EPSLN / REAL(N) - DO 4947 q = 1, NR - CTEMP = CMPLX(XSC*ABS(A(q,q)),ZERO) - DO 4949 p = 1, N - IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) ) - $ .OR. ( p .LT. q ) ) -* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) ) - $ A(p,q) = CTEMP - 4949 CONTINUE - 4947 CONTINUE - ELSE - CALL CLASET( 'U', NR-1,NR-1, CZERO,CZERO, A(1,2),LDA ) - END IF -* -* .. second preconditioning using the QR factorization -* - CALL CGEQRF( N,NR, A,LDA, CWORK, CWORK(N+1),LWORK-N, IERR ) -* -* .. and transpose upper to lower triangular - DO 1948 p = 1, NR - 1 - CALL CCOPY( NR-p, A(p,p+1), LDA, A(p+1,p), 1 ) - CALL CLACGV( NR-p+1, A(p,p), 1 ) - 1948 CONTINUE -* - END IF -* -* Row-cyclic Jacobi SVD algorithm with column pivoting -* -* .. again some perturbation (a "background noise") is added -* to drown denormals - IF ( L2PERT ) THEN -* XSC = SQRT(SMALL) - XSC = EPSLN / REAL(N) - DO 1947 q = 1, NR - CTEMP = CMPLX(XSC*ABS(A(q,q)),ZERO) - DO 1949 p = 1, NR - IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) ) - $ .OR. ( p .LT. q ) ) -* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) ) - $ A(p,q) = CTEMP - 1949 CONTINUE - 1947 CONTINUE - ELSE - CALL CLASET( 'U', NR-1, NR-1, CZERO, CZERO, A(1,2), LDA ) - END IF -* -* .. and one-sided Jacobi rotations are started on a lower -* triangular matrix (plus perturbation which is ignored in -* the part which destroys triangular form (confusing?!)) -* - CALL CGESVJ( 'L', 'N', 'N', NR, NR, A, LDA, SVA, - $ N, V, LDV, CWORK, LWORK, RWORK, LRWORK, INFO ) -* - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) -* -* - ELSE IF ( ( RSVEC .AND. ( .NOT. LSVEC ) .AND. ( .NOT. JRACC ) ) - $ .OR. - $ ( JRACC .AND. ( .NOT. LSVEC ) .AND. ( NR .NE. N ) ) ) THEN -* -* -> Singular Values and Right Singular Vectors <- -* - IF ( ALMORT ) THEN -* -* .. in this case NR equals N - DO 1998 p = 1, NR - CALL CCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) - CALL CLACGV( N-p+1, V(p,p), 1 ) - 1998 CONTINUE - CALL CLASET( 'U', NR-1,NR-1, CZERO, CZERO, V(1,2), LDV ) -* - CALL CGESVJ( 'L','U','N', N, NR, V, LDV, SVA, NR, A, LDA, - $ CWORK, LWORK, RWORK, LRWORK, INFO ) - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) - - ELSE -* -* .. two more QR factorizations ( one QRF is not enough, two require -* accumulated product of Jacobi rotations, three are perfect ) -* - CALL CLASET( 'L', NR-1,NR-1, CZERO, CZERO, A(2,1), LDA ) - CALL CGELQF( NR,N, A, LDA, CWORK, CWORK(N+1), LWORK-N, IERR) - CALL CLACPY( 'L', NR, NR, A, LDA, V, LDV ) - CALL CLASET( 'U', NR-1,NR-1, CZERO, CZERO, V(1,2), LDV ) - CALL CGEQRF( NR, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), - $ LWORK-2*N, IERR ) - DO 8998 p = 1, NR - CALL CCOPY( NR-p+1, V(p,p), LDV, V(p,p), 1 ) - CALL CLACGV( NR-p+1, V(p,p), 1 ) - 8998 CONTINUE - CALL CLASET('U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV) -* - CALL CGESVJ( 'L', 'U','N', NR, NR, V,LDV, SVA, NR, U, - $ LDU, CWORK(N+1), LWORK-N, RWORK, LRWORK, INFO ) - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) - IF ( NR .LT. N ) THEN - CALL CLASET( 'A',N-NR, NR, CZERO,CZERO, V(NR+1,1), LDV ) - CALL CLASET( 'A',NR, N-NR, CZERO,CZERO, V(1,NR+1), LDV ) - CALL CLASET( 'A',N-NR,N-NR,CZERO,CONE, V(NR+1,NR+1),LDV ) - END IF -* - CALL CUNMLQ( 'L', 'C', N, N, NR, A, LDA, CWORK, - $ V, LDV, CWORK(N+1), LWORK-N, IERR ) -* - END IF -* .. permute the rows of V -* DO 8991 p = 1, N -* CALL CCOPY( N, V(p,1), LDV, A(IWORK(p),1), LDA ) -* 8991 CONTINUE -* CALL CLACPY( 'All', N, N, A, LDA, V, LDV ) - CALL CLAPMR( .FALSE., N, N, V, LDV, IWORK ) -* - IF ( TRANSP ) THEN - CALL CLACPY( 'A', N, N, V, LDV, U, LDU ) - END IF -* - ELSE IF ( JRACC .AND. (.NOT. LSVEC) .AND. ( NR.EQ. N ) ) THEN -* - CALL CLASET( 'L', N-1,N-1, CZERO, CZERO, A(2,1), LDA ) -* - CALL CGESVJ( 'U','N','V', N, N, A, LDA, SVA, N, V, LDV, - $ CWORK, LWORK, RWORK, LRWORK, INFO ) - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) - CALL CLAPMR( .FALSE., N, N, V, LDV, IWORK ) -* - ELSE IF ( LSVEC .AND. ( .NOT. RSVEC ) ) THEN -* -* .. Singular Values and Left Singular Vectors .. -* -* .. second preconditioning step to avoid need to accumulate -* Jacobi rotations in the Jacobi iterations. - DO 1965 p = 1, NR - CALL CCOPY( N-p+1, A(p,p), LDA, U(p,p), 1 ) - CALL CLACGV( N-p+1, U(p,p), 1 ) - 1965 CONTINUE - CALL CLASET( 'U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) -* - CALL CGEQRF( N, NR, U, LDU, CWORK(N+1), CWORK(2*N+1), - $ LWORK-2*N, IERR ) -* - DO 1967 p = 1, NR - 1 - CALL CCOPY( NR-p, U(p,p+1), LDU, U(p+1,p), 1 ) - CALL CLACGV( N-p+1, U(p,p), 1 ) - 1967 CONTINUE - CALL CLASET( 'U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) -* - CALL CGESVJ( 'L', 'U', 'N', NR,NR, U, LDU, SVA, NR, A, - $ LDA, CWORK(N+1), LWORK-N, RWORK, LRWORK, INFO ) - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) -* - IF ( NR .LT. M ) THEN - CALL CLASET( 'A', M-NR, NR,CZERO, CZERO, U(NR+1,1), LDU ) - IF ( NR .LT. N1 ) THEN - CALL CLASET( 'A',NR, N1-NR, CZERO, CZERO, U(1,NR+1),LDU ) - CALL CLASET( 'A',M-NR,N1-NR,CZERO,CONE,U(NR+1,NR+1),LDU ) - END IF - END IF -* - CALL CUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, - $ LDU, CWORK(N+1), LWORK-N, IERR ) -* - IF ( ROWPIV ) - $ CALL CLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) -* - DO 1974 p = 1, N1 - XSC = ONE / SCNRM2( M, U(1,p), 1 ) - CALL CSSCAL( M, XSC, U(1,p), 1 ) - 1974 CONTINUE -* - IF ( TRANSP ) THEN - CALL CLACPY( 'A', N, N, U, LDU, V, LDV ) - END IF -* - ELSE -* -* .. Full SVD .. -* - IF ( .NOT. JRACC ) THEN -* - IF ( .NOT. ALMORT ) THEN -* -* Second Preconditioning Step (QRF [with pivoting]) -* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is -* equivalent to an LQF CALL. Since in many libraries the QRF -* seems to be better optimized than the LQF, we do explicit -* transpose and use the QRF. This is subject to changes in an -* optimized implementation of CGEJSV. -* - DO 1968 p = 1, NR - CALL CCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) - CALL CLACGV( N-p+1, V(p,p), 1 ) - 1968 CONTINUE -* -* .. the following two loops perturb small entries to avoid -* denormals in the second QR factorization, where they are -* as good as zeros. This is done to avoid painfully slow -* computation with denormals. The relative size of the perturbation -* is a parameter that can be changed by the implementer. -* This perturbation device will be obsolete on machines with -* properly implemented arithmetic. -* To switch it off, set L2PERT=.FALSE. To remove it from the -* code, remove the action under L2PERT=.TRUE., leave the ELSE part. -* The following two loops should be blocked and fused with the -* transposed copy above. -* - IF ( L2PERT ) THEN - XSC = SQRT(SMALL) - DO 2969 q = 1, NR - CTEMP = CMPLX(XSC*ABS( V(q,q) ),ZERO) - DO 2968 p = 1, N - IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 ) - $ .OR. ( p .LT. q ) ) -* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) ) - $ V(p,q) = CTEMP - IF ( p .LT. q ) V(p,q) = - V(p,q) - 2968 CONTINUE - 2969 CONTINUE - ELSE - CALL CLASET( 'U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV ) - END IF -* -* Estimate the row scaled condition number of R1 -* (If R1 is rectangular, N > NR, then the condition number -* of the leading NR x NR submatrix is estimated.) -* - CALL CLACPY( 'L', NR, NR, V, LDV, CWORK(2*N+1), NR ) - DO 3950 p = 1, NR - TEMP1 = SCNRM2(NR-p+1,CWORK(2*N+(p-1)*NR+p),1) - CALL CSSCAL(NR-p+1,ONE/TEMP1,CWORK(2*N+(p-1)*NR+p),1) - 3950 CONTINUE - CALL CPOCON('L',NR,CWORK(2*N+1),NR,ONE,TEMP1, - $ CWORK(2*N+NR*NR+1),RWORK,IERR) - CONDR1 = ONE / SQRT(TEMP1) -* .. here need a second oppinion on the condition number -* .. then assume worst case scenario -* R1 is OK for inverse <=> CONDR1 .LT. REAL(N) -* more conservative <=> CONDR1 .LT. SQRT(REAL(N)) -* - COND_OK = SQRT(SQRT(REAL(NR))) -*[TP] COND_OK is a tuning parameter. -* - IF ( CONDR1 .LT. COND_OK ) THEN -* .. the second QRF without pivoting. Note: in an optimized -* implementation, this QRF should be implemented as the QRF -* of a lower triangular matrix. -* R1^* = Q2 * R2 - CALL CGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), - $ LWORK-2*N, IERR ) -* - IF ( L2PERT ) THEN - XSC = SQRT(SMALL)/EPSLN - DO 3959 p = 2, NR - DO 3958 q = 1, p - 1 - CTEMP=CMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), - $ ZERO) - IF ( ABS(V(q,p)) .LE. TEMP1 ) -* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) ) - $ V(q,p) = CTEMP - 3958 CONTINUE - 3959 CONTINUE - END IF -* - IF ( NR .NE. N ) - $ CALL CLACPY( 'A', N, NR, V, LDV, CWORK(2*N+1), N ) -* .. save ... -* -* .. this transposed copy should be better than naive - DO 1969 p = 1, NR - 1 - CALL CCOPY( NR-p, V(p,p+1), LDV, V(p+1,p), 1 ) - CALL CLACGV(NR-p+1, V(p,p), 1 ) - 1969 CONTINUE - V(NR,NR)=CONJG(V(NR,NR)) -* - CONDR2 = CONDR1 -* - ELSE -* -* .. ill-conditioned case: second QRF with pivoting -* Note that windowed pivoting would be equaly good -* numerically, and more run-time efficient. So, in -* an optimal implementation, the next call to CGEQP3 -* should be replaced with eg. CALL CGEQPX (ACM TOMS #782) -* with properly (carefully) chosen parameters. -* -* R1^* * P2 = Q2 * R2 - DO 3003 p = 1, NR - IWORK(N+p) = 0 - 3003 CONTINUE - CALL CGEQP3( N, NR, V, LDV, IWORK(N+1), CWORK(N+1), - $ CWORK(2*N+1), LWORK-2*N, RWORK, IERR ) -** CALL CGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), -** $ LWORK-2*N, IERR ) - IF ( L2PERT ) THEN - XSC = SQRT(SMALL) - DO 3969 p = 2, NR - DO 3968 q = 1, p - 1 - CTEMP=CMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), - $ ZERO) - IF ( ABS(V(q,p)) .LE. TEMP1 ) -* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) ) - $ V(q,p) = CTEMP - 3968 CONTINUE - 3969 CONTINUE - END IF -* - CALL CLACPY( 'A', N, NR, V, LDV, CWORK(2*N+1), N ) -* - IF ( L2PERT ) THEN - XSC = SQRT(SMALL) - DO 8970 p = 2, NR - DO 8971 q = 1, p - 1 - CTEMP=CMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), - $ ZERO) -* V(p,q) = - TEMP1*( V(q,p) / ABS(V(q,p)) ) - V(p,q) = - CTEMP - 8971 CONTINUE - 8970 CONTINUE - ELSE - CALL CLASET( 'L',NR-1,NR-1,CZERO,CZERO,V(2,1),LDV ) - END IF -* Now, compute R2 = L3 * Q3, the LQ factorization. - CALL CGELQF( NR, NR, V, LDV, CWORK(2*N+N*NR+1), - $ CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, IERR ) -* .. and estimate the condition number - CALL CLACPY( 'L',NR,NR,V,LDV,CWORK(2*N+N*NR+NR+1),NR ) - DO 4950 p = 1, NR - TEMP1 = SCNRM2( p, CWORK(2*N+N*NR+NR+p), NR ) - CALL CSSCAL( p, ONE/TEMP1, CWORK(2*N+N*NR+NR+p), NR ) - 4950 CONTINUE - CALL CPOCON( 'L',NR,CWORK(2*N+N*NR+NR+1),NR,ONE,TEMP1, - $ CWORK(2*N+N*NR+NR+NR*NR+1),RWORK,IERR ) - CONDR2 = ONE / SQRT(TEMP1) -* -* - IF ( CONDR2 .GE. COND_OK ) THEN -* .. save the Householder vectors used for Q3 -* (this overwrittes the copy of R2, as it will not be -* needed in this branch, but it does not overwritte the -* Huseholder vectors of Q2.). - CALL CLACPY( 'U', NR, NR, V, LDV, CWORK(2*N+1), N ) -* .. and the rest of the information on Q3 is in -* WORK(2*N+N*NR+1:2*N+N*NR+N) - END IF -* - END IF -* - IF ( L2PERT ) THEN - XSC = SQRT(SMALL) - DO 4968 q = 2, NR - CTEMP = XSC * V(q,q) - DO 4969 p = 1, q - 1 -* V(p,q) = - TEMP1*( V(p,q) / ABS(V(p,q)) ) - V(p,q) = - CTEMP - 4969 CONTINUE - 4968 CONTINUE - ELSE - CALL CLASET( 'U', NR-1,NR-1, CZERO,CZERO, V(1,2), LDV ) - END IF -* -* Second preconditioning finished; continue with Jacobi SVD -* The input matrix is lower trinagular. -* -* Recover the right singular vectors as solution of a well -* conditioned triangular matrix equation. -* - IF ( CONDR1 .LT. COND_OK ) THEN -* - CALL CGESVJ( 'L','U','N',NR,NR,V,LDV,SVA,NR,U, LDU, - $ CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,RWORK, - $ LRWORK, INFO ) - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) - DO 3970 p = 1, NR - CALL CCOPY( NR, V(1,p), 1, U(1,p), 1 ) - CALL CSSCAL( NR, SVA(p), V(1,p), 1 ) - 3970 CONTINUE - -* .. pick the right matrix equation and solve it -* - IF ( NR .EQ. N ) THEN -* :)) .. best case, R1 is inverted. The solution of this matrix -* equation is Q2*V2 = the product of the Jacobi rotations -* used in CGESVJ, premultiplied with the orthogonal matrix -* from the second QR factorization. - CALL CTRSM('L','U','N','N', NR,NR,CONE, A,LDA, V,LDV) - ELSE -* .. R1 is well conditioned, but non-square. Adjoint of R2 -* is inverted to get the product of the Jacobi rotations -* used in CGESVJ. The Q-factor from the second QR -* factorization is then built in explicitly. - CALL CTRSM('L','U','C','N',NR,NR,CONE,CWORK(2*N+1), - $ N,V,LDV) - IF ( NR .LT. N ) THEN - CALL CLASET('A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV) - CALL CLASET('A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV) - CALL CLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) - END IF - CALL CUNMQR('L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), - $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR) - END IF -* - ELSE IF ( CONDR2 .LT. COND_OK ) THEN -* -* The matrix R2 is inverted. The solution of the matrix equation -* is Q3^* * V3 = the product of the Jacobi rotations (appplied to -* the lower triangular L3 from the LQ factorization of -* R2=L3*Q3), pre-multiplied with the transposed Q3. - CALL CGESVJ( 'L', 'U', 'N', NR, NR, V, LDV, SVA, NR, U, - $ LDU, CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, - $ RWORK, LRWORK, INFO ) - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) - DO 3870 p = 1, NR - CALL CCOPY( NR, V(1,p), 1, U(1,p), 1 ) - CALL CSSCAL( NR, SVA(p), U(1,p), 1 ) - 3870 CONTINUE - CALL CTRSM('L','U','N','N',NR,NR,CONE,CWORK(2*N+1),N, - $ U,LDU) -* .. apply the permutation from the second QR factorization - DO 873 q = 1, NR - DO 872 p = 1, NR - CWORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q) - 872 CONTINUE - DO 874 p = 1, NR - U(p,q) = CWORK(2*N+N*NR+NR+p) - 874 CONTINUE - 873 CONTINUE - IF ( NR .LT. N ) THEN - CALL CLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) - CALL CLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) - CALL CLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) - END IF - CALL CUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), - $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) - ELSE -* Last line of defense. -* #:( This is a rather pathological case: no scaled condition -* improvement after two pivoted QR factorizations. Other -* possibility is that the rank revealing QR factorization -* or the condition estimator has failed, or the COND_OK -* is set very close to ONE (which is unnecessary). Normally, -* this branch should never be executed, but in rare cases of -* failure of the RRQR or condition estimator, the last line of -* defense ensures that CGEJSV completes the task. -* Compute the full SVD of L3 using CGESVJ with explicit -* accumulation of Jacobi rotations. - CALL CGESVJ( 'L', 'U', 'V', NR, NR, V, LDV, SVA, NR, U, - $ LDU, CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, - $ RWORK, LRWORK, INFO ) - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) - IF ( NR .LT. N ) THEN - CALL CLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) - CALL CLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) - CALL CLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) - END IF - CALL CUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), - $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) -* - CALL CUNMLQ( 'L', 'C', NR, NR, NR, CWORK(2*N+1), N, - $ CWORK(2*N+N*NR+1), U, LDU, CWORK(2*N+N*NR+NR+1), - $ LWORK-2*N-N*NR-NR, IERR ) - DO 773 q = 1, NR - DO 772 p = 1, NR - CWORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q) - 772 CONTINUE - DO 774 p = 1, NR - U(p,q) = CWORK(2*N+N*NR+NR+p) - 774 CONTINUE - 773 CONTINUE -* - END IF -* -* Permute the rows of V using the (column) permutation from the -* first QRF. Also, scale the columns to make them unit in -* Euclidean norm. This applies to all cases. -* - TEMP1 = SQRT(REAL(N)) * EPSLN - DO 1972 q = 1, N - DO 972 p = 1, N - CWORK(2*N+N*NR+NR+IWORK(p)) = V(p,q) - 972 CONTINUE - DO 973 p = 1, N - V(p,q) = CWORK(2*N+N*NR+NR+p) - 973 CONTINUE - XSC = ONE / SCNRM2( N, V(1,q), 1 ) - IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) - $ CALL CSSCAL( N, XSC, V(1,q), 1 ) - 1972 CONTINUE -* At this moment, V contains the right singular vectors of A. -* Next, assemble the left singular vector matrix U (M x N). - IF ( NR .LT. M ) THEN - CALL CLASET('A', M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU) - IF ( NR .LT. N1 ) THEN - CALL CLASET('A',NR,N1-NR,CZERO,CZERO,U(1,NR+1),LDU) - CALL CLASET('A',M-NR,N1-NR,CZERO,CONE, - $ U(NR+1,NR+1),LDU) - END IF - END IF -* -* The Q matrix from the first QRF is built into the left singular -* matrix U. This applies to all cases. -* - CALL CUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, - $ LDU, CWORK(N+1), LWORK-N, IERR ) - -* The columns of U are normalized. The cost is O(M*N) flops. - TEMP1 = SQRT(REAL(M)) * EPSLN - DO 1973 p = 1, NR - XSC = ONE / SCNRM2( M, U(1,p), 1 ) - IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) - $ CALL CSSCAL( M, XSC, U(1,p), 1 ) - 1973 CONTINUE -* -* If the initial QRF is computed with row pivoting, the left -* singular vectors must be adjusted. -* - IF ( ROWPIV ) - $ CALL CLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) -* - ELSE -* -* .. the initial matrix A has almost orthogonal columns and -* the second QRF is not needed -* - CALL CLACPY( 'U', N, N, A, LDA, CWORK(N+1), N ) - IF ( L2PERT ) THEN - XSC = SQRT(SMALL) - DO 5970 p = 2, N - CTEMP = XSC * CWORK( N + (p-1)*N + p ) - DO 5971 q = 1, p - 1 -* CWORK(N+(q-1)*N+p)=-TEMP1 * ( CWORK(N+(p-1)*N+q) / -* $ ABS(CWORK(N+(p-1)*N+q)) ) - CWORK(N+(q-1)*N+p)=-CTEMP - 5971 CONTINUE - 5970 CONTINUE - ELSE - CALL CLASET( 'L',N-1,N-1,CZERO,CZERO,CWORK(N+2),N ) - END IF -* - CALL CGESVJ( 'U', 'U', 'N', N, N, CWORK(N+1), N, SVA, - $ N, U, LDU, CWORK(N+N*N+1), LWORK-N-N*N, RWORK, LRWORK, - $ INFO ) -* - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) - DO 6970 p = 1, N - CALL CCOPY( N, CWORK(N+(p-1)*N+1), 1, U(1,p), 1 ) - CALL CSSCAL( N, SVA(p), CWORK(N+(p-1)*N+1), 1 ) - 6970 CONTINUE -* - CALL CTRSM( 'L', 'U', 'N', 'N', N, N, - $ CONE, A, LDA, CWORK(N+1), N ) - DO 6972 p = 1, N - CALL CCOPY( N, CWORK(N+p), N, V(IWORK(p),1), LDV ) - 6972 CONTINUE - TEMP1 = SQRT(REAL(N))*EPSLN - DO 6971 p = 1, N - XSC = ONE / SCNRM2( N, V(1,p), 1 ) - IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) - $ CALL CSSCAL( N, XSC, V(1,p), 1 ) - 6971 CONTINUE -* -* Assemble the left singular vector matrix U (M x N). -* - IF ( N .LT. M ) THEN - CALL CLASET( 'A', M-N, N, CZERO, CZERO, U(N+1,1), LDU ) - IF ( N .LT. N1 ) THEN - CALL CLASET('A',N, N1-N, CZERO, CZERO, U(1,N+1),LDU) - CALL CLASET( 'A',M-N,N1-N, CZERO, CONE,U(N+1,N+1),LDU) - END IF - END IF - CALL CUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, - $ LDU, CWORK(N+1), LWORK-N, IERR ) - TEMP1 = SQRT(REAL(M))*EPSLN - DO 6973 p = 1, N1 - XSC = ONE / SCNRM2( M, U(1,p), 1 ) - IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) - $ CALL CSSCAL( M, XSC, U(1,p), 1 ) - 6973 CONTINUE -* - IF ( ROWPIV ) - $ CALL CLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) -* - END IF -* -* end of the >> almost orthogonal case << in the full SVD -* - ELSE -* -* This branch deploys a preconditioned Jacobi SVD with explicitly -* accumulated rotations. It is included as optional, mainly for -* experimental purposes. It does perfom well, and can also be used. -* In this implementation, this branch will be automatically activated -* if the condition number sigma_max(A) / sigma_min(A) is predicted -* to be greater than the overflow threshold. This is because the -* a posteriori computation of the singular vectors assumes robust -* implementation of BLAS and some LAPACK procedures, capable of working -* in presence of extreme values, e.g. when the singular values spread from -* the underflow to the overflow threshold. -* - DO 7968 p = 1, NR - CALL CCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) - CALL CLACGV( N-p+1, V(p,p), 1 ) - 7968 CONTINUE -* - IF ( L2PERT ) THEN - XSC = SQRT(SMALL/EPSLN) - DO 5969 q = 1, NR - CTEMP = CMPLX(XSC*ABS( V(q,q) ),ZERO) - DO 5968 p = 1, N - IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 ) - $ .OR. ( p .LT. q ) ) -* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) ) - $ V(p,q) = CTEMP - IF ( p .LT. q ) V(p,q) = - V(p,q) - 5968 CONTINUE - 5969 CONTINUE - ELSE - CALL CLASET( 'U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV ) - END IF - - CALL CGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), - $ LWORK-2*N, IERR ) - CALL CLACPY( 'L', N, NR, V, LDV, CWORK(2*N+1), N ) -* - DO 7969 p = 1, NR - CALL CCOPY( NR-p+1, V(p,p), LDV, U(p,p), 1 ) - CALL CLACGV( NR-p+1, U(p,p), 1 ) - 7969 CONTINUE - - IF ( L2PERT ) THEN - XSC = SQRT(SMALL/EPSLN) - DO 9970 q = 2, NR - DO 9971 p = 1, q - 1 - CTEMP = CMPLX(XSC * MIN(ABS(U(p,p)),ABS(U(q,q))), - $ ZERO) -* U(p,q) = - TEMP1 * ( U(q,p) / ABS(U(q,p)) ) - U(p,q) = - CTEMP - 9971 CONTINUE - 9970 CONTINUE - ELSE - CALL CLASET('U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) - END IF - - CALL CGESVJ( 'L', 'U', 'V', NR, NR, U, LDU, SVA, - $ N, V, LDV, CWORK(2*N+N*NR+1), LWORK-2*N-N*NR, - $ RWORK, LRWORK, INFO ) - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) - - IF ( NR .LT. N ) THEN - CALL CLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) - CALL CLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) - CALL CLASET( 'A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV ) - END IF - - CALL CUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), - $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) -* -* Permute the rows of V using the (column) permutation from the -* first QRF. Also, scale the columns to make them unit in -* Euclidean norm. This applies to all cases. -* - TEMP1 = SQRT(REAL(N)) * EPSLN - DO 7972 q = 1, N - DO 8972 p = 1, N - CWORK(2*N+N*NR+NR+IWORK(p)) = V(p,q) - 8972 CONTINUE - DO 8973 p = 1, N - V(p,q) = CWORK(2*N+N*NR+NR+p) - 8973 CONTINUE - XSC = ONE / SCNRM2( N, V(1,q), 1 ) - IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) - $ CALL CSSCAL( N, XSC, V(1,q), 1 ) - 7972 CONTINUE -* -* At this moment, V contains the right singular vectors of A. -* Next, assemble the left singular vector matrix U (M x N). -* - IF ( NR .LT. M ) THEN - CALL CLASET( 'A', M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU ) - IF ( NR .LT. N1 ) THEN - CALL CLASET('A',NR, N1-NR, CZERO, CZERO, U(1,NR+1),LDU) - CALL CLASET('A',M-NR,N1-NR, CZERO, CONE,U(NR+1,NR+1),LDU) - END IF - END IF -* - CALL CUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, - $ LDU, CWORK(N+1), LWORK-N, IERR ) -* - IF ( ROWPIV ) - $ CALL CLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) -* -* - END IF - IF ( TRANSP ) THEN -* .. swap U and V because the procedure worked on A^* - DO 6974 p = 1, N - CALL CSWAP( N, U(1,p), 1, V(1,p), 1 ) - 6974 CONTINUE - END IF -* - END IF -* end of the full SVD -* -* Undo scaling, if necessary (and possible) -* - IF ( USCAL2 .LE. (BIG/SVA(1))*USCAL1 ) THEN - CALL SLASCL( 'G', 0, 0, USCAL1, USCAL2, NR, 1, SVA, N, IERR ) - USCAL1 = ONE - USCAL2 = ONE - END IF -* - IF ( NR .LT. N ) THEN - DO 3004 p = NR+1, N - SVA(p) = ZERO - 3004 CONTINUE - END IF -* - RWORK(1) = USCAL2 * SCALEM - RWORK(2) = USCAL1 - IF ( ERREST ) RWORK(3) = SCONDA - IF ( LSVEC .AND. RSVEC ) THEN - RWORK(4) = CONDR1 - RWORK(5) = CONDR2 - END IF - IF ( L2TRAN ) THEN - RWORK(6) = ENTRA - RWORK(7) = ENTRAT - END IF -* - IWORK(1) = NR - IWORK(2) = NUMRANK - IWORK(3) = WARNING - IF ( TRANSP ) THEN - IWORK(4) = 1 - ELSE - IWORK(4) = -1 - END IF - -* - RETURN -* .. -* .. END OF CGEJSV -* .. - END -* +*> \brief \b CGEJSV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEJSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, +* M, N, A, LDA, SVA, U, LDU, V, LDV, +* CWORK, LWORK, RWORK, LRWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* IMPLICIT NONE +* INTEGER INFO, LDA, LDU, LDV, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), U( LDU, * ), V( LDV, * ), CWORK( LWORK ) +* REAL SVA( N ), RWORK( LRWORK ) +* INTEGER IWORK( * ) +* CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEJSV computes the singular value decomposition (SVD) of a complex M-by-N +*> matrix [A], where M >= N. The SVD of [A] is written as +*> +*> [A] = [U] * [SIGMA] * [V]^*, +*> +*> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N +*> diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and +*> [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are +*> the singular values of [A]. The columns of [U] and [V] are the left and +*> the right singular vectors of [A], respectively. The matrices [U] and [V] +*> are computed and stored in the arrays U and V, respectively. The diagonal +*> of [SIGMA] is computed and stored in the array SVA. +*> \endverbatim +*> +*> Arguments: +*> ========== +*> +*> \param[in] JOBA +*> \verbatim +*> JOBA is CHARACTER*1 +*> Specifies the level of accuracy: +*> = 'C': This option works well (high relative accuracy) if A = B * D, +*> with well-conditioned B and arbitrary diagonal matrix D. +*> The accuracy cannot be spoiled by COLUMN scaling. The +*> accuracy of the computed output depends on the condition of +*> B, and the procedure aims at the best theoretical accuracy. +*> The relative error max_{i=1:N}|d sigma_i| / sigma_i is +*> bounded by f(M,N)*epsilon* cond(B), independent of D. +*> The input matrix is preprocessed with the QRF with column +*> pivoting. This initial preprocessing and preconditioning by +*> a rank revealing QR factorization is common for all values of +*> JOBA. Additional actions are specified as follows: +*> = 'E': Computation as with 'C' with an additional estimate of the +*> condition number of B. It provides a realistic error bound. +*> = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings +*> D1, D2, and well-conditioned matrix C, this option gives +*> higher accuracy than the 'C' option. If the structure of the +*> input matrix is not known, and relative accuracy is +*> desirable, then this option is advisable. The input matrix A +*> is preprocessed with QR factorization with FULL (row and +*> column) pivoting. +*> = 'G' Computation as with 'F' with an additional estimate of the +*> condition number of B, where A=B*D. If A has heavily weighted +*> rows, then using this condition number gives too pessimistic +*> error bound. +*> = 'A': Small singular values are not well determined by the data +*> and are considered as noisy; the matrix is treated as +*> numerically rank defficient. The error in the computed +*> singular values is bounded by f(m,n)*epsilon*||A||. +*> The computed SVD A = U * S * V^* restores A up to +*> f(m,n)*epsilon*||A||. +*> This gives the procedure the licence to discard (set to zero) +*> all singular values below N*epsilon*||A||. +*> = 'R': Similar as in 'A'. Rank revealing property of the initial +*> QR factorization is used do reveal (using triangular factor) +*> a gap sigma_{r+1} < epsilon * sigma_r in which case the +*> numerical RANK is declared to be r. The SVD is computed with +*> absolute error bounds, but more accurately than with 'A'. +*> \endverbatim +*> +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies whether to compute the columns of U: +*> = 'U': N columns of U are returned in the array U. +*> = 'F': full set of M left sing. vectors is returned in the array U. +*> = 'W': U may be used as workspace of length M*N. See the description +*> of U. +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether to compute the matrix V: +*> = 'V': N columns of V are returned in the array V; Jacobi rotations +*> are not explicitly accumulated. +*> = 'J': N columns of V are returned in the array V, but they are +*> computed as the product of Jacobi rotations, if JOBT .EQ. 'N'. +*> = 'W': V may be used as workspace of length N*N. See the description +*> of V. +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBR +*> \verbatim +*> JOBR is CHARACTER*1 +*> Specifies the RANGE for the singular values. Issues the licence to +*> set to zero small positive singular values if they are outside +*> specified range. If A .NE. 0 is scaled so that the largest singular +*> value of c*A is around SQRT(BIG), BIG=SLAMCH('O'), then JOBR issues +*> the licence to kill columns of A whose norm in c*A is less than +*> SQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN, +*> where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E'). +*> = 'N': Do not kill small columns of c*A. This option assumes that +*> BLAS and QR factorizations and triangular solvers are +*> implemented to work in that range. If the condition of A +*> is greater than BIG, use CGESVJ. +*> = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)] +*> (roughly, as described above). This option is recommended. +*> =========================== +*> For computing the singular values in the FULL range [SFMIN,BIG] +*> use CGESVJ. +*> \endverbatim +*> +*> \param[in] JOBT +*> \verbatim +*> JOBT is CHARACTER*1 +*> If the matrix is square then the procedure may determine to use +*> transposed A if A^* seems to be better with respect to convergence. +*> If the matrix is not square, JOBT is ignored. +*> The decision is based on two values of entropy over the adjoint +*> orbit of A^* * A. See the descriptions of WORK(6) and WORK(7). +*> = 'T': transpose if entropy test indicates possibly faster +*> convergence of Jacobi process if A^* is taken as input. If A is +*> replaced with A^*, then the row pivoting is included automatically. +*> = 'N': do not speculate. +*> The option 'T' can be used to compute only the singular values, or +*> the full SVD (U, SIGMA and V). For only one set of singular vectors +*> (U or V), the caller should provide both U and V, as one of the +*> matrices is used as workspace if the matrix A is transposed. +*> The implementer can easily remove this constraint and make the +*> code more complicated. See the descriptions of U and V. +*> In general, this option is considered experimental, and 'N'; should +*> be preferred. This is subject to changes in the future. +*> \endverbatim +*> +*> \param[in] JOBP +*> \verbatim +*> JOBP is CHARACTER*1 +*> Issues the licence to introduce structured perturbations to drown +*> denormalized numbers. This licence should be active if the +*> denormals are poorly implemented, causing slow computation, +*> especially in cases of fast convergence (!). For details see [1,2]. +*> For the sake of simplicity, this perturbations are included only +*> when the full SVD or only the singular values are requested. The +*> implementer/user can easily add the perturbation for the cases of +*> computing one set of singular vectors. +*> = 'P': introduce perturbation +*> = 'N': do not perturb +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] SVA +*> \verbatim +*> SVA is REAL array, dimension (N) +*> On exit, +*> - For WORK(1)/WORK(2) = ONE: The singular values of A. During the +*> computation SVA contains Euclidean column norms of the +*> iterated matrices in the array A. +*> - For WORK(1) .NE. WORK(2): The singular values of A are +*> (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if +*> sigma_max(A) overflows or if small singular values have been +*> saved from underflow by scaling the input matrix A. +*> - If JOBR='R' then some of the singular values may be returned +*> as exact zeros obtained by "set to zero" because they are +*> below the numerical rank threshold or are denormalized numbers. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX array, dimension ( LDU, N ) or ( LDU, M ) +*> If JOBU = 'U', then U contains on exit the M-by-N matrix of +*> the left singular vectors. +*> If JOBU = 'F', then U contains on exit the M-by-M matrix of +*> the left singular vectors, including an ONB +*> of the orthogonal complement of the Range(A). +*> If JOBU = 'W' .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N), +*> then U is used as workspace if the procedure +*> replaces A with A^*. In that case, [V] is computed +*> in U as left singular vectors of A^* and then +*> copied back to the V array. This 'W' option is just +*> a reminder to the caller that in this case U is +*> reserved as workspace of length N*N. +*> If JOBU = 'N' U is not referenced, unless JOBT='T'. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U, LDU >= 1. +*> IF JOBU = 'U' or 'F' or 'W', then LDU >= M. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX array, dimension ( LDV, N ) +*> If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of +*> the right singular vectors; +*> If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N), +*> then V is used as workspace if the pprocedure +*> replaces A with A^*. In that case, [U] is computed +*> in V as right singular vectors of A^* and then +*> copied back to the U array. This 'W' option is just +*> a reminder to the caller that in this case V is +*> reserved as workspace of length N*N. +*> If JOBV = 'N' V is not referenced, unless JOBT='T'. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV >= 1. +*> If JOBV = 'V' or 'J' or 'W', then LDV >= N. +*> \endverbatim +*> +*> \param[out] CWORK +*> \verbatim +*> CWORK is COMPLEX array, dimension (MAX(2,LWORK)) +*> If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or +*> LRWORK=-1), then on exit CWORK(1) contains the required length of +*> CWORK for the job parameters used in the call. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> Length of CWORK to confirm proper allocation of workspace. +*> LWORK depends on the job: +*> +*> 1. If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and +*> 1.1 .. no scaled condition estimate required (JOBA.NE.'E'.AND.JOBA.NE.'G'): +*> LWORK >= 2*N+1. This is the minimal requirement. +*> ->> For optimal performance (blocked code) the optimal value +*> is LWORK >= N + (N+1)*NB. Here NB is the optimal +*> block size for CGEQP3 and CGEQRF. +*> In general, optimal LWORK is computed as +*> LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CGEQRF), LWORK(CGESVJ)). +*> 1.2. .. an estimate of the scaled condition number of A is +*> required (JOBA='E', or 'G'). In this case, LWORK the minimal +*> requirement is LWORK >= N*N + 2*N. +*> ->> For optimal performance (blocked code) the optimal value +*> is LWORK >= max(N+(N+1)*NB, N*N+2*N)=N**2+2*N. +*> In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CGEQRF), LWORK(CGESVJ), +*> N*N+LWORK(CPOCON)). +*> 2. If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'), +*> (JOBU.EQ.'N') +*> 2.1 .. no scaled condition estimate requested (JOBE.EQ.'N'): +*> -> the minimal requirement is LWORK >= 3*N. +*> -> For optimal performance, +*> LWORK >= max(N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, +*> where NB is the optimal block size for CGEQP3, CGEQRF, CGELQ, +*> CUNMLQ. In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(CGEQP3), N+LWORK(CGESVJ), +*> N+LWORK(CGELQF), 2*N+LWORK(CGEQRF), N+LWORK(CUNMLQ)). +*> 2.2 .. an estimate of the scaled condition number of A is +*> required (JOBA='E', or 'G'). +*> -> the minimal requirement is LWORK >= 3*N. +*> -> For optimal performance, +*> LWORK >= max(N+(N+1)*NB, 2*N,2*N+N*NB)=2*N+N*NB, +*> where NB is the optimal block size for CGEQP3, CGEQRF, CGELQ, +*> CUNMLQ. In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(CGEQP3), LWORK(CPOCON), N+LWORK(CGESVJ), +*> N+LWORK(CGELQF), 2*N+LWORK(CGEQRF), N+LWORK(CUNMLQ)). +*> 3. If SIGMA and the left singular vectors are needed +*> 3.1 .. no scaled condition estimate requested (JOBE.EQ.'N'): +*> -> the minimal requirement is LWORK >= 3*N. +*> -> For optimal performance: +*> if JOBU.EQ.'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, +*> where NB is the optimal block size for CGEQP3, CGEQRF, CUNMQR. +*> In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(CGEQP3), 2*N+LWORK(CGEQRF), N+LWORK(CUNMQR)). +*> 3.2 .. an estimate of the scaled condition number of A is +*> required (JOBA='E', or 'G'). +*> -> the minimal requirement is LWORK >= 3*N. +*> -> For optimal performance: +*> if JOBU.EQ.'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, +*> where NB is the optimal block size for CGEQP3, CGEQRF, CUNMQR. +*> In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CPOCON), +*> 2*N+LWORK(CGEQRF), N+LWORK(CUNMQR)). +*> +*> 4. If the full SVD is needed: (JOBU.EQ.'U' or JOBU.EQ.'F') and +*> 4.1. if JOBV.EQ.'V' +*> the minimal requirement is LWORK >= 5*N+2*N*N. +*> 4.2. if JOBV.EQ.'J' the minimal requirement is +*> LWORK >= 4*N+N*N. +*> In both cases, the allocated CWORK can accommodate blocked runs +*> of CGEQP3, CGEQRF, CGELQF, CUNMQR, CUNMLQ. +*> +*> If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or +*> LRWORK=-1), then on exit CWORK(1) contains the optimal and CWORK(2) contains the +*> minimal length of CWORK for the job parameters used in the call. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (MAX(7,LWORK)) +*> On exit, +*> RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1) +*> such that SCALE*SVA(1:N) are the computed singular values +*> of A. (See the description of SVA().) +*> RWORK(2) = See the description of RWORK(1). +*> RWORK(3) = SCONDA is an estimate for the condition number of +*> column equilibrated A. (If JOBA .EQ. 'E' or 'G') +*> SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1). +*> It is computed using SPOCON. It holds +*> N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA +*> where R is the triangular factor from the QRF of A. +*> However, if R is truncated and the numerical rank is +*> determined to be strictly smaller than N, SCONDA is +*> returned as -1, thus indicating that the smallest +*> singular values might be lost. +*> +*> If full SVD is needed, the following two condition numbers are +*> useful for the analysis of the algorithm. They are provied for +*> a developer/implementer who is familiar with the details of +*> the method. +*> +*> RWORK(4) = an estimate of the scaled condition number of the +*> triangular factor in the first QR factorization. +*> RWORK(5) = an estimate of the scaled condition number of the +*> triangular factor in the second QR factorization. +*> The following two parameters are computed if JOBT .EQ. 'T'. +*> They are provided for a developer/implementer who is familiar +*> with the details of the method. +*> RWORK(6) = the entropy of A^* * A :: this is the Shannon entropy +*> of diag(A^* * A) / Trace(A^* * A) taken as point in the +*> probability simplex. +*> RWORK(7) = the entropy of A * A^*. (See the description of RWORK(6).) +*> If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or +*> LRWORK=-1), then on exit RWORK(1) contains the required length of +*> RWORK for the job parameters used in the call. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> Length of RWORK to confirm proper allocation of workspace. +*> LRWORK depends on the job: +*> +*> 1. If only the singular values are requested i.e. if +*> LSAME(JOBU,'N') .AND. LSAME(JOBV,'N') +*> then: +*> 1.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), +*> then: LRWORK = max( 7, 2 * M ). +*> 1.2. Otherwise, LRWORK = max( 7, N ). +*> 2. If singular values with the right singular vectors are requested +*> i.e. if +*> (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) .AND. +*> .NOT.(LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) +*> then: +*> 2.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), +*> then LRWORK = max( 7, 2 * M ). +*> 2.2. Otherwise, LRWORK = max( 7, N ). +*> 3. If singular values with the left singular vectors are requested, i.e. if +*> (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND. +*> .NOT.(LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) +*> then: +*> 3.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), +*> then LRWORK = max( 7, 2 * M ). +*> 3.2. Otherwise, LRWORK = max( 7, N ). +*> 4. If singular values with both the left and the right singular vectors +*> are requested, i.e. if +*> (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND. +*> (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) +*> then: +*> 4.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), +*> then LRWORK = max( 7, 2 * M ). +*> 4.2. Otherwise, LRWORK = max( 7, N ). +*> +*> If, on entry, LRWORK = -1 or LWORK=-1, a workspace query is assumed and +*> the length of RWORK is returned in RWORK(1). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, of dimension at least 4, that further depends +*> on the job: +*> +*> 1. If only the singular values are requested then: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is N+M; otherwise the length of IWORK is N. +*> 2. If the singular values and the right singular vectors are requested then: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is N+M; otherwise the length of IWORK is N. +*> 3. If the singular values and the left singular vectors are requested then: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is N+M; otherwise the length of IWORK is N. +*> 4. If the singular values with both the left and the right singular vectors +*> are requested, then: +*> 4.1. If LSAME(JOBV,'J') the length of IWORK is determined as follows: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is N+M; otherwise the length of IWORK is N. +*> 4.2. If LSAME(JOBV,'V') the length of IWORK is determined as follows: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is 2*N+M; otherwise the length of IWORK is 2*N. +*> +*> On exit, +*> IWORK(1) = the numerical rank determined after the initial +*> QR factorization with pivoting. See the descriptions +*> of JOBA and JOBR. +*> IWORK(2) = the number of the computed nonzero singular values +*> IWORK(3) = if nonzero, a warning message: +*> If IWORK(3).EQ.1 then some of the column norms of A +*> were denormalized floats. The requested high accuracy +*> is not warranted by the data. +*> IWORK(4) = 1 or -1. If IWORK(4) .EQ. 1, then the procedure used A^* to +*> do the job as specified by the JOB parameters. +*> If the call to CGEJSV is a workspace query (indicated by LWORK .EQ. -1 and +*> LRWORK .EQ. -1), then on exit IWORK(1) contains the required length of +*> IWORK for the job parameters used in the call. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> < 0 : if INFO = -i, then the i-th argument had an illegal value. +*> = 0 : successful exit; +*> > 0 : CGEJSV did not converge in the maximal allowed number +*> of sweeps. The computed values may be inaccurate. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexGEsing +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> CGEJSV implements a preconditioned Jacobi SVD algorithm. It uses CGEQP3, +*> CGEQRF, and CGELQF as preprocessors and preconditioners. Optionally, an +*> additional row pivoting can be used as a preprocessor, which in some +*> cases results in much higher accuracy. An example is matrix A with the +*> structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned +*> diagonal matrices and C is well-conditioned matrix. In that case, complete +*> pivoting in the first QR factorizations provides accuracy dependent on the +*> condition number of C, and independent of D1, D2. Such higher accuracy is +*> not completely understood theoretically, but it works well in practice. +*> Further, if A can be written as A = B*D, with well-conditioned B and some +*> diagonal D, then the high accuracy is guaranteed, both theoretically and +*> in software, independent of D. For more details see [1], [2]. +*> The computational range for the singular values can be the full range +*> ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS +*> & LAPACK routines called by CGEJSV are implemented to work in that range. +*> If that is not the case, then the restriction for safe computation with +*> the singular values in the range of normalized IEEE numbers is that the +*> spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not +*> overflow. This code (CGEJSV) is best used in this restricted range, +*> meaning that singular values of magnitude below ||A||_2 / SLAMCH('O') are +*> returned as zeros. See JOBR for details on this. +*> Further, this implementation is somewhat slower than the one described +*> in [1,2] due to replacement of some non-LAPACK components, and because +*> the choice of some tuning parameters in the iterative part (CGESVJ) is +*> left to the implementer on a particular machine. +*> The rank revealing QR factorization (in this code: CGEQP3) should be +*> implemented as in [3]. We have a new version of CGEQP3 under development +*> that is more robust than the current one in LAPACK, with a cleaner cut in +*> rank deficient cases. It will be available in the SIGMA library [4]. +*> If M is much larger than N, it is obvious that the initial QRF with +*> column pivoting can be preprocessed by the QRF without pivoting. That +*> well known trick is not used in CGEJSV because in some cases heavy row +*> weighting can be treated with complete pivoting. The overhead in cases +*> M much larger than N is then only due to pivoting, but the benefits in +*> terms of accuracy have prevailed. The implementer/user can incorporate +*> this extra QRF step easily. The implementer can also improve data movement +*> (matrix transpose, matrix copy, matrix transposed copy) - this +*> implementation of CGEJSV uses only the simplest, naive data movement. +*> \endverbatim +* +*> \par Contributor: +* ================== +*> +*> Zlatko Drmac (Zagreb, Croatia) +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. +*> LAPACK Working note 169. +*> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. +*> LAPACK Working note 170. +*> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR +*> factorization software - a case study. +*> ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28. +*> LAPACK Working note 176. +*> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, +*> QSVD, (H,K)-SVD computations. +*> Department of Mathematics, University of Zagreb, 2008, 2016. +*> \endverbatim +* +*> \par Bugs, examples and comments: +* ================================= +*> +*> Please report all bugs and send interesting examples and/or comments to +*> drmac@math.hr. Thank you. +*> +* ===================================================================== + SUBROUTINE CGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, + $ M, N, A, LDA, SVA, U, LDU, V, LDV, + $ CWORK, LWORK, RWORK, LRWORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + IMPLICIT NONE + INTEGER INFO, LDA, LDU, LDV, LWORK, LRWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), U( LDU, * ), V( LDV, * ), CWORK( LWORK ) + REAL SVA( N ), RWORK( LRWORK ) + INTEGER IWORK( * ) + CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV +* .. +* +* =========================================================================== +* +* .. Local Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + COMPLEX CTEMP + REAL AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, COND_OK, + $ CONDR1, CONDR2, ENTRA, ENTRAT, EPSLN, MAXPRJ, SCALEM, + $ SCONDA, SFMIN, SMALL, TEMP1, USCAL1, USCAL2, XSC + INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING + LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LQUERY, + $ LSVEC, L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN, NOSCAL, + $ ROWPIV, RSVEC, TRANSP +* + INTEGER OPTWRK, MINWRK, MINRWRK, MINIWRK + INTEGER LWCON, LWLQF, LWQP3, LWQRF, LWUNMLQ, LWUNMQR, LWUNMQRM, + $ LWSVDJ, LWSVDJV, LRWQP3, LRWCON, LRWSVDJ, IWOFF + INTEGER LWRK_CGELQF, LWRK_CGEQP3, LWRK_CGEQP3N, LWRK_CGEQRF, + $ LWRK_CGESVJ, LWRK_CGESVJV, LWRK_CGESVJU, LWRK_CUNMLQ, + $ LWRK_CUNMQR, LWRK_CUNMQRM +* .. +* .. Local Arrays + COMPLEX CDUMMY(1) + REAL RDUMMY(1) +* +* .. Intrinsic Functions .. + INTRINSIC ABS, CMPLX, CONJG, ALOG, MAX, MIN, REAL, NINT, SQRT +* .. +* .. External Functions .. + REAL SLAMCH, SCNRM2 + INTEGER ISAMAX, ICAMAX + LOGICAL LSAME + EXTERNAL ISAMAX, ICAMAX, LSAME, SLAMCH, SCNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ, CCOPY, CGELQF, CGEQP3, CGEQRF, CLACPY, CLAPMR, + $ CLASCL, SLASCL, CLASET, CLASSQ, CLASWP, CUNGQR, CUNMLQ, + $ CUNMQR, CPOCON, SSCAL, CSSCAL, CSWAP, CTRSM, CLACGV, + $ XERBLA +* + EXTERNAL CGESVJ +* .. +* +* Test the input arguments +* + LSVEC = LSAME( JOBU, 'U' ) .OR. LSAME( JOBU, 'F' ) + JRACC = LSAME( JOBV, 'J' ) + RSVEC = LSAME( JOBV, 'V' ) .OR. JRACC + ROWPIV = LSAME( JOBA, 'F' ) .OR. LSAME( JOBA, 'G' ) + L2RANK = LSAME( JOBA, 'R' ) + L2ABER = LSAME( JOBA, 'A' ) + ERREST = LSAME( JOBA, 'E' ) .OR. LSAME( JOBA, 'G' ) + L2TRAN = LSAME( JOBT, 'T' ) .AND. ( M .EQ. N ) + L2KILL = LSAME( JOBR, 'R' ) + DEFR = LSAME( JOBR, 'N' ) + L2PERT = LSAME( JOBP, 'P' ) +* + LQUERY = ( LWORK .EQ. -1 ) .OR. ( LRWORK .EQ. -1 ) +* + IF ( .NOT.(ROWPIV .OR. L2RANK .OR. L2ABER .OR. + $ ERREST .OR. LSAME( JOBA, 'C' ) )) THEN + INFO = - 1 + ELSE IF ( .NOT.( LSVEC .OR. LSAME( JOBU, 'N' ) .OR. + $ ( LSAME( JOBU, 'W' ) .AND. RSVEC .AND. L2TRAN ) ) ) THEN + INFO = - 2 + ELSE IF ( .NOT.( RSVEC .OR. LSAME( JOBV, 'N' ) .OR. + $ ( LSAME( JOBV, 'W' ) .AND. LSVEC .AND. L2TRAN ) ) ) THEN + INFO = - 3 + ELSE IF ( .NOT. ( L2KILL .OR. DEFR ) ) THEN + INFO = - 4 + ELSE IF ( .NOT. ( LSAME(JOBT,'T') .OR. LSAME(JOBT,'N') ) ) THEN + INFO = - 5 + ELSE IF ( .NOT. ( L2PERT .OR. LSAME( JOBP, 'N' ) ) ) THEN + INFO = - 6 + ELSE IF ( M .LT. 0 ) THEN + INFO = - 7 + ELSE IF ( ( N .LT. 0 ) .OR. ( N .GT. M ) ) THEN + INFO = - 8 + ELSE IF ( LDA .LT. M ) THEN + INFO = - 10 + ELSE IF ( LSVEC .AND. ( LDU .LT. M ) ) THEN + INFO = - 13 + ELSE IF ( RSVEC .AND. ( LDV .LT. N ) ) THEN + INFO = - 15 + ELSE +* #:) + INFO = 0 + END IF +* + IF ( INFO .EQ. 0 ) THEN +* .. compute the minimal and the optimal workspace lengths +* [[The expressions for computing the minimal and the optimal +* values of LCWORK, LRWORK are written with a lot of redundancy and +* can be simplified. However, this verbose form is useful for +* maintenance and modifications of the code.]] +* +* .. minimal workspace length for CGEQP3 of an M x N matrix, +* CGEQRF of an N x N matrix, CGELQF of an N x N matrix, +* CUNMLQ for computing N x N matrix, CUNMQR for computing N x N +* matrix, CUNMQR for computing M x N matrix, respectively. + LWQP3 = N+1 + LWQRF = MAX( 1, N ) + LWLQF = MAX( 1, N ) + LWUNMLQ = MAX( 1, N ) + LWUNMQR = MAX( 1, N ) + LWUNMQRM = MAX( 1, M ) +* .. minimal workspace length for CPOCON of an N x N matrix + LWCON = 2 * N +* .. minimal workspace length for CGESVJ of an N x N matrix, +* without and with explicit accumulation of Jacobi rotations + LWSVDJ = MAX( 2 * N, 1 ) + LWSVDJV = MAX( 2 * N, 1 ) +* .. minimal REAL workspace length for CGEQP3, CPOCON, CGESVJ + LRWQP3 = N + LRWCON = N + LRWSVDJ = N + IF ( LQUERY ) THEN + CALL CGEQP3( M, N, A, LDA, IWORK, CDUMMY, CDUMMY, -1, + $ RDUMMY, IERR ) + LWRK_CGEQP3 = CDUMMY(1) + CALL CGEQRF( N, N, A, LDA, CDUMMY, CDUMMY,-1, IERR ) + LWRK_CGEQRF = CDUMMY(1) + CALL CGELQF( N, N, A, LDA, CDUMMY, CDUMMY,-1, IERR ) + LWRK_CGELQF = CDUMMY(1) + END IF + MINWRK = 2 + OPTWRK = 2 + MINIWRK = N + IF ( .NOT. (LSVEC .OR. RSVEC ) ) THEN +* .. minimal and optimal sizes of the complex workspace if +* only the singular values are requested + IF ( ERREST ) THEN + MINWRK = MAX( N+LWQP3, N**2+LWCON, N+LWQRF, LWSVDJ ) + ELSE + MINWRK = MAX( N+LWQP3, N+LWQRF, LWSVDJ ) + END IF + IF ( LQUERY ) THEN + CALL CGESVJ( 'L', 'N', 'N', N, N, A, LDA, SVA, N, V, + $ LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_CGESVJ = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = MAX( N+LWRK_CGEQP3, N**2+LWCON, + $ N+LWRK_CGEQRF, LWRK_CGESVJ ) + ELSE + OPTWRK = MAX( N+LWRK_CGEQP3, N+LWRK_CGEQRF, + $ LWRK_CGESVJ ) + END IF + END IF + IF ( L2TRAN .OR. ROWPIV ) THEN + IF ( ERREST ) THEN + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWCON, LRWSVDJ ) + ELSE + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) + END IF + ELSE + IF ( ERREST ) THEN + MINRWRK = MAX( 7, LRWQP3, LRWCON, LRWSVDJ ) + ELSE + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) + END IF + END IF + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + ELSE IF ( RSVEC .AND. (.NOT.LSVEC) ) THEN +* .. minimal and optimal sizes of the complex workspace if the +* singular values and the right singular vectors are requested + IF ( ERREST ) THEN + MINWRK = MAX( N+LWQP3, LWCON, LWSVDJ, N+LWLQF, + $ 2*N+LWQRF, N+LWSVDJ, N+LWUNMLQ ) + ELSE + MINWRK = MAX( N+LWQP3, LWSVDJ, N+LWLQF, 2*N+LWQRF, + $ N+LWSVDJ, N+LWUNMLQ ) + END IF + IF ( LQUERY ) THEN + CALL CGESVJ( 'L', 'U', 'N', N,N, U, LDU, SVA, N, A, + $ LDA, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_CGESVJ = CDUMMY(1) + CALL CUNMLQ( 'L', 'C', N, N, N, A, LDA, CDUMMY, + $ V, LDV, CDUMMY, -1, IERR ) + LWRK_CUNMLQ = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = MAX( N+LWRK_CGEQP3, LWCON, LWRK_CGESVJ, + $ N+LWRK_CGELQF, 2*N+LWRK_CGEQRF, + $ N+LWRK_CGESVJ, N+LWRK_CUNMLQ ) + ELSE + OPTWRK = MAX( N+LWRK_CGEQP3, LWRK_CGESVJ,N+LWRK_CGELQF, + $ 2*N+LWRK_CGEQRF, N+LWRK_CGESVJ, + $ N+LWRK_CUNMLQ ) + END IF + END IF + IF ( L2TRAN .OR. ROWPIV ) THEN + IF ( ERREST ) THEN + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) + END IF + ELSE + IF ( ERREST ) THEN + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) + END IF + END IF + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + ELSE IF ( LSVEC .AND. (.NOT.RSVEC) ) THEN +* .. minimal and optimal sizes of the complex workspace if the +* singular values and the left singular vectors are requested + IF ( ERREST ) THEN + MINWRK = N + MAX( LWQP3,LWCON,N+LWQRF,LWSVDJ,LWUNMQRM ) + ELSE + MINWRK = N + MAX( LWQP3, N+LWQRF, LWSVDJ, LWUNMQRM ) + END IF + IF ( LQUERY ) THEN + CALL CGESVJ( 'L', 'U', 'N', N,N, U, LDU, SVA, N, A, + $ LDA, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_CGESVJ = CDUMMY(1) + CALL CUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, + $ LDU, CDUMMY, -1, IERR ) + LWRK_CUNMQRM = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = N + MAX( LWRK_CGEQP3, LWCON, N+LWRK_CGEQRF, + $ LWRK_CGESVJ, LWRK_CUNMQRM ) + ELSE + OPTWRK = N + MAX( LWRK_CGEQP3, N+LWRK_CGEQRF, + $ LWRK_CGESVJ, LWRK_CUNMQRM ) + END IF + END IF + IF ( L2TRAN .OR. ROWPIV ) THEN + IF ( ERREST ) THEN + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) + END IF + ELSE + IF ( ERREST ) THEN + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) + END IF + END IF + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + ELSE +* .. minimal and optimal sizes of the complex workspace if the +* full SVD is requested + IF ( .NOT. JRACC ) THEN + IF ( ERREST ) THEN + MINWRK = MAX( N+LWQP3, N+LWCON, 2*N+N**2+LWCON, + $ 2*N+LWQRF, 2*N+LWQP3, + $ 2*N+N**2+N+LWLQF, 2*N+N**2+N+N**2+LWCON, + $ 2*N+N**2+N+LWSVDJ, 2*N+N**2+N+LWSVDJV, + $ 2*N+N**2+N+LWUNMQR,2*N+N**2+N+LWUNMLQ, + $ N+N**2+LWSVDJ, N+LWUNMQRM ) + ELSE + MINWRK = MAX( N+LWQP3, 2*N+N**2+LWCON, + $ 2*N+LWQRF, 2*N+LWQP3, + $ 2*N+N**2+N+LWLQF, 2*N+N**2+N+N**2+LWCON, + $ 2*N+N**2+N+LWSVDJ, 2*N+N**2+N+LWSVDJV, + $ 2*N+N**2+N+LWUNMQR,2*N+N**2+N+LWUNMLQ, + $ N+N**2+LWSVDJ, N+LWUNMQRM ) + END IF + MINIWRK = MINIWRK + N + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + ELSE + IF ( ERREST ) THEN + MINWRK = MAX( N+LWQP3, N+LWCON, 2*N+LWQRF, + $ 2*N+N**2+LWSVDJV, 2*N+N**2+N+LWUNMQR, + $ N+LWUNMQRM ) + ELSE + MINWRK = MAX( N+LWQP3, 2*N+LWQRF, + $ 2*N+N**2+LWSVDJV, 2*N+N**2+N+LWUNMQR, + $ N+LWUNMQRM ) + END IF + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + END IF + IF ( LQUERY ) THEN + CALL CUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, + $ LDU, CDUMMY, -1, IERR ) + LWRK_CUNMQRM = CDUMMY(1) + CALL CUNMQR( 'L', 'N', N, N, N, A, LDA, CDUMMY, U, + $ LDU, CDUMMY, -1, IERR ) + LWRK_CUNMQR = CDUMMY(1) + IF ( .NOT. JRACC ) THEN + CALL CGEQP3( N,N, A, LDA, IWORK, CDUMMY,CDUMMY, -1, + $ RDUMMY, IERR ) + LWRK_CGEQP3N = CDUMMY(1) + CALL CGESVJ( 'L', 'U', 'N', N, N, U, LDU, SVA, + $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_CGESVJ = CDUMMY(1) + CALL CGESVJ( 'U', 'U', 'N', N, N, U, LDU, SVA, + $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_CGESVJU = CDUMMY(1) + CALL CGESVJ( 'L', 'U', 'V', N, N, U, LDU, SVA, + $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_CGESVJV = CDUMMY(1) + CALL CUNMLQ( 'L', 'C', N, N, N, A, LDA, CDUMMY, + $ V, LDV, CDUMMY, -1, IERR ) + LWRK_CUNMLQ = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = MAX( N+LWRK_CGEQP3, N+LWCON, + $ 2*N+N**2+LWCON, 2*N+LWRK_CGEQRF, + $ 2*N+LWRK_CGEQP3N, + $ 2*N+N**2+N+LWRK_CGELQF, + $ 2*N+N**2+N+N**2+LWCON, + $ 2*N+N**2+N+LWRK_CGESVJ, + $ 2*N+N**2+N+LWRK_CGESVJV, + $ 2*N+N**2+N+LWRK_CUNMQR, + $ 2*N+N**2+N+LWRK_CUNMLQ, + $ N+N**2+LWRK_CGESVJU, + $ N+LWRK_CUNMQRM ) + ELSE + OPTWRK = MAX( N+LWRK_CGEQP3, + $ 2*N+N**2+LWCON, 2*N+LWRK_CGEQRF, + $ 2*N+LWRK_CGEQP3N, + $ 2*N+N**2+N+LWRK_CGELQF, + $ 2*N+N**2+N+N**2+LWCON, + $ 2*N+N**2+N+LWRK_CGESVJ, + $ 2*N+N**2+N+LWRK_CGESVJV, + $ 2*N+N**2+N+LWRK_CUNMQR, + $ 2*N+N**2+N+LWRK_CUNMLQ, + $ N+N**2+LWRK_CGESVJU, + $ N+LWRK_CUNMQRM ) + END IF + ELSE + CALL CGESVJ( 'L', 'U', 'V', N, N, U, LDU, SVA, + $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_CGESVJV = CDUMMY(1) + CALL CUNMQR( 'L', 'N', N, N, N, CDUMMY, N, CDUMMY, + $ V, LDV, CDUMMY, -1, IERR ) + LWRK_CUNMQR = CDUMMY(1) + CALL CUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, + $ LDU, CDUMMY, -1, IERR ) + LWRK_CUNMQRM = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = MAX( N+LWRK_CGEQP3, N+LWCON, + $ 2*N+LWRK_CGEQRF, 2*N+N**2, + $ 2*N+N**2+LWRK_CGESVJV, + $ 2*N+N**2+N+LWRK_CUNMQR,N+LWRK_CUNMQRM ) + ELSE + OPTWRK = MAX( N+LWRK_CGEQP3, 2*N+LWRK_CGEQRF, + $ 2*N+N**2, 2*N+N**2+LWRK_CGESVJV, + $ 2*N+N**2+N+LWRK_CUNMQR, + $ N+LWRK_CUNMQRM ) + END IF + END IF + END IF + IF ( L2TRAN .OR. ROWPIV ) THEN + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) + END IF + END IF + MINWRK = MAX( 2, MINWRK ) + OPTWRK = MAX( 2, OPTWRK ) + IF ( LWORK .LT. MINWRK .AND. (.NOT.LQUERY) ) INFO = - 17 + IF ( LRWORK .LT. MINRWRK .AND. (.NOT.LQUERY) ) INFO = - 19 + END IF +* + IF ( INFO .NE. 0 ) THEN +* #:( + CALL XERBLA( 'CGEJSV', - INFO ) + RETURN + ELSE IF ( LQUERY ) THEN + CWORK(1) = OPTWRK + CWORK(2) = MINWRK + RWORK(1) = MINRWRK + IWORK(1) = MAX( 4, MINIWRK ) + RETURN + END IF +* +* Quick return for void matrix (Y3K safe) +* #:) + IF ( ( M .EQ. 0 ) .OR. ( N .EQ. 0 ) ) THEN + IWORK(1:4) = 0 + RWORK(1:7) = 0 + RETURN + ENDIF +* +* Determine whether the matrix U should be M x N or M x M +* + IF ( LSVEC ) THEN + N1 = N + IF ( LSAME( JOBU, 'F' ) ) N1 = M + END IF +* +* Set numerical parameters +* +*! NOTE: Make sure SLAMCH() does not fail on the target architecture. +* + EPSLN = SLAMCH('Epsilon') + SFMIN = SLAMCH('SafeMinimum') + SMALL = SFMIN / EPSLN + BIG = SLAMCH('O') +* BIG = ONE / SFMIN +* +* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N +* +*(!) If necessary, scale SVA() to protect the largest norm from +* overflow. It is possible that this scaling pushes the smallest +* column norm left from the underflow threshold (extreme case). +* + SCALEM = ONE / SQRT(REAL(M)*REAL(N)) + NOSCAL = .TRUE. + GOSCAL = .TRUE. + DO 1874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL CLASSQ( M, A(1,p), 1, AAPP, AAQQ ) + IF ( AAPP .GT. BIG ) THEN + INFO = - 9 + CALL XERBLA( 'CGEJSV', -INFO ) + RETURN + END IF + AAQQ = SQRT(AAQQ) + IF ( ( AAPP .LT. (BIG / AAQQ) ) .AND. NOSCAL ) THEN + SVA(p) = AAPP * AAQQ + ELSE + NOSCAL = .FALSE. + SVA(p) = AAPP * ( AAQQ * SCALEM ) + IF ( GOSCAL ) THEN + GOSCAL = .FALSE. + CALL SSCAL( p-1, SCALEM, SVA, 1 ) + END IF + END IF + 1874 CONTINUE +* + IF ( NOSCAL ) SCALEM = ONE +* + AAPP = ZERO + AAQQ = BIG + DO 4781 p = 1, N + AAPP = MAX( AAPP, SVA(p) ) + IF ( SVA(p) .NE. ZERO ) AAQQ = MIN( AAQQ, SVA(p) ) + 4781 CONTINUE +* +* Quick return for zero M x N matrix +* #:) + IF ( AAPP .EQ. ZERO ) THEN + IF ( LSVEC ) CALL CLASET( 'G', M, N1, CZERO, CONE, U, LDU ) + IF ( RSVEC ) CALL CLASET( 'G', N, N, CZERO, CONE, V, LDV ) + RWORK(1) = ONE + RWORK(2) = ONE + IF ( ERREST ) RWORK(3) = ONE + IF ( LSVEC .AND. RSVEC ) THEN + RWORK(4) = ONE + RWORK(5) = ONE + END IF + IF ( L2TRAN ) THEN + RWORK(6) = ZERO + RWORK(7) = ZERO + END IF + IWORK(1) = 0 + IWORK(2) = 0 + IWORK(3) = 0 + IWORK(4) = -1 + RETURN + END IF +* +* Issue warning if denormalized column norms detected. Override the +* high relative accuracy request. Issue licence to kill nonzero columns +* (set them to zero) whose norm is less than sigma_max / BIG (roughly). +* #:( + WARNING = 0 + IF ( AAQQ .LE. SFMIN ) THEN + L2RANK = .TRUE. + L2KILL = .TRUE. + WARNING = 1 + END IF +* +* Quick return for one-column matrix +* #:) + IF ( N .EQ. 1 ) THEN +* + IF ( LSVEC ) THEN + CALL CLASCL( 'G',0,0,SVA(1),SCALEM, M,1,A(1,1),LDA,IERR ) + CALL CLACPY( 'A', M, 1, A, LDA, U, LDU ) +* computing all M left singular vectors of the M x 1 matrix + IF ( N1 .NE. N ) THEN + CALL CGEQRF( M, N, U,LDU, CWORK, CWORK(N+1),LWORK-N,IERR ) + CALL CUNGQR( M,N1,1, U,LDU,CWORK,CWORK(N+1),LWORK-N,IERR ) + CALL CCOPY( M, A(1,1), 1, U(1,1), 1 ) + END IF + END IF + IF ( RSVEC ) THEN + V(1,1) = CONE + END IF + IF ( SVA(1) .LT. (BIG*SCALEM) ) THEN + SVA(1) = SVA(1) / SCALEM + SCALEM = ONE + END IF + RWORK(1) = ONE / SCALEM + RWORK(2) = ONE + IF ( SVA(1) .NE. ZERO ) THEN + IWORK(1) = 1 + IF ( ( SVA(1) / SCALEM) .GE. SFMIN ) THEN + IWORK(2) = 1 + ELSE + IWORK(2) = 0 + END IF + ELSE + IWORK(1) = 0 + IWORK(2) = 0 + END IF + IWORK(3) = 0 + IWORK(4) = -1 + IF ( ERREST ) RWORK(3) = ONE + IF ( LSVEC .AND. RSVEC ) THEN + RWORK(4) = ONE + RWORK(5) = ONE + END IF + IF ( L2TRAN ) THEN + RWORK(6) = ZERO + RWORK(7) = ZERO + END IF + RETURN +* + END IF +* + TRANSP = .FALSE. +* + AATMAX = -ONE + AATMIN = BIG + IF ( ROWPIV .OR. L2TRAN ) THEN +* +* Compute the row norms, needed to determine row pivoting sequence +* (in the case of heavily row weighted A, row pivoting is strongly +* advised) and to collect information needed to compare the +* structures of A * A^* and A^* * A (in the case L2TRAN.EQ..TRUE.). +* + IF ( L2TRAN ) THEN + DO 1950 p = 1, M + XSC = ZERO + TEMP1 = ONE + CALL CLASSQ( N, A(p,1), LDA, XSC, TEMP1 ) +* CLASSQ gets both the ell_2 and the ell_infinity norm +* in one pass through the vector + RWORK(M+p) = XSC * SCALEM + RWORK(p) = XSC * (SCALEM*SQRT(TEMP1)) + AATMAX = MAX( AATMAX, RWORK(p) ) + IF (RWORK(p) .NE. ZERO) + $ AATMIN = MIN(AATMIN,RWORK(p)) + 1950 CONTINUE + ELSE + DO 1904 p = 1, M + RWORK(M+p) = SCALEM*ABS( A(p,ICAMAX(N,A(p,1),LDA)) ) + AATMAX = MAX( AATMAX, RWORK(M+p) ) + AATMIN = MIN( AATMIN, RWORK(M+p) ) + 1904 CONTINUE + END IF +* + END IF +* +* For square matrix A try to determine whether A^* would be better +* input for the preconditioned Jacobi SVD, with faster convergence. +* The decision is based on an O(N) function of the vector of column +* and row norms of A, based on the Shannon entropy. This should give +* the right choice in most cases when the difference actually matters. +* It may fail and pick the slower converging side. +* + ENTRA = ZERO + ENTRAT = ZERO + IF ( L2TRAN ) THEN +* + XSC = ZERO + TEMP1 = ONE + CALL SLASSQ( N, SVA, 1, XSC, TEMP1 ) + TEMP1 = ONE / TEMP1 +* + ENTRA = ZERO + DO 1113 p = 1, N + BIG1 = ( ( SVA(p) / XSC )**2 ) * TEMP1 + IF ( BIG1 .NE. ZERO ) ENTRA = ENTRA + BIG1 * ALOG(BIG1) + 1113 CONTINUE + ENTRA = - ENTRA / ALOG(REAL(N)) +* +* Now, SVA().^2/Trace(A^* * A) is a point in the probability simplex. +* It is derived from the diagonal of A^* * A. Do the same with the +* diagonal of A * A^*, compute the entropy of the corresponding +* probability distribution. Note that A * A^* and A^* * A have the +* same trace. +* + ENTRAT = ZERO + DO 1114 p = 1, M + BIG1 = ( ( RWORK(p) / XSC )**2 ) * TEMP1 + IF ( BIG1 .NE. ZERO ) ENTRAT = ENTRAT + BIG1 * ALOG(BIG1) + 1114 CONTINUE + ENTRAT = - ENTRAT / ALOG(REAL(M)) +* +* Analyze the entropies and decide A or A^*. Smaller entropy +* usually means better input for the algorithm. +* + TRANSP = ( ENTRAT .LT. ENTRA ) +* +* If A^* is better than A, take the adjoint of A. This is allowed +* only for square matrices, M=N. + IF ( TRANSP ) THEN +* In an optimal implementation, this trivial transpose +* should be replaced with faster transpose. + DO 1115 p = 1, N - 1 + A(p,p) = CONJG(A(p,p)) + DO 1116 q = p + 1, N + CTEMP = CONJG(A(q,p)) + A(q,p) = CONJG(A(p,q)) + A(p,q) = CTEMP + 1116 CONTINUE + 1115 CONTINUE + A(N,N) = CONJG(A(N,N)) + DO 1117 p = 1, N + RWORK(M+p) = SVA(p) + SVA(p) = RWORK(p) +* previously computed row 2-norms are now column 2-norms +* of the transposed matrix + 1117 CONTINUE + TEMP1 = AAPP + AAPP = AATMAX + AATMAX = TEMP1 + TEMP1 = AAQQ + AAQQ = AATMIN + AATMIN = TEMP1 + KILL = LSVEC + LSVEC = RSVEC + RSVEC = KILL + IF ( LSVEC ) N1 = N +* + ROWPIV = .TRUE. + END IF +* + END IF +* END IF L2TRAN +* +* Scale the matrix so that its maximal singular value remains less +* than SQRT(BIG) -- the matrix is scaled so that its maximal column +* has Euclidean norm equal to SQRT(BIG/N). The only reason to keep +* SQRT(BIG) instead of BIG is the fact that CGEJSV uses LAPACK and +* BLAS routines that, in some implementations, are not capable of +* working in the full interval [SFMIN,BIG] and that they may provoke +* overflows in the intermediate results. If the singular values spread +* from SFMIN to BIG, then CGESVJ will compute them. So, in that case, +* one should use CGESVJ instead of CGEJSV. + BIG1 = SQRT( BIG ) + TEMP1 = SQRT( BIG / REAL(N) ) +* >> for future updates: allow bigger range, i.e. the largest column +* will be allowed up to BIG/N and CGESVJ will do the rest. However, for +* this all other (LAPACK) components must allow such a range. +* TEMP1 = BIG/REAL(N) +* TEMP1 = BIG * EPSLN this should 'almost' work with current LAPACK components + CALL SLASCL( 'G', 0, 0, AAPP, TEMP1, N, 1, SVA, N, IERR ) + IF ( AAQQ .GT. (AAPP * SFMIN) ) THEN + AAQQ = ( AAQQ / AAPP ) * TEMP1 + ELSE + AAQQ = ( AAQQ * TEMP1 ) / AAPP + END IF + TEMP1 = TEMP1 * SCALEM + CALL CLASCL( 'G', 0, 0, AAPP, TEMP1, M, N, A, LDA, IERR ) +* +* To undo scaling at the end of this procedure, multiply the +* computed singular values with USCAL2 / USCAL1. +* + USCAL1 = TEMP1 + USCAL2 = AAPP +* + IF ( L2KILL ) THEN +* L2KILL enforces computation of nonzero singular values in +* the restricted range of condition number of the initial A, +* sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN). + XSC = SQRT( SFMIN ) + ELSE + XSC = SMALL +* +* Now, if the condition number of A is too big, +* sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN, +* as a precaution measure, the full SVD is computed using CGESVJ +* with accumulated Jacobi rotations. This provides numerically +* more robust computation, at the cost of slightly increased run +* time. Depending on the concrete implementation of BLAS and LAPACK +* (i.e. how they behave in presence of extreme ill-conditioning) the +* implementor may decide to remove this switch. + IF ( ( AAQQ.LT.SQRT(SFMIN) ) .AND. LSVEC .AND. RSVEC ) THEN + JRACC = .TRUE. + END IF +* + END IF + IF ( AAQQ .LT. XSC ) THEN + DO 700 p = 1, N + IF ( SVA(p) .LT. XSC ) THEN + CALL CLASET( 'A', M, 1, CZERO, CZERO, A(1,p), LDA ) + SVA(p) = ZERO + END IF + 700 CONTINUE + END IF +* +* Preconditioning using QR factorization with pivoting +* + IF ( ROWPIV ) THEN +* Optional row permutation (Bjoerck row pivoting): +* A result by Cox and Higham shows that the Bjoerck's +* row pivoting combined with standard column pivoting +* has similar effect as Powell-Reid complete pivoting. +* The ell-infinity norms of A are made nonincreasing. + IF ( ( LSVEC .AND. RSVEC ) .AND. .NOT.( JRACC ) ) THEN + IWOFF = 2*N + ELSE + IWOFF = N + END IF + DO 1952 p = 1, M - 1 + q = ISAMAX( M-p+1, RWORK(M+p), 1 ) + p - 1 + IWORK(IWOFF+p) = q + IF ( p .NE. q ) THEN + TEMP1 = RWORK(M+p) + RWORK(M+p) = RWORK(M+q) + RWORK(M+q) = TEMP1 + END IF + 1952 CONTINUE + CALL CLASWP( N, A, LDA, 1, M-1, IWORK(IWOFF+1), 1 ) + END IF +* +* End of the preparation phase (scaling, optional sorting and +* transposing, optional flushing of small columns). +* +* Preconditioning +* +* If the full SVD is needed, the right singular vectors are computed +* from a matrix equation, and for that we need theoretical analysis +* of the Businger-Golub pivoting. So we use CGEQP3 as the first RR QRF. +* In all other cases the first RR QRF can be chosen by other criteria +* (eg speed by replacing global with restricted window pivoting, such +* as in xGEQPX from TOMS # 782). Good results will be obtained using +* xGEQPX with properly (!) chosen numerical parameters. +* Any improvement of CGEQP3 improves overal performance of CGEJSV. +* +* A * P1 = Q1 * [ R1^* 0]^*: + DO 1963 p = 1, N +* .. all columns are free columns + IWORK(p) = 0 + 1963 CONTINUE + CALL CGEQP3( M, N, A, LDA, IWORK, CWORK, CWORK(N+1), LWORK-N, + $ RWORK, IERR ) +* +* The upper triangular matrix R1 from the first QRF is inspected for +* rank deficiency and possibilities for deflation, or possible +* ill-conditioning. Depending on the user specified flag L2RANK, +* the procedure explores possibilities to reduce the numerical +* rank by inspecting the computed upper triangular factor. If +* L2RANK or L2ABER are up, then CGEJSV will compute the SVD of +* A + dA, where ||dA|| <= f(M,N)*EPSLN. +* + NR = 1 + IF ( L2ABER ) THEN +* Standard absolute error bound suffices. All sigma_i with +* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an +* agressive enforcement of lower numerical rank by introducing a +* backward error of the order of N*EPSLN*||A||. + TEMP1 = SQRT(REAL(N))*EPSLN + DO 3001 p = 2, N + IF ( ABS(A(p,p)) .GE. (TEMP1*ABS(A(1,1))) ) THEN + NR = NR + 1 + ELSE + GO TO 3002 + END IF + 3001 CONTINUE + 3002 CONTINUE + ELSE IF ( L2RANK ) THEN +* .. similarly as above, only slightly more gentle (less agressive). +* Sudden drop on the diagonal of R1 is used as the criterion for +* close-to-rank-defficient. + TEMP1 = SQRT(SFMIN) + DO 3401 p = 2, N + IF ( ( ABS(A(p,p)) .LT. (EPSLN*ABS(A(p-1,p-1))) ) .OR. + $ ( ABS(A(p,p)) .LT. SMALL ) .OR. + $ ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3402 + NR = NR + 1 + 3401 CONTINUE + 3402 CONTINUE +* + ELSE +* The goal is high relative accuracy. However, if the matrix +* has high scaled condition number the relative accuracy is in +* general not feasible. Later on, a condition number estimator +* will be deployed to estimate the scaled condition number. +* Here we just remove the underflowed part of the triangular +* factor. This prevents the situation in which the code is +* working hard to get the accuracy not warranted by the data. + TEMP1 = SQRT(SFMIN) + DO 3301 p = 2, N + IF ( ( ABS(A(p,p)) .LT. SMALL ) .OR. + $ ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3302 + NR = NR + 1 + 3301 CONTINUE + 3302 CONTINUE +* + END IF +* + ALMORT = .FALSE. + IF ( NR .EQ. N ) THEN + MAXPRJ = ONE + DO 3051 p = 2, N + TEMP1 = ABS(A(p,p)) / SVA(IWORK(p)) + MAXPRJ = MIN( MAXPRJ, TEMP1 ) + 3051 CONTINUE + IF ( MAXPRJ**2 .GE. ONE - REAL(N)*EPSLN ) ALMORT = .TRUE. + END IF +* +* + SCONDA = - ONE + CONDR1 = - ONE + CONDR2 = - ONE +* + IF ( ERREST ) THEN + IF ( N .EQ. NR ) THEN + IF ( RSVEC ) THEN +* .. V is available as workspace + CALL CLACPY( 'U', N, N, A, LDA, V, LDV ) + DO 3053 p = 1, N + TEMP1 = SVA(IWORK(p)) + CALL CSSCAL( p, ONE/TEMP1, V(1,p), 1 ) + 3053 CONTINUE + IF ( LSVEC )THEN + CALL CPOCON( 'U', N, V, LDV, ONE, TEMP1, + $ CWORK(N+1), RWORK, IERR ) + ELSE + CALL CPOCON( 'U', N, V, LDV, ONE, TEMP1, + $ CWORK, RWORK, IERR ) + END IF +* + ELSE IF ( LSVEC ) THEN +* .. U is available as workspace + CALL CLACPY( 'U', N, N, A, LDA, U, LDU ) + DO 3054 p = 1, N + TEMP1 = SVA(IWORK(p)) + CALL CSSCAL( p, ONE/TEMP1, U(1,p), 1 ) + 3054 CONTINUE + CALL CPOCON( 'U', N, U, LDU, ONE, TEMP1, + $ CWORK(N+1), RWORK, IERR ) + ELSE + CALL CLACPY( 'U', N, N, A, LDA, CWORK, N ) +*[] CALL CLACPY( 'U', N, N, A, LDA, CWORK(N+1), N ) +* Change: here index shifted by N to the left, CWORK(1:N) +* not needed for SIGMA only computation + DO 3052 p = 1, N + TEMP1 = SVA(IWORK(p)) +*[] CALL CSSCAL( p, ONE/TEMP1, CWORK(N+(p-1)*N+1), 1 ) + CALL CSSCAL( p, ONE/TEMP1, CWORK((p-1)*N+1), 1 ) + 3052 CONTINUE +* .. the columns of R are scaled to have unit Euclidean lengths. +*[] CALL CPOCON( 'U', N, CWORK(N+1), N, ONE, TEMP1, +*[] $ CWORK(N+N*N+1), RWORK, IERR ) + CALL CPOCON( 'U', N, CWORK, N, ONE, TEMP1, + $ CWORK(N*N+1), RWORK, IERR ) +* + END IF + IF ( TEMP1 .NE. ZERO ) THEN + SCONDA = ONE / SQRT(TEMP1) + ELSE + SCONDA = - ONE + END IF +* SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1). +* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA + ELSE + SCONDA = - ONE + END IF + END IF +* + L2PERT = L2PERT .AND. ( ABS( A(1,1)/A(NR,NR) ) .GT. SQRT(BIG1) ) +* If there is no violent scaling, artificial perturbation is not needed. +* +* Phase 3: +* + IF ( .NOT. ( RSVEC .OR. LSVEC ) ) THEN +* +* Singular Values only +* +* .. transpose A(1:NR,1:N) + DO 1946 p = 1, MIN( N-1, NR ) + CALL CCOPY( N-p, A(p,p+1), LDA, A(p+1,p), 1 ) + CALL CLACGV( N-p+1, A(p,p), 1 ) + 1946 CONTINUE + IF ( NR .EQ. N ) A(N,N) = CONJG(A(N,N)) +* +* The following two DO-loops introduce small relative perturbation +* into the strict upper triangle of the lower triangular matrix. +* Small entries below the main diagonal are also changed. +* This modification is useful if the computing environment does not +* provide/allow FLUSH TO ZERO underflow, for it prevents many +* annoying denormalized numbers in case of strongly scaled matrices. +* The perturbation is structured so that it does not introduce any +* new perturbation of the singular values, and it does not destroy +* the job done by the preconditioner. +* The licence for this perturbation is in the variable L2PERT, which +* should be .FALSE. if FLUSH TO ZERO underflow is active. +* + IF ( .NOT. ALMORT ) THEN +* + IF ( L2PERT ) THEN +* XSC = SQRT(SMALL) + XSC = EPSLN / REAL(N) + DO 4947 q = 1, NR + CTEMP = CMPLX(XSC*ABS(A(q,q)),ZERO) + DO 4949 p = 1, N + IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) ) + $ .OR. ( p .LT. q ) ) +* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) ) + $ A(p,q) = CTEMP + 4949 CONTINUE + 4947 CONTINUE + ELSE + CALL CLASET( 'U', NR-1,NR-1, CZERO,CZERO, A(1,2),LDA ) + END IF +* +* .. second preconditioning using the QR factorization +* + CALL CGEQRF( N,NR, A,LDA, CWORK, CWORK(N+1),LWORK-N, IERR ) +* +* .. and transpose upper to lower triangular + DO 1948 p = 1, NR - 1 + CALL CCOPY( NR-p, A(p,p+1), LDA, A(p+1,p), 1 ) + CALL CLACGV( NR-p+1, A(p,p), 1 ) + 1948 CONTINUE +* + END IF +* +* Row-cyclic Jacobi SVD algorithm with column pivoting +* +* .. again some perturbation (a "background noise") is added +* to drown denormals + IF ( L2PERT ) THEN +* XSC = SQRT(SMALL) + XSC = EPSLN / REAL(N) + DO 1947 q = 1, NR + CTEMP = CMPLX(XSC*ABS(A(q,q)),ZERO) + DO 1949 p = 1, NR + IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) ) + $ .OR. ( p .LT. q ) ) +* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) ) + $ A(p,q) = CTEMP + 1949 CONTINUE + 1947 CONTINUE + ELSE + CALL CLASET( 'U', NR-1, NR-1, CZERO, CZERO, A(1,2), LDA ) + END IF +* +* .. and one-sided Jacobi rotations are started on a lower +* triangular matrix (plus perturbation which is ignored in +* the part which destroys triangular form (confusing?!)) +* + CALL CGESVJ( 'L', 'N', 'N', NR, NR, A, LDA, SVA, + $ N, V, LDV, CWORK, LWORK, RWORK, LRWORK, INFO ) +* + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) +* +* + ELSE IF ( ( RSVEC .AND. ( .NOT. LSVEC ) .AND. ( .NOT. JRACC ) ) + $ .OR. + $ ( JRACC .AND. ( .NOT. LSVEC ) .AND. ( NR .NE. N ) ) ) THEN +* +* -> Singular Values and Right Singular Vectors <- +* + IF ( ALMORT ) THEN +* +* .. in this case NR equals N + DO 1998 p = 1, NR + CALL CCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + CALL CLACGV( N-p+1, V(p,p), 1 ) + 1998 CONTINUE + CALL CLASET( 'U', NR-1,NR-1, CZERO, CZERO, V(1,2), LDV ) +* + CALL CGESVJ( 'L','U','N', N, NR, V, LDV, SVA, NR, A, LDA, + $ CWORK, LWORK, RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + + ELSE +* +* .. two more QR factorizations ( one QRF is not enough, two require +* accumulated product of Jacobi rotations, three are perfect ) +* + CALL CLASET( 'L', NR-1,NR-1, CZERO, CZERO, A(2,1), LDA ) + CALL CGELQF( NR,N, A, LDA, CWORK, CWORK(N+1), LWORK-N, IERR) + CALL CLACPY( 'L', NR, NR, A, LDA, V, LDV ) + CALL CLASET( 'U', NR-1,NR-1, CZERO, CZERO, V(1,2), LDV ) + CALL CGEQRF( NR, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), + $ LWORK-2*N, IERR ) + DO 8998 p = 1, NR + CALL CCOPY( NR-p+1, V(p,p), LDV, V(p,p), 1 ) + CALL CLACGV( NR-p+1, V(p,p), 1 ) + 8998 CONTINUE + CALL CLASET('U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV) +* + CALL CGESVJ( 'L', 'U','N', NR, NR, V,LDV, SVA, NR, U, + $ LDU, CWORK(N+1), LWORK-N, RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + IF ( NR .LT. N ) THEN + CALL CLASET( 'A',N-NR, NR, CZERO,CZERO, V(NR+1,1), LDV ) + CALL CLASET( 'A',NR, N-NR, CZERO,CZERO, V(1,NR+1), LDV ) + CALL CLASET( 'A',N-NR,N-NR,CZERO,CONE, V(NR+1,NR+1),LDV ) + END IF +* + CALL CUNMLQ( 'L', 'C', N, N, NR, A, LDA, CWORK, + $ V, LDV, CWORK(N+1), LWORK-N, IERR ) +* + END IF +* .. permute the rows of V +* DO 8991 p = 1, N +* CALL CCOPY( N, V(p,1), LDV, A(IWORK(p),1), LDA ) +* 8991 CONTINUE +* CALL CLACPY( 'All', N, N, A, LDA, V, LDV ) + CALL CLAPMR( .FALSE., N, N, V, LDV, IWORK ) +* + IF ( TRANSP ) THEN + CALL CLACPY( 'A', N, N, V, LDV, U, LDU ) + END IF +* + ELSE IF ( JRACC .AND. (.NOT. LSVEC) .AND. ( NR.EQ. N ) ) THEN +* + CALL CLASET( 'L', N-1,N-1, CZERO, CZERO, A(2,1), LDA ) +* + CALL CGESVJ( 'U','N','V', N, N, A, LDA, SVA, N, V, LDV, + $ CWORK, LWORK, RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + CALL CLAPMR( .FALSE., N, N, V, LDV, IWORK ) +* + ELSE IF ( LSVEC .AND. ( .NOT. RSVEC ) ) THEN +* +* .. Singular Values and Left Singular Vectors .. +* +* .. second preconditioning step to avoid need to accumulate +* Jacobi rotations in the Jacobi iterations. + DO 1965 p = 1, NR + CALL CCOPY( N-p+1, A(p,p), LDA, U(p,p), 1 ) + CALL CLACGV( N-p+1, U(p,p), 1 ) + 1965 CONTINUE + CALL CLASET( 'U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) +* + CALL CGEQRF( N, NR, U, LDU, CWORK(N+1), CWORK(2*N+1), + $ LWORK-2*N, IERR ) +* + DO 1967 p = 1, NR - 1 + CALL CCOPY( NR-p, U(p,p+1), LDU, U(p+1,p), 1 ) + CALL CLACGV( N-p+1, U(p,p), 1 ) + 1967 CONTINUE + CALL CLASET( 'U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) +* + CALL CGESVJ( 'L', 'U', 'N', NR,NR, U, LDU, SVA, NR, A, + $ LDA, CWORK(N+1), LWORK-N, RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) +* + IF ( NR .LT. M ) THEN + CALL CLASET( 'A', M-NR, NR,CZERO, CZERO, U(NR+1,1), LDU ) + IF ( NR .LT. N1 ) THEN + CALL CLASET( 'A',NR, N1-NR, CZERO, CZERO, U(1,NR+1),LDU ) + CALL CLASET( 'A',M-NR,N1-NR,CZERO,CONE,U(NR+1,NR+1),LDU ) + END IF + END IF +* + CALL CUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, + $ LDU, CWORK(N+1), LWORK-N, IERR ) +* + IF ( ROWPIV ) + $ CALL CLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) +* + DO 1974 p = 1, N1 + XSC = ONE / SCNRM2( M, U(1,p), 1 ) + CALL CSSCAL( M, XSC, U(1,p), 1 ) + 1974 CONTINUE +* + IF ( TRANSP ) THEN + CALL CLACPY( 'A', N, N, U, LDU, V, LDV ) + END IF +* + ELSE +* +* .. Full SVD .. +* + IF ( .NOT. JRACC ) THEN +* + IF ( .NOT. ALMORT ) THEN +* +* Second Preconditioning Step (QRF [with pivoting]) +* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is +* equivalent to an LQF CALL. Since in many libraries the QRF +* seems to be better optimized than the LQF, we do explicit +* transpose and use the QRF. This is subject to changes in an +* optimized implementation of CGEJSV. +* + DO 1968 p = 1, NR + CALL CCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + CALL CLACGV( N-p+1, V(p,p), 1 ) + 1968 CONTINUE +* +* .. the following two loops perturb small entries to avoid +* denormals in the second QR factorization, where they are +* as good as zeros. This is done to avoid painfully slow +* computation with denormals. The relative size of the perturbation +* is a parameter that can be changed by the implementer. +* This perturbation device will be obsolete on machines with +* properly implemented arithmetic. +* To switch it off, set L2PERT=.FALSE. To remove it from the +* code, remove the action under L2PERT=.TRUE., leave the ELSE part. +* The following two loops should be blocked and fused with the +* transposed copy above. +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 2969 q = 1, NR + CTEMP = CMPLX(XSC*ABS( V(q,q) ),ZERO) + DO 2968 p = 1, N + IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 ) + $ .OR. ( p .LT. q ) ) +* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) ) + $ V(p,q) = CTEMP + IF ( p .LT. q ) V(p,q) = - V(p,q) + 2968 CONTINUE + 2969 CONTINUE + ELSE + CALL CLASET( 'U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV ) + END IF +* +* Estimate the row scaled condition number of R1 +* (If R1 is rectangular, N > NR, then the condition number +* of the leading NR x NR submatrix is estimated.) +* + CALL CLACPY( 'L', NR, NR, V, LDV, CWORK(2*N+1), NR ) + DO 3950 p = 1, NR + TEMP1 = SCNRM2(NR-p+1,CWORK(2*N+(p-1)*NR+p),1) + CALL CSSCAL(NR-p+1,ONE/TEMP1,CWORK(2*N+(p-1)*NR+p),1) + 3950 CONTINUE + CALL CPOCON('L',NR,CWORK(2*N+1),NR,ONE,TEMP1, + $ CWORK(2*N+NR*NR+1),RWORK,IERR) + CONDR1 = ONE / SQRT(TEMP1) +* .. here need a second oppinion on the condition number +* .. then assume worst case scenario +* R1 is OK for inverse <=> CONDR1 .LT. REAL(N) +* more conservative <=> CONDR1 .LT. SQRT(REAL(N)) +* + COND_OK = SQRT(SQRT(REAL(NR))) +*[TP] COND_OK is a tuning parameter. +* + IF ( CONDR1 .LT. COND_OK ) THEN +* .. the second QRF without pivoting. Note: in an optimized +* implementation, this QRF should be implemented as the QRF +* of a lower triangular matrix. +* R1^* = Q2 * R2 + CALL CGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), + $ LWORK-2*N, IERR ) +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL)/EPSLN + DO 3959 p = 2, NR + DO 3958 q = 1, p - 1 + CTEMP=CMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), + $ ZERO) + IF ( ABS(V(q,p)) .LE. TEMP1 ) +* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) ) + $ V(q,p) = CTEMP + 3958 CONTINUE + 3959 CONTINUE + END IF +* + IF ( NR .NE. N ) + $ CALL CLACPY( 'A', N, NR, V, LDV, CWORK(2*N+1), N ) +* .. save ... +* +* .. this transposed copy should be better than naive + DO 1969 p = 1, NR - 1 + CALL CCOPY( NR-p, V(p,p+1), LDV, V(p+1,p), 1 ) + CALL CLACGV(NR-p+1, V(p,p), 1 ) + 1969 CONTINUE + V(NR,NR)=CONJG(V(NR,NR)) +* + CONDR2 = CONDR1 +* + ELSE +* +* .. ill-conditioned case: second QRF with pivoting +* Note that windowed pivoting would be equaly good +* numerically, and more run-time efficient. So, in +* an optimal implementation, the next call to CGEQP3 +* should be replaced with eg. CALL CGEQPX (ACM TOMS #782) +* with properly (carefully) chosen parameters. +* +* R1^* * P2 = Q2 * R2 + DO 3003 p = 1, NR + IWORK(N+p) = 0 + 3003 CONTINUE + CALL CGEQP3( N, NR, V, LDV, IWORK(N+1), CWORK(N+1), + $ CWORK(2*N+1), LWORK-2*N, RWORK, IERR ) +** CALL CGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), +** $ LWORK-2*N, IERR ) + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 3969 p = 2, NR + DO 3968 q = 1, p - 1 + CTEMP=CMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), + $ ZERO) + IF ( ABS(V(q,p)) .LE. TEMP1 ) +* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) ) + $ V(q,p) = CTEMP + 3968 CONTINUE + 3969 CONTINUE + END IF +* + CALL CLACPY( 'A', N, NR, V, LDV, CWORK(2*N+1), N ) +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 8970 p = 2, NR + DO 8971 q = 1, p - 1 + CTEMP=CMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), + $ ZERO) +* V(p,q) = - TEMP1*( V(q,p) / ABS(V(q,p)) ) + V(p,q) = - CTEMP + 8971 CONTINUE + 8970 CONTINUE + ELSE + CALL CLASET( 'L',NR-1,NR-1,CZERO,CZERO,V(2,1),LDV ) + END IF +* Now, compute R2 = L3 * Q3, the LQ factorization. + CALL CGELQF( NR, NR, V, LDV, CWORK(2*N+N*NR+1), + $ CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, IERR ) +* .. and estimate the condition number + CALL CLACPY( 'L',NR,NR,V,LDV,CWORK(2*N+N*NR+NR+1),NR ) + DO 4950 p = 1, NR + TEMP1 = SCNRM2( p, CWORK(2*N+N*NR+NR+p), NR ) + CALL CSSCAL( p, ONE/TEMP1, CWORK(2*N+N*NR+NR+p), NR ) + 4950 CONTINUE + CALL CPOCON( 'L',NR,CWORK(2*N+N*NR+NR+1),NR,ONE,TEMP1, + $ CWORK(2*N+N*NR+NR+NR*NR+1),RWORK,IERR ) + CONDR2 = ONE / SQRT(TEMP1) +* +* + IF ( CONDR2 .GE. COND_OK ) THEN +* .. save the Householder vectors used for Q3 +* (this overwrittes the copy of R2, as it will not be +* needed in this branch, but it does not overwritte the +* Huseholder vectors of Q2.). + CALL CLACPY( 'U', NR, NR, V, LDV, CWORK(2*N+1), N ) +* .. and the rest of the information on Q3 is in +* WORK(2*N+N*NR+1:2*N+N*NR+N) + END IF +* + END IF +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 4968 q = 2, NR + CTEMP = XSC * V(q,q) + DO 4969 p = 1, q - 1 +* V(p,q) = - TEMP1*( V(p,q) / ABS(V(p,q)) ) + V(p,q) = - CTEMP + 4969 CONTINUE + 4968 CONTINUE + ELSE + CALL CLASET( 'U', NR-1,NR-1, CZERO,CZERO, V(1,2), LDV ) + END IF +* +* Second preconditioning finished; continue with Jacobi SVD +* The input matrix is lower trinagular. +* +* Recover the right singular vectors as solution of a well +* conditioned triangular matrix equation. +* + IF ( CONDR1 .LT. COND_OK ) THEN +* + CALL CGESVJ( 'L','U','N',NR,NR,V,LDV,SVA,NR,U, LDU, + $ CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,RWORK, + $ LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + DO 3970 p = 1, NR + CALL CCOPY( NR, V(1,p), 1, U(1,p), 1 ) + CALL CSSCAL( NR, SVA(p), V(1,p), 1 ) + 3970 CONTINUE + +* .. pick the right matrix equation and solve it +* + IF ( NR .EQ. N ) THEN +* :)) .. best case, R1 is inverted. The solution of this matrix +* equation is Q2*V2 = the product of the Jacobi rotations +* used in CGESVJ, premultiplied with the orthogonal matrix +* from the second QR factorization. + CALL CTRSM('L','U','N','N', NR,NR,CONE, A,LDA, V,LDV) + ELSE +* .. R1 is well conditioned, but non-square. Adjoint of R2 +* is inverted to get the product of the Jacobi rotations +* used in CGESVJ. The Q-factor from the second QR +* factorization is then built in explicitly. + CALL CTRSM('L','U','C','N',NR,NR,CONE,CWORK(2*N+1), + $ N,V,LDV) + IF ( NR .LT. N ) THEN + CALL CLASET('A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV) + CALL CLASET('A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV) + CALL CLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) + END IF + CALL CUNMQR('L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), + $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR) + END IF +* + ELSE IF ( CONDR2 .LT. COND_OK ) THEN +* +* The matrix R2 is inverted. The solution of the matrix equation +* is Q3^* * V3 = the product of the Jacobi rotations (appplied to +* the lower triangular L3 from the LQ factorization of +* R2=L3*Q3), pre-multiplied with the transposed Q3. + CALL CGESVJ( 'L', 'U', 'N', NR, NR, V, LDV, SVA, NR, U, + $ LDU, CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, + $ RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + DO 3870 p = 1, NR + CALL CCOPY( NR, V(1,p), 1, U(1,p), 1 ) + CALL CSSCAL( NR, SVA(p), U(1,p), 1 ) + 3870 CONTINUE + CALL CTRSM('L','U','N','N',NR,NR,CONE,CWORK(2*N+1),N, + $ U,LDU) +* .. apply the permutation from the second QR factorization + DO 873 q = 1, NR + DO 872 p = 1, NR + CWORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q) + 872 CONTINUE + DO 874 p = 1, NR + U(p,q) = CWORK(2*N+N*NR+NR+p) + 874 CONTINUE + 873 CONTINUE + IF ( NR .LT. N ) THEN + CALL CLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) + CALL CLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) + CALL CLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) + END IF + CALL CUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), + $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) + ELSE +* Last line of defense. +* #:( This is a rather pathological case: no scaled condition +* improvement after two pivoted QR factorizations. Other +* possibility is that the rank revealing QR factorization +* or the condition estimator has failed, or the COND_OK +* is set very close to ONE (which is unnecessary). Normally, +* this branch should never be executed, but in rare cases of +* failure of the RRQR or condition estimator, the last line of +* defense ensures that CGEJSV completes the task. +* Compute the full SVD of L3 using CGESVJ with explicit +* accumulation of Jacobi rotations. + CALL CGESVJ( 'L', 'U', 'V', NR, NR, V, LDV, SVA, NR, U, + $ LDU, CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, + $ RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + IF ( NR .LT. N ) THEN + CALL CLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) + CALL CLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) + CALL CLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) + END IF + CALL CUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), + $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) +* + CALL CUNMLQ( 'L', 'C', NR, NR, NR, CWORK(2*N+1), N, + $ CWORK(2*N+N*NR+1), U, LDU, CWORK(2*N+N*NR+NR+1), + $ LWORK-2*N-N*NR-NR, IERR ) + DO 773 q = 1, NR + DO 772 p = 1, NR + CWORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q) + 772 CONTINUE + DO 774 p = 1, NR + U(p,q) = CWORK(2*N+N*NR+NR+p) + 774 CONTINUE + 773 CONTINUE +* + END IF +* +* Permute the rows of V using the (column) permutation from the +* first QRF. Also, scale the columns to make them unit in +* Euclidean norm. This applies to all cases. +* + TEMP1 = SQRT(REAL(N)) * EPSLN + DO 1972 q = 1, N + DO 972 p = 1, N + CWORK(2*N+N*NR+NR+IWORK(p)) = V(p,q) + 972 CONTINUE + DO 973 p = 1, N + V(p,q) = CWORK(2*N+N*NR+NR+p) + 973 CONTINUE + XSC = ONE / SCNRM2( N, V(1,q), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL CSSCAL( N, XSC, V(1,q), 1 ) + 1972 CONTINUE +* At this moment, V contains the right singular vectors of A. +* Next, assemble the left singular vector matrix U (M x N). + IF ( NR .LT. M ) THEN + CALL CLASET('A', M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU) + IF ( NR .LT. N1 ) THEN + CALL CLASET('A',NR,N1-NR,CZERO,CZERO,U(1,NR+1),LDU) + CALL CLASET('A',M-NR,N1-NR,CZERO,CONE, + $ U(NR+1,NR+1),LDU) + END IF + END IF +* +* The Q matrix from the first QRF is built into the left singular +* matrix U. This applies to all cases. +* + CALL CUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, + $ LDU, CWORK(N+1), LWORK-N, IERR ) + +* The columns of U are normalized. The cost is O(M*N) flops. + TEMP1 = SQRT(REAL(M)) * EPSLN + DO 1973 p = 1, NR + XSC = ONE / SCNRM2( M, U(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL CSSCAL( M, XSC, U(1,p), 1 ) + 1973 CONTINUE +* +* If the initial QRF is computed with row pivoting, the left +* singular vectors must be adjusted. +* + IF ( ROWPIV ) + $ CALL CLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) +* + ELSE +* +* .. the initial matrix A has almost orthogonal columns and +* the second QRF is not needed +* + CALL CLACPY( 'U', N, N, A, LDA, CWORK(N+1), N ) + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 5970 p = 2, N + CTEMP = XSC * CWORK( N + (p-1)*N + p ) + DO 5971 q = 1, p - 1 +* CWORK(N+(q-1)*N+p)=-TEMP1 * ( CWORK(N+(p-1)*N+q) / +* $ ABS(CWORK(N+(p-1)*N+q)) ) + CWORK(N+(q-1)*N+p)=-CTEMP + 5971 CONTINUE + 5970 CONTINUE + ELSE + CALL CLASET( 'L',N-1,N-1,CZERO,CZERO,CWORK(N+2),N ) + END IF +* + CALL CGESVJ( 'U', 'U', 'N', N, N, CWORK(N+1), N, SVA, + $ N, U, LDU, CWORK(N+N*N+1), LWORK-N-N*N, RWORK, LRWORK, + $ INFO ) +* + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + DO 6970 p = 1, N + CALL CCOPY( N, CWORK(N+(p-1)*N+1), 1, U(1,p), 1 ) + CALL CSSCAL( N, SVA(p), CWORK(N+(p-1)*N+1), 1 ) + 6970 CONTINUE +* + CALL CTRSM( 'L', 'U', 'N', 'N', N, N, + $ CONE, A, LDA, CWORK(N+1), N ) + DO 6972 p = 1, N + CALL CCOPY( N, CWORK(N+p), N, V(IWORK(p),1), LDV ) + 6972 CONTINUE + TEMP1 = SQRT(REAL(N))*EPSLN + DO 6971 p = 1, N + XSC = ONE / SCNRM2( N, V(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL CSSCAL( N, XSC, V(1,p), 1 ) + 6971 CONTINUE +* +* Assemble the left singular vector matrix U (M x N). +* + IF ( N .LT. M ) THEN + CALL CLASET( 'A', M-N, N, CZERO, CZERO, U(N+1,1), LDU ) + IF ( N .LT. N1 ) THEN + CALL CLASET('A',N, N1-N, CZERO, CZERO, U(1,N+1),LDU) + CALL CLASET( 'A',M-N,N1-N, CZERO, CONE,U(N+1,N+1),LDU) + END IF + END IF + CALL CUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, + $ LDU, CWORK(N+1), LWORK-N, IERR ) + TEMP1 = SQRT(REAL(M))*EPSLN + DO 6973 p = 1, N1 + XSC = ONE / SCNRM2( M, U(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL CSSCAL( M, XSC, U(1,p), 1 ) + 6973 CONTINUE +* + IF ( ROWPIV ) + $ CALL CLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) +* + END IF +* +* end of the >> almost orthogonal case << in the full SVD +* + ELSE +* +* This branch deploys a preconditioned Jacobi SVD with explicitly +* accumulated rotations. It is included as optional, mainly for +* experimental purposes. It does perfom well, and can also be used. +* In this implementation, this branch will be automatically activated +* if the condition number sigma_max(A) / sigma_min(A) is predicted +* to be greater than the overflow threshold. This is because the +* a posteriori computation of the singular vectors assumes robust +* implementation of BLAS and some LAPACK procedures, capable of working +* in presence of extreme values, e.g. when the singular values spread from +* the underflow to the overflow threshold. +* + DO 7968 p = 1, NR + CALL CCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + CALL CLACGV( N-p+1, V(p,p), 1 ) + 7968 CONTINUE +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL/EPSLN) + DO 5969 q = 1, NR + CTEMP = CMPLX(XSC*ABS( V(q,q) ),ZERO) + DO 5968 p = 1, N + IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 ) + $ .OR. ( p .LT. q ) ) +* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) ) + $ V(p,q) = CTEMP + IF ( p .LT. q ) V(p,q) = - V(p,q) + 5968 CONTINUE + 5969 CONTINUE + ELSE + CALL CLASET( 'U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV ) + END IF + + CALL CGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), + $ LWORK-2*N, IERR ) + CALL CLACPY( 'L', N, NR, V, LDV, CWORK(2*N+1), N ) +* + DO 7969 p = 1, NR + CALL CCOPY( NR-p+1, V(p,p), LDV, U(p,p), 1 ) + CALL CLACGV( NR-p+1, U(p,p), 1 ) + 7969 CONTINUE + + IF ( L2PERT ) THEN + XSC = SQRT(SMALL/EPSLN) + DO 9970 q = 2, NR + DO 9971 p = 1, q - 1 + CTEMP = CMPLX(XSC * MIN(ABS(U(p,p)),ABS(U(q,q))), + $ ZERO) +* U(p,q) = - TEMP1 * ( U(q,p) / ABS(U(q,p)) ) + U(p,q) = - CTEMP + 9971 CONTINUE + 9970 CONTINUE + ELSE + CALL CLASET('U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) + END IF + + CALL CGESVJ( 'L', 'U', 'V', NR, NR, U, LDU, SVA, + $ N, V, LDV, CWORK(2*N+N*NR+1), LWORK-2*N-N*NR, + $ RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + + IF ( NR .LT. N ) THEN + CALL CLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) + CALL CLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) + CALL CLASET( 'A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV ) + END IF + + CALL CUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), + $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) +* +* Permute the rows of V using the (column) permutation from the +* first QRF. Also, scale the columns to make them unit in +* Euclidean norm. This applies to all cases. +* + TEMP1 = SQRT(REAL(N)) * EPSLN + DO 7972 q = 1, N + DO 8972 p = 1, N + CWORK(2*N+N*NR+NR+IWORK(p)) = V(p,q) + 8972 CONTINUE + DO 8973 p = 1, N + V(p,q) = CWORK(2*N+N*NR+NR+p) + 8973 CONTINUE + XSC = ONE / SCNRM2( N, V(1,q), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL CSSCAL( N, XSC, V(1,q), 1 ) + 7972 CONTINUE +* +* At this moment, V contains the right singular vectors of A. +* Next, assemble the left singular vector matrix U (M x N). +* + IF ( NR .LT. M ) THEN + CALL CLASET( 'A', M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU ) + IF ( NR .LT. N1 ) THEN + CALL CLASET('A',NR, N1-NR, CZERO, CZERO, U(1,NR+1),LDU) + CALL CLASET('A',M-NR,N1-NR, CZERO, CONE,U(NR+1,NR+1),LDU) + END IF + END IF +* + CALL CUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, + $ LDU, CWORK(N+1), LWORK-N, IERR ) +* + IF ( ROWPIV ) + $ CALL CLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) +* +* + END IF + IF ( TRANSP ) THEN +* .. swap U and V because the procedure worked on A^* + DO 6974 p = 1, N + CALL CSWAP( N, U(1,p), 1, V(1,p), 1 ) + 6974 CONTINUE + END IF +* + END IF +* end of the full SVD +* +* Undo scaling, if necessary (and possible) +* + IF ( USCAL2 .LE. (BIG/SVA(1))*USCAL1 ) THEN + CALL SLASCL( 'G', 0, 0, USCAL1, USCAL2, NR, 1, SVA, N, IERR ) + USCAL1 = ONE + USCAL2 = ONE + END IF +* + IF ( NR .LT. N ) THEN + DO 3004 p = NR+1, N + SVA(p) = ZERO + 3004 CONTINUE + END IF +* + RWORK(1) = USCAL2 * SCALEM + RWORK(2) = USCAL1 + IF ( ERREST ) RWORK(3) = SCONDA + IF ( LSVEC .AND. RSVEC ) THEN + RWORK(4) = CONDR1 + RWORK(5) = CONDR2 + END IF + IF ( L2TRAN ) THEN + RWORK(6) = ENTRA + RWORK(7) = ENTRAT + END IF +* + IWORK(1) = NR + IWORK(2) = NUMRANK + IWORK(3) = WARNING + IF ( TRANSP ) THEN + IWORK(4) = 1 + ELSE + IWORK(4) = -1 + END IF + +* + RETURN +* .. +* .. END OF CGEJSV +* .. + END +* diff --git a/lapack-netlib/SRC/cgelqt.f b/lapack-netlib/SRC/cgelqt.f index 5bead5355..e151f10fe 100644 --- a/lapack-netlib/SRC/cgelqt.f +++ b/lapack-netlib/SRC/cgelqt.f @@ -92,7 +92,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleGEcomputational * @@ -101,8 +101,8 @@ *> *> \verbatim *> -*> The matrix V stores the elementary reflectors H(i) in the i-th column -*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> The matrix V stores the elementary reflectors H(i) in the i-th row +*> above the diagonal. For example, if M=5 and N=3, the matrix V is *> *> V = ( 1 v1 v1 v1 v1 ) *> ( 1 v2 v2 v2 ) @@ -111,11 +111,11 @@ *> *> where the vi's represent the vectors which define H(i), which are returned *> in the matrix A. The 1's along the diagonal of V are not stored in A. -*> Let K=MIN(M,N). The number of blocks is B = ceiling(K/NB), where each -*> block is of order NB except for the last block, which is of order -*> IB = K - (B-1)*NB. For each of the B blocks, a upper triangular block -*> reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB -*> for the last block) T's are stored in the NB-by-N matrix T as +*> Let K=MIN(M,N). The number of blocks is B = ceiling(K/MB), where each +*> block is of order MB except for the last block, which is of order +*> IB = K - (B-1)*MB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The MB-by-MB (and IB-by-IB +*> for the last block) T's are stored in the MB-by-K matrix T as *> *> T = (T1 T2 ... TB). *> \endverbatim @@ -123,10 +123,10 @@ * ===================================================================== SUBROUTINE CGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDT, M, N, MB diff --git a/lapack-netlib/SRC/cgelqt3.f b/lapack-netlib/SRC/cgelqt3.f index 751cb6132..f64379722 100644 --- a/lapack-netlib/SRC/cgelqt3.f +++ b/lapack-netlib/SRC/cgelqt3.f @@ -83,7 +83,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleGEcomputational * @@ -92,8 +92,8 @@ *> *> \verbatim *> -*> The matrix V stores the elementary reflectors H(i) in the i-th column -*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> The matrix V stores the elementary reflectors H(i) in the i-th row +*> above the diagonal. For example, if M=5 and N=3, the matrix V is *> *> V = ( 1 v1 v1 v1 v1 ) *> ( 1 v2 v2 v2 ) @@ -114,10 +114,10 @@ * ===================================================================== RECURSIVE SUBROUTINE CGELQT3( M, N, A, LDA, T, LDT, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, M, N, LDT @@ -134,7 +134,7 @@ PARAMETER ( ZERO = (0.0E+00,0.0E+00)) * .. * .. Local Scalars .. - INTEGER I, I1, J, J1, M1, M2, N1, N2, IINFO + INTEGER I, I1, J, J1, M1, M2, IINFO * .. * .. External Subroutines .. EXTERNAL CLARFG, CTRMM, CGEMM, XERBLA diff --git a/lapack-netlib/SRC/cgemlqt.f b/lapack-netlib/SRC/cgemlqt.f index e4c991a72..e35e421b1 100644 --- a/lapack-netlib/SRC/cgemlqt.f +++ b/lapack-netlib/SRC/cgemlqt.f @@ -18,16 +18,16 @@ *> *> \verbatim *> -*> CGEMQRT overwrites the general real M-by-N matrix C with +*> CGEMLQT overwrites the general real M-by-N matrix C with *> *> SIDE = 'L' SIDE = 'R' *> TRANS = 'N': Q C C Q -*> TRANS = 'C': Q**C C C Q**C +*> TRANS = 'C': Q**H C C Q**H *> *> where Q is a complex orthogonal matrix defined as the product of K *> elementary reflectors: *> -*> Q = H(1) H(2) . . . H(K) = I - V C V**C +*> Q = H(1) H(2) . . . H(K) = I - V T V**H *> *> generated using the compact WY representation as returned by CGELQT. *> @@ -40,15 +40,15 @@ *> \param[in] SIDE *> \verbatim *> SIDE is CHARACTER*1 -*> = 'L': apply Q or Q**C from the Left; -*> = 'R': apply Q or Q**C from the Right. +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. *> \endverbatim *> *> \param[in] TRANS *> \verbatim *> TRANS is CHARACTER*1 *> = 'N': No transpose, apply Q; -*> = 'C': Transpose, apply Q**C. +*> = 'C': Transpose, apply Q**H. *> \endverbatim *> *> \param[in] M @@ -82,7 +82,9 @@ *> *> \param[in] V *> \verbatim -*> V is COMPLEX array, dimension (LDV,K) +*> V is COMPLEX array, dimension +*> (LDV,M) if SIDE = 'L', +*> (LDV,N) if SIDE = 'R' *> The i-th row must contain the vector which defines the *> elementary reflector H(i), for i = 1,2,...,k, as returned by *> DGELQT in the first K rows of its array argument A. @@ -91,16 +93,14 @@ *> \param[in] LDV *> \verbatim *> LDV is INTEGER -*> The leading dimension of the array V. -*> If SIDE = 'L', LDA >= max(1,M); -*> if SIDE = 'R', LDA >= max(1,N). +*> The leading dimension of the array V. LDV >= max(1,K). *> \endverbatim *> *> \param[in] T *> \verbatim *> T is COMPLEX array, dimension (LDT,K) *> The upper triangular factors of the block reflectors -*> as returned by DGELQT, stored as a MB-by-M matrix. +*> as returned by DGELQT, stored as a MB-by-K matrix. *> \endverbatim *> *> \param[in] LDT @@ -113,7 +113,7 @@ *> \verbatim *> C is COMPLEX array, dimension (LDC,N) *> On entry, the M-by-N matrix C. -*> On exit, C is overwritten by Q C, Q**C C, C Q**C or C Q. +*> On exit, C is overwritten by Q C, Q**H C, C Q**H or C Q. *> \endverbatim *> *> \param[in] LDC @@ -143,7 +143,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleGEcomputational * @@ -151,10 +151,10 @@ SUBROUTINE CGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, $ C, LDC, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS @@ -169,7 +169,7 @@ * .. * .. Local Scalars .. LOGICAL LEFT, RIGHT, TRAN, NOTRAN - INTEGER I, IB, LDWORK, KF, Q + INTEGER I, IB, LDWORK, KF * .. * .. External Functions .. LOGICAL LSAME diff --git a/lapack-netlib/SRC/cgeqrt.f b/lapack-netlib/SRC/cgeqrt.f index 2b8bb6986..ef9c1c3c7 100644 --- a/lapack-netlib/SRC/cgeqrt.f +++ b/lapack-netlib/SRC/cgeqrt.f @@ -108,7 +108,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexGEcomputational * @@ -133,7 +133,7 @@ *> block is of order NB except for the last block, which is of order *> IB = K - (B-1)*NB. For each of the B blocks, a upper triangular block *> reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB -*> for the last block) T's are stored in the NB-by-N matrix T as +*> for the last block) T's are stored in the NB-by-K matrix T as *> *> T = (T1 T2 ... TB). *> \endverbatim @@ -141,10 +141,10 @@ * ===================================================================== SUBROUTINE CGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDT, M, N, NB diff --git a/lapack-netlib/SRC/cgesv.f b/lapack-netlib/SRC/cgesv.f index 7837e0fa4..2b0fb2153 100644 --- a/lapack-netlib/SRC/cgesv.f +++ b/lapack-netlib/SRC/cgesv.f @@ -1,4 +1,4 @@ -*> \brief CGESV computes the solution to system of linear equations A * X = B for GE matrices (simple driver) +*> \brief CGESV computes the solution to system of linear equations A * X = B for GE matrices (simple driver) * * =========== DOCUMENTATION =========== * @@ -115,17 +115,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexGEsolve * * ===================================================================== SUBROUTINE CGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDB, N, NRHS diff --git a/lapack-netlib/SRC/cgesvdx.f b/lapack-netlib/SRC/cgesvdx.f index fdfb9734f..5b08d5732 100644 --- a/lapack-netlib/SRC/cgesvdx.f +++ b/lapack-netlib/SRC/cgesvdx.f @@ -270,7 +270,7 @@ $ IL, IU, NS, S, U, LDU, VT, LDVT, WORK, $ LWORK, RWORK, IWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -309,7 +309,8 @@ * .. * .. External Subroutines .. EXTERNAL CGEBRD, CGELQF, CGEQRF, CLASCL, CLASET, - $ SLASCL, XERBLA + $ CUNMBR, CUNMQR, CUNMLQ, CLACPY, + $ SBDSVDX, SLASCL, XERBLA * .. * .. External Functions .. LOGICAL LSAME diff --git a/lapack-netlib/SRC/cgesvj.f b/lapack-netlib/SRC/cgesvj.f index 8dc6280fa..2a5ced225 100644 --- a/lapack-netlib/SRC/cgesvj.f +++ b/lapack-netlib/SRC/cgesvj.f @@ -52,7 +52,7 @@ * *> \param[in] JOBA *> \verbatim -*> JOBA is CHARACTER* 1 +*> JOBA is CHARACTER*1 *> Specifies the structure of A. *> = 'L': The input matrix A is lower triangular; *> = 'U': The input matrix A is upper triangular; @@ -206,7 +206,7 @@ *> *> \param[in,out] CWORK *> \verbatim -*> CWORK is COMPLEX array, dimension max(1,LWORK). +*> CWORK is COMPLEX array, dimension (max(1,LWORK)) *> Used as workspace. *> If on entry LWORK .EQ. -1, then a workspace query is assumed and *> no computation is done; CWORK(1) is set to the minial (and optimal) @@ -221,7 +221,7 @@ *> *> \param[in,out] RWORK *> \verbatim -*> RWORK is REAL array, dimension max(6,LRWORK). +*> RWORK is REAL array, dimension (max(6,LRWORK)) *> On entry, *> If JOBU .EQ. 'C' : *> RWORK(1) = CTOL, where CTOL defines the threshold for convergence. @@ -318,6 +318,8 @@ *> \par References: * ================ *> +*> \verbatim +*> *> [1] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the *> singular value decomposition on a vector computer. *> SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371. @@ -349,7 +351,7 @@ SUBROUTINE CGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, $ LDV, CWORK, LWORK, RWORK, LRWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -407,7 +409,7 @@ * .. External Subroutines .. * .. * from BLAS - EXTERNAL CCOPY, CROT, CSSCAL, CSWAP + EXTERNAL CCOPY, CROT, CSSCAL, CSWAP, CAXPY * from LAPACK EXTERNAL CLASCL, CLASET, CLASSQ, SLASCL, XERBLA EXTERNAL CGSVJ0, CGSVJ1 diff --git a/lapack-netlib/SRC/cgetsls.f b/lapack-netlib/SRC/cgetsls.f index 56fb8063f..e7c5d8120 100644 --- a/lapack-netlib/SRC/cgetsls.f +++ b/lapack-netlib/SRC/cgetsls.f @@ -53,7 +53,7 @@ *> \verbatim *> TRANS is CHARACTER*1 *> = 'N': the linear system involves A; -*> = 'C': the linear system involves A**C. +*> = 'C': the linear system involves A**H. *> \endverbatim *> *> \param[in] M @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexGEsolve * @@ -160,10 +160,10 @@ SUBROUTINE CGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, $ WORK, LWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER TRANS @@ -187,8 +187,8 @@ INTEGER I, IASCL, IBSCL, J, MINMN, MAXMN, BROW, $ SCLLEN, MNK, TSZO, TSZM, LWO, LWM, LW1, LW2, $ WSIZEO, WSIZEM, INFO2 - REAL ANRM, BIGNUM, BNRM, SMLNUM - COMPLEX TQ( 5 ), WORKQ + REAL ANRM, BIGNUM, BNRM, SMLNUM, DUM( 1 ) + COMPLEX TQ( 5 ), WORKQ( 1 ) * .. * .. External Functions .. LOGICAL LSAME @@ -236,31 +236,31 @@ IF( M.GE.N ) THEN CALL CGEQR( M, N, A, LDA, TQ, -1, WORKQ, -1, INFO2 ) TSZO = INT( TQ( 1 ) ) - LWO = INT( WORKQ ) + LWO = INT( WORKQ( 1 ) ) CALL CGEMQR( 'L', TRANS, M, NRHS, N, A, LDA, TQ, $ TSZO, B, LDB, WORKQ, -1, INFO2 ) - LWO = MAX( LWO, INT( WORKQ ) ) + LWO = MAX( LWO, INT( WORKQ( 1 ) ) ) CALL CGEQR( M, N, A, LDA, TQ, -2, WORKQ, -2, INFO2 ) TSZM = INT( TQ( 1 ) ) - LWM = INT( WORKQ ) + LWM = INT( WORKQ( 1 ) ) CALL CGEMQR( 'L', TRANS, M, NRHS, N, A, LDA, TQ, $ TSZM, B, LDB, WORKQ, -1, INFO2 ) - LWM = MAX( LWM, INT( WORKQ ) ) + LWM = MAX( LWM, INT( WORKQ( 1 ) ) ) WSIZEO = TSZO + LWO WSIZEM = TSZM + LWM ELSE CALL CGELQ( M, N, A, LDA, TQ, -1, WORKQ, -1, INFO2 ) TSZO = INT( TQ( 1 ) ) - LWO = INT( WORKQ ) + LWO = INT( WORKQ( 1 ) ) CALL CGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ, $ TSZO, B, LDB, WORKQ, -1, INFO2 ) - LWO = MAX( LWO, INT( WORKQ ) ) + LWO = MAX( LWO, INT( WORKQ( 1 ) ) ) CALL CGELQ( M, N, A, LDA, TQ, -2, WORKQ, -2, INFO2 ) TSZM = INT( TQ( 1 ) ) - LWM = INT( WORKQ ) + LWM = INT( WORKQ( 1 ) ) CALL CGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ, $ TSZO, B, LDB, WORKQ, -1, INFO2 ) - LWM = MAX( LWM, INT( WORKQ ) ) + LWM = MAX( LWM, INT( WORKQ( 1 ) ) ) WSIZEO = TSZO + LWO WSIZEM = TSZM + LWM END IF @@ -305,7 +305,7 @@ * * Scale A, B if max element outside range [SMLNUM,BIGNUM] * - ANRM = CLANGE( 'M', M, N, A, LDA, WORK ) + ANRM = CLANGE( 'M', M, N, A, LDA, DUM ) IASCL = 0 IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN * @@ -331,7 +331,7 @@ IF ( TRAN ) THEN BROW = N END IF - BNRM = CLANGE( 'M', BROW, NRHS, B, LDB, WORK ) + BNRM = CLANGE( 'M', BROW, NRHS, B, LDB, DUM ) IBSCL = 0 IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN * diff --git a/lapack-netlib/SRC/cggesx.f b/lapack-netlib/SRC/cggesx.f index 4d4964741..74169ff80 100644 --- a/lapack-netlib/SRC/cggesx.f +++ b/lapack-netlib/SRC/cggesx.f @@ -104,7 +104,7 @@ *> *> \param[in] SELCTG *> \verbatim -*> SELCTG is procedure) LOGICAL FUNCTION of two COMPLEX arguments +*> SELCTG is a LOGICAL FUNCTION of two COMPLEX arguments *> SELCTG must be declared EXTERNAL in the calling subroutine. *> If SORT = 'N', SELCTG is not referenced. *> If SORT = 'S', SELCTG is used to select eigenvalues to sort @@ -320,7 +320,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexGEeigen * @@ -330,10 +330,10 @@ $ LDVSR, RCONDE, RCONDV, WORK, LWORK, RWORK, $ IWORK, LIWORK, BWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER JOBVSL, JOBVSR, SENSE, SORT diff --git a/lapack-netlib/SRC/cgghd3.f b/lapack-netlib/SRC/cgghd3.f index a9468a24b..b86c9dc71 100644 --- a/lapack-netlib/SRC/cgghd3.f +++ b/lapack-netlib/SRC/cgghd3.f @@ -231,7 +231,7 @@ SUBROUTINE CGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, $ LDQ, Z, LDZ, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.6.1) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * January 2015 @@ -271,7 +271,8 @@ EXTERNAL ILAENV, LSAME * .. * .. External Subroutines .. - EXTERNAL CGGHRD, CLARTG, CLASET, CUNM22, CROT, XERBLA + EXTERNAL CGGHRD, CLARTG, CLASET, CUNM22, CROT, CGEMM, + $ CGEMV, CTRMV, CLACPY, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC REAL, CMPLX, CONJG, MAX diff --git a/lapack-netlib/SRC/cgsvj0.f b/lapack-netlib/SRC/cgsvj0.f index ca817159c..80e67a06e 100644 --- a/lapack-netlib/SRC/cgsvj0.f +++ b/lapack-netlib/SRC/cgsvj0.f @@ -169,7 +169,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is COMPLEX array, dimension LWORK. +*> WORK is COMPLEX array, dimension (LWORK) *> \endverbatim *> *> \param[in] LWORK @@ -218,7 +218,7 @@ SUBROUTINE CGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, $ SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -268,7 +268,7 @@ * .. External Subroutines .. * .. * from BLAS - EXTERNAL CCOPY, CROT, CSWAP + EXTERNAL CCOPY, CROT, CSWAP, CAXPY * from LAPACK EXTERNAL CLASCL, CLASSQ, XERBLA * .. diff --git a/lapack-netlib/SRC/cgsvj1.f b/lapack-netlib/SRC/cgsvj1.f index 1689caaa2..bebcd5c45 100644 --- a/lapack-netlib/SRC/cgsvj1.f +++ b/lapack-netlib/SRC/cgsvj1.f @@ -199,7 +199,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is COMPLEX array, dimension LWORK. +*> WORK is COMPLEX array, dimension (LWORK) *> \endverbatim *> *> \param[in] LWORK @@ -236,7 +236,7 @@ SUBROUTINE CGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, $ EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -281,7 +281,7 @@ * .. * .. External Subroutines .. * .. from BLAS - EXTERNAL CCOPY, CROT, CSWAP + EXTERNAL CCOPY, CROT, CSWAP, CAXPY * .. from LAPACK EXTERNAL CLASCL, CLASSQ, XERBLA * .. diff --git a/lapack-netlib/SRC/chb2st_kernels.f b/lapack-netlib/SRC/chb2st_kernels.f index 77ddaed5d..25c9ab717 100644 --- a/lapack-netlib/SRC/chb2st_kernels.f +++ b/lapack-netlib/SRC/chb2st_kernels.f @@ -47,45 +47,87 @@ * Arguments: * ========== * -*> @param[in] n -*> The order of the matrix A. -*> -*> @param[in] nb -*> The size of the band. -*> -*> @param[in, out] A -*> A pointer to the matrix A. -*> -*> @param[in] lda -*> The leading dimension of the matrix A. +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> \endverbatim *> -*> @param[out] V -*> COMPLEX array, dimension 2*n if eigenvalues only are -*> requested or to be queried for vectors. +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both +*> Eigenvalue/Eigenvectors. +*> \endverbatim *> -*> @param[out] TAU -*> COMPLEX array, dimension (2*n). -*> The scalar factors of the Householder reflectors are stored -*> in this array. +*> \param[in] TTYPE +*> \verbatim +*> TTYPE is INTEGER +*> \endverbatim *> -*> @param[in] st +*> \param[in] ST +*> \verbatim +*> ST is INTEGER *> internal parameter for indices. +*> \endverbatim *> -*> @param[in] ed +*> \param[in] ED +*> \verbatim +*> ED is INTEGER *> internal parameter for indices. +*> \endverbatim *> -*> @param[in] sweep +*> \param[in] SWEEP +*> \verbatim +*> SWEEP is INTEGER *> internal parameter for indices. +*> \endverbatim *> -*> @param[in] Vblksiz -*> internal parameter for indices. +*> \param[in] N +*> \verbatim +*> N is INTEGER. The order of the matrix A. +*> \endverbatim *> -*> @param[in] wantz -*> logical which indicate if Eigenvalue are requested or both -*> Eigenvalue/Eigenvectors. +*> \param[in] NB +*> \verbatim +*> NB is INTEGER. The size of the band. +*> \endverbatim +*> +*> \param[in] IB +*> \verbatim +*> IB is INTEGER. +*> \endverbatim +*> +*> \param[in, out] A +*> \verbatim +*> A is COMPLEX array. A pointer to the matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER. The leading dimension of the matrix A. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX array, dimension 2*n if eigenvalues only are +*> requested or to be queried for vectors. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (2*n). +*> The scalar factors of the Householder reflectors are stored +*> in this array. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER. +*> \endverbatim *> -*> @param[in] work -*> Workspace of size nb. +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX array. Workspace of size nb. +*> \endverbatim *> *> \par Further Details: * ===================== @@ -128,10 +170,10 @@ * IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/chbev_2stage.f b/lapack-netlib/SRC/chbev_2stage.f index 5ced8c977..179fb5a28 100644 --- a/lapack-netlib/SRC/chbev_2stage.f +++ b/lapack-netlib/SRC/chbev_2stage.f @@ -171,7 +171,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexOTHEReigen * @@ -213,10 +213,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -242,13 +242,13 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE REAL SLAMCH, CLANHB - EXTERNAL LSAME, SLAMCH, CLANHB, ILAENV + EXTERNAL LSAME, SLAMCH, CLANHB, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL SSCAL, SSTERF, XERBLA, CLASCL, CSTEQR, - $ CHETRD_2STAGE + $ CHETRD_2STAGE, CHETRD_HB2ST * .. * .. Intrinsic Functions .. INTRINSIC REAL, SQRT @@ -281,9 +281,12 @@ LWMIN = 1 WORK( 1 ) = LWMIN ELSE - IB = ILAENV( 18, 'CHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'CHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'CHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + IB = ILAENV2STAGE( 2, 'CHETRD_HB2ST', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'CHETRD_HB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'CHETRD_HB2ST', JOBZ, + $ N, KD, IB, -1 ) LWMIN = LHTRD + LWTRD WORK( 1 ) = LWMIN ENDIF diff --git a/lapack-netlib/SRC/chbevd_2stage.f b/lapack-netlib/SRC/chbevd_2stage.f index f8296a443..400233976 100644 --- a/lapack-netlib/SRC/chbevd_2stage.f +++ b/lapack-netlib/SRC/chbevd_2stage.f @@ -219,7 +219,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexOTHEReigen * @@ -262,10 +262,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -296,9 +296,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE REAL SLAMCH, CLANHB - EXTERNAL LSAME, SLAMCH, CLANHB, ILAENV + EXTERNAL LSAME, SLAMCH, CLANHB, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL SSCAL, SSTERF, XERBLA, CGEMM, CLACPY, @@ -321,9 +321,9 @@ LRWMIN = 1 LIWMIN = 1 ELSE - IB = ILAENV( 18, 'CHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'CHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'CHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + IB = ILAENV2STAGE( 2, 'CHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'CHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'CHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) IF( WANTZ ) THEN LWMIN = 2*N**2 LRWMIN = 1 + 5*N + 2*N**2 diff --git a/lapack-netlib/SRC/chbevx_2stage.f b/lapack-netlib/SRC/chbevx_2stage.f index e2cb8ca5e..ddc95e188 100644 --- a/lapack-netlib/SRC/chbevx_2stage.f +++ b/lapack-netlib/SRC/chbevx_2stage.f @@ -329,7 +329,7 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -369,9 +369,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE REAL SLAMCH, CLANHB - EXTERNAL LSAME, SLAMCH, CLANHB, ILAENV + EXTERNAL LSAME, SLAMCH, CLANHB, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA, CCOPY, @@ -429,9 +429,12 @@ LWMIN = 1 WORK( 1 ) = LWMIN ELSE - IB = ILAENV( 18, 'CHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'CHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'CHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + IB = ILAENV2STAGE( 2, 'CHETRD_HB2ST', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'CHETRD_HB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'CHETRD_HB2ST', JOBZ, + $ N, KD, IB, -1 ) LWMIN = LHTRD + LWTRD WORK( 1 ) = LWMIN ENDIF diff --git a/lapack-netlib/SRC/checon_3.f b/lapack-netlib/SRC/checon_3.f index 8b18dacdb..6427dd594 100644 --- a/lapack-netlib/SRC/checon_3.f +++ b/lapack-netlib/SRC/checon_3.f @@ -149,7 +149,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexHEcomputational * @@ -157,7 +157,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -171,10 +171,10 @@ SUBROUTINE CHECON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, $ WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/cheequb.f b/lapack-netlib/SRC/cheequb.f index f324a08c4..3c6085ed2 100644 --- a/lapack-netlib/SRC/cheequb.f +++ b/lapack-netlib/SRC/cheequb.f @@ -132,7 +132,7 @@ * ===================================================================== SUBROUTINE CHEEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 @@ -168,7 +168,7 @@ EXTERNAL LSAME, SLAMCH * .. * .. External Subroutines .. - EXTERNAL CLASSQ + EXTERNAL CLASSQ, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, INT, LOG, MAX, MIN, REAL, SQRT diff --git a/lapack-netlib/SRC/cheev_2stage.f b/lapack-netlib/SRC/cheev_2stage.f index 55a0ed60a..a9d04ab81 100644 --- a/lapack-netlib/SRC/cheev_2stage.f +++ b/lapack-netlib/SRC/cheev_2stage.f @@ -149,7 +149,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexHEeigen * @@ -191,10 +191,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -222,9 +222,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE REAL SLAMCH, CLANHE - EXTERNAL LSAME, ILAENV, SLAMCH, CLANHE + EXTERNAL LSAME, SLAMCH, CLANHE, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL SSCAL, SSTERF, XERBLA, CLASCL, CSTEQR, @@ -253,10 +253,10 @@ END IF * IF( INFO.EQ.0 ) THEN - KD = ILAENV( 17, 'CHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'CHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'CHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'CHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) LWMIN = N + LHTRD + LWTRD WORK( 1 ) = LWMIN * diff --git a/lapack-netlib/SRC/cheevd_2stage.f b/lapack-netlib/SRC/cheevd_2stage.f index 56e2bac5c..6b31d4b73 100644 --- a/lapack-netlib/SRC/cheevd_2stage.f +++ b/lapack-netlib/SRC/cheevd_2stage.f @@ -202,7 +202,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexHEeigen * @@ -255,10 +255,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -291,9 +291,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE REAL SLAMCH, CLANHE - EXTERNAL LSAME, ILAENV, SLAMCH, CLANHE + EXTERNAL LSAME, SLAMCH, CLANHE, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL SSCAL, SSTERF, XERBLA, CLACPY, CLASCL, @@ -327,10 +327,14 @@ LRWMIN = 1 LIWMIN = 1 ELSE - KD = ILAENV( 17, 'CHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'CHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'CHETRD_2STAGE', JOBZ, + $ N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'CHETRD_2STAGE', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'CHETRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'CHETRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) IF( WANTZ ) THEN LWMIN = 2*N + N*N LRWMIN = 1 + 5*N + 2*N**2 diff --git a/lapack-netlib/SRC/cheevr_2stage.f b/lapack-netlib/SRC/cheevr_2stage.f index 361addd1e..20a1cb3f3 100644 --- a/lapack-netlib/SRC/cheevr_2stage.f +++ b/lapack-netlib/SRC/cheevr_2stage.f @@ -408,7 +408,7 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -445,9 +445,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV, ILAENV2STAGE REAL SLAMCH, CLANSY - EXTERNAL LSAME, ILAENV, SLAMCH, CLANSY + EXTERNAL LSAME, SLAMCH, CLANSY, ILAENV, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA, CSSCAL, @@ -471,10 +471,10 @@ LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LRWORK.EQ.-1 ) .OR. $ ( LIWORK.EQ.-1 ) ) * - KD = ILAENV( 17, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'CHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'CHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) LWMIN = N + LHTRD + LWTRD LRWMIN = MAX( 1, 24*N ) LIWMIN = MAX( 1, 10*N ) diff --git a/lapack-netlib/SRC/cheevx_2stage.f b/lapack-netlib/SRC/cheevx_2stage.f index 002dddb45..59c2ae8dc 100644 --- a/lapack-netlib/SRC/cheevx_2stage.f +++ b/lapack-netlib/SRC/cheevx_2stage.f @@ -308,7 +308,7 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -345,9 +345,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE REAL SLAMCH, CLANHE - EXTERNAL LSAME, ILAENV, SLAMCH, CLANHE + EXTERNAL LSAME, SLAMCH, CLANHE, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA, CSSCAL, @@ -402,10 +402,14 @@ LWMIN = 1 WORK( 1 ) = LWMIN ELSE - KD = ILAENV( 17, 'CHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'CHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'CHETRD_2STAGE', JOBZ, + $ N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'CHETRD_2STAGE', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'CHETRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'CHETRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) LWMIN = N + LHTRD + LWTRD WORK( 1 ) = LWMIN END IF diff --git a/lapack-netlib/SRC/chegv_2stage.f b/lapack-netlib/SRC/chegv_2stage.f index 11956def5..77bc7fc19 100644 --- a/lapack-netlib/SRC/chegv_2stage.f +++ b/lapack-netlib/SRC/chegv_2stage.f @@ -47,7 +47,7 @@ *> positive definite. *> This routine use the 2stage technique for the reduction to tridiagonal *> which showed higher performance on recent architecture and for large -* sizes N>2000. +*> sizes N>2000. *> \endverbatim * * Arguments: @@ -192,7 +192,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexHEeigen * @@ -234,10 +234,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -261,8 +261,8 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV - EXTERNAL LSAME, ILAENV + INTEGER ILAENV2STAGE + EXTERNAL LSAME, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL XERBLA, CHEGST, CPOTRF, CTRMM, CTRSM, @@ -295,10 +295,10 @@ END IF * IF( INFO.EQ.0 ) THEN - KD = ILAENV( 17, 'CHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'CHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'CHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'CHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) LWMIN = N + LHTRD + LWTRD WORK( 1 ) = LWMIN * diff --git a/lapack-netlib/SRC/chesv_aa.f b/lapack-netlib/SRC/chesv_aa.f index d08740559..0bf636b48 100644 --- a/lapack-netlib/SRC/chesv_aa.f +++ b/lapack-netlib/SRC/chesv_aa.f @@ -129,8 +129,6 @@ *> The length of WORK. LWORK >= MAX(1,2*N,3*N-2), and for best *> performance LWORK >= MAX(1,N*NB), where NB is the optimal *> blocksize for CHETRF. -*> for LWORK < N, TRS will be done with Level BLAS 2 -*> for LWORK >= N, TRS will be done with Level BLAS 3 *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -156,7 +154,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexHEsolve * @@ -164,10 +162,10 @@ SUBROUTINE CHESV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, $ LWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -190,7 +188,7 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL XERBLA, CHETRF, CHETRS, CHETRS2 + EXTERNAL XERBLA, CHETRF_AA, CHETRS_AA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/chesv_aa_2stage.f b/lapack-netlib/SRC/chesv_aa_2stage.f new file mode 100644 index 000000000..057d9c57a --- /dev/null +++ b/lapack-netlib/SRC/chesv_aa_2stage.f @@ -0,0 +1,276 @@ +*> \brief CHESV_AA_2STAGE computes the solution to system of linear equations A * X = B for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHESV_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHESV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, +* IPIV, IPIV2, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX A( LDA, * ), TB( * ), B( LDB, *), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHESV_AA_2STAGE computes the solution to a complex system of +*> linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's 2-stage algorithm is used to factor A as +*> A = U * T * U**H, if UPLO = 'U', or +*> A = L * T * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is Hermitian and band. The matrix T is +*> then LU-factored with partial pivoting. The factored form of A +*> is then used to solve the system of equations A * X = B. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CHESV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX A( LDA, * ), B( LDB, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHETRF_AA_2STAGE, CHETRS_AA_2STAGE, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL CHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, + $ IPIV2, WORK, -1, INFO ) + LWKOPT = INT( WORK(1) ) + IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHESV_AA_2STAGE', -INFO ) + RETURN + ELSE IF( WQUERY .OR. TQUERY ) THEN + RETURN + END IF +* +* +* Compute the factorization A = U*T*U**H or A = L*T*L**H. +* + CALL CHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, IPIV2, + $ WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CHETRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, + $ IPIV2, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* +* End of CHESV_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/chetrd_2stage.f b/lapack-netlib/SRC/chetrd_2stage.f index 56d55f6e9..e7370a4dd 100644 --- a/lapack-netlib/SRC/chetrd_2stage.f +++ b/lapack-netlib/SRC/chetrd_2stage.f @@ -144,7 +144,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is COMPLEX array, dimension LWORK. +*> WORK is COMPLEX array, dimension (LWORK) *> \endverbatim *> *> \param[in] LWORK @@ -183,7 +183,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexHEcomputational * @@ -227,10 +227,10 @@ * IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER VECT, UPLO @@ -253,8 +253,8 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV - EXTERNAL LSAME, ILAENV + INTEGER ILAENV2STAGE + EXTERNAL LSAME, ILAENV2STAGE * .. * .. Executable Statements .. * @@ -267,10 +267,10 @@ * * Determine the block size, the workspace size and the hous size. * - KD = ILAENV( 17, 'CHETRD_2STAGE', VECT, N, -1, -1, -1 ) - IB = ILAENV( 18, 'CHETRD_2STAGE', VECT, N, KD, -1, -1 ) - LHMIN = ILAENV( 19, 'CHETRD_2STAGE', VECT, N, KD, IB, -1 ) - LWMIN = ILAENV( 20, 'CHETRD_2STAGE', VECT, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'CHETRD_2STAGE', VECT, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'CHETRD_2STAGE', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV2STAGE( 3, 'CHETRD_2STAGE', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV2STAGE( 4, 'CHETRD_2STAGE', VECT, N, KD, IB, -1 ) * WRITE(*,*),'CHETRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO, * $ LHMIN, LWMIN * diff --git a/lapack-netlib/SRC/chetrd_hb2st.F b/lapack-netlib/SRC/chetrd_hb2st.F index 83c5c262a..b96e5db95 100644 --- a/lapack-netlib/SRC/chetrd_hb2st.F +++ b/lapack-netlib/SRC/chetrd_hb2st.F @@ -188,7 +188,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexOTHERcomputational * @@ -237,10 +237,10 @@ * IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER STAGE1, UPLO, VECT @@ -273,7 +273,7 @@ COMPLEX TMP * .. * .. External Subroutines .. - EXTERNAL CHB2ST_KERNELS, CLACPY, CLASET + EXTERNAL CHB2ST_KERNELS, CLACPY, CLASET, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MIN, MAX, CEILING, REAL diff --git a/lapack-netlib/SRC/chetrd_he2hb.f b/lapack-netlib/SRC/chetrd_he2hb.f index 677f18287..fd8c3fbe0 100644 --- a/lapack-netlib/SRC/chetrd_he2hb.f +++ b/lapack-netlib/SRC/chetrd_he2hb.f @@ -123,7 +123,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is COMPLEX array, dimension LWORK. +*> WORK is COMPLEX array, dimension (LWORK) *> On exit, if INFO = 0, or if LWORK=-1, *> WORK(1) returns the size of LWORK. *> \endverbatim @@ -132,7 +132,7 @@ *> \verbatim *> LWORK is INTEGER *> The dimension of the array WORK which should be calculated -* by a workspace query. LWORK = MAX(1, LWORK_QUERY) +*> by a workspace query. LWORK = MAX(1, LWORK_QUERY) *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns *> this value as the first entry of the WORK array, and no error @@ -158,7 +158,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexHEcomputational * @@ -222,7 +222,7 @@ *> *> where tau is a complex scalar, and v is a complex vector with *> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in -* A(i+kd+2:n,i), and tau in TAU(i). +*> A(i+kd+2:n,i), and tau in TAU(i). *> *> The contents of A on exit are illustrated by the following examples *> with n = 5: @@ -245,10 +245,10 @@ * IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -277,7 +277,7 @@ $ TPOS, WPOS, S2POS, S1POS * .. * .. External Subroutines .. - EXTERNAL XERBLA, CHER2K, CHEMM, CGEMM, + EXTERNAL XERBLA, CHER2K, CHEMM, CGEMM, CCOPY, $ CLARFT, CGELQF, CGEQRF, CLASET * .. * .. Intrinsic Functions .. diff --git a/lapack-netlib/SRC/chetrf_aa.f b/lapack-netlib/SRC/chetrf_aa.f index 153a089de..2c5564893 100644 --- a/lapack-netlib/SRC/chetrf_aa.f +++ b/lapack-netlib/SRC/chetrf_aa.f @@ -114,11 +114,7 @@ *> \verbatim *> INFO is INTEGER *> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization -*> has been completed, but the block diagonal matrix D is -*> exactly singular, and division by zero will occur if it -*> is used to solve a system of equations. +*> < 0: if INFO = -i, the i-th argument had an illegal value. *> \endverbatim * * Authors: @@ -129,17 +125,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexHEcomputational * * ===================================================================== SUBROUTINE CHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -159,7 +155,7 @@ * * .. Local Scalars .. LOGICAL LQUERY, UPPER - INTEGER J, LWKOPT, IINFO + INTEGER J, LWKOPT INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB COMPLEX ALPHA * .. @@ -169,7 +165,7 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL CLAHEF_AA, CGEMM, CCOPY, CSWAP, CSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC REAL, CONJG, MAX @@ -178,7 +174,7 @@ * * Determine the block size * - NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 ) + NB = ILAENV( 1, 'CHETRF_AA', UPLO, N, -1, -1, -1 ) * * Test the input parameters. * @@ -215,13 +211,10 @@ IPIV( 1 ) = 1 IF ( N.EQ.1 ) THEN A( 1, 1 ) = REAL( A( 1, 1 ) ) - IF ( A( 1, 1 ).EQ.ZERO ) THEN - INFO = 1 - END IF RETURN END IF * -* Adjubst block size based on the workspace size +* Adjust block size based on the workspace size * IF( LWORK.LT.((1+NB)*N) ) THEN NB = ( LWORK-N ) / N @@ -261,11 +254,7 @@ * CALL CLAHEF_AA( UPLO, 2-K1, N-J, JB, $ A( MAX(1, J), J+1 ), LDA, - $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), - $ IINFO ) - IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN - INFO = IINFO+J - ENDIF + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) * * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) * @@ -385,10 +374,7 @@ * CALL CLAHEF_AA( UPLO, 2-K1, N-J, JB, $ A( J+1, MAX(1, J) ), LDA, - $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), IINFO) - IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN - INFO = IINFO+J - ENDIF + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) * * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) * diff --git a/lapack-netlib/SRC/chetrf_aa_2stage.f b/lapack-netlib/SRC/chetrf_aa_2stage.f new file mode 100644 index 000000000..0fa2ae3a0 --- /dev/null +++ b/lapack-netlib/SRC/chetrf_aa_2stage.f @@ -0,0 +1,664 @@ +*> \brief \b CHETRF_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRF_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, +* IPIV2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX A( LDA, * ), TB( * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRF_AA_2STAGE computes the factorization of a real hermitian matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a hermitian band matrix with the +*> bandwidth of NB (NB is internally selected and stored in TB( 1 ), and T is +*> LU factorized with partial pivoting). +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, + $ IPIV2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX A( LDA, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER I, J, K, I1, I2, TD + INTEGER LDTB, NB, KB, JB, NT, IINFO + COMPLEX PIV +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV + +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CCOPY, CLACGV, CLACPY, + $ CLASET, CGBTRF, CGEMM, CGETRF, + $ CHEGST, CSWAP, CTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MIN, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LTB .LT. 4*N .AND. .NOT.TQUERY ) THEN + INFO = -6 + ELSE IF ( LWORK .LT. N .AND. .NOT.WQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRF_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Answer the query +* + NB = ILAENV( 1, 'CHETRF_AA_2STAGE', UPLO, N, -1, -1, -1 ) + IF( INFO.EQ.0 ) THEN + IF( TQUERY ) THEN + TB( 1 ) = (3*NB+1)*N + END IF + IF( WQUERY ) THEN + WORK( 1 ) = N*NB + END IF + END IF + IF( TQUERY .OR. WQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF +* +* Determine the number of the block size +* + LDTB = LTB/N + IF( LDTB .LT. 3*NB+1 ) THEN + NB = (LDTB-1)/3 + END IF + IF( LWORK .LT. NB*N ) THEN + NB = LWORK/N + END IF +* +* Determine the number of the block columns +* + NT = (N+NB-1)/NB + TD = 2*NB + KB = MIN(NB, N) +* +* Initialize vectors/matrices +* + DO J = 1, KB + IPIV( J ) = J + END DO +* +* Save NB +* + TB( 1 ) = NB +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*U(I,J) + T(I+1,I)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( (I-1)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (I-2)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL CLACPY( 'Upper', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = U(1:J,J)'*H(1:J) + CALL CGEMM( 'Conjugate transpose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -ONE, A( 1, J*NB+1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += U(J,J)'*T(J,J-1)*U(J-1,J) + CALL CGEMM( 'Conjugate transpose', 'NoTranspose', + $ KB, NB, KB, + $ ONE, A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ ZERO, WORK( 1 ), N ) + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB, + $ -ONE, WORK( 1 ), N, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF + IF( J.GT.0 ) THEN + CALL CHEGST( 1, 'Upper', KB, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, IINFO ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + TB( TD+1 + (J*NB+I-1)*LDTB ) + $ = REAL( TB( TD+1 + (J*NB+I-1)*LDTB ) ) + DO K = I+1, KB + TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + $ = CONJG( TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, KB, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + ELSE + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB+KB, + $ ONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL CGEMM( 'Conjugate transpose', 'NoTranspose', + $ NB, N-(J+1)*NB, J*NB, + $ -ONE, WORK( NB+1 ), N, + $ A( 1, (J+1)*NB+1 ), LDA, + $ ONE, A( J*NB+1, (J+1)*NB+1 ), LDA ) + END IF +* +* Copy panel to workspace to call CGETRF +* + DO K = 1, NB + CALL CCOPY( N-(J+1)*NB, + $ A( J*NB+K, (J+1)*NB+1 ), LDA, + $ WORK( 1+(K-1)*N ), 1 ) + END DO +* +* Factorize panel +* + CALL CGETRF( N-(J+1)*NB, NB, + $ WORK, N, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Copy panel back +* + DO K = 1, NB +* +* Copy only L-factor +* + CALL CCOPY( N-K-(J+1)*NB, + $ WORK( K+1+(K-1)*N ), 1, + $ A( J*NB+K, (J+1)*NB+K+1 ), LDA ) +* +* Transpose U-factor to be copied back into T(J+1, J) +* + CALL CLACGV( K, WORK( 1+(K-1)*N ), 1 ) + END DO +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL CLASET( 'Full', KB, NB, ZERO, ZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL CLACPY( 'Upper', KB, NB, + $ WORK, N, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL CTRSM( 'R', 'U', 'N', 'U', KB, NB, ONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J,J+1) into T(J+1, J), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = CONJG( TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) ) + END DO + END DO + CALL CLASET( 'Lower', KB, NB, ZERO, ONE, + $ A( J*NB+1, (J+1)*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL CSWAP( K-1, A( (J+1)*NB+1, I1 ), 1, + $ A( (J+1)*NB+1, I2 ), 1 ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL CSWAP( I2-I1-1, A( I1, I1+1 ), LDA, + $ A( I1+1, I2 ), 1 ) + CALL CLACGV( I2-I1, A( I1, I1+1 ), LDA ) + CALL CLACGV( I2-I1-1, A( I1+1, I2 ), 1 ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL CSWAP( N-I2, A( I1, I2+1 ), LDA, + $ A( I2, I2+1 ), LDA ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL CSWAP( J*NB, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ENDIF + END DO + END IF + END DO + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL CGEMM( 'NoTranspose', 'Conjugate transpose', + $ NB, KB, JB, + $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (I-1)*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL CGEMM( 'NoTranspose', 'Conjugate transpose', + $ NB, KB, JB, + $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (I-2)*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL CLACPY( 'Lower', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = L(J,1:J)*H(1:J) + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -ONE, A( J*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += L(J,J)*T(J,J-1)*L(J,J-1)' + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, NB, KB, + $ ONE, A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ ZERO, WORK( 1 ), N ) + CALL CGEMM( 'NoTranspose', 'Conjugate transpose', + $ KB, KB, NB, + $ -ONE, WORK( 1 ), N, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF + IF( J.GT.0 ) THEN + CALL CHEGST( 1, 'Lower', KB, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, IINFO ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + TB( TD+1 + (J*NB+I-1)*LDTB ) + $ = REAL( TB( TD+1 + (J*NB+I-1)*LDTB ) ) + DO K = I+1, KB + TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + $ = CONJG( TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL CGEMM( 'NoTranspose', 'Conjugate transpose', + $ KB, KB, KB, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + ELSE + CALL CGEMM( 'NoTranspose', 'Conjugate transpose', + $ KB, KB, NB+KB, + $ ONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ N-(J+1)*NB, NB, J*NB, + $ -ONE, A( (J+1)*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, A( (J+1)*NB+1, J*NB+1 ), LDA ) + END IF +* +* Factorize panel +* + CALL CGETRF( N-(J+1)*NB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL CLASET( 'Full', KB, NB, ZERO, ZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL CLACPY( 'Upper', KB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL CTRSM( 'R', 'L', 'C', 'U', KB, NB, ONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J+1,J) into T(J, J+1), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = CONJG( TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) ) + END DO + END DO + CALL CLASET( 'Upper', KB, NB, ZERO, ONE, + $ A( (J+1)*NB+1, J*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL CSWAP( K-1, A( I1, (J+1)*NB+1 ), LDA, + $ A( I2, (J+1)*NB+1 ), LDA ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL CSWAP( I2-I1-1, A( I1+1, I1 ), 1, + $ A( I2, I1+1 ), LDA ) + CALL CLACGV( I2-I1, A( I1+1, I1 ), 1 ) + CALL CLACGV( I2-I1-1, A( I2, I1+1 ), LDA ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL CSWAP( N-I2, A( I2+1, I1 ), 1, + $ A( I2+1, I2 ), 1 ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL CSWAP( J*NB, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ENDIF + END DO +* +* Apply pivots to previous columns of L +* +c CALL CLASWP( J*NB, A( 1, 1 ), LDA, +c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) + END IF + END DO + END IF +* +* Factor the band matrix + CALL CGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* +* End of CHETRF_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/chetri2.f b/lapack-netlib/SRC/chetri2.f index 684bacfc3..722d13008 100644 --- a/lapack-netlib/SRC/chetri2.f +++ b/lapack-netlib/SRC/chetri2.f @@ -120,17 +120,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexHEcomputational * * ===================================================================== SUBROUTINE CHETRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -153,7 +153,7 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL CHETRI2X + EXTERNAL CHETRI2X, CHETRI, XERBLA * .. * .. Executable Statements .. * diff --git a/lapack-netlib/SRC/chetri_3.f b/lapack-netlib/SRC/chetri_3.f index 8f1527dbb..0b31f533b 100644 --- a/lapack-netlib/SRC/chetri_3.f +++ b/lapack-netlib/SRC/chetri_3.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexHEcomputational * @@ -160,7 +160,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> November 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -170,10 +170,10 @@ SUBROUTINE CHETRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -196,7 +196,7 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL CHETRI_3X + EXTERNAL CHETRI_3X, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/chetri_3x.f b/lapack-netlib/SRC/chetri_3x.f index c8fc3d9c7..d4cddc1c0 100644 --- a/lapack-netlib/SRC/chetri_3x.f +++ b/lapack-netlib/SRC/chetri_3x.f @@ -142,7 +142,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexHEcomputational * @@ -150,7 +150,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -159,10 +159,10 @@ * ===================================================================== SUBROUTINE CHETRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/chetrs_3.f b/lapack-netlib/SRC/chetrs_3.f index ade0a156b..c7c77b971 100644 --- a/lapack-netlib/SRC/chetrs_3.f +++ b/lapack-netlib/SRC/chetrs_3.f @@ -142,7 +142,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexHEcomputational * @@ -151,7 +151,7 @@ *> *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -165,10 +165,10 @@ SUBROUTINE CHETRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/chetrs_aa.f b/lapack-netlib/SRC/chetrs_aa.f index f6640c509..50e5692db 100644 --- a/lapack-netlib/SRC/chetrs_aa.f +++ b/lapack-netlib/SRC/chetrs_aa.f @@ -66,7 +66,7 @@ *> of the matrix B. NRHS >= 0. *> \endverbatim *> -*> \param[in,out] A +*> \param[in] A *> \verbatim *> A is COMPLEX array, dimension (LDA,N) *> Details of factors computed by CHETRF_AA. @@ -121,7 +121,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexHEcomputational * @@ -129,10 +129,10 @@ SUBROUTINE CHETRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, $ WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -159,7 +159,7 @@ EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL CGTSV, CSWAP, CTRSM, XERBLA + EXTERNAL CLACPY, CLACGV, CGTSV, CSWAP, CTRSM, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/chetrs_aa_2stage.f b/lapack-netlib/SRC/chetrs_aa_2stage.f new file mode 100644 index 000000000..3f8576673 --- /dev/null +++ b/lapack-netlib/SRC/chetrs_aa_2stage.f @@ -0,0 +1,283 @@ +*> \brief \b CHETRS_AA_2STAGE +* +* @generated from SRC/dsytrs_aa_2stage.f, fortran d -> c, Mon Oct 30 11:59:02 2017 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRS_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, +* IPIV2, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRS_AA_2STAGE solves a system of linear equations A*X = B with a real +*> hermitian matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by CHETRF_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> Details of factors computed by CHETRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX array, dimension (LTB) +*> Details of factors computed by CHETRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> CHETRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] IPIV2 +*> \verbatim +*> IPIV2 is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> CHETRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CHETRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER LDTB, NB + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGBTRS, CLASWP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LTB.LT.( 4*N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRS_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Read NB and compute LDTB +* + NB = INT( TB( 1 ) ) + LDTB = LTB/N +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL CLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (U**T \P**T * B) -> B [ (U**T \P**T * B) ] +* + CALL CTRSM( 'L', 'U', 'C', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (U**T \P**T * B) ] +* + CALL CGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (U \ B) -> B [ U \ (T \ (U**T \P**T * B) ) ] +* + CALL CTRSM( 'L', 'U', 'N', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (U \ (T \ (U**T \P**T * B) )) ] +* + CALL CLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL CLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL CTRSM( 'L', 'L', 'N', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL CGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL CTRSM( 'L', 'L', 'C', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + CALL CLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF + END IF +* + RETURN +* +* End of CHETRS_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/cla_gbrfsx_extended.f b/lapack-netlib/SRC/cla_gbrfsx_extended.f index 441518022..888ecd4f7 100644 --- a/lapack-netlib/SRC/cla_gbrfsx_extended.f +++ b/lapack-netlib/SRC/cla_gbrfsx_extended.f @@ -208,8 +208,7 @@ *> *> \param[in,out] ERR_BNDS_NORM *> \verbatim -*> ERR_BNDS_NORM is REAL array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> normwise relative error, which is defined as follows: @@ -255,8 +254,7 @@ *> *> \param[in,out] ERR_BNDS_COMP *> \verbatim -*> ERR_BNDS_COMP is REAL array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> componentwise relative error, which is defined as follows: @@ -399,7 +397,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexGBcomputational * @@ -412,10 +410,10 @@ $ Y_TAIL, RCOND, ITHRESH, RTHRESH, $ DZ_UB, IGNORE_CWISE, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDAB, LDAFB, LDB, LDY, N, KL, KU, NRHS, diff --git a/lapack-netlib/SRC/cla_heamv.f b/lapack-netlib/SRC/cla_heamv.f index 6a3eef1bf..38a949292 100644 --- a/lapack-netlib/SRC/cla_heamv.f +++ b/lapack-netlib/SRC/cla_heamv.f @@ -89,7 +89,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, n ). +*> A is COMPLEX array, dimension ( LDA, n ). *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. *> Unchanged on exit. @@ -154,7 +154,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexHEcomputational * @@ -178,10 +178,10 @@ SUBROUTINE CLA_HEAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, $ INCY ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. REAL ALPHA, BETA diff --git a/lapack-netlib/SRC/cla_herfsx_extended.f b/lapack-netlib/SRC/cla_herfsx_extended.f index 7af5441e2..c69589dfa 100644 --- a/lapack-netlib/SRC/cla_herfsx_extended.f +++ b/lapack-netlib/SRC/cla_herfsx_extended.f @@ -161,8 +161,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is COMPLEX array, dimension -*> (LDY,NRHS) +*> Y is COMPLEX array, dimension (LDY,NRHS) *> On entry, the solution matrix X, as computed by CHETRS. *> On exit, the improved solution matrix Y. *> \endverbatim @@ -194,8 +193,7 @@ *> *> \param[in,out] ERR_BNDS_NORM *> \verbatim -*> ERR_BNDS_NORM is REAL array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> normwise relative error, which is defined as follows: @@ -241,8 +239,7 @@ *> *> \param[in,out] ERR_BNDS_COMP *> \verbatim -*> ERR_BNDS_COMP is REAL array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> componentwise relative error, which is defined as follows: @@ -385,7 +382,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexHEcomputational * @@ -398,10 +395,10 @@ $ RTHRESH, DZ_UB, IGNORE_CWISE, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, diff --git a/lapack-netlib/SRC/cla_porfsx_extended.f b/lapack-netlib/SRC/cla_porfsx_extended.f index 73184d761..3a3409c9e 100644 --- a/lapack-netlib/SRC/cla_porfsx_extended.f +++ b/lapack-netlib/SRC/cla_porfsx_extended.f @@ -153,8 +153,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is COMPLEX array, dimension -*> (LDY,NRHS) +*> Y is COMPLEX array, dimension (LDY,NRHS) *> On entry, the solution matrix X, as computed by CPOTRS. *> On exit, the improved solution matrix Y. *> \endverbatim @@ -186,8 +185,7 @@ *> *> \param[in,out] ERR_BNDS_NORM *> \verbatim -*> ERR_BNDS_NORM is REAL array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> normwise relative error, which is defined as follows: @@ -233,8 +231,7 @@ *> *> \param[in,out] ERR_BNDS_COMP *> \verbatim -*> ERR_BNDS_COMP is REAL array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> componentwise relative error, which is defined as follows: @@ -377,7 +374,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexPOcomputational * @@ -390,10 +387,10 @@ $ RTHRESH, DZ_UB, IGNORE_CWISE, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, diff --git a/lapack-netlib/SRC/cla_syamv.f b/lapack-netlib/SRC/cla_syamv.f index 362d4559d..e1d3df960 100644 --- a/lapack-netlib/SRC/cla_syamv.f +++ b/lapack-netlib/SRC/cla_syamv.f @@ -90,7 +90,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX array of DIMENSION ( LDA, n ). +*> A is COMPLEX array, dimension ( LDA, n ). *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. *> Unchanged on exit. @@ -155,7 +155,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexSYcomputational * @@ -179,10 +179,10 @@ SUBROUTINE CLA_SYAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, $ INCY ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. REAL ALPHA, BETA diff --git a/lapack-netlib/SRC/cla_syrfsx_extended.f b/lapack-netlib/SRC/cla_syrfsx_extended.f index f99801c42..5d2fa0cbb 100644 --- a/lapack-netlib/SRC/cla_syrfsx_extended.f +++ b/lapack-netlib/SRC/cla_syrfsx_extended.f @@ -161,8 +161,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is COMPLEX array, dimension -*> (LDY,NRHS) +*> Y is COMPLEX array, dimension (LDY,NRHS) *> On entry, the solution matrix X, as computed by CSYTRS. *> On exit, the improved solution matrix Y. *> \endverbatim @@ -194,8 +193,7 @@ *> *> \param[in,out] ERR_BNDS_NORM *> \verbatim -*> ERR_BNDS_NORM is REAL array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> normwise relative error, which is defined as follows: @@ -241,8 +239,7 @@ *> *> \param[in,out] ERR_BNDS_COMP *> \verbatim -*> ERR_BNDS_COMP is REAL array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> componentwise relative error, which is defined as follows: @@ -385,7 +382,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexSYcomputational * @@ -398,10 +395,10 @@ $ RTHRESH, DZ_UB, IGNORE_CWISE, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, diff --git a/lapack-netlib/SRC/clabrd.f b/lapack-netlib/SRC/clabrd.f index 87bcb1bcb..7830222b8 100644 --- a/lapack-netlib/SRC/clabrd.f +++ b/lapack-netlib/SRC/clabrd.f @@ -111,7 +111,7 @@ *> *> \param[out] TAUQ *> \verbatim -*> TAUQ is COMPLEX array dimension (NB) +*> TAUQ is COMPLEX array, dimension (NB) *> The scalar factors of the elementary reflectors which *> represent the unitary matrix Q. See Further Details. *> \endverbatim @@ -157,7 +157,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexOTHERauxiliary * @@ -212,10 +212,10 @@ SUBROUTINE CLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, $ LDY ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER LDA, LDX, LDY, M, N, NB diff --git a/lapack-netlib/SRC/clahef_aa.f b/lapack-netlib/SRC/clahef_aa.f index f3a9add2a..88bc3d216 100644 --- a/lapack-netlib/SRC/clahef_aa.f +++ b/lapack-netlib/SRC/clahef_aa.f @@ -19,11 +19,11 @@ * =========== * * SUBROUTINE CLAHEF_AA( UPLO, J1, M, NB, A, LDA, IPIV, -* H, LDH, WORK, INFO ) +* H, LDH, WORK ) * * .. Scalar Arguments .. * CHARACTER UPLO -* INTEGER J1, M, NB, LDA, LDH, INFO +* INTEGER J1, M, NB, LDA, LDH * .. * .. Array Arguments .. * INTEGER IPIV( * ) @@ -127,16 +127,6 @@ *> WORK is COMPLEX workspace, dimension (M). *> \endverbatim *> -*> \param[out] INFO -*> \verbatim -*> INFO is INTEGER -*> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization -*> has been completed, but the block diagonal matrix D is -*> exactly singular, and division by zero will occur if it -*> is used to solve a system of equations. -*> \endverbatim * * Authors: * ======== @@ -146,24 +136,24 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexSYcomputational * * ===================================================================== SUBROUTINE CLAHEF_AA( UPLO, J1, M, NB, A, LDA, IPIV, - $ H, LDH, WORK, INFO ) + $ H, LDH, WORK ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * * .. Scalar Arguments .. CHARACTER UPLO - INTEGER M, NB, J1, LDA, LDH, INFO + INTEGER M, NB, J1, LDA, LDH * .. * .. Array Arguments .. INTEGER IPIV( * ) @@ -176,7 +166,7 @@ PARAMETER ( ZERO = (0.0E+0, 0.0E+0), ONE = (1.0E+0, 0.0E+0) ) * * .. Local Scalars .. - INTEGER J, K, K1, I1, I2 + INTEGER J, K, K1, I1, I2, MJ COMPLEX PIV, ALPHA * .. * .. External Functions .. @@ -185,14 +175,14 @@ EXTERNAL LSAME, ILAENV, ICAMAX * .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL CLACGV, CGEMV, CSCAL, CAXPY, CCOPY, CSWAP, CLASET, + $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC REAL, CONJG, MAX * .. * .. Executable Statements .. * - INFO = 0 J = 1 * * K1 is the first column of the panel to be factorized @@ -216,6 +206,14 @@ * > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, * K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF * * H(J:N, J) := A(J, J:N) - H(J:N, 1:(J-1)) * L(J1:(J-1), J), * where H(J:N, J) has been initialized to be A(J, J:N) @@ -229,7 +227,7 @@ * first column * CALL CLACGV( J-K1, A( 1, J ), 1 ) - CALL CGEMV( 'No transpose', M-J+1, J-K1, + CALL CGEMV( 'No transpose', MJ, J-K1, $ -ONE, H( J, K1 ), LDH, $ A( 1, J ), 1, $ ONE, H( J, J ), 1 ) @@ -238,7 +236,7 @@ * * Copy H(i:n, i) into WORK * - CALL CCOPY( M-J+1, H( J, J ), 1, WORK( 1 ), 1 ) + CALL CCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) * IF( J.GT.K1 ) THEN * @@ -246,7 +244,7 @@ * where A(J-1, J) stores T(J-1, J) and A(J-2, J:N) stores U(J-1, J:N) * ALPHA = -CONJG( A( K-1, J ) ) - CALL CAXPY( M-J+1, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) + CALL CAXPY( MJ, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) END IF * * Set A(J, J) = T(J, J) @@ -319,12 +317,6 @@ * Set A(J, J+1) = T(J, J+1) * A( K, J+1 ) = WORK( 2 ) - IF( (A( K, J ).EQ.ZERO ) .AND. - $ ( (J.EQ.M) .OR. (A( K, J+1 ).EQ.ZERO))) THEN - IF(INFO .EQ. 0) THEN - INFO = J - END IF - END IF * IF( J.LT.NB ) THEN * @@ -345,10 +337,6 @@ CALL CLASET( 'Full', 1, M-J-1, ZERO, ZERO, $ A( K, J+2 ), LDA) END IF - ELSE - IF( (A( K, J ).EQ.ZERO) .AND. (INFO.EQ.0) ) THEN - INFO = J - END IF END IF J = J + 1 GO TO 10 @@ -370,6 +358,14 @@ * > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, * K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF * * H(J:N, J) := A(J:N, J) - H(J:N, 1:(J-1)) * L(J, J1:(J-1))^T, * where H(J:N, J) has been initialized to be A(J:N, J) @@ -383,7 +379,7 @@ * first column * CALL CLACGV( J-K1, A( J, 1 ), LDA ) - CALL CGEMV( 'No transpose', M-J+1, J-K1, + CALL CGEMV( 'No transpose', MJ, J-K1, $ -ONE, H( J, K1 ), LDH, $ A( J, 1 ), LDA, $ ONE, H( J, J ), 1 ) @@ -392,7 +388,7 @@ * * Copy H(J:N, J) into WORK * - CALL CCOPY( M-J+1, H( J, J ), 1, WORK( 1 ), 1 ) + CALL CCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) * IF( J.GT.K1 ) THEN * @@ -400,7 +396,7 @@ * where A(J-1, J) = T(J-1, J) and A(J, J-2) = L(J, J-1) * ALPHA = -CONJG( A( J, K-1 ) ) - CALL CAXPY( M-J+1, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) + CALL CAXPY( MJ, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) END IF * * Set A(J, J) = T(J, J) @@ -473,11 +469,6 @@ * Set A(J+1, J) = T(J+1, J) * A( J+1, K ) = WORK( 2 ) - IF( (A( J, K ).EQ.ZERO) .AND. - $ ( (J.EQ.M) .OR. (A( J+1, K ).EQ.ZERO)) ) THEN - IF (INFO .EQ. 0) - $ INFO = J - END IF * IF( J.LT.NB ) THEN * @@ -498,9 +489,6 @@ CALL CLASET( 'Full', M-J-1, 1, ZERO, ZERO, $ A( J+2, K ), LDA ) END IF - ELSE - IF( (A( J, K ).EQ.ZERO) .AND. (J.EQ.M) - $ .AND. (INFO.EQ.0) ) INFO = J END IF J = J + 1 GO TO 30 diff --git a/lapack-netlib/SRC/clalsa.f b/lapack-netlib/SRC/clalsa.f index 8a817924d..004d68fba 100644 --- a/lapack-netlib/SRC/clalsa.f +++ b/lapack-netlib/SRC/clalsa.f @@ -232,8 +232,7 @@ *> *> \param[out] IWORK *> \verbatim -*> IWORK is INTEGER array. -*> The dimension must be at least 3 * N +*> IWORK is INTEGER array, dimension (3*N) *> \endverbatim *> *> \param[out] INFO @@ -251,7 +250,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexOTHERcomputational * @@ -268,10 +267,10 @@ $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK, $ IWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, diff --git a/lapack-netlib/SRC/clamswlq.f b/lapack-netlib/SRC/clamswlq.f index fd19f0af7..f2f9ab7f9 100644 --- a/lapack-netlib/SRC/clamswlq.f +++ b/lapack-netlib/SRC/clamswlq.f @@ -23,7 +23,7 @@ *> *> SIDE = 'L' SIDE = 'R' *> TRANS = 'N': Q * C C * Q -*> TRANS = 'T': Q**T * C C * Q**T +*> TRANS = 'T': Q**H * C C * Q**H *> where Q is a real orthogonal matrix defined as the product of blocked *> elementary reflectors computed by short wide LQ *> factorization (CLASWLQ) @@ -35,21 +35,21 @@ *> \param[in] SIDE *> \verbatim *> SIDE is CHARACTER*1 -*> = 'L': apply Q or Q**T from the Left; -*> = 'R': apply Q or Q**T from the Right. +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. *> \endverbatim *> *> \param[in] TRANS *> \verbatim *> TRANS is CHARACTER*1 *> = 'N': No transpose, apply Q; -*> = 'T': Transpose, apply Q**T. +*> = 'C': Transpose, apply Q**H. *> \endverbatim *> *> \param[in] M *> \verbatim *> M is INTEGER -*> The number of rows of the matrix A. M >=0. +*> The number of rows of the matrix C. M >=0. *> \endverbatim *> *> \param[in] N @@ -88,12 +88,14 @@ *> *> \endverbatim *> -*> \param[in,out] A +*> \param[in] A *> \verbatim -*> A is COMPLEX array, dimension (LDA,K) +*> A is COMPLEX array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' *> The i-th row must contain the vector which defines the blocked *> elementary reflector H(i), for i = 1,2,...,k, as returned by -*> DLASWLQ in the first k rows of its array argument A. +*> CLASWLQ in the first k rows of its array argument A. *> \endverbatim *> *> \param[in] LDA @@ -123,7 +125,7 @@ *> \verbatim *> C is COMPLEX array, dimension (LDC,N) *> On entry, the M-by-N matrix C. -*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. *> \endverbatim *> *> \param[in] LDC @@ -200,10 +202,10 @@ SUBROUTINE CLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, $ LDT, C, LDC, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS @@ -219,7 +221,7 @@ * .. * .. Local Scalars .. LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY - INTEGER I, II, KK, LW , CTR + INTEGER I, II, KK, LW, CTR * .. * .. External Functions .. LOGICAL LSAME diff --git a/lapack-netlib/SRC/clamtsqr.f b/lapack-netlib/SRC/clamtsqr.f index a787caab6..77d09a573 100644 --- a/lapack-netlib/SRC/clamtsqr.f +++ b/lapack-netlib/SRC/clamtsqr.f @@ -23,7 +23,7 @@ *> *> SIDE = 'L' SIDE = 'R' *> TRANS = 'N': Q * C C * Q -*> TRANS = 'C': Q**C * C C * Q**C +*> TRANS = 'C': Q**H * C C * Q**H *> where Q is a real orthogonal matrix defined as the product *> of blocked elementary reflectors computed by tall skinny *> QR factorization (CLATSQR) @@ -35,15 +35,15 @@ *> \param[in] SIDE *> \verbatim *> SIDE is CHARACTER*1 -*> = 'L': apply Q or Q**T from the Left; -*> = 'R': apply Q or Q**T from the Right. +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. *> \endverbatim *> *> \param[in] TRANS *> \verbatim *> TRANS is CHARACTER*1 *> = 'N': No transpose, apply Q; -*> = 'C': Conjugate Transpose, apply Q**C. +*> = 'C': Conjugate Transpose, apply Q**H. *> \endverbatim *> *> \param[in] M @@ -81,7 +81,7 @@ *> N >= NB >= 1. *> \endverbatim *> -*> \param[in,out] A +*> \param[in] A *> \verbatim *> A is COMPLEX array, dimension (LDA,K) *> The i-th column must contain the vector which defines the @@ -117,7 +117,7 @@ *> \verbatim *> C is COMPLEX array, dimension (LDC,N) *> On entry, the M-by-N matrix C. -*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. *> \endverbatim *> *> \param[in] LDC @@ -195,10 +195,10 @@ SUBROUTINE CLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, $ LDT, C, LDC, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS diff --git a/lapack-netlib/SRC/claqr1.f b/lapack-netlib/SRC/claqr1.f index d3141e8ad..b76bedf60 100644 --- a/lapack-netlib/SRC/claqr1.f +++ b/lapack-netlib/SRC/claqr1.f @@ -50,19 +50,19 @@ * *> \param[in] N *> \verbatim -*> N is integer +*> N is INTEGER *> Order of the matrix H. N must be either 2 or 3. *> \endverbatim *> *> \param[in] H *> \verbatim -*> H is COMPLEX array of dimension (LDH,N) +*> H is COMPLEX array, dimension (LDH,N) *> The 2-by-2 or 3-by-3 matrix H in (*). *> \endverbatim *> *> \param[in] LDH *> \verbatim -*> LDH is integer +*> LDH is INTEGER *> The leading dimension of H as declared in *> the calling procedure. LDH.GE.N *> \endverbatim @@ -81,7 +81,7 @@ *> *> \param[out] V *> \verbatim -*> V is COMPLEX array of dimension N +*> V is COMPLEX array, dimension (N) *> A scalar multiple of the first column of the *> matrix K in (*). *> \endverbatim @@ -94,7 +94,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexOTHERauxiliary * @@ -107,10 +107,10 @@ * ===================================================================== SUBROUTINE CLAQR1( N, H, LDH, S1, S2, V ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. COMPLEX S1, S2 diff --git a/lapack-netlib/SRC/claqr2.f b/lapack-netlib/SRC/claqr2.f index aead5d661..03e9760cf 100644 --- a/lapack-netlib/SRC/claqr2.f +++ b/lapack-netlib/SRC/claqr2.f @@ -118,7 +118,7 @@ *> *> \param[in] LDH *> \verbatim -*> LDH is integer +*> LDH is INTEGER *> Leading dimension of H just as declared in the calling *> subroutine. N .LE. LDH *> \endverbatim @@ -146,14 +146,14 @@ *> *> \param[in] LDZ *> \verbatim -*> LDZ is integer +*> LDZ is INTEGER *> The leading dimension of Z just as declared in the *> calling subroutine. 1 .LE. LDZ. *> \endverbatim *> *> \param[out] NS *> \verbatim -*> NS is integer +*> NS is INTEGER *> The number of unconverged (ie approximate) eigenvalues *> returned in SR and SI that may be used as shifts by the *> calling subroutine. @@ -161,14 +161,14 @@ *> *> \param[out] ND *> \verbatim -*> ND is integer +*> ND is INTEGER *> The number of converged eigenvalues uncovered by this *> subroutine. *> \endverbatim *> *> \param[out] SH *> \verbatim -*> SH is COMPLEX array, dimension KBOT +*> SH is COMPLEX array, dimension (KBOT) *> On output, approximate eigenvalues that may *> be used for shifts are stored in SH(KBOT-ND-NS+1) *> through SR(KBOT-ND). Converged eigenvalues are @@ -183,14 +183,14 @@ *> *> \param[in] LDV *> \verbatim -*> LDV is integer scalar +*> LDV is INTEGER *> The leading dimension of V just as declared in the *> calling subroutine. NW .LE. LDV *> \endverbatim *> *> \param[in] NH *> \verbatim -*> NH is integer scalar +*> NH is INTEGER *> The number of columns of T. NH.GE.NW. *> \endverbatim *> @@ -201,14 +201,14 @@ *> *> \param[in] LDT *> \verbatim -*> LDT is integer +*> LDT is INTEGER *> The leading dimension of T just as declared in the *> calling subroutine. NW .LE. LDT *> \endverbatim *> *> \param[in] NV *> \verbatim -*> NV is integer +*> NV is INTEGER *> The number of rows of work array WV available for *> workspace. NV.GE.NW. *> \endverbatim @@ -220,21 +220,21 @@ *> *> \param[in] LDWV *> \verbatim -*> LDWV is integer +*> LDWV is INTEGER *> The leading dimension of W just as declared in the *> calling subroutine. NW .LE. LDV *> \endverbatim *> *> \param[out] WORK *> \verbatim -*> WORK is COMPLEX array, dimension LWORK. +*> WORK is COMPLEX array, dimension (LWORK) *> On exit, WORK(1) is set to an estimate of the optimal value *> of LWORK for the given values of N, NW, KTOP and KBOT. *> \endverbatim *> *> \param[in] LWORK *> \verbatim -*> LWORK is integer +*> LWORK is INTEGER *> The dimension of the work array WORK. LWORK = 2*NW *> suffices, but greater efficiency may result from larger *> values of LWORK. @@ -254,7 +254,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexOTHERauxiliary * @@ -269,10 +269,10 @@ $ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, $ NV, WV, LDWV, WORK, LWORK ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, diff --git a/lapack-netlib/SRC/claqr3.f b/lapack-netlib/SRC/claqr3.f index eff3f01e7..660a58376 100644 --- a/lapack-netlib/SRC/claqr3.f +++ b/lapack-netlib/SRC/claqr3.f @@ -115,7 +115,7 @@ *> *> \param[in] LDH *> \verbatim -*> LDH is integer +*> LDH is INTEGER *> Leading dimension of H just as declared in the calling *> subroutine. N .LE. LDH *> \endverbatim @@ -143,14 +143,14 @@ *> *> \param[in] LDZ *> \verbatim -*> LDZ is integer +*> LDZ is INTEGER *> The leading dimension of Z just as declared in the *> calling subroutine. 1 .LE. LDZ. *> \endverbatim *> *> \param[out] NS *> \verbatim -*> NS is integer +*> NS is INTEGER *> The number of unconverged (ie approximate) eigenvalues *> returned in SR and SI that may be used as shifts by the *> calling subroutine. @@ -158,14 +158,14 @@ *> *> \param[out] ND *> \verbatim -*> ND is integer +*> ND is INTEGER *> The number of converged eigenvalues uncovered by this *> subroutine. *> \endverbatim *> *> \param[out] SH *> \verbatim -*> SH is COMPLEX array, dimension KBOT +*> SH is COMPLEX array, dimension (KBOT) *> On output, approximate eigenvalues that may *> be used for shifts are stored in SH(KBOT-ND-NS+1) *> through SR(KBOT-ND). Converged eigenvalues are @@ -180,14 +180,14 @@ *> *> \param[in] LDV *> \verbatim -*> LDV is integer scalar +*> LDV is INTEGER *> The leading dimension of V just as declared in the *> calling subroutine. NW .LE. LDV *> \endverbatim *> *> \param[in] NH *> \verbatim -*> NH is integer scalar +*> NH is INTEGER *> The number of columns of T. NH.GE.NW. *> \endverbatim *> @@ -198,14 +198,14 @@ *> *> \param[in] LDT *> \verbatim -*> LDT is integer +*> LDT is INTEGER *> The leading dimension of T just as declared in the *> calling subroutine. NW .LE. LDT *> \endverbatim *> *> \param[in] NV *> \verbatim -*> NV is integer +*> NV is INTEGER *> The number of rows of work array WV available for *> workspace. NV.GE.NW. *> \endverbatim @@ -217,21 +217,21 @@ *> *> \param[in] LDWV *> \verbatim -*> LDWV is integer +*> LDWV is INTEGER *> The leading dimension of W just as declared in the *> calling subroutine. NW .LE. LDV *> \endverbatim *> *> \param[out] WORK *> \verbatim -*> WORK is COMPLEX array, dimension LWORK. +*> WORK is COMPLEX array, dimension (LWORK) *> On exit, WORK(1) is set to an estimate of the optimal value *> of LWORK for the given values of N, NW, KTOP and KBOT. *> \endverbatim *> *> \param[in] LWORK *> \verbatim -*> LWORK is integer +*> LWORK is INTEGER *> The dimension of the work array WORK. LWORK = 2*NW *> suffices, but greater efficiency may result from larger *> values of LWORK. @@ -266,7 +266,7 @@ $ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, $ NV, WV, LDWV, WORK, LWORK ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/claqr4.f b/lapack-netlib/SRC/claqr4.f index 573b1aab4..647fa6774 100644 --- a/lapack-netlib/SRC/claqr4.f +++ b/lapack-netlib/SRC/claqr4.f @@ -181,7 +181,6 @@ *> *> \param[out] INFO *> \verbatim -*> \verbatim *> INFO is INTEGER *> = 0: successful exit *> .GT. 0: if INFO = i, CLAQR4 failed to compute all of @@ -223,7 +222,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexOTHERauxiliary * @@ -249,10 +248,10 @@ SUBROUTINE CLAQR4( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, $ IHIZ, Z, LDZ, WORK, LWORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N diff --git a/lapack-netlib/SRC/claqr5.f b/lapack-netlib/SRC/claqr5.f index 94a5bdf2c..4c897895d 100644 --- a/lapack-netlib/SRC/claqr5.f +++ b/lapack-netlib/SRC/claqr5.f @@ -47,21 +47,21 @@ * *> \param[in] WANTT *> \verbatim -*> WANTT is logical scalar +*> WANTT is LOGICAL *> WANTT = .true. if the triangular Schur factor *> is being computed. WANTT is set to .false. otherwise. *> \endverbatim *> *> \param[in] WANTZ *> \verbatim -*> WANTZ is logical scalar +*> WANTZ is LOGICAL *> WANTZ = .true. if the unitary Schur factor is being *> computed. WANTZ is set to .false. otherwise. *> \endverbatim *> *> \param[in] KACC22 *> \verbatim -*> KACC22 is integer with value 0, 1, or 2. +*> KACC22 is INTEGER with value 0, 1, or 2. *> Specifies the computation mode of far-from-diagonal *> orthogonal updates. *> = 0: CLAQR5 does not accumulate reflections and does not @@ -77,19 +77,19 @@ *> *> \param[in] N *> \verbatim -*> N is integer scalar +*> N is INTEGER *> N is the order of the Hessenberg matrix H upon which this *> subroutine operates. *> \endverbatim *> *> \param[in] KTOP *> \verbatim -*> KTOP is integer scalar +*> KTOP is INTEGER *> \endverbatim *> *> \param[in] KBOT *> \verbatim -*> KBOT is integer scalar +*> KBOT is INTEGER *> These are the first and last rows and columns of an *> isolated diagonal block upon which the QR sweep is to be *> applied. It is assumed without a check that @@ -100,21 +100,21 @@ *> *> \param[in] NSHFTS *> \verbatim -*> NSHFTS is integer scalar +*> NSHFTS is INTEGER *> NSHFTS gives the number of simultaneous shifts. NSHFTS *> must be positive and even. *> \endverbatim *> *> \param[in,out] S *> \verbatim -*> S is COMPLEX array of size (NSHFTS) +*> S is COMPLEX array, dimension (NSHFTS) *> S contains the shifts of origin that define the multi- *> shift QR sweep. On output S may be reordered. *> \endverbatim *> *> \param[in,out] H *> \verbatim -*> H is COMPLEX array of size (LDH,N) +*> H is COMPLEX array, dimension (LDH,N) *> On input H contains a Hessenberg matrix. On output a *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied *> to the isolated diagonal block in rows and columns KTOP @@ -123,7 +123,7 @@ *> *> \param[in] LDH *> \verbatim -*> LDH is integer scalar +*> LDH is INTEGER *> LDH is the leading dimension of H just as declared in the *> calling procedure. LDH.GE.MAX(1,N). *> \endverbatim @@ -142,7 +142,7 @@ *> *> \param[in,out] Z *> \verbatim -*> Z is COMPLEX array of size (LDZ,IHIZ) +*> Z is COMPLEX array, dimension (LDZ,IHIZ) *> If WANTZ = .TRUE., then the QR Sweep unitary *> similarity transformation is accumulated into *> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. @@ -151,71 +151,69 @@ *> *> \param[in] LDZ *> \verbatim -*> LDZ is integer scalar +*> LDZ is INTEGER *> LDA is the leading dimension of Z just as declared in *> the calling procedure. LDZ.GE.N. *> \endverbatim *> *> \param[out] V *> \verbatim -*> V is COMPLEX array of size (LDV,NSHFTS/2) +*> V is COMPLEX array, dimension (LDV,NSHFTS/2) *> \endverbatim *> *> \param[in] LDV *> \verbatim -*> LDV is integer scalar +*> LDV is INTEGER *> LDV is the leading dimension of V as declared in the *> calling procedure. LDV.GE.3. *> \endverbatim *> *> \param[out] U *> \verbatim -*> U is COMPLEX array of size -*> (LDU,3*NSHFTS-3) +*> U is COMPLEX array, dimension (LDU,3*NSHFTS-3) *> \endverbatim *> *> \param[in] LDU *> \verbatim -*> LDU is integer scalar +*> LDU is INTEGER *> LDU is the leading dimension of U just as declared in the *> in the calling subroutine. LDU.GE.3*NSHFTS-3. *> \endverbatim *> *> \param[in] NH *> \verbatim -*> NH is integer scalar +*> NH is INTEGER *> NH is the number of columns in array WH available for *> workspace. NH.GE.1. *> \endverbatim *> *> \param[out] WH *> \verbatim -*> WH is COMPLEX array of size (LDWH,NH) +*> WH is COMPLEX array, dimension (LDWH,NH) *> \endverbatim *> *> \param[in] LDWH *> \verbatim -*> LDWH is integer scalar +*> LDWH is INTEGER *> Leading dimension of WH just as declared in the *> calling procedure. LDWH.GE.3*NSHFTS-3. *> \endverbatim *> *> \param[in] NV *> \verbatim -*> NV is integer scalar +*> NV is INTEGER *> NV is the number of rows in WV agailable for workspace. *> NV.GE.1. *> \endverbatim *> *> \param[out] WV *> \verbatim -*> WV is COMPLEX array of size -*> (LDWV,3*NSHFTS-3) +*> WV is COMPLEX array, dimension (LDWV,3*NSHFTS-3) *> \endverbatim *> *> \param[in] LDWV *> \verbatim -*> LDWV is integer scalar +*> LDWV is INTEGER *> LDWV is the leading dimension of WV as declared in the *> in the calling subroutine. LDWV.GE.NV. *> \endverbatim @@ -251,7 +249,7 @@ $ H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, $ WV, LDWV, NH, WH, LDWH ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/clarfg.f b/lapack-netlib/SRC/clarfg.f index 4c0c5f715..d54c227bc 100644 --- a/lapack-netlib/SRC/clarfg.f +++ b/lapack-netlib/SRC/clarfg.f @@ -99,17 +99,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexOTHERauxiliary * * ===================================================================== SUBROUTINE CLARFG( N, ALPHA, X, INCX, TAU ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX, N @@ -175,7 +175,7 @@ BETA = BETA*RSAFMN ALPHI = ALPHI*RSAFMN ALPHR = ALPHR*RSAFMN - IF( ABS( BETA ).LT.SAFMIN .AND. KNT .LT. 1000) + IF( (ABS( BETA ).LT.SAFMIN) .AND. (KNT .LT. 20) ) $ GO TO 10 * * New BETA is at most 1, at least SAFMIN diff --git a/lapack-netlib/SRC/clarfgp.f b/lapack-netlib/SRC/clarfgp.f index 75cfd8cc2..19e48cccd 100644 --- a/lapack-netlib/SRC/clarfgp.f +++ b/lapack-netlib/SRC/clarfgp.f @@ -97,17 +97,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexOTHERauxiliary * * ===================================================================== SUBROUTINE CLARFGP( N, ALPHA, X, INCX, TAU ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX, N @@ -197,7 +197,7 @@ BETA = BETA*BIGNUM ALPHI = ALPHI*BIGNUM ALPHR = ALPHR*BIGNUM - IF( ABS( BETA ).LT.SMLNUM .AND. KNT .LT. 1000 ) + IF( (ABS( BETA ).LT.SMLNUM) .AND. (KNT .LT. 20) ) $ GO TO 10 * * New BETA is at most 1, at least SMLNUM diff --git a/lapack-netlib/SRC/clarrv.f b/lapack-netlib/SRC/clarrv.f index 1e1a30997..72fe1f948 100644 --- a/lapack-netlib/SRC/clarrv.f +++ b/lapack-netlib/SRC/clarrv.f @@ -199,7 +199,7 @@ *> *> \param[out] Z *> \verbatim -*> Z is array, dimension (LDZ, max(1,M) ) +*> Z is COMPLEX array, dimension (LDZ, max(1,M) ) *> If INFO = 0, the first M columns of Z contain the *> orthonormal eigenvectors of the matrix T *> corresponding to the input eigenvalues, with the i-th @@ -286,7 +286,7 @@ $ IBLOCK, INDEXW, GERS, Z, LDZ, ISUPPZ, $ WORK, IWORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -348,6 +348,13 @@ * .. INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* * The first N entries of WORK are reserved for the eigenvalues INDLD = N+1 INDLLD= 2*N+1 diff --git a/lapack-netlib/SRC/claswlq.f b/lapack-netlib/SRC/claswlq.f index 8b77142df..5fa2276e8 100644 --- a/lapack-netlib/SRC/claswlq.f +++ b/lapack-netlib/SRC/claswlq.f @@ -55,7 +55,7 @@ *> \verbatim *> A is COMPLEX array, dimension (LDA,N) *> On entry, the M-by-N matrix A. -*> On exit, the elements on and bleow the diagonal +*> On exit, the elements on and below the diagonal *> of the array contain the N-by-N lower triangular matrix L; *> the elements above the diagonal represent Q by the rows *> of blocked V (see Further Details). @@ -150,10 +150,10 @@ SUBROUTINE CLASWLQ( M, N, MB, NB, A, LDA, T, LDT, WORK, LWORK, $ INFO) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, M, N, MB, NB, LWORK, LDT diff --git a/lapack-netlib/SRC/claswp.f b/lapack-netlib/SRC/claswp.f index 8b5632c85..9f8a2dc69 100644 --- a/lapack-netlib/SRC/claswp.f +++ b/lapack-netlib/SRC/claswp.f @@ -79,14 +79,15 @@ *> \verbatim *> IPIV is INTEGER array, dimension (K1+(K2-K1)*abs(INCX)) *> The vector of pivot indices. Only the elements in positions -*> K1 through K1+(K2-K1)*INCX of IPIV are accessed. -*> IPIV(K) = L implies rows K and L are to be interchanged. +*> K1 through K1+(K2-K1)*abs(INCX) of IPIV are accessed. +*> IPIV(K1+(K-K1)*abs(INCX)) = L implies rows K and L are to be +*> interchanged. *> \endverbatim *> *> \param[in] INCX *> \verbatim *> INCX is INTEGER -*> The increment between successive values of IPIV. If IPIV +*> The increment between successive values of IPIV. If INCX *> is negative, the pivots are applied in reverse order. *> \endverbatim * @@ -98,7 +99,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexOTHERauxiliary * @@ -114,10 +115,10 @@ * ===================================================================== SUBROUTINE CLASWP( N, A, LDA, K1, K2, IPIV, INCX ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INCX, K1, K2, LDA, N @@ -135,7 +136,8 @@ * .. * .. Executable Statements .. * -* Interchange row I with row IPIV(I) for each of rows K1 through K2. +* Interchange row I with row IPIV(K1+(I-K1)*abs(INCX)) for each of rows +* K1 through K2. * IF( INCX.GT.0 ) THEN IX0 = K1 diff --git a/lapack-netlib/SRC/clasyf_aa.f b/lapack-netlib/SRC/clasyf_aa.f index 2c8cdc46a..1bc96ee1b 100644 --- a/lapack-netlib/SRC/clasyf_aa.f +++ b/lapack-netlib/SRC/clasyf_aa.f @@ -19,11 +19,11 @@ * =========== * * SUBROUTINE CLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, -* H, LDH, WORK, INFO ) +* H, LDH, WORK ) * * .. Scalar Arguments .. * CHARACTER UPLO -* INTEGER J1, M, NB, LDA, LDH, INFO +* INTEGER J1, M, NB, LDA, LDH * .. * .. Array Arguments .. * INTEGER IPIV( * ) @@ -99,12 +99,12 @@ *> \param[in] LDA *> \verbatim *> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(1,N). +*> The leading dimension of the array A. LDA >= max(1,M). *> \endverbatim *> *> \param[out] IPIV *> \verbatim -*> IPIV is INTEGER array, dimension (N) +*> IPIV is INTEGER array, dimension (M) *> Details of the row and column interchanges, *> the row and column k were interchanged with the row and *> column IPIV(k). @@ -127,16 +127,6 @@ *> WORK is REAL workspace, dimension (M). *> \endverbatim *> -*> \param[out] INFO -*> \verbatim -*> INFO is INTEGER -*> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization -*> has been completed, but the block diagonal matrix D is -*> exactly singular, and division by zero will occur if it -*> is used to solve a system of equations. -*> \endverbatim * * Authors: * ======== @@ -146,24 +136,24 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexSYcomputational * * ===================================================================== SUBROUTINE CLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, - $ H, LDH, WORK, INFO ) + $ H, LDH, WORK ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * * .. Scalar Arguments .. CHARACTER UPLO - INTEGER M, NB, J1, LDA, LDH, INFO + INTEGER M, NB, J1, LDA, LDH * .. * .. Array Arguments .. INTEGER IPIV( * ) @@ -176,7 +166,7 @@ PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * * .. Local Scalars .. - INTEGER J, K, K1, I1, I2 + INTEGER J, K, K1, I1, I2, MJ COMPLEX PIV, ALPHA * .. * .. External Functions .. @@ -185,14 +175,14 @@ EXTERNAL LSAME, ILAENV, ICAMAX * .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL CAXPY, CGEMV, CSCAL, CCOPY, CSWAP, CLASET, + $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * - INFO = 0 J = 1 * * K1 is the first column of the panel to be factorized @@ -216,9 +206,17 @@ * > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, * K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF * -* H(J:N, J) := A(J, J:N) - H(J:N, 1:(J-1)) * L(J1:(J-1), J), -* where H(J:N, J) has been initialized to be A(J, J:N) +* H(J:M, J) := A(J, J:M) - H(J:M, 1:(J-1)) * L(J1:(J-1), J), +* where H(J:M, J) has been initialized to be A(J, J:M) * IF( K.GT.2 ) THEN * @@ -228,23 +226,23 @@ * > for the rest of the columns, K is J+1, skipping only the * first column * - CALL CGEMV( 'No transpose', M-J+1, J-K1, + CALL CGEMV( 'No transpose', MJ, J-K1, $ -ONE, H( J, K1 ), LDH, $ A( 1, J ), 1, $ ONE, H( J, J ), 1 ) END IF * -* Copy H(i:n, i) into WORK +* Copy H(i:M, i) into WORK * - CALL CCOPY( M-J+1, H( J, J ), 1, WORK( 1 ), 1 ) + CALL CCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) * IF( J.GT.K1 ) THEN * -* Compute WORK := WORK - L(J-1, J:N) * T(J-1,J), -* where A(J-1, J) stores T(J-1, J) and A(J-2, J:N) stores U(J-1, J:N) +* Compute WORK := WORK - L(J-1, J:M) * T(J-1,J), +* where A(J-1, J) stores T(J-1, J) and A(J-2, J:M) stores U(J-1, J:M) * ALPHA = -A( K-1, J ) - CALL CAXPY( M-J+1, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) + CALL CAXPY( MJ, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) END IF * * Set A(J, J) = T(J, J) @@ -253,8 +251,8 @@ * IF( J.LT.M ) THEN * -* Compute WORK(2:N) = T(J, J) L(J, (J+1):N) -* where A(J, J) stores T(J, J) and A(J-1, (J+1):N) stores U(J, (J+1):N) +* Compute WORK(2:M) = T(J, J) L(J, (J+1):M) +* where A(J, J) stores T(J, J) and A(J-1, (J+1):M) stores U(J, (J+1):M) * IF( K.GT.1 ) THEN ALPHA = -A( K, J ) @@ -262,7 +260,7 @@ $ WORK( 2 ), 1 ) ENDIF * -* Find max(|WORK(2:n)|) +* Find max(|WORK(2:M)|) * I2 = ICAMAX( M-J, WORK( 2 ), 1 ) + 1 PIV = WORK( I2 ) @@ -277,14 +275,14 @@ WORK( I2 ) = WORK( I1 ) WORK( I1 ) = PIV * -* Swap A(I1, I1+1:N) with A(I1+1:N, I2) +* Swap A(I1, I1+1:M) with A(I1+1:M, I2) * I1 = I1+J-1 I2 = I2+J-1 CALL CSWAP( I2-I1-1, A( J1+I1-1, I1+1 ), LDA, $ A( J1+I1, I2 ), 1 ) * -* Swap A(I1, I2+1:N) with A(I2, I2+1:N) +* Swap A(I1, I2+1:M) with A(I2, I2+1:M) * CALL CSWAP( M-I2, A( J1+I1-1, I2+1 ), LDA, $ A( J1+I2-1, I2+1 ), LDA ) @@ -315,23 +313,17 @@ * Set A(J, J+1) = T(J, J+1) * A( K, J+1 ) = WORK( 2 ) - IF( (A( K, J ).EQ.ZERO ) .AND. - $ ( (J.EQ.M) .OR. (A( K, J+1 ).EQ.ZERO))) THEN - IF(INFO .EQ. 0) THEN - INFO = J - ENDIF - END IF * IF( J.LT.NB ) THEN * -* Copy A(J+1:N, J+1) into H(J:N, J), +* Copy A(J+1:M, J+1) into H(J:M, J), * CALL CCOPY( M-J, A( K+1, J+1 ), LDA, $ H( J+1, J+1 ), 1 ) END IF * -* Compute L(J+2, J+1) = WORK( 3:N ) / T(J, J+1), -* where A(J, J+1) = T(J, J+1) and A(J+2:N, J) = L(J+2:N, J+1) +* Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1) * IF( A( K, J+1 ).NE.ZERO ) THEN ALPHA = ONE / A( K, J+1 ) @@ -341,10 +333,6 @@ CALL CLASET( 'Full', 1, M-J-1, ZERO, ZERO, $ A( K, J+2 ), LDA) END IF - ELSE - IF( (A( K, J ).EQ.ZERO) .AND. (INFO.EQ.0) ) THEN - INFO = J - END IF END IF J = J + 1 GO TO 10 @@ -366,9 +354,17 @@ * > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, * K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF * -* H(J:N, J) := A(J:N, J) - H(J:N, 1:(J-1)) * L(J, J1:(J-1))^T, -* where H(J:N, J) has been initialized to be A(J:N, J) +* H(J:M, J) := A(J:M, J) - H(J:M, 1:(J-1)) * L(J, J1:(J-1))^T, +* where H(J:M, J) has been initialized to be A(J:M, J) * IF( K.GT.2 ) THEN * @@ -378,23 +374,23 @@ * > for the rest of the columns, K is J+1, skipping only the * first column * - CALL CGEMV( 'No transpose', M-J+1, J-K1, + CALL CGEMV( 'No transpose', MJ, J-K1, $ -ONE, H( J, K1 ), LDH, $ A( J, 1 ), LDA, $ ONE, H( J, J ), 1 ) END IF * -* Copy H(J:N, J) into WORK +* Copy H(J:M, J) into WORK * - CALL CCOPY( M-J+1, H( J, J ), 1, WORK( 1 ), 1 ) + CALL CCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) * IF( J.GT.K1 ) THEN * -* Compute WORK := WORK - L(J:N, J-1) * T(J-1,J), +* Compute WORK := WORK - L(J:M, J-1) * T(J-1,J), * where A(J-1, J) = T(J-1, J) and A(J, J-2) = L(J, J-1) * ALPHA = -A( J, K-1 ) - CALL CAXPY( M-J+1, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) + CALL CAXPY( MJ, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) END IF * * Set A(J, J) = T(J, J) @@ -403,8 +399,8 @@ * IF( J.LT.M ) THEN * -* Compute WORK(2:N) = T(J, J) L((J+1):N, J) -* where A(J, J) = T(J, J) and A((J+1):N, J-1) = L((J+1):N, J) +* Compute WORK(2:M) = T(J, J) L((J+1):M, J) +* where A(J, J) = T(J, J) and A((J+1):M, J-1) = L((J+1):M, J) * IF( K.GT.1 ) THEN ALPHA = -A( J, K ) @@ -412,7 +408,7 @@ $ WORK( 2 ), 1 ) ENDIF * -* Find max(|WORK(2:n)|) +* Find max(|WORK(2:M)|) * I2 = ICAMAX( M-J, WORK( 2 ), 1 ) + 1 PIV = WORK( I2 ) @@ -427,14 +423,14 @@ WORK( I2 ) = WORK( I1 ) WORK( I1 ) = PIV * -* Swap A(I1+1:N, I1) with A(I2, I1+1:N) +* Swap A(I1+1:M, I1) with A(I2, I1+1:M) * I1 = I1+J-1 I2 = I2+J-1 CALL CSWAP( I2-I1-1, A( I1+1, J1+I1-1 ), 1, $ A( I2, J1+I1 ), LDA ) * -* Swap A(I2+1:N, I1) with A(I2+1:N, I2) +* Swap A(I2+1:M, I1) with A(I2+1:M, I2) * CALL CSWAP( M-I2, A( I2+1, J1+I1-1 ), 1, $ A( I2+1, J1+I2-1 ), 1 ) @@ -465,22 +461,17 @@ * Set A(J+1, J) = T(J+1, J) * A( J+1, K ) = WORK( 2 ) - IF( (A( J, K ).EQ.ZERO) .AND. - $ ( (J.EQ.M) .OR. (A( J+1, K ).EQ.ZERO)) ) THEN - IF (INFO .EQ. 0) - $ INFO = J - END IF * IF( J.LT.NB ) THEN * -* Copy A(J+1:N, J+1) into H(J+1:N, J), +* Copy A(J+1:M, J+1) into H(J+1:M, J), * CALL CCOPY( M-J, A( J+1, K+1 ), 1, $ H( J+1, J+1 ), 1 ) END IF * -* Compute L(J+2, J+1) = WORK( 3:N ) / T(J, J+1), -* where A(J, J+1) = T(J, J+1) and A(J+2:N, J) = L(J+2:N, J+1) +* Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1) * IF( A( J+1, K ).NE.ZERO ) THEN ALPHA = ONE / A( J+1, K ) @@ -490,10 +481,6 @@ CALL CLASET( 'Full', M-J-1, 1, ZERO, ZERO, $ A( J+2, K ), LDA ) END IF - ELSE - IF( (A( J, K ).EQ.ZERO) .AND. (INFO.EQ.0) ) THEN - INFO = J - END IF END IF J = J + 1 GO TO 30 diff --git a/lapack-netlib/SRC/cstegr.f b/lapack-netlib/SRC/cstegr.f index 3209f27ee..98b82c1b7 100644 --- a/lapack-netlib/SRC/cstegr.f +++ b/lapack-netlib/SRC/cstegr.f @@ -184,7 +184,7 @@ *> *> \param[out] ISUPPZ *> \verbatim -*> ISUPPZ is INTEGER ARRAY, dimension ( 2*max(1,M) ) +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) *> The support of the eigenvectors in Z, i.e., the indices *> indicating the nonzero elements in Z. The i-th computed eigenvector *> is nonzero only in elements ISUPPZ( 2*i-1 ) through @@ -265,7 +265,7 @@ $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, $ LIWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/cstemr.f b/lapack-netlib/SRC/cstemr.f index f7e0abbe0..22ac842c9 100644 --- a/lapack-netlib/SRC/cstemr.f +++ b/lapack-netlib/SRC/cstemr.f @@ -239,7 +239,7 @@ *> *> \param[out] ISUPPZ *> \verbatim -*> ISUPPZ is INTEGER ARRAY, dimension ( 2*max(1,M) ) +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) *> The support of the eigenvectors in Z, i.e., the indices *> indicating the nonzero elements in Z. The i-th computed eigenvector *> is nonzero only in elements ISUPPZ( 2*i-1 ) through @@ -338,7 +338,7 @@ $ M, W, Z, LDZ, NZC, ISUPPZ, TRYRAC, WORK, LWORK, $ IWORK, LIWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/csycon_3.f b/lapack-netlib/SRC/csycon_3.f index a1ff812b0..47d52dd15 100644 --- a/lapack-netlib/SRC/csycon_3.f +++ b/lapack-netlib/SRC/csycon_3.f @@ -149,7 +149,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexSYcomputational * @@ -157,7 +157,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -171,10 +171,10 @@ SUBROUTINE CSYCON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, $ WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/csyconvf.f b/lapack-netlib/SRC/csyconvf.f index 0e843c3f3..77ecf46b5 100644 --- a/lapack-netlib/SRC/csyconvf.f +++ b/lapack-netlib/SRC/csyconvf.f @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE CSYCONVF( UPLO, WAY, N, A, LDA, IPIV, E, INFO ) +* SUBROUTINE CSYCONVF( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) * * .. Scalar Arguments .. * CHARACTER UPLO, WAY @@ -192,7 +192,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexSYcomputational * @@ -201,7 +201,7 @@ *> *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> November 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -209,10 +209,10 @@ * ===================================================================== SUBROUTINE CSYCONVF( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO, WAY diff --git a/lapack-netlib/SRC/csyconvf_rook.f b/lapack-netlib/SRC/csyconvf_rook.f index 7a8ba601d..1146a97c5 100644 --- a/lapack-netlib/SRC/csyconvf_rook.f +++ b/lapack-netlib/SRC/csyconvf_rook.f @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE CSYCONVF_ROOK( UPLO, WAY, N, A, LDA, IPIV, E, INFO ) +* SUBROUTINE CSYCONVF_ROOK( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) * * .. Scalar Arguments .. * CHARACTER UPLO, WAY @@ -183,7 +183,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexSYcomputational * @@ -192,7 +192,7 @@ *> *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> November 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -200,10 +200,10 @@ * ===================================================================== SUBROUTINE CSYCONVF_ROOK( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO, WAY diff --git a/lapack-netlib/SRC/csyequb.f b/lapack-netlib/SRC/csyequb.f index ff1f014aa..b1c5c6fc6 100644 --- a/lapack-netlib/SRC/csyequb.f +++ b/lapack-netlib/SRC/csyequb.f @@ -117,7 +117,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexSYcomputational * @@ -132,10 +132,10 @@ * ===================================================================== SUBROUTINE CSYEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, N @@ -168,7 +168,7 @@ EXTERNAL LSAME, SLAMCH * .. * .. External Subroutines .. - EXTERNAL CLASSQ + EXTERNAL CLASSQ, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, INT, LOG, MAX, MIN, REAL, SQRT diff --git a/lapack-netlib/SRC/csysv_aa.f b/lapack-netlib/SRC/csysv_aa.f index 187a6aaf9..9cd669d33 100644 --- a/lapack-netlib/SRC/csysv_aa.f +++ b/lapack-netlib/SRC/csysv_aa.f @@ -154,7 +154,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexSYsolve * @@ -162,10 +162,10 @@ SUBROUTINE CSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, $ LWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -188,7 +188,7 @@ EXTERNAL ILAENV, LSAME * .. * .. External Subroutines .. - EXTERNAL XERBLA, CSYTRF, CSYTRS, CSYTRS2 + EXTERNAL XERBLA, CSYTRF_AA, CSYTRS_AA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/csysv_aa_2stage.f b/lapack-netlib/SRC/csysv_aa_2stage.f new file mode 100644 index 000000000..cba57fc3e --- /dev/null +++ b/lapack-netlib/SRC/csysv_aa_2stage.f @@ -0,0 +1,276 @@ +*> \brief CSYSV_AA_2STAGE computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYSV_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, +* IPIV, IPIV2, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX A( LDA, * ), TB( * ), B( LDB, *), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYSV_AA_2STAGE computes the solution to a complex system of +*> linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's 2-stage algorithm is used to factor A as +*> A = U * T * U**H, if UPLO = 'U', or +*> A = L * T * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is symmetric and band. The matrix T is +*> then LU-factored with partial pivoting. The factored form of A +*> is then used to solve the system of equations A * X = B. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX A( LDA, * ), B( LDB, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSYTRF_AA_2STAGE, + $ CSYTRS_AA_2STAGE, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL CSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, + $ IPIV2, WORK, -1, INFO ) + LWKOPT = INT( WORK(1) ) + IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYSV_AA_2STAGE', -INFO ) + RETURN + ELSE IF( WQUERY .OR. TQUERY ) THEN + RETURN + END IF +* +* +* Compute the factorization A = U*T*U**H or A = L*T*L**H. +* + CALL CSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, IPIV2, + $ WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, + $ IPIV2, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* +* End of CSYSV_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/csytrf_aa.f b/lapack-netlib/SRC/csytrf_aa.f index 7fcbb3781..2f185b0c7 100644 --- a/lapack-netlib/SRC/csytrf_aa.f +++ b/lapack-netlib/SRC/csytrf_aa.f @@ -114,11 +114,7 @@ *> \verbatim *> INFO is INTEGER *> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization -*> has been completed, but the block diagonal matrix D is -*> exactly singular, and division by zero will occur if it -*> is used to solve a system of equations. +*> < 0: if INFO = -i, the i-th argument had an illegal value. *> \endverbatim * * Authors: @@ -129,17 +125,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexSYcomputational * * ===================================================================== SUBROUTINE CSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -159,7 +155,7 @@ * * .. Local Scalars .. LOGICAL LQUERY, UPPER - INTEGER J, LWKOPT, IINFO + INTEGER J, LWKOPT INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB COMPLEX ALPHA * .. @@ -169,7 +165,8 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL CLASYF_AA, CGEMM, CGEMV, CSCAL, CSWAP, CCOPY, + $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -178,7 +175,7 @@ * * Determine the block size * - NB = ILAENV( 1, 'CSYTRF', UPLO, N, -1, -1, -1 ) + NB = ILAENV( 1, 'CSYTRF_AA', UPLO, N, -1, -1, -1 ) * * Test the input parameters. * @@ -214,13 +211,10 @@ ENDIF IPIV( 1 ) = 1 IF ( N.EQ.1 ) THEN - IF ( A( 1, 1 ).EQ.ZERO ) THEN - INFO = 1 - END IF RETURN END IF * -* Adjubst block size based on the workspace size +* Adjust block size based on the workspace size * IF( LWORK.LT.((1+NB)*N) ) THEN NB = ( LWORK-N ) / N @@ -260,11 +254,7 @@ * CALL CLASYF_AA( UPLO, 2-K1, N-J, JB, $ A( MAX(1, J), J+1 ), LDA, - $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), - $ IINFO ) - IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN - INFO = IINFO+J - ENDIF + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) * * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) * @@ -383,10 +373,7 @@ * CALL CLASYF_AA( UPLO, 2-K1, N-J, JB, $ A( J+1, MAX(1, J) ), LDA, - $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), IINFO) - IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN - INFO = IINFO+J - ENDIF + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) * * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) * diff --git a/lapack-netlib/SRC/csytrf_aa_2stage.f b/lapack-netlib/SRC/csytrf_aa_2stage.f new file mode 100644 index 000000000..0a6bfbe31 --- /dev/null +++ b/lapack-netlib/SRC/csytrf_aa_2stage.f @@ -0,0 +1,668 @@ +*> \brief \b CSYTRF_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRF_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, +* IPIV2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX A( LDA, * ), TB( * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRF_AA_2STAGE computes the factorization of a complex symmetric matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a complex symmetric band matrix with the +*> bandwidth of NB (NB is internally selected and stored in TB( 1 ), and T is +*> LU factorized with partial pivoting). +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, + $ IPIV2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX A( LDA, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER I, J, K, I1, I2, TD + INTEGER LDTB, NB, KB, JB, NT, IINFO + COMPLEX PIV +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGBTRF, CGEMM, CGETRF, CLACPY, + $ CLASET, CTRSM, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LTB .LT. 4*N .AND. .NOT.TQUERY ) THEN + INFO = -6 + ELSE IF ( LWORK .LT. N .AND. .NOT.WQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRF_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Answer the query +* + NB = ILAENV( 1, 'CSYTRF_AA_2STAGE', UPLO, N, -1, -1, -1 ) + IF( INFO.EQ.0 ) THEN + IF( TQUERY ) THEN + TB( 1 ) = (3*NB+1)*N + END IF + IF( WQUERY ) THEN + WORK( 1 ) = N*NB + END IF + END IF + IF( TQUERY .OR. WQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF +* +* Determine the number of the block size +* + LDTB = LTB/N + IF( LDTB .LT. 3*NB+1 ) THEN + NB = (LDTB-1)/3 + END IF + IF( LWORK .LT. NB*N ) THEN + NB = LWORK/N + END IF +* +* Determine the number of the block columns +* + NT = (N+NB-1)/NB + TD = 2*NB + KB = MIN(NB, N) +* +* Initialize vectors/matrices +* + DO J = 1, KB + IPIV( J ) = J + END DO +* +* Save NB +* + TB( 1 ) = NB +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*U(I,J) + T(I+1,I)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ CONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( (I-1)*NB+1, J*NB+1 ), LDA, + $ CZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J) + IF( I .EQ. J-1) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ CONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (I-2)*NB+1, J*NB+1 ), LDA, + $ CZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL CLACPY( 'Upper', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = U(1:J,J)'*H(1:J) + CALL CGEMM( 'Transpose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -CONE, A( 1, J*NB+1 ), LDA, + $ WORK( NB+1 ), N, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += U(J,J)'*T(J,J-1)*U(J-1,J) + CALL CGEMM( 'Transpose', 'NoTranspose', + $ KB, NB, KB, + $ CONE, A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ CZERO, WORK( 1 ), N ) + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB, + $ -CONE, WORK( 1 ), N, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + DO K = I+1, KB + TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + $ = TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + END DO + END DO + IF( J.GT.0 ) THEN +c CALL CHEGST( 1, 'Upper', KB, +c $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, +c $ A( (J-1)*NB+1, J*NB+1 ), LDA, IINFO ) + CALL CTRSM( 'L', 'U', 'T', 'N', KB, KB, CONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + CALL CTRSM( 'R', 'U', 'N', 'N', KB, KB, CONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, KB, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ CZERO, WORK( J*NB+1 ), N ) + ELSE + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB+KB, + $ CONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ CZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL CGEMM( 'Transpose', 'NoTranspose', + $ NB, N-(J+1)*NB, J*NB, + $ -CONE, WORK( NB+1 ), N, + $ A( 1, (J+1)*NB+1 ), LDA, + $ CONE, A( J*NB+1, (J+1)*NB+1 ), LDA ) + END IF +* +* Copy panel to workspace to call CGETRF +* + DO K = 1, NB + CALL CCOPY( N-(J+1)*NB, + $ A( J*NB+K, (J+1)*NB+1 ), LDA, + $ WORK( 1+(K-1)*N ), 1 ) + END DO +* +* Factorize panel +* + CALL CGETRF( N-(J+1)*NB, NB, + $ WORK, N, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Copy panel back +* + DO K = 1, NB + CALL CCOPY( N-(J+1)*NB, + $ WORK( 1+(K-1)*N ), 1, + $ A( J*NB+K, (J+1)*NB+1 ), LDA ) + END DO +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL CLASET( 'Full', KB, NB, CZERO, CZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL CLACPY( 'Upper', KB, NB, + $ WORK, N, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL CTRSM( 'R', 'U', 'N', 'U', KB, NB, CONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J,J+1) into T(J+1, J), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) + END DO + END DO + CALL CLASET( 'Lower', KB, NB, CZERO, CONE, + $ A( J*NB+1, (J+1)*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL CSWAP( K-1, A( (J+1)*NB+1, I1 ), 1, + $ A( (J+1)*NB+1, I2 ), 1 ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL CSWAP( I2-I1-1, A( I1, I1+1 ), LDA, + $ A( I1+1, I2 ), 1 ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL CSWAP( N-I2, A( I1, I2+1 ), LDA, + $ A( I2, I2+1 ), LDA ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL CSWAP( J*NB, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ENDIF + END DO + END IF + END DO + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL CGEMM( 'NoTranspose', 'Transpose', + $ NB, KB, JB, + $ CONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (I-1)*NB+1 ), LDA, + $ CZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL CGEMM( 'NoTranspose', 'Transpose', + $ NB, KB, JB, + $ CONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (I-2)*NB+1 ), LDA, + $ CZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL CLACPY( 'Lower', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = L(J,1:J)*H(1:J) + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -CONE, A( J*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += L(J,J)*T(J,J-1)*L(J,J-1)' + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, NB, KB, + $ CONE, A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ CZERO, WORK( 1 ), N ) + CALL CGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, NB, + $ -CONE, WORK( 1 ), N, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + DO K = I+1, KB + TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + $ = TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + END DO + END DO + IF( J.GT.0 ) THEN +c CALL CHEGST( 1, 'Lower', KB, +c $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, +c $ A( J*NB+1, (J-1)*NB+1 ), LDA, IINFO ) + CALL CTRSM( 'L', 'L', 'N', 'N', KB, KB, CONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + CALL CTRSM( 'R', 'L', 'T', 'N', KB, KB, CONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Symmetrize T(J,J) +* + DO I = 1, KB + DO K = I+1, KB + TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + $ = TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL CGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, KB, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ CZERO, WORK( J*NB+1 ), N ) + ELSE + CALL CGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, NB+KB, + $ CONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ CZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ N-(J+1)*NB, NB, J*NB, + $ -CONE, A( (J+1)*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ CONE, A( (J+1)*NB+1, J*NB+1 ), LDA ) + END IF +* +* Factorize panel +* + CALL CGETRF( N-(J+1)*NB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL CLASET( 'Full', KB, NB, CZERO, CZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL CLACPY( 'Upper', KB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL CTRSM( 'R', 'L', 'T', 'U', KB, NB, CONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J+1,J) into T(J, J+1), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) = + $ TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) + END DO + END DO + CALL CLASET( 'Upper', KB, NB, CZERO, CONE, + $ A( (J+1)*NB+1, J*NB+1 ), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL CSWAP( K-1, A( I1, (J+1)*NB+1 ), LDA, + $ A( I2, (J+1)*NB+1 ), LDA ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL CSWAP( I2-I1-1, A( I1+1, I1 ), 1, + $ A( I2, I1+1 ), LDA ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL CSWAP( N-I2, A( I2+1, I1 ), 1, + $ A( I2+1, I2 ), 1 ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL CSWAP( J*NB, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ENDIF + END DO +* +* Apply pivots to previous columns of L +* +c CALL CLASWP( J*NB, A( 1, 1 ), LDA, +c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) + END IF + END DO + END IF +* +* Factor the band matrix + CALL CGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* +* End of CSYTRF_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/csytri2.f b/lapack-netlib/SRC/csytri2.f index 34058cb03..4c6baaa3e 100644 --- a/lapack-netlib/SRC/csytri2.f +++ b/lapack-netlib/SRC/csytri2.f @@ -120,17 +120,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexSYcomputational * * ===================================================================== SUBROUTINE CSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -153,7 +153,7 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL CSYTRI, CSYTRI2X + EXTERNAL CSYTRI, CSYTRI2X, XERBLA * .. * .. Executable Statements .. * diff --git a/lapack-netlib/SRC/csytri2x.f b/lapack-netlib/SRC/csytri2x.f index 046d61346..151f06331 100644 --- a/lapack-netlib/SRC/csytri2x.f +++ b/lapack-netlib/SRC/csytri2x.f @@ -87,7 +87,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is COMPLEX array, dimension (N+NNB+1,NNB+3) +*> WORK is COMPLEX array, dimension (N+NB+1,NB+3) *> \endverbatim *> *> \param[in] NB @@ -113,17 +113,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexSYcomputational * * ===================================================================== SUBROUTINE CSYTRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/csytri_3.f b/lapack-netlib/SRC/csytri_3.f index 43abc6a74..d618c26ed 100644 --- a/lapack-netlib/SRC/csytri_3.f +++ b/lapack-netlib/SRC/csytri_3.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexSYcomputational * @@ -160,7 +160,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> November 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -170,10 +170,10 @@ SUBROUTINE CSYTRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -196,7 +196,7 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL CSYTRI_3X + EXTERNAL CSYTRI_3X, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/csytri_3x.f b/lapack-netlib/SRC/csytri_3x.f index 2865839f8..5a81ee3a5 100644 --- a/lapack-netlib/SRC/csytri_3x.f +++ b/lapack-netlib/SRC/csytri_3x.f @@ -142,7 +142,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexSYcomputational * @@ -150,7 +150,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -159,10 +159,10 @@ * ===================================================================== SUBROUTINE CSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/csytrs_3.f b/lapack-netlib/SRC/csytrs_3.f index b0e868e1b..8008c5de4 100644 --- a/lapack-netlib/SRC/csytrs_3.f +++ b/lapack-netlib/SRC/csytrs_3.f @@ -142,7 +142,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexSYcomputational * @@ -151,7 +151,7 @@ *> *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -165,10 +165,10 @@ SUBROUTINE CSYTRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/csytrs_aa.f b/lapack-netlib/SRC/csytrs_aa.f index 6fedf9120..7cf950492 100644 --- a/lapack-netlib/SRC/csytrs_aa.f +++ b/lapack-netlib/SRC/csytrs_aa.f @@ -66,7 +66,7 @@ *> of the matrix B. NRHS >= 0. *> \endverbatim *> -*> \param[in,out] A +*> \param[in] A *> \verbatim *> A is REAL array, dimension (LDA,N) *> Details of factors computed by CSYTRF_AA. @@ -121,7 +121,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexSYcomputational * @@ -129,10 +129,10 @@ SUBROUTINE CSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, $ WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -159,7 +159,7 @@ EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL CGTSV, CSWAP, CTRSM, XERBLA + EXTERNAL CLACPY, CGTSV, CSWAP, CTRSM, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/csytrs_aa_2stage.f b/lapack-netlib/SRC/csytrs_aa_2stage.f new file mode 100644 index 000000000..03bccda82 --- /dev/null +++ b/lapack-netlib/SRC/csytrs_aa_2stage.f @@ -0,0 +1,281 @@ +*> \brief \b CSYTRS_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRS_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, +* IPIV2, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRS_AA_2STAGE solves a system of linear equations A*X = B with a complex +*> symmetric matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by CSYTRF_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> Details of factors computed by CSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX array, dimension (LTB) +*> Details of factors computed by CSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> CSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] IPIV2 +*> \verbatim +*> IPIV2 is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> CSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER LDTB, NB + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGBTRS, CLASWP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LTB.LT.( 4*N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRS_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Read NB and compute LDTB +* + NB = INT( TB( 1 ) ) + LDTB = LTB/N +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL CLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (U**T \P**T * B) -> B [ (U**T \P**T * B) ] +* + CALL CTRSM( 'L', 'U', 'T', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (U**T \P**T * B) ] +* + CALL CGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (U \ B) -> B [ U \ (T \ (U**T \P**T * B) ) ] +* + CALL CTRSM( 'L', 'U', 'N', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (U \ (T \ (U**T \P**T * B) )) ] +* + CALL CLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL CLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL CTRSM( 'L', 'L', 'N', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL CGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL CTRSM( 'L', 'L', 'T', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + CALL CLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF + END IF +* + RETURN +* +* End of CSYTRS_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/ctgex2.f b/lapack-netlib/SRC/ctgex2.f index c487e8f14..7ac1784b6 100644 --- a/lapack-netlib/SRC/ctgex2.f +++ b/lapack-netlib/SRC/ctgex2.f @@ -76,7 +76,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is COMPLEX arrays, dimensions (LDA,N) +*> A is COMPLEX array, dimension (LDA,N) *> On entry, the matrix A in the pair (A, B). *> On exit, the updated matrix A. *> \endverbatim @@ -89,7 +89,7 @@ *> *> \param[in,out] B *> \verbatim -*> B is COMPLEX arrays, dimensions (LDB,N) +*> B is COMPLEX array, dimension (LDB,N) *> On entry, the matrix B in the pair (A, B). *> On exit, the updated matrix B. *> \endverbatim @@ -102,7 +102,7 @@ *> *> \param[in,out] Q *> \verbatim -*> Q is COMPLEX array, dimension (LDZ,N) +*> Q is COMPLEX array, dimension (LDQ,N) *> If WANTQ = .TRUE, on entry, the unitary matrix Q. On exit, *> the updated matrix Q. *> Not referenced if WANTQ = .FALSE.. @@ -153,7 +153,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexGEauxiliary * @@ -190,10 +190,10 @@ SUBROUTINE CTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, $ LDZ, J1, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. LOGICAL WANTQ, WANTZ diff --git a/lapack-netlib/SRC/ctgexc.f b/lapack-netlib/SRC/ctgexc.f index f6ccdcb9b..ba1c2814c 100644 --- a/lapack-netlib/SRC/ctgexc.f +++ b/lapack-netlib/SRC/ctgexc.f @@ -102,7 +102,7 @@ *> *> \param[in,out] Q *> \verbatim -*> Q is COMPLEX array, dimension (LDZ,N) +*> Q is COMPLEX array, dimension (LDQ,N) *> On entry, if WANTQ = .TRUE., the unitary matrix Q. *> On exit, the updated matrix Q. *> If WANTQ = .FALSE., Q is not referenced. @@ -163,7 +163,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexGEcomputational * @@ -200,10 +200,10 @@ SUBROUTINE CTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, $ LDZ, IFST, ILST, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. LOGICAL WANTQ, WANTZ diff --git a/lapack-netlib/SRC/ctgsen.f b/lapack-netlib/SRC/ctgsen.f index 8f02d0e86..8198d4554 100644 --- a/lapack-netlib/SRC/ctgsen.f +++ b/lapack-netlib/SRC/ctgsen.f @@ -72,7 +72,7 @@ * *> \param[in] IJOB *> \verbatim -*> IJOB is integer +*> IJOB is INTEGER *> Specifies whether condition numbers are required for the *> cluster of eigenvalues (PL and PR) or the deflating subspaces *> (Difu and Difl): @@ -433,7 +433,7 @@ $ ALPHA, BETA, Q, LDQ, Z, LDZ, M, PL, PR, DIF, $ WORK, LWORK, IWORK, LIWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/ctplqt.f b/lapack-netlib/SRC/ctplqt.f index 322b6dc5c..cb4d419b9 100644 --- a/lapack-netlib/SRC/ctplqt.f +++ b/lapack-netlib/SRC/ctplqt.f @@ -56,8 +56,8 @@ *> *> \param[in,out] A *> \verbatim -*> A is COMPLEX array, dimension (LDA,N) -*> On entry, the lower triangular N-by-N matrix A. +*> A is COMPLEX array, dimension (LDA,M) +*> On entry, the lower triangular M-by-M matrix A. *> On exit, the elements on and below the diagonal of the array *> contain the lower triangular matrix L. *> \endverbatim @@ -65,7 +65,7 @@ *> \param[in] LDA *> \verbatim *> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(1,N). +*> The leading dimension of the array A. LDA >= max(1,M). *> \endverbatim *> *> \param[in,out] B @@ -115,7 +115,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERcomputational * @@ -129,26 +129,26 @@ *> C = [ A ] [ B ] *> *> -*> where A is an lower triangular N-by-N matrix, and B is M-by-N pentagonal +*> where A is an lower triangular M-by-M matrix, and B is M-by-N pentagonal *> matrix consisting of a M-by-(N-L) rectangular matrix B1 on left of a M-by-L *> upper trapezoidal matrix B2: *> [ B ] = [ B1 ] [ B2 ] *> [ B1 ] <- M-by-(N-L) rectangular -*> [ B2 ] <- M-by-L upper trapezoidal. +*> [ B2 ] <- M-by-L lower trapezoidal. *> *> The lower trapezoidal matrix B2 consists of the first L columns of a -*> N-by-N lower triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> M-by-M lower triangular matrix, where 0 <= L <= MIN(M,N). If L=0, *> B is rectangular M-by-N; if M=L=N, B is lower triangular. *> *> The matrix W stores the elementary reflectors H(i) in the i-th row *> above the diagonal (of A) in the M-by-(M+N) input matrix C *> [ C ] = [ A ] [ B ] -*> [ A ] <- lower triangular N-by-N +*> [ A ] <- lower triangular M-by-M *> [ B ] <- M-by-N pentagonal *> *> so that W can be represented as *> [ W ] = [ I ] [ V ] -*> [ I ] <- identity, N-by-N +*> [ I ] <- identity, M-by-M *> [ V ] <- M-by-N, same form as B. *> *> Thus, all of information needed for W is contained on exit in B, which @@ -172,10 +172,10 @@ SUBROUTINE CTPLQT( M, N, L, MB, A, LDA, B, LDB, T, LDT, WORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDB, LDT, N, M, L, MB diff --git a/lapack-netlib/SRC/ctplqt2.f b/lapack-netlib/SRC/ctplqt2.f index 1c9b128e9..b16d6149a 100644 --- a/lapack-netlib/SRC/ctplqt2.f +++ b/lapack-netlib/SRC/ctplqt2.f @@ -48,7 +48,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is COMPLEX array, dimension (LDA,N) +*> A is COMPLEX array, dimension (LDA,M) *> On entry, the lower triangular M-by-M matrix A. *> On exit, the elements on and below the diagonal of the array *> contain the lower triangular matrix L. @@ -57,7 +57,7 @@ *> \param[in] LDA *> \verbatim *> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(1,N). +*> The leading dimension of the array A. LDA >= max(1,M). *> \endverbatim *> *> \param[in,out] B @@ -102,7 +102,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERcomputational * @@ -116,7 +116,7 @@ *> C = [ A ][ B ] *> *> -*> where A is an lower triangular N-by-N matrix, and B is M-by-N pentagonal +*> where A is an lower triangular M-by-M matrix, and B is M-by-N pentagonal *> matrix consisting of a M-by-(N-L) rectangular matrix B1 left of a M-by-L *> upper trapezoidal matrix B2: *> @@ -132,13 +132,13 @@ *> above the diagonal (of A) in the M-by-(M+N) input matrix C *> *> C = [ A ][ B ] -*> [ A ] <- lower triangular N-by-N +*> [ A ] <- lower triangular M-by-M *> [ B ] <- M-by-N pentagonal *> *> so that W can be represented as *> *> W = [ I ][ V ] -*> [ I ] <- identity, N-by-N +*> [ I ] <- identity, M-by-M *> [ V ] <- M-by-N, same form as B. *> *> Thus, all of information needed for W is contained on exit in B, which @@ -160,10 +160,10 @@ * ===================================================================== SUBROUTINE CTPLQT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDB, LDT, N, M, L diff --git a/lapack-netlib/SRC/ctpmlqt.f b/lapack-netlib/SRC/ctpmlqt.f index b326d6a2e..cb5f033ca 100644 --- a/lapack-netlib/SRC/ctpmlqt.f +++ b/lapack-netlib/SRC/ctpmlqt.f @@ -19,9 +19,9 @@ *> *> \verbatim *> -*> CTPMQRT applies a complex orthogonal matrix Q obtained from a -*> "triangular-pentagonal" real block reflector H to a general -*> real matrix C, which consists of two blocks A and B. +*> CTPMLQT applies a complex orthogonal matrix Q obtained from a +*> "triangular-pentagonal" complex block reflector H to a general +*> complex matrix C, which consists of two blocks A and B. *> \endverbatim * * Arguments: @@ -30,15 +30,15 @@ *> \param[in] SIDE *> \verbatim *> SIDE is CHARACTER*1 -*> = 'L': apply Q or Q**C from the Left; -*> = 'R': apply Q or Q**C from the Right. +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. *> \endverbatim *> *> \param[in] TRANS *> \verbatim *> TRANS is CHARACTER*1 *> = 'N': No transpose, apply Q; -*> = 'C': Transpose, apply Q**C. +*> = 'C': Transpose, apply Q**H. *> \endverbatim *> *> \param[in] M @@ -111,7 +111,7 @@ *> (LDA,K) if SIDE = 'R' *> On entry, the K-by-N or M-by-K matrix A. *> On exit, A is overwritten by the corresponding block of -*> Q*C or Q**C*C or C*Q or C*Q**C. See Further Details. +*> Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. *> \endverbatim *> *> \param[in] LDA @@ -127,7 +127,7 @@ *> B is COMPLEX array, dimension (LDB,N) *> On entry, the M-by-N matrix B. *> On exit, B is overwritten by the corresponding block of -*> Q*C or Q**C*C or C*Q or C*Q**C. See Further Details. +*> Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. *> \endverbatim *> *> \param[in] LDB @@ -158,7 +158,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERcomputational * @@ -188,21 +188,21 @@ *> *> If TRANS='N' and SIDE='L', C is on exit replaced with Q * C. *> -*> If TRANS='C' and SIDE='L', C is on exit replaced with Q**C * C. +*> If TRANS='C' and SIDE='L', C is on exit replaced with Q**H * C. *> *> If TRANS='N' and SIDE='R', C is on exit replaced with C * Q. *> -*> If TRANS='C' and SIDE='R', C is on exit replaced with C * Q**C. +*> If TRANS='C' and SIDE='R', C is on exit replaced with C * Q**H. *> \endverbatim *> * ===================================================================== SUBROUTINE CTPMLQT( SIDE, TRANS, M, N, K, L, MB, V, LDV, T, LDT, $ A, LDA, B, LDB, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS diff --git a/lapack-netlib/SRC/ctpmqrt.f b/lapack-netlib/SRC/ctpmqrt.f index 025de8295..fd3d1b109 100644 --- a/lapack-netlib/SRC/ctpmqrt.f +++ b/lapack-netlib/SRC/ctpmqrt.f @@ -175,7 +175,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complexOTHERcomputational * @@ -216,10 +216,10 @@ SUBROUTINE CTPMQRT( SIDE, TRANS, M, N, K, L, NB, V, LDV, T, LDT, $ A, LDA, B, LDB, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS @@ -242,7 +242,7 @@ EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL CTPRFB, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN diff --git a/lapack-netlib/SRC/ctrevc3.f b/lapack-netlib/SRC/ctrevc3.f index bb4c32ef2..c06b40477 100644 --- a/lapack-netlib/SRC/ctrevc3.f +++ b/lapack-netlib/SRC/ctrevc3.f @@ -222,7 +222,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * * @generated from ztrevc3.f, fortran z -> c, Tue Apr 19 01:47:44 2016 * @@ -247,10 +247,10 @@ $ LDVR, MM, M, WORK, LWORK, RWORK, LRWORK, INFO) IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER HOWMNY, SIDE @@ -288,7 +288,7 @@ * .. * .. External Subroutines .. EXTERNAL XERBLA, CCOPY, CLASET, CSSCAL, CGEMM, CGEMV, - $ CLATRS, SLABAD + $ CLATRS, CLACPY, SLABAD * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL, CMPLX, CONJG, AIMAG, MAX diff --git a/lapack-netlib/SRC/cunbdb1.f b/lapack-netlib/SRC/cunbdb1.f index fdcc686d2..bfd2147df 100644 --- a/lapack-netlib/SRC/cunbdb1.f +++ b/lapack-netlib/SRC/cunbdb1.f @@ -32,7 +32,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -202,7 +202,7 @@ SUBROUTINE CUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/cunbdb2.f b/lapack-netlib/SRC/cunbdb2.f index d95276e59..ae73699b6 100644 --- a/lapack-netlib/SRC/cunbdb2.f +++ b/lapack-netlib/SRC/cunbdb2.f @@ -32,7 +32,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -202,7 +202,7 @@ SUBROUTINE CUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 @@ -230,7 +230,8 @@ LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL CLARF, CLARFGP, CUNBDB5, CSROT, CSCAL, XERBLA + EXTERNAL CLARF, CLARFGP, CUNBDB5, CSROT, CSCAL, CLACGV, + $ XERBLA * .. * .. External Functions .. REAL SCNRM2 diff --git a/lapack-netlib/SRC/cunbdb3.f b/lapack-netlib/SRC/cunbdb3.f index a9d05c20e..c0abde1eb 100644 --- a/lapack-netlib/SRC/cunbdb3.f +++ b/lapack-netlib/SRC/cunbdb3.f @@ -32,7 +32,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -202,7 +202,7 @@ SUBROUTINE CUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 @@ -229,7 +229,7 @@ LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL CLARF, CLARFGP, CUNBDB5, CSROT, XERBLA + EXTERNAL CLARF, CLARFGP, CUNBDB5, CSROT, CLACGV, XERBLA * .. * .. External Functions .. REAL SCNRM2 diff --git a/lapack-netlib/SRC/cunbdb4.f b/lapack-netlib/SRC/cunbdb4.f index 8388e4e8c..803b85145 100644 --- a/lapack-netlib/SRC/cunbdb4.f +++ b/lapack-netlib/SRC/cunbdb4.f @@ -33,7 +33,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -213,7 +213,7 @@ $ TAUP1, TAUP2, TAUQ1, PHANTOM, WORK, LWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 @@ -241,7 +241,8 @@ LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL CLARF, CLARFGP, CUNBDB5, CSROT, CSCAL, XERBLA + EXTERNAL CLARF, CLARFGP, CUNBDB5, CSROT, CSCAL, CLACGV, + $ XERBLA * .. * .. External Functions .. REAL SCNRM2 diff --git a/lapack-netlib/SRC/cunbdb5.f b/lapack-netlib/SRC/cunbdb5.f index 006522d25..95997450a 100644 --- a/lapack-netlib/SRC/cunbdb5.f +++ b/lapack-netlib/SRC/cunbdb5.f @@ -31,7 +31,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -156,7 +156,7 @@ SUBROUTINE CUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/cunbdb6.f b/lapack-netlib/SRC/cunbdb6.f index ab7fe8a48..05a13896d 100644 --- a/lapack-netlib/SRC/cunbdb6.f +++ b/lapack-netlib/SRC/cunbdb6.f @@ -31,7 +31,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -154,7 +154,7 @@ SUBROUTINE CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/cuncsd.f b/lapack-netlib/SRC/cuncsd.f index 829c4bcf9..f30529130 100644 --- a/lapack-netlib/SRC/cuncsd.f +++ b/lapack-netlib/SRC/cuncsd.f @@ -188,7 +188,7 @@ *> *> \param[out] U1 *> \verbatim -*> U1 is COMPLEX array, dimension (P) +*> U1 is COMPLEX array, dimension (LDU1,P) *> If JOBU1 = 'Y', U1 contains the P-by-P unitary matrix U1. *> \endverbatim *> @@ -201,7 +201,7 @@ *> *> \param[out] U2 *> \verbatim -*> U2 is COMPLEX array, dimension (M-P) +*> U2 is COMPLEX array, dimension (LDU2,M-P) *> If JOBU2 = 'Y', U2 contains the (M-P)-by-(M-P) unitary *> matrix U2. *> \endverbatim @@ -215,7 +215,7 @@ *> *> \param[out] V1T *> \verbatim -*> V1T is COMPLEX array, dimension (Q) +*> V1T is COMPLEX array, dimension (LDV1T,Q) *> If JOBV1T = 'Y', V1T contains the Q-by-Q matrix unitary *> matrix V1**H. *> \endverbatim @@ -229,7 +229,7 @@ *> *> \param[out] V2T *> \verbatim -*> V2T is COMPLEX array, dimension (M-Q) +*> V2T is COMPLEX array, dimension (LDV2T,M-Q) *> If JOBV2T = 'Y', V2T contains the (M-Q)-by-(M-Q) unitary *> matrix V2**H. *> \endverbatim @@ -320,7 +320,7 @@ $ LDV2T, WORK, LWORK, RWORK, LRWORK, $ IWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/cuncsd2by1.f b/lapack-netlib/SRC/cuncsd2by1.f index 64070ca9a..1d49885f2 100644 --- a/lapack-netlib/SRC/cuncsd2by1.f +++ b/lapack-netlib/SRC/cuncsd2by1.f @@ -39,7 +39,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -255,7 +255,7 @@ $ LDV1T, WORK, LWORK, RWORK, LRWORK, IWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/cunm22.f b/lapack-netlib/SRC/cunm22.f index 85c2269ad..3c6b4c125 100644 --- a/lapack-netlib/SRC/cunm22.f +++ b/lapack-netlib/SRC/cunm22.f @@ -52,8 +52,8 @@ *> N2-by-N2 upper triangular matrix. *> \endverbatim * -* Arguments -* ========= +* Arguments: +* ========== * *> \param[in] SIDE *> \verbatim @@ -162,7 +162,7 @@ SUBROUTINE CUNM22( SIDE, TRANS, M, N, N1, N2, Q, LDQ, C, LDC, $ WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.6.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * January 2015 diff --git a/lapack-netlib/SRC/dbbcsd.f b/lapack-netlib/SRC/dbbcsd.f index d7c7d14a7..f1df567c1 100644 --- a/lapack-netlib/SRC/dbbcsd.f +++ b/lapack-netlib/SRC/dbbcsd.f @@ -190,7 +190,7 @@ *> *> \param[in,out] V2T *> \verbatim -*> V2T is DOUBLE PRECISION array, dimenison (LDV2T,M-Q) +*> V2T is DOUBLE PRECISION array, dimension (LDV2T,M-Q) *> On entry, an (M-Q)-by-(M-Q) matrix. On exit, V2T is *> premultiplied by the transpose of the right *> singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and @@ -332,7 +332,7 @@ $ V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E, $ B22D, B22E, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/dbdsdc.f b/lapack-netlib/SRC/dbdsdc.f index e349b0cc0..4c8b95f85 100644 --- a/lapack-netlib/SRC/dbdsdc.f +++ b/lapack-netlib/SRC/dbdsdc.f @@ -205,7 +205,7 @@ SUBROUTINE DBDSDC( UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ, $ WORK, IWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -311,12 +311,12 @@ WSTART = 1 QSTART = 3 IF( ICOMPQ.EQ.1 ) THEN - CALL DCOPY( N, D, 1, Q( 1 ), 1 ) + CALL DCOPY( N, D, 1, Q( 1 ), 1 ) CALL DCOPY( N-1, E, 1, Q( N+1 ), 1 ) END IF IF( IUPLO.EQ.2 ) THEN QSTART = 5 - WSTART = 2*N - 1 + IF( ICOMPQ .EQ. 2 ) WSTART = 2*N - 1 DO 10 I = 1, N - 1 CALL DLARTG( D( I ), E( I ), CS, SN, R ) D( I ) = R diff --git a/lapack-netlib/SRC/dbdsqr.f b/lapack-netlib/SRC/dbdsqr.f index c4cfbb3f7..93db95e7a 100644 --- a/lapack-netlib/SRC/dbdsqr.f +++ b/lapack-netlib/SRC/dbdsqr.f @@ -212,6 +212,7 @@ *> algorithm through its inner loop. The algorithms stops *> (and so fails to converge) if the number of passes *> through the inner loop exceeds MAXITR*N**2. +*> *> \endverbatim * *> \par Note: @@ -232,7 +233,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup auxOTHERcomputational * @@ -240,10 +241,10 @@ SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, $ LDU, C, LDC, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -310,7 +311,7 @@ ELSE IF( NRU.LT.0 ) THEN INFO = -4 ELSE IF( NCC.LT.0 ) THEN - INFO = -5 + INFO = -5 ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN INFO = -9 @@ -440,12 +441,12 @@ * IF( M.LE.1 ) $ GO TO 160 -* +* IF( ITER.GE.N ) THEN ITER = ITER - N ITERDIVN = ITERDIVN + 1 - IF (ITERDIVN.GE.MAXITDIVN ) - $ GO TO 200 + IF( ITERDIVN.GE.MAXITDIVN ) + $ GO TO 200 END IF * * Find diagonal block of matrix to work on diff --git a/lapack-netlib/SRC/dbdsvdx.f b/lapack-netlib/SRC/dbdsvdx.f index 94f52b4e6..96fdb3d61 100644 --- a/lapack-netlib/SRC/dbdsvdx.f +++ b/lapack-netlib/SRC/dbdsvdx.f @@ -226,10 +226,10 @@ SUBROUTINE DBDSVDX( UPLO, JOBZ, RANGE, N, D, E, VL, VU, IL, IU, $ NS, S, Z, LDZ, WORK, IWORK, INFO) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, RANGE, UPLO @@ -269,7 +269,7 @@ EXTERNAL IDAMAX, LSAME, DAXPY, DDOT, DLAMCH, DNRM2 * .. * .. External Subroutines .. - EXTERNAL DCOPY, DLASET, DSCAL, DSWAP + EXTERNAL DSTEVX, DCOPY, DLASET, DSCAL, DSWAP, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, SIGN, SQRT diff --git a/lapack-netlib/SRC/dgebal.f b/lapack-netlib/SRC/dgebal.f index 93efd2892..95876aced 100644 --- a/lapack-netlib/SRC/dgebal.f +++ b/lapack-netlib/SRC/dgebal.f @@ -67,7 +67,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is DOUBLE array, dimension (LDA,N) +*> A is DOUBLE PRECISION array, dimension (LDA,N) *> On entry, the input matrix A. *> On exit, A is overwritten by the balanced matrix. *> If JOB = 'N', A is not referenced. @@ -94,7 +94,7 @@ *> *> \param[out] SCALE *> \verbatim -*> SCALE is DOUBLE array, dimension (N) +*> SCALE is DOUBLE PRECISION array, dimension (N) *> Details of the permutations and scaling factors applied to *> A. If P(j) is the index of the row and column interchanged *> with row and column j and D(j) is the scaling factor @@ -121,7 +121,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleGEcomputational * @@ -160,10 +160,10 @@ * ===================================================================== SUBROUTINE DGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER JOB diff --git a/lapack-netlib/SRC/dgebd2.f b/lapack-netlib/SRC/dgebd2.f index bb4035dbb..2bec4e29c 100644 --- a/lapack-netlib/SRC/dgebd2.f +++ b/lapack-netlib/SRC/dgebd2.f @@ -100,7 +100,7 @@ *> *> \param[out] TAUQ *> \verbatim -*> TAUQ is DOUBLE PRECISION array dimension (min(M,N)) +*> TAUQ is DOUBLE PRECISION array, dimension (min(M,N)) *> The scalar factors of the elementary reflectors which *> represent the orthogonal matrix Q. See Further Details. *> \endverbatim @@ -132,7 +132,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleGEcomputational * @@ -189,10 +189,10 @@ * ===================================================================== SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, M, N diff --git a/lapack-netlib/SRC/dgebrd.f b/lapack-netlib/SRC/dgebrd.f index 885ad9bb4..56a7abef0 100644 --- a/lapack-netlib/SRC/dgebrd.f +++ b/lapack-netlib/SRC/dgebrd.f @@ -101,7 +101,7 @@ *> *> \param[out] TAUQ *> \verbatim -*> TAUQ is DOUBLE PRECISION array dimension (min(M,N)) +*> TAUQ is DOUBLE PRECISION array, dimension (min(M,N)) *> The scalar factors of the elementary reflectors which *> represent the orthogonal matrix Q. See Further Details. *> \endverbatim @@ -147,7 +147,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleGEcomputational * @@ -205,10 +205,10 @@ SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LWORK, M, N @@ -227,8 +227,7 @@ * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB, - $ NBMIN, NX - DOUBLE PRECISION WS + $ NBMIN, NX, WS * .. * .. External Subroutines .. EXTERNAL DGEBD2, DGEMM, DLABRD, XERBLA diff --git a/lapack-netlib/SRC/dgejsv.f b/lapack-netlib/SRC/dgejsv.f index 81c0a21ae..25ed248d0 100644 --- a/lapack-netlib/SRC/dgejsv.f +++ b/lapack-netlib/SRC/dgejsv.f @@ -271,7 +271,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is DOUBLE PRECISION array, dimension at least LWORK. +*> WORK is DOUBLE PRECISION array, dimension (LWORK) *> On exit, if N.GT.0 .AND. M.GT.0 (else not referenced), *> WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such *> that SCALE*SVA(1:N) are the computed singular values @@ -362,7 +362,7 @@ *> *> \param[out] IWORK *> \verbatim -*> IWORK is INTEGER array, dimension M+3*N. +*> IWORK is INTEGER array, dimension (M+3*N). *> On exit, *> IWORK(1) = the numerical rank determined after the initial *> QR factorization with pivoting. See the descriptions @@ -476,7 +476,7 @@ $ M, N, A, LDA, SVA, U, LDU, V, LDV, $ WORK, LWORK, IWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/dgelqt.f b/lapack-netlib/SRC/dgelqt.f index b11e9d6ee..2124f3dc3 100644 --- a/lapack-netlib/SRC/dgelqt.f +++ b/lapack-netlib/SRC/dgelqt.f @@ -108,7 +108,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleGEcomputational * @@ -117,8 +117,8 @@ *> *> \verbatim *> -*> The matrix V stores the elementary reflectors H(i) in the i-th column -*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> The matrix V stores the elementary reflectors H(i) in the i-th row +*> above the diagonal. For example, if M=5 and N=3, the matrix V is *> *> V = ( 1 v1 v1 v1 v1 ) *> ( 1 v2 v2 v2 ) @@ -127,11 +127,11 @@ *> *> where the vi's represent the vectors which define H(i), which are returned *> in the matrix A. The 1's along the diagonal of V are not stored in A. -*> Let K=MIN(M,N). The number of blocks is B = ceiling(K/NB), where each -*> block is of order NB except for the last block, which is of order -*> IB = K - (B-1)*NB. For each of the B blocks, a upper triangular block -*> reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB -*> for the last block) T's are stored in the NB-by-N matrix T as +*> Let K=MIN(M,N). The number of blocks is B = ceiling(K/MB), where each +*> block is of order MB except for the last block, which is of order +*> IB = K - (B-1)*MB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The MB-by-MB (and IB-by-IB +*> for the last block) T's are stored in the MB-by-K matrix T as *> *> T = (T1 T2 ... TB). *> \endverbatim @@ -139,10 +139,10 @@ * ===================================================================== SUBROUTINE DGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDT, M, N, MB @@ -158,7 +158,7 @@ INTEGER I, IB, IINFO, K * .. * .. External Subroutines .. - EXTERNAL DGEQRT2, DGEQRT3, DLARFB, XERBLA + EXTERNAL DGEQRT2, DGELQT3, DGEQRT3, DLARFB, XERBLA * .. * .. Executable Statements .. * diff --git a/lapack-netlib/SRC/dgelqt3.f b/lapack-netlib/SRC/dgelqt3.f index b0bb242a6..f19a91ca2 100644 --- a/lapack-netlib/SRC/dgelqt3.f +++ b/lapack-netlib/SRC/dgelqt3.f @@ -100,7 +100,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleGEcomputational * @@ -109,8 +109,8 @@ *> *> \verbatim *> -*> The matrix V stores the elementary reflectors H(i) in the i-th column -*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> The matrix V stores the elementary reflectors H(i) in the i-th row +*> above the diagonal. For example, if M=5 and N=3, the matrix V is *> *> V = ( 1 v1 v1 v1 v1 ) *> ( 1 v2 v2 v2 ) @@ -131,10 +131,10 @@ * ===================================================================== RECURSIVE SUBROUTINE DGELQT3( M, N, A, LDA, T, LDT, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, M, N, LDT @@ -150,7 +150,7 @@ PARAMETER ( ONE = 1.0D+00 ) * .. * .. Local Scalars .. - INTEGER I, I1, J, J1, M1, M2, N1, N2, IINFO + INTEGER I, I1, J, J1, M1, M2, IINFO * .. * .. External Subroutines .. EXTERNAL DLARFG, DTRMM, DGEMM, XERBLA diff --git a/lapack-netlib/SRC/dgelsd.f b/lapack-netlib/SRC/dgelsd.f index d24b2559a..f2cfd6337 100644 --- a/lapack-netlib/SRC/dgelsd.f +++ b/lapack-netlib/SRC/dgelsd.f @@ -89,7 +89,7 @@ *> of the matrices B and X. NRHS >= 0. *> \endverbatim *> -*> \param[in] A +*> \param[in,out] A *> \verbatim *> A is DOUBLE PRECISION array, dimension (LDA,N) *> On entry, the M-by-N matrix A. @@ -194,7 +194,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleGEsolve * @@ -209,10 +209,10 @@ SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, $ WORK, LWORK, IWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK diff --git a/lapack-netlib/SRC/dgemlqt.f b/lapack-netlib/SRC/dgemlqt.f index 41a517a2d..8cc59b8ee 100644 --- a/lapack-netlib/SRC/dgemlqt.f +++ b/lapack-netlib/SRC/dgemlqt.f @@ -6,7 +6,7 @@ * http://www.netlib.org/lapack/explore-html/ * *> \htmlonly -*> Download DGEMQRT + dependencies +*> Download DGEMLQT + dependencies *> *> [TGZ] *> @@ -35,7 +35,7 @@ *> *> \verbatim *> -*> DGEMQRT overwrites the general real M-by-N matrix C with +*> DGEMLQT overwrites the general real M-by-N matrix C with *> *> SIDE = 'L' SIDE = 'R' *> TRANS = 'N': Q C C Q @@ -99,7 +99,9 @@ *> *> \param[in] V *> \verbatim -*> V is DOUBLE PRECISION array, dimension (LDV,K) +*> V is DOUBLE PRECISION array, dimension +*> (LDV,M) if SIDE = 'L', +*> (LDV,N) if SIDE = 'R' *> The i-th row must contain the vector which defines the *> elementary reflector H(i), for i = 1,2,...,k, as returned by *> DGELQT in the first K rows of its array argument A. @@ -108,16 +110,14 @@ *> \param[in] LDV *> \verbatim *> LDV is INTEGER -*> The leading dimension of the array V. -*> If SIDE = 'L', LDA >= max(1,M); -*> if SIDE = 'R', LDA >= max(1,N). +*> The leading dimension of the array V. LDV >= max(1,K). *> \endverbatim *> *> \param[in] T *> \verbatim *> T is DOUBLE PRECISION array, dimension (LDT,K) *> The upper triangular factors of the block reflectors -*> as returned by DGELQT, stored as a MB-by-M matrix. +*> as returned by DGELQT, stored as a MB-by-K matrix. *> \endverbatim *> *> \param[in] LDT @@ -160,7 +160,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleGEcomputational * @@ -168,10 +168,10 @@ SUBROUTINE DGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, $ C, LDC, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS @@ -186,7 +186,7 @@ * .. * .. Local Scalars .. LOGICAL LEFT, RIGHT, TRAN, NOTRAN - INTEGER I, IB, LDWORK, KF, Q + INTEGER I, IB, LDWORK, KF * .. * .. External Functions .. LOGICAL LSAME diff --git a/lapack-netlib/SRC/dgeqrt.f b/lapack-netlib/SRC/dgeqrt.f index 6856bac07..c7d932069 100644 --- a/lapack-netlib/SRC/dgeqrt.f +++ b/lapack-netlib/SRC/dgeqrt.f @@ -108,7 +108,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleGEcomputational * @@ -133,7 +133,7 @@ *> block is of order NB except for the last block, which is of order *> IB = K - (B-1)*NB. For each of the B blocks, a upper triangular block *> reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB -*> for the last block) T's are stored in the NB-by-N matrix T as +*> for the last block) T's are stored in the NB-by-K matrix T as *> *> T = (T1 T2 ... TB). *> \endverbatim @@ -141,10 +141,10 @@ * ===================================================================== SUBROUTINE DGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDT, M, N, NB diff --git a/lapack-netlib/SRC/dgesc2.f b/lapack-netlib/SRC/dgesc2.f index db684bae4..2f01a762f 100644 --- a/lapack-netlib/SRC/dgesc2.f +++ b/lapack-netlib/SRC/dgesc2.f @@ -101,7 +101,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleGEauxiliary * @@ -114,10 +114,10 @@ * ===================================================================== SUBROUTINE DGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER LDA, N @@ -139,7 +139,7 @@ DOUBLE PRECISION BIGNUM, EPS, SMLNUM, TEMP * .. * .. External Subroutines .. - EXTERNAL DLASWP, DSCAL + EXTERNAL DLASWP, DSCAL, DLABAD * .. * .. External Functions .. INTEGER IDAMAX diff --git a/lapack-netlib/SRC/dgesvdx.f b/lapack-netlib/SRC/dgesvdx.f index 7da3d099c..3937c13bd 100644 --- a/lapack-netlib/SRC/dgesvdx.f +++ b/lapack-netlib/SRC/dgesvdx.f @@ -263,7 +263,7 @@ $ IL, IU, NS, S, U, LDU, VT, LDVT, WORK, $ LWORK, IWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -299,7 +299,7 @@ * .. External Subroutines .. EXTERNAL DBDSVDX, DGEBRD, DGELQF, DGEQRF, DLACPY, $ DLASCL, DLASET, DORMBR, DORMLQ, DORMQR, - $ XERBLA + $ DCOPY, XERBLA * .. * .. External Functions .. LOGICAL LSAME diff --git a/lapack-netlib/SRC/dgesvj.f b/lapack-netlib/SRC/dgesvj.f index 2b2599420..2cbc5ce0e 100644 --- a/lapack-netlib/SRC/dgesvj.f +++ b/lapack-netlib/SRC/dgesvj.f @@ -54,7 +54,7 @@ * *> \param[in] JOBA *> \verbatim -*> JOBA is CHARACTER* 1 +*> JOBA is CHARACTER*1 *> Specifies the structure of A. *> = 'L': The input matrix A is lower triangular; *> = 'U': The input matrix A is upper triangular; @@ -208,7 +208,7 @@ *> *> \param[in,out] WORK *> \verbatim -*> WORK is DOUBLE PRECISION array, dimension MAX(6,M+N). +*> WORK is DOUBLE PRECISION array, dimension (LWORK) *> On entry : *> If JOBU .EQ. 'C' : *> WORK(1) = CTOL, where CTOL defines the threshold for convergence. @@ -260,7 +260,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleGEcomputational * @@ -337,10 +337,10 @@ SUBROUTINE DGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, $ LDV, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDV, LWORK, M, MV, N diff --git a/lapack-netlib/SRC/dgetc2.f b/lapack-netlib/SRC/dgetc2.f index d850bc628..0896a7013 100644 --- a/lapack-netlib/SRC/dgetc2.f +++ b/lapack-netlib/SRC/dgetc2.f @@ -111,7 +111,7 @@ * ===================================================================== SUBROUTINE DGETC2( N, A, LDA, IPIV, JPIV, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -135,7 +135,7 @@ DOUBLE PRECISION BIGNUM, EPS, SMIN, SMLNUM, XMAX * .. * .. External Subroutines .. - EXTERNAL DGER, DSWAP + EXTERNAL DGER, DSWAP, DLABAD * .. * .. External Functions .. DOUBLE PRECISION DLAMCH diff --git a/lapack-netlib/SRC/dgetsls.f b/lapack-netlib/SRC/dgetsls.f index ca0ef777b..3b44a40ab 100644 --- a/lapack-netlib/SRC/dgetsls.f +++ b/lapack-netlib/SRC/dgetsls.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleGEsolve * @@ -160,10 +160,10 @@ SUBROUTINE DGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, $ WORK, LWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER TRANS @@ -185,7 +185,7 @@ INTEGER I, IASCL, IBSCL, J, MINMN, MAXMN, BROW, $ SCLLEN, MNK, TSZO, TSZM, LWO, LWM, LW1, LW2, $ WSIZEO, WSIZEM, INFO2 - DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM, TQ( 5 ), WORKQ + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM, TQ( 5 ), WORKQ( 1 ) * .. * .. External Functions .. LOGICAL LSAME @@ -233,31 +233,31 @@ IF( M.GE.N ) THEN CALL DGEQR( M, N, A, LDA, TQ, -1, WORKQ, -1, INFO2 ) TSZO = INT( TQ( 1 ) ) - LWO = INT( WORKQ ) + LWO = INT( WORKQ( 1 ) ) CALL DGEMQR( 'L', TRANS, M, NRHS, N, A, LDA, TQ, $ TSZO, B, LDB, WORKQ, -1, INFO2 ) - LWO = MAX( LWO, INT( WORKQ ) ) + LWO = MAX( LWO, INT( WORKQ( 1 ) ) ) CALL DGEQR( M, N, A, LDA, TQ, -2, WORKQ, -2, INFO2 ) TSZM = INT( TQ( 1 ) ) - LWM = INT( WORKQ ) + LWM = INT( WORKQ( 1 ) ) CALL DGEMQR( 'L', TRANS, M, NRHS, N, A, LDA, TQ, $ TSZM, B, LDB, WORKQ, -1, INFO2 ) - LWM = MAX( LWM, INT( WORKQ ) ) + LWM = MAX( LWM, INT( WORKQ( 1 ) ) ) WSIZEO = TSZO + LWO WSIZEM = TSZM + LWM ELSE CALL DGELQ( M, N, A, LDA, TQ, -1, WORKQ, -1, INFO2 ) TSZO = INT( TQ( 1 ) ) - LWO = INT( WORKQ ) + LWO = INT( WORKQ( 1 ) ) CALL DGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ, $ TSZO, B, LDB, WORKQ, -1, INFO2 ) - LWO = MAX( LWO, INT( WORKQ ) ) + LWO = MAX( LWO, INT( WORKQ( 1 ) ) ) CALL DGELQ( M, N, A, LDA, TQ, -2, WORKQ, -2, INFO2 ) TSZM = INT( TQ( 1 ) ) - LWM = INT( WORKQ ) + LWM = INT( WORKQ( 1 ) ) CALL DGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ, $ TSZO, B, LDB, WORKQ, -1, INFO2 ) - LWM = MAX( LWM, INT( WORKQ ) ) + LWM = MAX( LWM, INT( WORKQ( 1 ) ) ) WSIZEO = TSZO + LWO WSIZEM = TSZM + LWM END IF diff --git a/lapack-netlib/SRC/dggesx.f b/lapack-netlib/SRC/dggesx.f index f316c7fc2..47022fbdf 100644 --- a/lapack-netlib/SRC/dggesx.f +++ b/lapack-netlib/SRC/dggesx.f @@ -111,7 +111,7 @@ *> *> \param[in] SELCTG *> \verbatim -*> SELCTG is procedure) LOGICAL FUNCTION of three DOUBLE PRECISION arguments +*> SELCTG is a LOGICAL FUNCTION of three DOUBLE PRECISION arguments *> SELCTG must be declared EXTERNAL in the calling subroutine. *> If SORT = 'N', SELCTG is not referenced. *> If SORT = 'S', SELCTG is used to select eigenvalues to sort @@ -337,7 +337,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleGEeigen * @@ -365,10 +365,10 @@ $ VSR, LDVSR, RCONDE, RCONDV, WORK, LWORK, IWORK, $ LIWORK, BWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER JOBVSL, JOBVSR, SENSE, SORT diff --git a/lapack-netlib/SRC/dgghd3.f b/lapack-netlib/SRC/dgghd3.f index 034e94389..283d914de 100644 --- a/lapack-netlib/SRC/dgghd3.f +++ b/lapack-netlib/SRC/dgghd3.f @@ -230,7 +230,7 @@ SUBROUTINE DGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, $ LDQ, Z, LDZ, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.6.1) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * January 2015 @@ -266,7 +266,8 @@ EXTERNAL ILAENV, LSAME * .. * .. External Subroutines .. - EXTERNAL DGGHRD, DLARTG, DLASET, DORM22, DROT, XERBLA + EXTERNAL DGGHRD, DLARTG, DLASET, DORM22, DROT, DGEMM, + $ DGEMV, DTRMV, DLACPY, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX diff --git a/lapack-netlib/SRC/dgsvj0.f b/lapack-netlib/SRC/dgsvj0.f index 7d242806b..4fd38d37e 100644 --- a/lapack-netlib/SRC/dgsvj0.f +++ b/lapack-netlib/SRC/dgsvj0.f @@ -193,7 +193,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleOTHERcomputational * @@ -218,10 +218,10 @@ SUBROUTINE DGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, $ SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP @@ -262,7 +262,8 @@ EXTERNAL IDAMAX, LSAME, DDOT, DNRM2 * .. * .. External Subroutines .. - EXTERNAL DAXPY, DCOPY, DLASCL, DLASSQ, DROTM, DSWAP + EXTERNAL DAXPY, DCOPY, DLASCL, DLASSQ, DROTM, DSWAP, + $ XERBLA * .. * .. Executable Statements .. * diff --git a/lapack-netlib/SRC/dgsvj1.f b/lapack-netlib/SRC/dgsvj1.f index 9acab16ba..376682c7f 100644 --- a/lapack-netlib/SRC/dgsvj1.f +++ b/lapack-netlib/SRC/dgsvj1.f @@ -236,7 +236,7 @@ SUBROUTINE DGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, $ EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -280,7 +280,8 @@ EXTERNAL IDAMAX, LSAME, DDOT, DNRM2 * .. * .. External Subroutines .. - EXTERNAL DAXPY, DCOPY, DLASCL, DLASSQ, DROTM, DSWAP + EXTERNAL DAXPY, DCOPY, DLASCL, DLASSQ, DROTM, DSWAP, + $ XERBLA * .. * .. Executable Statements .. * diff --git a/lapack-netlib/SRC/disnan.f b/lapack-netlib/SRC/disnan.f index da89158fb..a565ed36d 100644 --- a/lapack-netlib/SRC/disnan.f +++ b/lapack-netlib/SRC/disnan.f @@ -21,7 +21,7 @@ * LOGICAL FUNCTION DISNAN( DIN ) * * .. Scalar Arguments .. -* DOUBLE PRECISION DIN +* DOUBLE PRECISION, INTENT(IN) :: DIN * .. * * @@ -52,20 +52,20 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * * ===================================================================== LOGICAL FUNCTION DISNAN( DIN ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. - DOUBLE PRECISION DIN + DOUBLE PRECISION, INTENT(IN) :: DIN * .. * * ===================================================================== diff --git a/lapack-netlib/SRC/dla_gbamv.f b/lapack-netlib/SRC/dla_gbamv.f index 577866cf7..350284cfb 100644 --- a/lapack-netlib/SRC/dla_gbamv.f +++ b/lapack-netlib/SRC/dla_gbamv.f @@ -106,7 +106,7 @@ *> *> \param[in] AB *> \verbatim -*> AB is DOUBLE PRECISION array of DIMENSION ( LDAB, n ) +*> AB is DOUBLE PRECISION array, dimension ( LDAB, n ) *> Before entry, the leading m by n part of the array AB must *> contain the matrix of coefficients. *> Unchanged on exit. @@ -177,7 +177,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleGBcomputational * @@ -185,10 +185,10 @@ SUBROUTINE DLA_GBAMV( TRANS, M, N, KL, KU, ALPHA, AB, LDAB, X, $ INCX, BETA, Y, INCY ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. DOUBLE PRECISION ALPHA, BETA diff --git a/lapack-netlib/SRC/dla_gbrfsx_extended.f b/lapack-netlib/SRC/dla_gbrfsx_extended.f index bab9bbceb..12b2a32a4 100644 --- a/lapack-netlib/SRC/dla_gbrfsx_extended.f +++ b/lapack-netlib/SRC/dla_gbrfsx_extended.f @@ -177,8 +177,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is DOUBLE PRECISION array, dimension -*> (LDY,NRHS) +*> Y is DOUBLE PRECISION array, dimension (LDY,NRHS) *> On entry, the solution matrix X, as computed by DGBTRS. *> On exit, the improved solution matrix Y. *> \endverbatim @@ -210,8 +209,7 @@ *> *> \param[in,out] ERR_BNDS_NORM *> \verbatim -*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> normwise relative error, which is defined as follows: @@ -257,8 +255,7 @@ *> *> \param[in,out] ERR_BNDS_COMP *> \verbatim -*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> componentwise relative error, which is defined as follows: @@ -401,7 +398,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleGBcomputational * @@ -414,10 +411,10 @@ $ Y_TAIL, RCOND, ITHRESH, RTHRESH, $ DZ_UB, IGNORE_CWISE, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDAB, LDAFB, LDB, LDY, N, KL, KU, NRHS, diff --git a/lapack-netlib/SRC/dla_geamv.f b/lapack-netlib/SRC/dla_geamv.f index 9a91f6ffc..0a83e4b64 100644 --- a/lapack-netlib/SRC/dla_geamv.f +++ b/lapack-netlib/SRC/dla_geamv.f @@ -94,7 +94,7 @@ *> *> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, n ) +*> A is DOUBLE PRECISION array, dimension ( LDA, n ) *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. *> Unchanged on exit. @@ -138,8 +138,8 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is DOUBLE PRECISION -*> Array of DIMENSION at least +*> Y is DOUBLE PRECISION array, +*> dimension at least *> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. @@ -166,7 +166,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleGEcomputational * @@ -174,10 +174,10 @@ SUBROUTINE DLA_GEAMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, $ Y, INCY ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. DOUBLE PRECISION ALPHA, BETA diff --git a/lapack-netlib/SRC/dla_gerfsx_extended.f b/lapack-netlib/SRC/dla_gerfsx_extended.f index d6af49025..082f810f0 100644 --- a/lapack-netlib/SRC/dla_gerfsx_extended.f +++ b/lapack-netlib/SRC/dla_gerfsx_extended.f @@ -163,8 +163,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is DOUBLE PRECISION array, dimension -*> (LDY,NRHS) +*> Y is DOUBLE PRECISION array, dimension (LDY,NRHS) *> On entry, the solution matrix X, as computed by DGETRS. *> On exit, the improved solution matrix Y. *> \endverbatim @@ -196,8 +195,7 @@ *> *> \param[in,out] ERRS_N *> \verbatim -*> ERRS_N is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERRS_N is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> normwise relative error, which is defined as follows: @@ -243,8 +241,7 @@ *> *> \param[in,out] ERRS_C *> \verbatim -*> ERRS_C is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERRS_C is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> componentwise relative error, which is defined as follows: @@ -387,7 +384,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleGEcomputational * @@ -399,10 +396,10 @@ $ Y_TAIL, RCOND, ITHRESH, RTHRESH, $ DZ_UB, IGNORE_CWISE, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, diff --git a/lapack-netlib/SRC/dla_porfsx_extended.f b/lapack-netlib/SRC/dla_porfsx_extended.f index 0e21f0b13..8c0d6bebd 100644 --- a/lapack-netlib/SRC/dla_porfsx_extended.f +++ b/lapack-netlib/SRC/dla_porfsx_extended.f @@ -153,8 +153,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is DOUBLE PRECISION array, dimension -*> (LDY,NRHS) +*> Y is DOUBLE PRECISION array, dimension (LDY,NRHS) *> On entry, the solution matrix X, as computed by DPOTRS. *> On exit, the improved solution matrix Y. *> \endverbatim @@ -186,8 +185,7 @@ *> *> \param[in,out] ERR_BNDS_NORM *> \verbatim -*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> normwise relative error, which is defined as follows: @@ -233,8 +231,7 @@ *> *> \param[in,out] ERR_BNDS_COMP *> \verbatim -*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> componentwise relative error, which is defined as follows: @@ -377,7 +374,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doublePOcomputational * @@ -390,10 +387,10 @@ $ RTHRESH, DZ_UB, IGNORE_CWISE, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, diff --git a/lapack-netlib/SRC/dla_syamv.f b/lapack-netlib/SRC/dla_syamv.f index 1f948a2d7..29566a6e9 100644 --- a/lapack-netlib/SRC/dla_syamv.f +++ b/lapack-netlib/SRC/dla_syamv.f @@ -88,7 +88,7 @@ *> *> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array of DIMENSION ( LDA, n ). +*> A is DOUBLE PRECISION array, dimension ( LDA, n ). *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. *> Unchanged on exit. @@ -153,7 +153,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleSYcomputational * @@ -177,10 +177,10 @@ SUBROUTINE DLA_SYAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, $ INCY ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. DOUBLE PRECISION ALPHA, BETA diff --git a/lapack-netlib/SRC/dla_syrfsx_extended.f b/lapack-netlib/SRC/dla_syrfsx_extended.f index 66661f7e2..f54d15194 100644 --- a/lapack-netlib/SRC/dla_syrfsx_extended.f +++ b/lapack-netlib/SRC/dla_syrfsx_extended.f @@ -162,8 +162,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is DOUBLE PRECISION array, dimension -*> (LDY,NRHS) +*> Y is DOUBLE PRECISION array, dimension (LDY,NRHS) *> On entry, the solution matrix X, as computed by DSYTRS. *> On exit, the improved solution matrix Y. *> \endverbatim @@ -195,8 +194,7 @@ *> *> \param[in,out] ERR_BNDS_NORM *> \verbatim -*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> normwise relative error, which is defined as follows: @@ -242,8 +240,7 @@ *> *> \param[in,out] ERR_BNDS_COMP *> \verbatim -*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> componentwise relative error, which is defined as follows: @@ -386,7 +383,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleSYcomputational * @@ -399,10 +396,10 @@ $ RTHRESH, DZ_UB, IGNORE_CWISE, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, diff --git a/lapack-netlib/SRC/dlabrd.f b/lapack-netlib/SRC/dlabrd.f index 36c2e85bc..b5e734dc7 100644 --- a/lapack-netlib/SRC/dlabrd.f +++ b/lapack-netlib/SRC/dlabrd.f @@ -110,7 +110,7 @@ *> *> \param[out] TAUQ *> \verbatim -*> TAUQ is DOUBLE PRECISION array dimension (NB) +*> TAUQ is DOUBLE PRECISION array, dimension (NB) *> The scalar factors of the elementary reflectors which *> represent the orthogonal matrix Q. See Further Details. *> \endverbatim @@ -156,7 +156,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERauxiliary * @@ -210,10 +210,10 @@ SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, $ LDY ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER LDA, LDX, LDY, M, N, NB diff --git a/lapack-netlib/SRC/dlaed3.f b/lapack-netlib/SRC/dlaed3.f index 4e62b3143..d200fc0a2 100644 --- a/lapack-netlib/SRC/dlaed3.f +++ b/lapack-netlib/SRC/dlaed3.f @@ -116,7 +116,7 @@ *> *> \param[in] Q2 *> \verbatim -*> Q2 is DOUBLE PRECISION array, dimension (LDQ2, N) +*> Q2 is DOUBLE PRECISION array, dimension (LDQ2*N) *> The first K columns of this matrix contain the non-deflated *> eigenvectors for the split problem. *> \endverbatim @@ -170,7 +170,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup auxOTHERcomputational * @@ -185,10 +185,10 @@ SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX, $ CTOT, W, S, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, K, LDQ, N, N1 diff --git a/lapack-netlib/SRC/dlaisnan.f b/lapack-netlib/SRC/dlaisnan.f index 4b5ebb4f5..c2e87d88a 100644 --- a/lapack-netlib/SRC/dlaisnan.f +++ b/lapack-netlib/SRC/dlaisnan.f @@ -21,7 +21,7 @@ * LOGICAL FUNCTION DLAISNAN( DIN1, DIN2 ) * * .. Scalar Arguments .. -* DOUBLE PRECISION DIN1, DIN2 +* DOUBLE PRECISION, INTENT(IN) :: DIN1, DIN2 * .. * * @@ -67,20 +67,20 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * * ===================================================================== LOGICAL FUNCTION DLAISNAN( DIN1, DIN2 ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. - DOUBLE PRECISION DIN1, DIN2 + DOUBLE PRECISION, INTENT(IN) :: DIN1, DIN2 * .. * * ===================================================================== diff --git a/lapack-netlib/SRC/dlalsa.f b/lapack-netlib/SRC/dlalsa.f index 4aef66c95..b643f11c0 100644 --- a/lapack-netlib/SRC/dlalsa.f +++ b/lapack-netlib/SRC/dlalsa.f @@ -227,14 +227,12 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is DOUBLE PRECISION array. -*> The dimension must be at least N. +*> WORK is DOUBLE PRECISION array, dimension (N) *> \endverbatim *> *> \param[out] IWORK *> \verbatim -*> IWORK is INTEGER array. -*> The dimension must be at least 3 * N +*> IWORK is INTEGER array, dimension (3*N) *> \endverbatim *> *> \param[out] INFO @@ -252,7 +250,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERcomputational * @@ -269,10 +267,10 @@ $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK, $ IWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, diff --git a/lapack-netlib/SRC/dlamswlq.f b/lapack-netlib/SRC/dlamswlq.f index 8dc6df8a5..19e32f888 100644 --- a/lapack-netlib/SRC/dlamswlq.f +++ b/lapack-netlib/SRC/dlamswlq.f @@ -49,7 +49,7 @@ *> \param[in] M *> \verbatim *> M is INTEGER -*> The number of rows of the matrix A. M >=0. +*> The number of rows of the matrix C. M >=0. *> \endverbatim *> *> \param[in] N @@ -88,9 +88,11 @@ *> *> \endverbatim *> -*> \param[in,out] A +*> \param[in] A *> \verbatim -*> A is DOUBLE PRECISION array, dimension (LDA,K) +*> A is DOUBLE PRECISION array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' *> The i-th row must contain the vector which defines the blocked *> elementary reflector H(i), for i = 1,2,...,k, as returned by *> DLASWLQ in the first k rows of its array argument A. @@ -200,10 +202,10 @@ SUBROUTINE DLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, $ LDT, C, LDC, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS diff --git a/lapack-netlib/SRC/dlamtsqr.f b/lapack-netlib/SRC/dlamtsqr.f index 9ba45901b..6af89d28e 100644 --- a/lapack-netlib/SRC/dlamtsqr.f +++ b/lapack-netlib/SRC/dlamtsqr.f @@ -81,7 +81,7 @@ *> N >= NB >= 1. *> \endverbatim *> -*> \param[in,out] A +*> \param[in] A *> \verbatim *> A is DOUBLE PRECISION array, dimension (LDA,K) *> The i-th column must contain the vector which defines the @@ -195,10 +195,10 @@ SUBROUTINE DLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, $ LDT, C, LDC, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS diff --git a/lapack-netlib/SRC/dlapy2.f b/lapack-netlib/SRC/dlapy2.f index 3861b1d0a..bc01829a2 100644 --- a/lapack-netlib/SRC/dlapy2.f +++ b/lapack-netlib/SRC/dlapy2.f @@ -56,17 +56,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * * ===================================================================== DOUBLE PRECISION FUNCTION DLAPY2( X, Y ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. DOUBLE PRECISION X, Y @@ -82,20 +82,32 @@ * .. * .. Local Scalars .. DOUBLE PRECISION W, XABS, YABS, Z + LOGICAL X_IS_NAN, Y_IS_NAN +* .. +* .. External Functions .. + LOGICAL DISNAN + EXTERNAL DISNAN * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, SQRT * .. * .. Executable Statements .. * - XABS = ABS( X ) - YABS = ABS( Y ) - W = MAX( XABS, YABS ) - Z = MIN( XABS, YABS ) - IF( Z.EQ.ZERO ) THEN - DLAPY2 = W - ELSE - DLAPY2 = W*SQRT( ONE+( Z / W )**2 ) + X_IS_NAN = DISNAN( X ) + Y_IS_NAN = DISNAN( Y ) + IF ( X_IS_NAN ) DLAPY2 = X + IF ( Y_IS_NAN ) DLAPY2 = Y +* + IF ( .NOT.( X_IS_NAN.OR.Y_IS_NAN ) ) THEN + XABS = ABS( X ) + YABS = ABS( Y ) + W = MAX( XABS, YABS ) + Z = MIN( XABS, YABS ) + IF( Z.EQ.ZERO ) THEN + DLAPY2 = W + ELSE + DLAPY2 = W*SQRT( ONE+( Z / W )**2 ) + END IF END IF RETURN * diff --git a/lapack-netlib/SRC/dlaqr1.f b/lapack-netlib/SRC/dlaqr1.f index acaefdeba..81a462fb3 100644 --- a/lapack-netlib/SRC/dlaqr1.f +++ b/lapack-netlib/SRC/dlaqr1.f @@ -55,19 +55,19 @@ * *> \param[in] N *> \verbatim -*> N is integer +*> N is INTEGER *> Order of the matrix H. N must be either 2 or 3. *> \endverbatim *> *> \param[in] H *> \verbatim -*> H is DOUBLE PRECISION array of dimension (LDH,N) +*> H is DOUBLE PRECISION array, dimension (LDH,N) *> The 2-by-2 or 3-by-3 matrix H in (*). *> \endverbatim *> *> \param[in] LDH *> \verbatim -*> LDH is integer +*> LDH is INTEGER *> The leading dimension of H as declared in *> the calling procedure. LDH.GE.N *> \endverbatim @@ -95,7 +95,7 @@ *> *> \param[out] V *> \verbatim -*> V is DOUBLE PRECISION array of dimension N +*> V is DOUBLE PRECISION array, dimension (N) *> A scalar multiple of the first column of the *> matrix K in (*). *> \endverbatim @@ -108,7 +108,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERauxiliary * @@ -121,10 +121,10 @@ * ===================================================================== SUBROUTINE DLAQR1( N, H, LDH, SR1, SI1, SR2, SI2, V ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. DOUBLE PRECISION SI1, SI2, SR1, SR2 diff --git a/lapack-netlib/SRC/dlaqr2.f b/lapack-netlib/SRC/dlaqr2.f index 910fdda68..431b3f123 100644 --- a/lapack-netlib/SRC/dlaqr2.f +++ b/lapack-netlib/SRC/dlaqr2.f @@ -119,7 +119,7 @@ *> *> \param[in] LDH *> \verbatim -*> LDH is integer +*> LDH is INTEGER *> Leading dimension of H just as declared in the calling *> subroutine. N .LE. LDH *> \endverbatim @@ -147,14 +147,14 @@ *> *> \param[in] LDZ *> \verbatim -*> LDZ is integer +*> LDZ is INTEGER *> The leading dimension of Z just as declared in the *> calling subroutine. 1 .LE. LDZ. *> \endverbatim *> *> \param[out] NS *> \verbatim -*> NS is integer +*> NS is INTEGER *> The number of unconverged (ie approximate) eigenvalues *> returned in SR and SI that may be used as shifts by the *> calling subroutine. @@ -162,7 +162,7 @@ *> *> \param[out] ND *> \verbatim -*> ND is integer +*> ND is INTEGER *> The number of converged eigenvalues uncovered by this *> subroutine. *> \endverbatim @@ -192,14 +192,14 @@ *> *> \param[in] LDV *> \verbatim -*> LDV is integer scalar +*> LDV is INTEGER *> The leading dimension of V just as declared in the *> calling subroutine. NW .LE. LDV *> \endverbatim *> *> \param[in] NH *> \verbatim -*> NH is integer scalar +*> NH is INTEGER *> The number of columns of T. NH.GE.NW. *> \endverbatim *> @@ -210,14 +210,14 @@ *> *> \param[in] LDT *> \verbatim -*> LDT is integer +*> LDT is INTEGER *> The leading dimension of T just as declared in the *> calling subroutine. NW .LE. LDT *> \endverbatim *> *> \param[in] NV *> \verbatim -*> NV is integer +*> NV is INTEGER *> The number of rows of work array WV available for *> workspace. NV.GE.NW. *> \endverbatim @@ -229,7 +229,7 @@ *> *> \param[in] LDWV *> \verbatim -*> LDWV is integer +*> LDWV is INTEGER *> The leading dimension of W just as declared in the *> calling subroutine. NW .LE. LDV *> \endverbatim @@ -243,7 +243,7 @@ *> *> \param[in] LWORK *> \verbatim -*> LWORK is integer +*> LWORK is INTEGER *> The dimension of the work array WORK. LWORK = 2*NW *> suffices, but greater efficiency may result from larger *> values of LWORK. @@ -263,7 +263,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERauxiliary * @@ -278,10 +278,10 @@ $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, $ LDT, NV, WV, LDWV, WORK, LWORK ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, diff --git a/lapack-netlib/SRC/dlaqr3.f b/lapack-netlib/SRC/dlaqr3.f index 8a668bc65..aa23617c3 100644 --- a/lapack-netlib/SRC/dlaqr3.f +++ b/lapack-netlib/SRC/dlaqr3.f @@ -116,7 +116,7 @@ *> *> \param[in] LDH *> \verbatim -*> LDH is integer +*> LDH is INTEGER *> Leading dimension of H just as declared in the calling *> subroutine. N .LE. LDH *> \endverbatim @@ -144,14 +144,14 @@ *> *> \param[in] LDZ *> \verbatim -*> LDZ is integer +*> LDZ is INTEGER *> The leading dimension of Z just as declared in the *> calling subroutine. 1 .LE. LDZ. *> \endverbatim *> *> \param[out] NS *> \verbatim -*> NS is integer +*> NS is INTEGER *> The number of unconverged (ie approximate) eigenvalues *> returned in SR and SI that may be used as shifts by the *> calling subroutine. @@ -159,7 +159,7 @@ *> *> \param[out] ND *> \verbatim -*> ND is integer +*> ND is INTEGER *> The number of converged eigenvalues uncovered by this *> subroutine. *> \endverbatim @@ -189,14 +189,14 @@ *> *> \param[in] LDV *> \verbatim -*> LDV is integer scalar +*> LDV is INTEGER *> The leading dimension of V just as declared in the *> calling subroutine. NW .LE. LDV *> \endverbatim *> *> \param[in] NH *> \verbatim -*> NH is integer scalar +*> NH is INTEGER *> The number of columns of T. NH.GE.NW. *> \endverbatim *> @@ -207,14 +207,14 @@ *> *> \param[in] LDT *> \verbatim -*> LDT is integer +*> LDT is INTEGER *> The leading dimension of T just as declared in the *> calling subroutine. NW .LE. LDT *> \endverbatim *> *> \param[in] NV *> \verbatim -*> NV is integer +*> NV is INTEGER *> The number of rows of work array WV available for *> workspace. NV.GE.NW. *> \endverbatim @@ -226,7 +226,7 @@ *> *> \param[in] LDWV *> \verbatim -*> LDWV is integer +*> LDWV is INTEGER *> The leading dimension of W just as declared in the *> calling subroutine. NW .LE. LDV *> \endverbatim @@ -240,7 +240,7 @@ *> *> \param[in] LWORK *> \verbatim -*> LWORK is integer +*> LWORK is INTEGER *> The dimension of the work array WORK. LWORK = 2*NW *> suffices, but greater efficiency may result from larger *> values of LWORK. @@ -275,7 +275,7 @@ $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, $ LDT, NV, WV, LDWV, WORK, LWORK ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/dlaqr5.f b/lapack-netlib/SRC/dlaqr5.f index 8b536c08c..5cc4eda1a 100644 --- a/lapack-netlib/SRC/dlaqr5.f +++ b/lapack-netlib/SRC/dlaqr5.f @@ -48,21 +48,21 @@ * *> \param[in] WANTT *> \verbatim -*> WANTT is logical scalar +*> WANTT is LOGICAL *> WANTT = .true. if the quasi-triangular Schur factor *> is being computed. WANTT is set to .false. otherwise. *> \endverbatim *> *> \param[in] WANTZ *> \verbatim -*> WANTZ is logical scalar +*> WANTZ is LOGICAL *> WANTZ = .true. if the orthogonal Schur factor is being *> computed. WANTZ is set to .false. otherwise. *> \endverbatim *> *> \param[in] KACC22 *> \verbatim -*> KACC22 is integer with value 0, 1, or 2. +*> KACC22 is INTEGER with value 0, 1, or 2. *> Specifies the computation mode of far-from-diagonal *> orthogonal updates. *> = 0: DLAQR5 does not accumulate reflections and does not @@ -78,19 +78,19 @@ *> *> \param[in] N *> \verbatim -*> N is integer scalar +*> N is INTEGER *> N is the order of the Hessenberg matrix H upon which this *> subroutine operates. *> \endverbatim *> *> \param[in] KTOP *> \verbatim -*> KTOP is integer scalar +*> KTOP is INTEGER *> \endverbatim *> *> \param[in] KBOT *> \verbatim -*> KBOT is integer scalar +*> KBOT is INTEGER *> These are the first and last rows and columns of an *> isolated diagonal block upon which the QR sweep is to be *> applied. It is assumed without a check that @@ -101,19 +101,19 @@ *> *> \param[in] NSHFTS *> \verbatim -*> NSHFTS is integer scalar +*> NSHFTS is INTEGER *> NSHFTS gives the number of simultaneous shifts. NSHFTS *> must be positive and even. *> \endverbatim *> *> \param[in,out] SR *> \verbatim -*> SR is DOUBLE PRECISION array of size (NSHFTS) +*> SR is DOUBLE PRECISION array, dimension (NSHFTS) *> \endverbatim *> *> \param[in,out] SI *> \verbatim -*> SI is DOUBLE PRECISION array of size (NSHFTS) +*> SI is DOUBLE PRECISION array, dimension (NSHFTS) *> SR contains the real parts and SI contains the imaginary *> parts of the NSHFTS shifts of origin that define the *> multi-shift QR sweep. On output SR and SI may be @@ -122,7 +122,7 @@ *> *> \param[in,out] H *> \verbatim -*> H is DOUBLE PRECISION array of size (LDH,N) +*> H is DOUBLE PRECISION array, dimension (LDH,N) *> On input H contains a Hessenberg matrix. On output a *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied *> to the isolated diagonal block in rows and columns KTOP @@ -131,7 +131,7 @@ *> *> \param[in] LDH *> \verbatim -*> LDH is integer scalar +*> LDH is INTEGER *> LDH is the leading dimension of H just as declared in the *> calling procedure. LDH.GE.MAX(1,N). *> \endverbatim @@ -150,7 +150,7 @@ *> *> \param[in,out] Z *> \verbatim -*> Z is DOUBLE PRECISION array of size (LDZ,IHIZ) +*> Z is DOUBLE PRECISION array, dimension (LDZ,IHIZ) *> If WANTZ = .TRUE., then the QR Sweep orthogonal *> similarity transformation is accumulated into *> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. @@ -159,71 +159,69 @@ *> *> \param[in] LDZ *> \verbatim -*> LDZ is integer scalar +*> LDZ is INTEGER *> LDA is the leading dimension of Z just as declared in *> the calling procedure. LDZ.GE.N. *> \endverbatim *> *> \param[out] V *> \verbatim -*> V is DOUBLE PRECISION array of size (LDV,NSHFTS/2) +*> V is DOUBLE PRECISION array, dimension (LDV,NSHFTS/2) *> \endverbatim *> *> \param[in] LDV *> \verbatim -*> LDV is integer scalar +*> LDV is INTEGER *> LDV is the leading dimension of V as declared in the *> calling procedure. LDV.GE.3. *> \endverbatim *> *> \param[out] U *> \verbatim -*> U is DOUBLE PRECISION array of size -*> (LDU,3*NSHFTS-3) +*> U is DOUBLE PRECISION array, dimension (LDU,3*NSHFTS-3) *> \endverbatim *> *> \param[in] LDU *> \verbatim -*> LDU is integer scalar +*> LDU is INTEGER *> LDU is the leading dimension of U just as declared in the *> in the calling subroutine. LDU.GE.3*NSHFTS-3. *> \endverbatim *> *> \param[in] NH *> \verbatim -*> NH is integer scalar +*> NH is INTEGER *> NH is the number of columns in array WH available for *> workspace. NH.GE.1. *> \endverbatim *> *> \param[out] WH *> \verbatim -*> WH is DOUBLE PRECISION array of size (LDWH,NH) +*> WH is DOUBLE PRECISION array, dimension (LDWH,NH) *> \endverbatim *> *> \param[in] LDWH *> \verbatim -*> LDWH is integer scalar +*> LDWH is INTEGER *> Leading dimension of WH just as declared in the *> calling procedure. LDWH.GE.3*NSHFTS-3. *> \endverbatim *> *> \param[in] NV *> \verbatim -*> NV is integer scalar +*> NV is INTEGER *> NV is the number of rows in WV agailable for workspace. *> NV.GE.1. *> \endverbatim *> *> \param[out] WV *> \verbatim -*> WV is DOUBLE PRECISION array of size -*> (LDWV,3*NSHFTS-3) +*> WV is DOUBLE PRECISION array, dimension (LDWV,3*NSHFTS-3) *> \endverbatim *> *> \param[in] LDWV *> \verbatim -*> LDWV is integer scalar +*> LDWV is INTEGER *> LDWV is the leading dimension of WV as declared in the *> in the calling subroutine. LDWV.GE.NV. *> \endverbatim @@ -259,7 +257,7 @@ $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, $ LDU, NV, WV, LDWV, NH, WH, LDWH ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/dlarfg.f b/lapack-netlib/SRC/dlarfg.f index aa5fabc57..be33f932c 100644 --- a/lapack-netlib/SRC/dlarfg.f +++ b/lapack-netlib/SRC/dlarfg.f @@ -99,17 +99,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleOTHERauxiliary * * ===================================================================== SUBROUTINE DLARFG( N, ALPHA, X, INCX, TAU ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX, N @@ -170,7 +170,7 @@ CALL DSCAL( N-1, RSAFMN, X, INCX ) BETA = BETA*RSAFMN ALPHA = ALPHA*RSAFMN - IF( ABS( BETA ).LT.SAFMIN .AND. KNT .LT. 1000 ) + IF( (ABS( BETA ).LT.SAFMIN) .AND. (KNT .LT. 20) ) $ GO TO 10 * * New BETA is at most 1, at least SAFMIN diff --git a/lapack-netlib/SRC/dlarfgp.f b/lapack-netlib/SRC/dlarfgp.f index 70efabbb8..d040a8c7b 100644 --- a/lapack-netlib/SRC/dlarfgp.f +++ b/lapack-netlib/SRC/dlarfgp.f @@ -97,17 +97,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleOTHERauxiliary * * ===================================================================== SUBROUTINE DLARFGP( N, ALPHA, X, INCX, TAU ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX, N @@ -181,7 +181,7 @@ CALL DSCAL( N-1, BIGNUM, X, INCX ) BETA = BETA*BIGNUM ALPHA = ALPHA*BIGNUM - IF( ABS( BETA ).LT.SMLNUM .AND. KNT .LT. 1000) + IF( (ABS( BETA ).LT.SMLNUM) .AND. (KNT .LT. 20) ) $ GO TO 10 * * New BETA is at most 1, at least SMLNUM diff --git a/lapack-netlib/SRC/dlarra.f b/lapack-netlib/SRC/dlarra.f index 31a0bfbbc..7406a8f98 100644 --- a/lapack-netlib/SRC/dlarra.f +++ b/lapack-netlib/SRC/dlarra.f @@ -119,7 +119,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -136,10 +136,10 @@ SUBROUTINE DLARRA( N, D, E, E2, SPLTOL, TNRM, $ NSPLIT, ISPLIT, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, N, NSPLIT @@ -167,7 +167,13 @@ * .. Executable Statements .. * INFO = 0 - +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* * Compute splitting points NSPLIT = 1 IF(SPLTOL.LT.ZERO) THEN diff --git a/lapack-netlib/SRC/dlarrb.f b/lapack-netlib/SRC/dlarrb.f index 2733922f0..2b6389e25 100644 --- a/lapack-netlib/SRC/dlarrb.f +++ b/lapack-netlib/SRC/dlarrb.f @@ -178,7 +178,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -196,10 +196,10 @@ $ RTOL2, OFFSET, W, WGAP, WERR, WORK, IWORK, $ PIVMIN, SPDIAM, TWIST, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER IFIRST, ILAST, INFO, N, OFFSET, TWIST @@ -236,6 +236,12 @@ * .. Executable Statements .. * INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF * MAXITR = INT( ( LOG( SPDIAM+PIVMIN )-LOG( PIVMIN ) ) / $ LOG( TWO ) ) + 2 diff --git a/lapack-netlib/SRC/dlarrc.f b/lapack-netlib/SRC/dlarrc.f index 9635e4122..093bfa02c 100644 --- a/lapack-netlib/SRC/dlarrc.f +++ b/lapack-netlib/SRC/dlarrc.f @@ -137,7 +137,7 @@ SUBROUTINE DLARRC( JOBT, N, VL, VU, D, E, PIVMIN, $ EIGCNT, LCNT, RCNT, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -170,6 +170,13 @@ * .. Executable Statements .. * INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* LCNT = 0 RCNT = 0 EIGCNT = 0 diff --git a/lapack-netlib/SRC/dlarrd.f b/lapack-netlib/SRC/dlarrd.f index 57abf7436..5cc105261 100644 --- a/lapack-netlib/SRC/dlarrd.f +++ b/lapack-netlib/SRC/dlarrd.f @@ -329,7 +329,7 @@ $ M, W, WERR, WL, WU, IBLOCK, INDEXW, $ WORK, IWORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -385,6 +385,12 @@ * INFO = 0 * +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* * Decode RANGE * IF( LSAME( RANGE, 'A' ) ) THEN diff --git a/lapack-netlib/SRC/dlarre.f b/lapack-netlib/SRC/dlarre.f index f01b25f16..0613efbc3 100644 --- a/lapack-netlib/SRC/dlarre.f +++ b/lapack-netlib/SRC/dlarre.f @@ -305,7 +305,7 @@ $ W, WERR, WGAP, IBLOCK, INDEXW, GERS, PIVMIN, $ WORK, IWORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -360,7 +360,7 @@ * .. * .. External Subroutines .. EXTERNAL DCOPY, DLARNV, DLARRA, DLARRB, DLARRC, DLARRD, - $ DLASQ2 + $ DLASQ2, DLARRK * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN @@ -370,7 +370,12 @@ * INFO = 0 - +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF * * Decode RANGE * diff --git a/lapack-netlib/SRC/dlarrf.f b/lapack-netlib/SRC/dlarrf.f index 5ad4337ad..f814ee1b4 100644 --- a/lapack-netlib/SRC/dlarrf.f +++ b/lapack-netlib/SRC/dlarrf.f @@ -193,7 +193,7 @@ $ SPDIAM, CLGAPL, CLGAPR, PIVMIN, SIGMA, $ DPLUS, LPLUS, WORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -239,6 +239,13 @@ * .. Executable Statements .. * INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* FACT = DBLE(2**KTRYMAX) EPS = DLAMCH( 'Precision' ) SHIFT = 0 diff --git a/lapack-netlib/SRC/dlarrj.f b/lapack-netlib/SRC/dlarrj.f index ecd136f42..097ba9f77 100644 --- a/lapack-netlib/SRC/dlarrj.f +++ b/lapack-netlib/SRC/dlarrj.f @@ -150,7 +150,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -168,10 +168,10 @@ $ RTOL, OFFSET, W, WERR, WORK, IWORK, $ PIVMIN, SPDIAM, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER IFIRST, ILAST, INFO, N, OFFSET @@ -203,6 +203,12 @@ * .. Executable Statements .. * INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF * MAXITR = INT( ( LOG( SPDIAM+PIVMIN )-LOG( PIVMIN ) ) / $ LOG( TWO ) ) + 2 diff --git a/lapack-netlib/SRC/dlarrk.f b/lapack-netlib/SRC/dlarrk.f index 8b307a493..e92fe727e 100644 --- a/lapack-netlib/SRC/dlarrk.f +++ b/lapack-netlib/SRC/dlarrk.f @@ -137,7 +137,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -145,10 +145,10 @@ SUBROUTINE DLARRK( N, IW, GL, GU, $ D, E2, PIVMIN, RELTOL, W, WERR, INFO) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, IW, N @@ -179,6 +179,13 @@ * .. * .. Executable Statements .. * +* Quick return if possible +* + IF( N.LE.0 ) THEN + INFO = 0 + RETURN + END IF +* * Get machine constants EPS = DLAMCH( 'P' ) diff --git a/lapack-netlib/SRC/dlarrr.f b/lapack-netlib/SRC/dlarrr.f index c12b60585..7aa22476b 100644 --- a/lapack-netlib/SRC/dlarrr.f +++ b/lapack-netlib/SRC/dlarrr.f @@ -78,7 +78,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -94,10 +94,10 @@ * ===================================================================== SUBROUTINE DLARRR( N, D, E, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER N, INFO @@ -130,6 +130,13 @@ * .. * .. Executable Statements .. * +* Quick return if possible +* + IF( N.LE.0 ) THEN + INFO = 0 + RETURN + END IF +* * As a default, do NOT go for relative-accuracy preserving computations. INFO = 1 diff --git a/lapack-netlib/SRC/dlarrv.f b/lapack-netlib/SRC/dlarrv.f index edda67d7d..cace17c0e 100644 --- a/lapack-netlib/SRC/dlarrv.f +++ b/lapack-netlib/SRC/dlarrv.f @@ -68,8 +68,14 @@ *> \verbatim *> VU is DOUBLE PRECISION *> Upper bound of the interval that contains the desired -*> eigenvalues. VL < VU. Needed to compute gaps on the left or right -*> end of the extremal eigenvalues in the desired RANGE. +*> eigenvalues. VL < VU. +*> Note: VU is currently not used by this implementation of DLARRV, VU is +*> passed to DLARRV because it could be used compute gaps on the right end +*> of the extremal eigenvalues. However, with not much initial accuracy in +*> LAMBDA and VU, the formula can lead to an overestimation of the right gap +*> and thus to inadequately early RQI 'convergence'. This is currently +*> prevented this by forcing a small right gap. And so it turns out that VU +*> is currently not used by this implementation of DLARRV. *> \endverbatim *> *> \param[in,out] D @@ -286,7 +292,7 @@ $ IBLOCK, INDEXW, GERS, Z, LDZ, ISUPPZ, $ WORK, IWORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -344,6 +350,13 @@ * .. INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* * The first N entries of WORK are reserved for the eigenvalues INDLD = N+1 INDLLD= 2*N+1 diff --git a/lapack-netlib/SRC/dlartgs.f b/lapack-netlib/SRC/dlartgs.f index a83e74d37..29a9eb07d 100644 --- a/lapack-netlib/SRC/dlartgs.f +++ b/lapack-netlib/SRC/dlartgs.f @@ -83,17 +83,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup auxOTHERcomputational * * ===================================================================== SUBROUTINE DLARTGS( X, Y, SIGMA, CS, SN ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. DOUBLE PRECISION CS, SIGMA, SN, X, Y @@ -108,6 +108,9 @@ * .. Local Scalars .. DOUBLE PRECISION R, S, THRESH, W, Z * .. +* .. External Subroutines .. + EXTERNAL DLARTGP +* .. * .. External Functions .. DOUBLE PRECISION DLAMCH EXTERNAL DLAMCH diff --git a/lapack-netlib/SRC/dlasd0.f b/lapack-netlib/SRC/dlasd0.f index ca0b3b98c..522ca9adc 100644 --- a/lapack-netlib/SRC/dlasd0.f +++ b/lapack-netlib/SRC/dlasd0.f @@ -81,7 +81,7 @@ *> *> \param[out] U *> \verbatim -*> U is DOUBLE PRECISION array, dimension at least (LDQ, N) +*> U is DOUBLE PRECISION array, dimension (LDU, N) *> On exit, U contains the left singular vectors. *> \endverbatim *> @@ -93,7 +93,7 @@ *> *> \param[out] VT *> \verbatim -*> VT is DOUBLE PRECISION array, dimension at least (LDVT, M) +*> VT is DOUBLE PRECISION array, dimension (LDVT, M) *> On exit, VT**T contains the right singular vectors. *> \endverbatim *> @@ -112,14 +112,12 @@ *> *> \param[out] IWORK *> \verbatim -*> IWORK is INTEGER work array. -*> Dimension must be at least (8 * N) +*> IWORK is INTEGER array, dimension (8*N) *> \endverbatim *> *> \param[out] WORK *> \verbatim -*> WORK is DOUBLE PRECISION work array. -*> Dimension must be at least (3 * M**2 + 2 * M) +*> WORK is DOUBLE PRECISION array, dimension (3*M**2+2*M) *> \endverbatim *> *> \param[out] INFO @@ -138,7 +136,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -152,10 +150,10 @@ SUBROUTINE DLASD0( N, SQRE, D, E, U, LDU, VT, LDVT, SMLSIZ, IWORK, $ WORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDU, LDVT, N, SMLSIZ, SQRE diff --git a/lapack-netlib/SRC/dlasd2.f b/lapack-netlib/SRC/dlasd2.f index a7ced418f..87a154342 100644 --- a/lapack-netlib/SRC/dlasd2.f +++ b/lapack-netlib/SRC/dlasd2.f @@ -190,7 +190,7 @@ *> *> \param[out] IDXP *> \verbatim -*> IDXP is INTEGER array dimension(N) +*> IDXP is INTEGER array, dimension(N) *> This will contain the permutation used to place deflated *> values of D at the end of the array. On output IDXP(2:K) *> points to the nondeflated D-values and IDXP(K+1:N) @@ -199,14 +199,14 @@ *> *> \param[out] IDX *> \verbatim -*> IDX is INTEGER array dimension(N) +*> IDX is INTEGER array, dimension(N) *> This will contain the permutation used to sort the contents of *> D into ascending order. *> \endverbatim *> *> \param[out] IDXC *> \verbatim -*> IDXC is INTEGER array dimension(N) +*> IDXC is INTEGER array, dimension(N) *> This will contain the permutation used to arrange the columns *> of the deflated U matrix into three groups: the first group *> contains non-zero entries only at and above NL, the second @@ -216,7 +216,7 @@ *> *> \param[in,out] IDXQ *> \verbatim -*> IDXQ is INTEGER array dimension(N) +*> IDXQ is INTEGER array, dimension(N) *> This contains the permutation which separately sorts the two *> sub-problems in D into ascending order. Note that entries in *> the first hlaf of this permutation must first be moved one @@ -226,7 +226,7 @@ *> *> \param[out] COLTYP *> \verbatim -*> COLTYP is INTEGER array dimension(N) +*> COLTYP is INTEGER array, dimension(N) *> As workspace, this will contain a label which will indicate *> which of the following types a column in the U2 matrix or a *> row in the VT2 matrix is: @@ -254,7 +254,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -269,10 +269,10 @@ $ LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX, $ IDXC, IDXQ, COLTYP, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE diff --git a/lapack-netlib/SRC/dlasd3.f b/lapack-netlib/SRC/dlasd3.f index 57d0abd4c..7662be3e7 100644 --- a/lapack-netlib/SRC/dlasd3.f +++ b/lapack-netlib/SRC/dlasd3.f @@ -94,8 +94,7 @@ *> *> \param[out] Q *> \verbatim -*> Q is DOUBLE PRECISION array, -*> dimension at least (LDQ,K). +*> Q is DOUBLE PRECISION array, dimension (LDQ,K) *> \endverbatim *> *> \param[in] LDQ @@ -104,7 +103,7 @@ *> The leading dimension of the array Q. LDQ >= K. *> \endverbatim *> -*> \param[in] DSIGMA +*> \param[in,out] DSIGMA *> \verbatim *> DSIGMA is DOUBLE PRECISION array, dimension(K) *> The first K elements of this array contain the old roots @@ -125,7 +124,7 @@ *> The leading dimension of the array U. LDU >= N. *> \endverbatim *> -*> \param[in,out] U2 +*> \param[in] U2 *> \verbatim *> U2 is DOUBLE PRECISION array, dimension (LDU2, N) *> The first K columns of this matrix contain the non-deflated @@ -187,7 +186,7 @@ *> type is any column which has been deflated. *> \endverbatim *> -*> \param[in] Z +*> \param[in,out] Z *> \verbatim *> Z is DOUBLE PRECISION array, dimension (K) *> The first K elements of this array contain the components @@ -210,7 +209,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -225,10 +224,10 @@ $ LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z, $ INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR, diff --git a/lapack-netlib/SRC/dlasd8.f b/lapack-netlib/SRC/dlasd8.f index 245e814a1..fc5c48c52 100644 --- a/lapack-netlib/SRC/dlasd8.f +++ b/lapack-netlib/SRC/dlasd8.f @@ -133,7 +133,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is DOUBLE PRECISION array, dimension at least 3 * K +*> WORK is DOUBLE PRECISION array, dimension (3*K) *> \endverbatim *> *> \param[out] INFO @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -166,10 +166,10 @@ SUBROUTINE DLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDDIFR, $ DSIGMA, WORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER ICOMPQ, INFO, K, LDDIFR diff --git a/lapack-netlib/SRC/dlasda.f b/lapack-netlib/SRC/dlasda.f index 20ceedd0b..f41a108b8 100644 --- a/lapack-netlib/SRC/dlasda.f +++ b/lapack-netlib/SRC/dlasda.f @@ -239,8 +239,7 @@ *> *> \param[out] IWORK *> \verbatim -*> IWORK is INTEGER array. -*> Dimension must be at least (7 * N). +*> IWORK is INTEGER array, dimension (7*N) *> \endverbatim *> *> \param[out] INFO @@ -259,7 +258,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -274,10 +273,10 @@ $ DIFL, DIFR, Z, POLES, GIVPTR, GIVCOL, LDGCOL, $ PERM, GIVNUM, C, S, WORK, IWORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER ICOMPQ, INFO, LDGCOL, LDU, N, SMLSIZ, SQRE diff --git a/lapack-netlib/SRC/dlasq4.f b/lapack-netlib/SRC/dlasq4.f index cb7a714cc..d4ddbbc7b 100644 --- a/lapack-netlib/SRC/dlasq4.f +++ b/lapack-netlib/SRC/dlasq4.f @@ -151,7 +151,7 @@ SUBROUTINE DLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, $ DN1, DN2, TAU, TTYPE, G ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -240,7 +240,6 @@ NP = NN - 9 ELSE NP = NN - 2*PP - B2 = Z( NP-2 ) GAM = DN1 IF( Z( NP-4 ) .GT. Z( NP-2 ) ) $ RETURN diff --git a/lapack-netlib/SRC/dlasq5.f b/lapack-netlib/SRC/dlasq5.f index 99d4f678e..3812c879f 100644 --- a/lapack-netlib/SRC/dlasq5.f +++ b/lapack-netlib/SRC/dlasq5.f @@ -121,7 +121,7 @@ *> IEEE is LOGICAL *> Flag for IEEE or non IEEE arithmetic. *> \endverbatim -* +*> *> \param[in] EPS *> \verbatim *> EPS is DOUBLE PRECISION @@ -136,7 +136,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup auxOTHERcomputational * @@ -144,10 +144,10 @@ SUBROUTINE DLASQ5( I0, N0, Z, PP, TAU, SIGMA, DMIN, DMIN1, DMIN2, $ DN, DNM1, DNM2, IEEE, EPS ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. LOGICAL IEEE diff --git a/lapack-netlib/SRC/dlaswlq.f b/lapack-netlib/SRC/dlaswlq.f index 2830711a6..6e4ca20fd 100644 --- a/lapack-netlib/SRC/dlaswlq.f +++ b/lapack-netlib/SRC/dlaswlq.f @@ -55,7 +55,7 @@ *> \verbatim *> A is DOUBLE PRECISION array, dimension (LDA,N) *> On entry, the M-by-N matrix A. -*> On exit, the elements on and bleow the diagonal +*> On exit, the elements on and below the diagonal *> of the array contain the N-by-N lower triangular matrix L; *> the elements above the diagonal represent Q by the rows *> of blocked V (see Further Details). @@ -150,10 +150,10 @@ SUBROUTINE DLASWLQ( M, N, MB, NB, A, LDA, T, LDT, WORK, LWORK, $ INFO) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, M, N, MB, NB, LWORK, LDT diff --git a/lapack-netlib/SRC/dlaswp.f b/lapack-netlib/SRC/dlaswp.f index 2c526ffad..202fd8df1 100644 --- a/lapack-netlib/SRC/dlaswp.f +++ b/lapack-netlib/SRC/dlaswp.f @@ -79,14 +79,15 @@ *> \verbatim *> IPIV is INTEGER array, dimension (K1+(K2-K1)*abs(INCX)) *> The vector of pivot indices. Only the elements in positions -*> K1 through K1+(K2-K1)*INCX of IPIV are accessed. -*> IPIV(K) = L implies rows K and L are to be interchanged. +*> K1 through K1+(K2-K1)*abs(INCX) of IPIV are accessed. +*> IPIV(K1+(K-K1)*abs(INCX)) = L implies rows K and L are to be +*> interchanged. *> \endverbatim *> *> \param[in] INCX *> \verbatim *> INCX is INTEGER -*> The increment between successive values of IPIV. If IPIV +*> The increment between successive values of IPIV. If INCX *> is negative, the pivots are applied in reverse order. *> \endverbatim * @@ -98,7 +99,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERauxiliary * @@ -114,10 +115,10 @@ * ===================================================================== SUBROUTINE DLASWP( N, A, LDA, K1, K2, IPIV, INCX ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INCX, K1, K2, LDA, N @@ -135,7 +136,8 @@ * .. * .. Executable Statements .. * -* Interchange row I with row IPIV(I) for each of rows K1 through K2. +* Interchange row I with row IPIV(K1+(I-K1)*abs(INCX)) for each of rows +* K1 through K2. * IF( INCX.GT.0 ) THEN IX0 = K1 diff --git a/lapack-netlib/SRC/dlasyf_aa.f b/lapack-netlib/SRC/dlasyf_aa.f index 0bd2d6def..6b75e46e0 100644 --- a/lapack-netlib/SRC/dlasyf_aa.f +++ b/lapack-netlib/SRC/dlasyf_aa.f @@ -19,11 +19,11 @@ * =========== * * SUBROUTINE DLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, -* H, LDH, WORK, INFO ) +* H, LDH, WORK ) * * .. Scalar Arguments .. * CHARACTER UPLO -* INTEGER J1, M, NB, LDA, LDH, INFO +* INTEGER J1, M, NB, LDA, LDH * .. * .. Array Arguments .. * INTEGER IPIV( * ) @@ -99,12 +99,12 @@ *> \param[in] LDA *> \verbatim *> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(1,N). +*> The leading dimension of the array A. LDA >= max(1,M). *> \endverbatim *> *> \param[out] IPIV *> \verbatim -*> IPIV is INTEGER array, dimension (N) +*> IPIV is INTEGER array, dimension (M) *> Details of the row and column interchanges, *> the row and column k were interchanged with the row and *> column IPIV(k). @@ -127,16 +127,6 @@ *> WORK is DOUBLE PRECISION workspace, dimension (M). *> \endverbatim *> -*> \param[out] INFO -*> \verbatim -*> INFO is INTEGER -*> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization -*> has been completed, but the block diagonal matrix D is -*> exactly singular, and division by zero will occur if it -*> is used to solve a system of equations. -*> \endverbatim * * Authors: * ======== @@ -146,24 +136,24 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleSYcomputational * * ===================================================================== SUBROUTINE DLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, - $ H, LDH, WORK, INFO ) + $ H, LDH, WORK ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * * .. Scalar Arguments .. CHARACTER UPLO - INTEGER M, NB, J1, LDA, LDH, INFO + INTEGER M, NB, J1, LDA, LDH * .. * .. Array Arguments .. INTEGER IPIV( * ) @@ -176,7 +166,7 @@ PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * * .. Local Scalars .. - INTEGER J, K, K1, I1, I2 + INTEGER J, K, K1, I1, I2, MJ DOUBLE PRECISION PIV, ALPHA * .. * .. External Functions .. @@ -185,14 +175,14 @@ EXTERNAL LSAME, ILAENV, IDAMAX * .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL DGEMV, DAXPY, DCOPY, DSWAP, DSCAL, DLASET, + $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * - INFO = 0 J = 1 * * K1 is the first column of the panel to be factorized @@ -216,9 +206,17 @@ * > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, * K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF * -* H(J:N, J) := A(J, J:N) - H(J:N, 1:(J-1)) * L(J1:(J-1), J), -* where H(J:N, J) has been initialized to be A(J, J:N) +* H(J:M, J) := A(J, J:M) - H(J:M, 1:(J-1)) * L(J1:(J-1), J), +* where H(J:M, J) has been initialized to be A(J, J:M) * IF( K.GT.2 ) THEN * @@ -228,23 +226,23 @@ * > for the rest of the columns, K is J+1, skipping only the * first column * - CALL DGEMV( 'No transpose', M-J+1, J-K1, + CALL DGEMV( 'No transpose', MJ, J-K1, $ -ONE, H( J, K1 ), LDH, $ A( 1, J ), 1, $ ONE, H( J, J ), 1 ) END IF * -* Copy H(i:n, i) into WORK +* Copy H(i:M, i) into WORK * - CALL DCOPY( M-J+1, H( J, J ), 1, WORK( 1 ), 1 ) + CALL DCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) * IF( J.GT.K1 ) THEN * -* Compute WORK := WORK - L(J-1, J:N) * T(J-1,J), -* where A(J-1, J) stores T(J-1, J) and A(J-2, J:N) stores U(J-1, J:N) +* Compute WORK := WORK - L(J-1, J:M) * T(J-1,J), +* where A(J-1, J) stores T(J-1, J) and A(J-2, J:M) stores U(J-1, J:M) * ALPHA = -A( K-1, J ) - CALL DAXPY( M-J+1, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) + CALL DAXPY( MJ, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) END IF * * Set A(J, J) = T(J, J) @@ -253,8 +251,8 @@ * IF( J.LT.M ) THEN * -* Compute WORK(2:N) = T(J, J) L(J, (J+1):N) -* where A(J, J) stores T(J, J) and A(J-1, (J+1):N) stores U(J, (J+1):N) +* Compute WORK(2:M) = T(J, J) L(J, (J+1):M) +* where A(J, J) stores T(J, J) and A(J-1, (J+1):M) stores U(J, (J+1):M) * IF( K.GT.1 ) THEN ALPHA = -A( K, J ) @@ -262,7 +260,7 @@ $ WORK( 2 ), 1 ) ENDIF * -* Find max(|WORK(2:n)|) +* Find max(|WORK(2:M)|) * I2 = IDAMAX( M-J, WORK( 2 ), 1 ) + 1 PIV = WORK( I2 ) @@ -277,14 +275,14 @@ WORK( I2 ) = WORK( I1 ) WORK( I1 ) = PIV * -* Swap A(I1, I1+1:N) with A(I1+1:N, I2) +* Swap A(I1, I1+1:M) with A(I1+1:M, I2) * I1 = I1+J-1 I2 = I2+J-1 CALL DSWAP( I2-I1-1, A( J1+I1-1, I1+1 ), LDA, $ A( J1+I1, I2 ), 1 ) * -* Swap A(I1, I2+1:N) with A(I2, I2+1:N) +* Swap A(I1, I2+1:M) with A(I2, I2+1:M) * CALL DSWAP( M-I2, A( J1+I1-1, I2+1 ), LDA, $ A( J1+I2-1, I2+1 ), LDA ) @@ -315,23 +313,17 @@ * Set A(J, J+1) = T(J, J+1) * A( K, J+1 ) = WORK( 2 ) - IF( (A( K, J ).EQ.ZERO ) .AND. - $ ( (J.EQ.M) .OR. (A( K, J+1 ).EQ.ZERO))) THEN - IF(INFO .EQ. 0) THEN - INFO = J - ENDIF - END IF * IF( J.LT.NB ) THEN * -* Copy A(J+1:N, J+1) into H(J:N, J), +* Copy A(J+1:M, J+1) into H(J:M, J), * CALL DCOPY( M-J, A( K+1, J+1 ), LDA, $ H( J+1, J+1 ), 1 ) END IF * -* Compute L(J+2, J+1) = WORK( 3:N ) / T(J, J+1), -* where A(J, J+1) = T(J, J+1) and A(J+2:N, J) = L(J+2:N, J+1) +* Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1) * IF( A( K, J+1 ).NE.ZERO ) THEN ALPHA = ONE / A( K, J+1 ) @@ -341,10 +333,6 @@ CALL DLASET( 'Full', 1, M-J-1, ZERO, ZERO, $ A( K, J+2 ), LDA) END IF - ELSE - IF( (A( K, J ).EQ.ZERO) .AND. (INFO.EQ.0) ) THEN - INFO = J - END IF END IF J = J + 1 GO TO 10 @@ -366,9 +354,17 @@ * > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, * K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF * -* H(J:N, J) := A(J:N, J) - H(J:N, 1:(J-1)) * L(J, J1:(J-1))^T, -* where H(J:N, J) has been initialized to be A(J:N, J) +* H(J:M, J) := A(J:M, J) - H(J:M, 1:(J-1)) * L(J, J1:(J-1))^T, +* where H(J:M, J) has been initialized to be A(J:M, J) * IF( K.GT.2 ) THEN * @@ -378,23 +374,23 @@ * > for the rest of the columns, K is J+1, skipping only the * first column * - CALL DGEMV( 'No transpose', M-J+1, J-K1, + CALL DGEMV( 'No transpose', MJ, J-K1, $ -ONE, H( J, K1 ), LDH, $ A( J, 1 ), LDA, $ ONE, H( J, J ), 1 ) END IF * -* Copy H(J:N, J) into WORK +* Copy H(J:M, J) into WORK * - CALL DCOPY( M-J+1, H( J, J ), 1, WORK( 1 ), 1 ) + CALL DCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) * IF( J.GT.K1 ) THEN * -* Compute WORK := WORK - L(J:N, J-1) * T(J-1,J), +* Compute WORK := WORK - L(J:M, J-1) * T(J-1,J), * where A(J-1, J) = T(J-1, J) and A(J, J-2) = L(J, J-1) * ALPHA = -A( J, K-1 ) - CALL DAXPY( M-J+1, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) + CALL DAXPY( MJ, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) END IF * * Set A(J, J) = T(J, J) @@ -403,8 +399,8 @@ * IF( J.LT.M ) THEN * -* Compute WORK(2:N) = T(J, J) L((J+1):N, J) -* where A(J, J) = T(J, J) and A((J+1):N, J-1) = L((J+1):N, J) +* Compute WORK(2:M) = T(J, J) L((J+1):M, J) +* where A(J, J) = T(J, J) and A((J+1):M, J-1) = L((J+1):M, J) * IF( K.GT.1 ) THEN ALPHA = -A( J, K ) @@ -412,7 +408,7 @@ $ WORK( 2 ), 1 ) ENDIF * -* Find max(|WORK(2:n)|) +* Find max(|WORK(2:M)|) * I2 = IDAMAX( M-J, WORK( 2 ), 1 ) + 1 PIV = WORK( I2 ) @@ -427,14 +423,14 @@ WORK( I2 ) = WORK( I1 ) WORK( I1 ) = PIV * -* Swap A(I1+1:N, I1) with A(I2, I1+1:N) +* Swap A(I1+1:M, I1) with A(I2, I1+1:M) * I1 = I1+J-1 I2 = I2+J-1 CALL DSWAP( I2-I1-1, A( I1+1, J1+I1-1 ), 1, $ A( I2, J1+I1 ), LDA ) * -* Swap A(I2+1:N, I1) with A(I2+1:N, I2) +* Swap A(I2+1:M, I1) with A(I2+1:M, I2) * CALL DSWAP( M-I2, A( I2+1, J1+I1-1 ), 1, $ A( I2+1, J1+I2-1 ), 1 ) @@ -465,22 +461,17 @@ * Set A(J+1, J) = T(J+1, J) * A( J+1, K ) = WORK( 2 ) - IF( (A( J, K ).EQ.ZERO) .AND. - $ ( (J.EQ.M) .OR. (A( J+1, K ).EQ.ZERO)) ) THEN - IF (INFO .EQ. 0) - $ INFO = J - END IF * IF( J.LT.NB ) THEN * -* Copy A(J+1:N, J+1) into H(J+1:N, J), +* Copy A(J+1:M, J+1) into H(J+1:M, J), * CALL DCOPY( M-J, A( J+1, K+1 ), 1, $ H( J+1, J+1 ), 1 ) END IF * -* Compute L(J+2, J+1) = WORK( 3:N ) / T(J, J+1), -* where A(J, J+1) = T(J, J+1) and A(J+2:N, J) = L(J+2:N, J+1) +* Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1) * IF( A( J+1, K ).NE.ZERO ) THEN ALPHA = ONE / A( J+1, K ) @@ -490,10 +481,6 @@ CALL DLASET( 'Full', M-J-1, 1, ZERO, ZERO, $ A( J+2, K ), LDA ) END IF - ELSE - IF( (A( J, K ).EQ.ZERO) .AND. (INFO.EQ.0) ) THEN - INFO = J - END IF END IF J = J + 1 GO TO 30 diff --git a/lapack-netlib/SRC/dorbdb1.f b/lapack-netlib/SRC/dorbdb1.f index db3b14db2..675028acd 100644 --- a/lapack-netlib/SRC/dorbdb1.f +++ b/lapack-netlib/SRC/dorbdb1.f @@ -32,7 +32,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -203,7 +203,7 @@ SUBROUTINE DORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/dorbdb2.f b/lapack-netlib/SRC/dorbdb2.f index cec60da75..ea43d4fcb 100644 --- a/lapack-netlib/SRC/dorbdb2.f +++ b/lapack-netlib/SRC/dorbdb2.f @@ -32,7 +32,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -202,7 +202,7 @@ SUBROUTINE DORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/dorbdb3.f b/lapack-netlib/SRC/dorbdb3.f index 7149796ca..f5667b43b 100644 --- a/lapack-netlib/SRC/dorbdb3.f +++ b/lapack-netlib/SRC/dorbdb3.f @@ -32,7 +32,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -201,7 +201,7 @@ SUBROUTINE DORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/dorbdb4.f b/lapack-netlib/SRC/dorbdb4.f index 606d7083b..ff141b99f 100644 --- a/lapack-netlib/SRC/dorbdb4.f +++ b/lapack-netlib/SRC/dorbdb4.f @@ -33,7 +33,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -213,7 +213,7 @@ $ TAUP1, TAUP2, TAUQ1, PHANTOM, WORK, LWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/dorbdb5.f b/lapack-netlib/SRC/dorbdb5.f index de01f5a44..313c6d5ed 100644 --- a/lapack-netlib/SRC/dorbdb5.f +++ b/lapack-netlib/SRC/dorbdb5.f @@ -31,7 +31,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -156,7 +156,7 @@ SUBROUTINE DORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/dorbdb6.f b/lapack-netlib/SRC/dorbdb6.f index 6056d0301..c92c9fede 100644 --- a/lapack-netlib/SRC/dorbdb6.f +++ b/lapack-netlib/SRC/dorbdb6.f @@ -31,7 +31,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -154,7 +154,7 @@ SUBROUTINE DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/dorcsd.f b/lapack-netlib/SRC/dorcsd.f index 340e16a5d..f0284ce6e 100644 --- a/lapack-netlib/SRC/dorcsd.f +++ b/lapack-netlib/SRC/dorcsd.f @@ -186,7 +186,7 @@ *> *> \param[out] U1 *> \verbatim -*> U1 is DOUBLE PRECISION array, dimension (P) +*> U1 is DOUBLE PRECISION array, dimension (LDU1,P) *> If JOBU1 = 'Y', U1 contains the P-by-P orthogonal matrix U1. *> \endverbatim *> @@ -199,7 +199,7 @@ *> *> \param[out] U2 *> \verbatim -*> U2 is DOUBLE PRECISION array, dimension (M-P) +*> U2 is DOUBLE PRECISION array, dimension (LDU2,M-P) *> If JOBU2 = 'Y', U2 contains the (M-P)-by-(M-P) orthogonal *> matrix U2. *> \endverbatim @@ -213,7 +213,7 @@ *> *> \param[out] V1T *> \verbatim -*> V1T is DOUBLE PRECISION array, dimension (Q) +*> V1T is DOUBLE PRECISION array, dimension (LDV1T,Q) *> If JOBV1T = 'Y', V1T contains the Q-by-Q matrix orthogonal *> matrix V1**T. *> \endverbatim @@ -227,7 +227,7 @@ *> *> \param[out] V2T *> \verbatim -*> V2T is DOUBLE PRECISION array, dimension (M-Q) +*> V2T is DOUBLE PRECISION array, dimension (LDV2T,M-Q) *> If JOBV2T = 'Y', V2T contains the (M-Q)-by-(M-Q) orthogonal *> matrix V2**T. *> \endverbatim @@ -289,7 +289,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERcomputational * @@ -300,10 +300,10 @@ $ U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, $ LDV2T, WORK, LWORK, IWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, SIGNS, TRANS diff --git a/lapack-netlib/SRC/dorcsd2by1.f b/lapack-netlib/SRC/dorcsd2by1.f index 8542a2ed3..4f9880017 100644 --- a/lapack-netlib/SRC/dorcsd2by1.f +++ b/lapack-netlib/SRC/dorcsd2by1.f @@ -36,7 +36,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> diff --git a/lapack-netlib/SRC/dorm22.f b/lapack-netlib/SRC/dorm22.f index ac79e1e76..aac47109d 100644 --- a/lapack-netlib/SRC/dorm22.f +++ b/lapack-netlib/SRC/dorm22.f @@ -53,8 +53,8 @@ *> N2-by-N2 upper triangular matrix. *> \endverbatim * -* Arguments -* ========= +* Arguments: +* ========== * *> \param[in] SIDE *> \verbatim @@ -163,7 +163,7 @@ SUBROUTINE DORM22( SIDE, TRANS, M, N, N1, N2, Q, LDQ, C, LDC, $ WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.6.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * January 2015 diff --git a/lapack-netlib/SRC/dppsvx.f b/lapack-netlib/SRC/dppsvx.f index df949896e..4fc84ead8 100644 --- a/lapack-netlib/SRC/dppsvx.f +++ b/lapack-netlib/SRC/dppsvx.f @@ -147,8 +147,7 @@ *> *> \param[in,out] AFP *> \verbatim -*> AFP is DOUBLE PRECISION array, dimension -*> (N*(N+1)/2) +*> AFP is DOUBLE PRECISION array, dimension (N*(N+1)/2) *> If FACT = 'F', then AFP is an input argument and on entry *> contains the triangular factor U or L from the Cholesky *> factorization A = U**T*U or A = L*L**T, in the same storage @@ -312,7 +311,7 @@ SUBROUTINE DPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB, $ X, LDX, RCOND, FERR, BERR, WORK, IWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 diff --git a/lapack-netlib/SRC/drscl.f b/lapack-netlib/SRC/drscl.f index 925114368..cbd66dd27 100644 --- a/lapack-netlib/SRC/drscl.f +++ b/lapack-netlib/SRC/drscl.f @@ -77,17 +77,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleOTHERauxiliary * * ===================================================================== SUBROUTINE DRSCL( N, SA, SX, INCX ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX, N @@ -112,7 +112,7 @@ EXTERNAL DLAMCH * .. * .. External Subroutines .. - EXTERNAL DSCAL + EXTERNAL DSCAL, DLABAD * .. * .. Intrinsic Functions .. INTRINSIC ABS diff --git a/lapack-netlib/SRC/dsb2st_kernels.f b/lapack-netlib/SRC/dsb2st_kernels.f index afed5265f..3bf126d5b 100644 --- a/lapack-netlib/SRC/dsb2st_kernels.f +++ b/lapack-netlib/SRC/dsb2st_kernels.f @@ -47,45 +47,87 @@ * Arguments: * ========== * -*> @param[in] n -*> The order of the matrix A. -*> -*> @param[in] nb -*> The size of the band. -*> -*> @param[in, out] A -*> A pointer to the matrix A. -*> -*> @param[in] lda -*> The leading dimension of the matrix A. +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> \endverbatim *> -*> @param[out] V -*> DOUBLE PRECISION array, dimension 2*n if eigenvalues only are -*> requested or to be queried for vectors. +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both +*> Eigenvalue/Eigenvectors. +*> \endverbatim *> -*> @param[out] TAU -*> DOUBLE PRECISION array, dimension (2*n). -*> The scalar factors of the Householder reflectors are stored -*> in this array. +*> \param[in] TTYPE +*> \verbatim +*> TTYPE is INTEGER +*> \endverbatim *> -*> @param[in] st +*> \param[in] ST +*> \verbatim +*> ST is INTEGER *> internal parameter for indices. +*> \endverbatim *> -*> @param[in] ed +*> \param[in] ED +*> \verbatim +*> ED is INTEGER *> internal parameter for indices. +*> \endverbatim *> -*> @param[in] sweep +*> \param[in] SWEEP +*> \verbatim +*> SWEEP is INTEGER *> internal parameter for indices. +*> \endverbatim *> -*> @param[in] Vblksiz -*> internal parameter for indices. +*> \param[in] N +*> \verbatim +*> N is INTEGER. The order of the matrix A. +*> \endverbatim *> -*> @param[in] wantz -*> logical which indicate if Eigenvalue are requested or both -*> Eigenvalue/Eigenvectors. +*> \param[in] NB +*> \verbatim +*> NB is INTEGER. The size of the band. +*> \endverbatim +*> +*> \param[in] IB +*> \verbatim +*> IB is INTEGER. +*> \endverbatim +*> +*> \param[in, out] A +*> \verbatim +*> A is DOUBLE PRECISION array. A pointer to the matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER. The leading dimension of the matrix A. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension 2*n if eigenvalues only are +*> requested or to be queried for vectors. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (2*n). +*> The scalar factors of the Householder reflectors are stored +*> in this array. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER. +*> \endverbatim *> -*> @param[in] work -*> Workspace of size nb. +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array. Workspace of size nb. +*> \endverbatim *> *> \par Further Details: * ===================== @@ -128,10 +170,10 @@ * IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/dsbev_2stage.f b/lapack-netlib/SRC/dsbev_2stage.f index c66b40491..79991b649 100644 --- a/lapack-netlib/SRC/dsbev_2stage.f +++ b/lapack-netlib/SRC/dsbev_2stage.f @@ -164,7 +164,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleOTHEReigen * @@ -206,10 +206,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -234,9 +234,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE DOUBLE PRECISION DLAMCH, DLANSB - EXTERNAL LSAME, DLAMCH, DLANSB, ILAENV + EXTERNAL LSAME, DLAMCH, DLANSB, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL DLASCL, DSCAL, DSTEQR, DSTERF, XERBLA, @@ -273,9 +273,12 @@ LWMIN = 1 WORK( 1 ) = LWMIN ELSE - IB = ILAENV( 18, 'DSYTRD_SB2ST', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'DSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'DSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + IB = ILAENV2STAGE( 2, 'DSYTRD_SB2ST', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'DSYTRD_SB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'DSYTRD_SB2ST', JOBZ, + $ N, KD, IB, -1 ) LWMIN = N + LHTRD + LWTRD WORK( 1 ) = LWMIN ENDIF diff --git a/lapack-netlib/SRC/dsbevd_2stage.f b/lapack-netlib/SRC/dsbevd_2stage.f index 1968f2b78..859f87c0e 100644 --- a/lapack-netlib/SRC/dsbevd_2stage.f +++ b/lapack-netlib/SRC/dsbevd_2stage.f @@ -194,7 +194,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleOTHEReigen * @@ -236,10 +236,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -266,9 +266,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE DOUBLE PRECISION DLAMCH, DLANSB - EXTERNAL LSAME, DLAMCH, DLANSB, ILAENV + EXTERNAL LSAME, DLAMCH, DLANSB, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL DGEMM, DLACPY, DLASCL, DSCAL, DSTEDC, @@ -290,9 +290,9 @@ LIWMIN = 1 LWMIN = 1 ELSE - IB = ILAENV( 18, 'DSYTRD_SB2ST', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'DSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'DSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + IB = ILAENV2STAGE( 2, 'DSYTRD_SB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'DSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'DSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) IF( WANTZ ) THEN LIWMIN = 3 + 5*N LWMIN = 1 + 5*N + 2*N**2 diff --git a/lapack-netlib/SRC/dsbevx_2stage.f b/lapack-netlib/SRC/dsbevx_2stage.f index 9e120e5e5..93a07f160 100644 --- a/lapack-netlib/SRC/dsbevx_2stage.f +++ b/lapack-netlib/SRC/dsbevx_2stage.f @@ -324,7 +324,7 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -359,9 +359,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE DOUBLE PRECISION DLAMCH, DLANSB - EXTERNAL LSAME, DLAMCH, DLANSB, ILAENV + EXTERNAL LSAME, DLAMCH, DLANSB, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL DCOPY, DGEMV, DLACPY, DLASCL, DSCAL, @@ -419,9 +419,12 @@ LWMIN = 1 WORK( 1 ) = LWMIN ELSE - IB = ILAENV( 18, 'DSYTRD_SB2ST', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'DSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'DSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + IB = ILAENV2STAGE( 2, 'DSYTRD_SB2ST', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'DSYTRD_SB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'DSYTRD_SB2ST', JOBZ, + $ N, KD, IB, -1 ) LWMIN = 2*N + LHTRD + LWTRD WORK( 1 ) = LWMIN ENDIF diff --git a/lapack-netlib/SRC/dsgesv.f b/lapack-netlib/SRC/dsgesv.f index e867b974d..f47327d00 100644 --- a/lapack-netlib/SRC/dsgesv.f +++ b/lapack-netlib/SRC/dsgesv.f @@ -195,7 +195,7 @@ SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK, $ SWORK, ITER, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -230,8 +230,8 @@ DOUBLE PRECISION ANRM, CTE, EPS, RNRM, XNRM * * .. External Subroutines .. - EXTERNAL DAXPY, DGEMM, DLACPY, DLAG2S, SLAG2D, SGETRF, - $ SGETRS, XERBLA + EXTERNAL DAXPY, DGEMM, DLACPY, DLAG2S, DGETRF, DGETRS, + $ SGETRF, SGETRS, SLAG2D, XERBLA * .. * .. External Functions .. INTEGER IDAMAX diff --git a/lapack-netlib/SRC/dspevd.f b/lapack-netlib/SRC/dspevd.f index 234d03fed..5b99d7558 100644 --- a/lapack-netlib/SRC/dspevd.f +++ b/lapack-netlib/SRC/dspevd.f @@ -112,8 +112,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is DOUBLE PRECISION array, -*> dimension (LWORK) +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) *> On exit, if INFO = 0, WORK(1) returns the required LWORK. *> \endverbatim *> @@ -171,7 +170,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHEReigen * @@ -179,10 +178,10 @@ SUBROUTINE DSPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, $ IWORK, LIWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO diff --git a/lapack-netlib/SRC/dspgv.f b/lapack-netlib/SRC/dspgv.f index 085e96fe1..f7313ffec 100644 --- a/lapack-netlib/SRC/dspgv.f +++ b/lapack-netlib/SRC/dspgv.f @@ -77,8 +77,7 @@ *> *> \param[in,out] AP *> \verbatim -*> AP is DOUBLE PRECISION array, dimension -*> (N*(N+1)/2) +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) *> On entry, the upper or lower triangle of the symmetric matrix *> A, packed columnwise in a linear array. The j-th column of A *> is stored in the array AP as follows: @@ -153,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHEReigen * @@ -161,10 +160,10 @@ SUBROUTINE DSPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, $ INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO diff --git a/lapack-netlib/SRC/dsposv.f b/lapack-netlib/SRC/dsposv.f index 0f9eff8b0..4a8575241 100644 --- a/lapack-netlib/SRC/dsposv.f +++ b/lapack-netlib/SRC/dsposv.f @@ -199,7 +199,7 @@ SUBROUTINE DSPOSV( UPLO, N, NRHS, A, LDA, B, LDB, X, LDX, WORK, $ SWORK, ITER, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -235,7 +235,7 @@ * * .. External Subroutines .. EXTERNAL DAXPY, DSYMM, DLACPY, DLAT2S, DLAG2S, SLAG2D, - $ SPOTRF, SPOTRS, XERBLA + $ SPOTRF, SPOTRS, DPOTRF, DPOTRS, XERBLA * .. * .. External Functions .. INTEGER IDAMAX diff --git a/lapack-netlib/SRC/dspsvx.f b/lapack-netlib/SRC/dspsvx.f index b95c610ba..62df170c0 100644 --- a/lapack-netlib/SRC/dspsvx.f +++ b/lapack-netlib/SRC/dspsvx.f @@ -123,8 +123,7 @@ *> *> \param[in,out] AFP *> \verbatim -*> AFP is DOUBLE PRECISION array, dimension -*> (N*(N+1)/2) +*> AFP is DOUBLE PRECISION array, dimension (N*(N+1)/2) *> If FACT = 'F', then AFP is an input argument and on entry *> contains the block diagonal matrix D and the multipliers used *> to obtain the factor U or L from the factorization @@ -277,7 +276,7 @@ SUBROUTINE DSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, $ LDX, RCOND, FERR, BERR, WORK, IWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 diff --git a/lapack-netlib/SRC/dstedc.f b/lapack-netlib/SRC/dstedc.f index d7f953729..61b44bc06 100644 --- a/lapack-netlib/SRC/dstedc.f +++ b/lapack-netlib/SRC/dstedc.f @@ -105,8 +105,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is DOUBLE PRECISION array, -*> dimension (LWORK) +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. *> \endverbatim *> @@ -174,7 +173,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup auxOTHERcomputational * @@ -189,10 +188,10 @@ SUBROUTINE DSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, $ LIWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER COMPZ diff --git a/lapack-netlib/SRC/dstegr.f b/lapack-netlib/SRC/dstegr.f index f32860322..6a8c43faa 100644 --- a/lapack-netlib/SRC/dstegr.f +++ b/lapack-netlib/SRC/dstegr.f @@ -184,7 +184,7 @@ *> *> \param[out] ISUPPZ *> \verbatim -*> ISUPPZ is INTEGER ARRAY, dimension ( 2*max(1,M) ) +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) *> The support of the eigenvectors in Z, i.e., the indices *> indicating the nonzero elements in Z. The i-th computed eigenvector *> is nonzero only in elements ISUPPZ( 2*i-1 ) through @@ -265,7 +265,7 @@ $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, $ LIWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/dstemr.f b/lapack-netlib/SRC/dstemr.f index 924d738d0..a1a8e3433 100644 --- a/lapack-netlib/SRC/dstemr.f +++ b/lapack-netlib/SRC/dstemr.f @@ -222,7 +222,7 @@ *> *> \param[out] ISUPPZ *> \verbatim -*> ISUPPZ is INTEGER ARRAY, dimension ( 2*max(1,M) ) +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) *> The support of the eigenvectors in Z, i.e., the indices *> indicating the nonzero elements in Z. The i-th computed eigenvector *> is nonzero only in elements ISUPPZ( 2*i-1 ) through @@ -321,7 +321,7 @@ $ M, W, Z, LDZ, NZC, ISUPPZ, TRYRAC, WORK, LWORK, $ IWORK, LIWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/dsycon_3.f b/lapack-netlib/SRC/dsycon_3.f index 5802aa83f..7245ebadc 100644 --- a/lapack-netlib/SRC/dsycon_3.f +++ b/lapack-netlib/SRC/dsycon_3.f @@ -149,7 +149,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleSYcomputational * @@ -157,7 +157,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -171,10 +171,10 @@ SUBROUTINE DSYCON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, $ WORK, IWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/dsyconvf.f b/lapack-netlib/SRC/dsyconvf.f index 673360fdc..37c8157ba 100644 --- a/lapack-netlib/SRC/dsyconvf.f +++ b/lapack-netlib/SRC/dsyconvf.f @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE DSYCONVF( UPLO, WAY, N, A, LDA, IPIV, E, INFO ) +* SUBROUTINE DSYCONVF( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) * * .. Scalar Arguments .. * CHARACTER UPLO, WAY @@ -189,7 +189,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleSYcomputational * @@ -198,7 +198,7 @@ *> *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> November 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -206,10 +206,10 @@ * ===================================================================== SUBROUTINE DSYCONVF( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO, WAY diff --git a/lapack-netlib/SRC/dsyconvf_rook.f b/lapack-netlib/SRC/dsyconvf_rook.f index 2d163703a..5c774906e 100644 --- a/lapack-netlib/SRC/dsyconvf_rook.f +++ b/lapack-netlib/SRC/dsyconvf_rook.f @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE DSYCONVF_ROOK( UPLO, WAY, N, A, LDA, IPIV, E, INFO ) +* SUBROUTINE DSYCONVF_ROOK( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) * * .. Scalar Arguments .. * CHARACTER UPLO, WAY @@ -180,7 +180,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleSYcomputational * @@ -189,7 +189,7 @@ *> *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> November 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -197,10 +197,10 @@ * ===================================================================== SUBROUTINE DSYCONVF_ROOK( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO, WAY diff --git a/lapack-netlib/SRC/dsyequb.f b/lapack-netlib/SRC/dsyequb.f index dd1dc80bb..de6b71a34 100644 --- a/lapack-netlib/SRC/dsyequb.f +++ b/lapack-netlib/SRC/dsyequb.f @@ -116,7 +116,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleSYcomputational * @@ -131,10 +131,10 @@ * ===================================================================== SUBROUTINE DSYEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, N @@ -165,7 +165,7 @@ EXTERNAL DLAMCH, LSAME * .. * .. External Subroutines .. - EXTERNAL DLASSQ + EXTERNAL DLASSQ, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, INT, LOG, MAX, MIN, SQRT diff --git a/lapack-netlib/SRC/dsyev_2stage.f b/lapack-netlib/SRC/dsyev_2stage.f index af622fa2e..fff0dedbc 100644 --- a/lapack-netlib/SRC/dsyev_2stage.f +++ b/lapack-netlib/SRC/dsyev_2stage.f @@ -143,7 +143,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleSYeigen * @@ -185,10 +185,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -213,9 +213,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE DOUBLE PRECISION DLAMCH, DLANSY - EXTERNAL LSAME, ILAENV, DLAMCH, DLANSY + EXTERNAL LSAME, DLAMCH, DLANSY, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL DLASCL, DORGTR, DSCAL, DSTEQR, DSTERF, @@ -244,10 +244,10 @@ END IF * IF( INFO.EQ.0 ) THEN - KD = ILAENV( 17, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) LWMIN = 2*N + LHTRD + LWTRD WORK( 1 ) = LWMIN * diff --git a/lapack-netlib/SRC/dsyevd_2stage.f b/lapack-netlib/SRC/dsyevd_2stage.f index d9d080cb1..75a6da436 100644 --- a/lapack-netlib/SRC/dsyevd_2stage.f +++ b/lapack-netlib/SRC/dsyevd_2stage.f @@ -180,7 +180,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleSYeigen * @@ -229,10 +229,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -260,9 +260,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE DOUBLE PRECISION DLAMCH, DLANSY - EXTERNAL LSAME, DLAMCH, DLANSY, ILAENV + EXTERNAL LSAME, DLAMCH, DLANSY, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL DLACPY, DLASCL, DORMTR, DSCAL, DSTEDC, DSTERF, @@ -295,10 +295,14 @@ LIWMIN = 1 LWMIN = 1 ELSE - KD = ILAENV( 17, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'DSYTRD_2STAGE', JOBZ, + $ N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'DSYTRD_2STAGE', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'DSYTRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'DSYTRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) IF( WANTZ ) THEN LIWMIN = 3 + 5*N LWMIN = 1 + 6*N + 2*N**2 diff --git a/lapack-netlib/SRC/dsyevr.f b/lapack-netlib/SRC/dsyevr.f index 42f6081cf..d140426a4 100644 --- a/lapack-netlib/SRC/dsyevr.f +++ b/lapack-netlib/SRC/dsyevr.f @@ -88,7 +88,7 @@ *> *> Note 1 : DSYEVR calls DSTEMR when the full spectrum is requested *> on machines which conform to the ieee-754 floating point standard. -*> DSYEVR calls DSTEBZ and SSTEIN on non-ieee machines and +*> DSYEVR calls DSTEBZ and DSTEIN on non-ieee machines and *> when partial spectrum requests are made. *> *> Normal execution of DSTEMR may create NaNs and infinities and @@ -334,7 +334,7 @@ $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, $ IWORK, LIWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/dsyevr_2stage.f b/lapack-netlib/SRC/dsyevr_2stage.f index ae6258236..847acce42 100644 --- a/lapack-netlib/SRC/dsyevr_2stage.f +++ b/lapack-netlib/SRC/dsyevr_2stage.f @@ -383,7 +383,7 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -418,9 +418,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV, ILAENV2STAGE DOUBLE PRECISION DLAMCH, DLANSY - EXTERNAL LSAME, ILAENV, DLAMCH, DLANSY + EXTERNAL LSAME, DLAMCH, DLANSY, ILAENV, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL DCOPY, DORMTR, DSCAL, DSTEBZ, DSTEMR, DSTEIN, @@ -443,10 +443,10 @@ * LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) * - KD = ILAENV( 17, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) LWMIN = MAX( 26*N, 5*N + LHTRD + LWTRD ) LIWMIN = MAX( 1, 10*N ) * diff --git a/lapack-netlib/SRC/dsyevx_2stage.f b/lapack-netlib/SRC/dsyevx_2stage.f index 97ca806fd..7a93ac020 100644 --- a/lapack-netlib/SRC/dsyevx_2stage.f +++ b/lapack-netlib/SRC/dsyevx_2stage.f @@ -302,7 +302,7 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -336,9 +336,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE DOUBLE PRECISION DLAMCH, DLANSY - EXTERNAL LSAME, ILAENV, DLAMCH, DLANSY + EXTERNAL LSAME, DLAMCH, DLANSY, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL DCOPY, DLACPY, DORGTR, DORMTR, DSCAL, DSTEBZ, @@ -393,10 +393,14 @@ LWMIN = 1 WORK( 1 ) = LWMIN ELSE - KD = ILAENV( 17, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'DSYTRD_2STAGE', JOBZ, + $ N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'DSYTRD_2STAGE', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'DSYTRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'DSYTRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) LWMIN = MAX( 8*N, 3*N + LHTRD + LWTRD ) WORK( 1 ) = LWMIN END IF diff --git a/lapack-netlib/SRC/dsygv_2stage.f b/lapack-netlib/SRC/dsygv_2stage.f index b7da00f51..5b1a1766c 100644 --- a/lapack-netlib/SRC/dsygv_2stage.f +++ b/lapack-netlib/SRC/dsygv_2stage.f @@ -46,7 +46,7 @@ *> positive definite. *> This routine use the 2stage technique for the reduction to tridiagonal *> which showed higher performance on recent architecture and for large -* sizes N>2000. +*> sizes N>2000. *> \endverbatim * * Arguments: @@ -186,7 +186,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleSYeigen * @@ -228,10 +228,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -254,8 +254,8 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV - EXTERNAL LSAME, ILAENV + INTEGER ILAENV2STAGE + EXTERNAL LSAME, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL DPOTRF, DSYGST, DTRMM, DTRSM, XERBLA, @@ -288,10 +288,10 @@ END IF * IF( INFO.EQ.0 ) THEN - KD = ILAENV( 17, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) LWMIN = 2*N + LHTRD + LWTRD WORK( 1 ) = LWMIN * diff --git a/lapack-netlib/SRC/dsysv_aa.f b/lapack-netlib/SRC/dsysv_aa.f index e458f12bb..cbccd5e65 100644 --- a/lapack-netlib/SRC/dsysv_aa.f +++ b/lapack-netlib/SRC/dsysv_aa.f @@ -154,9 +154,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 -* -* @precisions fortran d -> z c +*> \date November 2017 * *> \ingroup doubleSYsolve * @@ -164,10 +162,10 @@ SUBROUTINE DSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, $ LWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -190,7 +188,7 @@ EXTERNAL ILAENV, LSAME * .. * .. External Subroutines .. - EXTERNAL XERBLA, DSYTRF, DSYTRS, DSYTRS2 + EXTERNAL XERBLA, DSYTRF_AA, DSYTRS_AA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/dsysv_aa_2stage.f b/lapack-netlib/SRC/dsysv_aa_2stage.f new file mode 100644 index 000000000..ac3c77d76 --- /dev/null +++ b/lapack-netlib/SRC/dsysv_aa_2stage.f @@ -0,0 +1,280 @@ +*> \brief DSYSV_AA_2STAGE computes the solution to system of linear equations A * X = B for SY matrices +* +* @generated from SRC/chesv_aa_2stage.f, fortran c -> d, Tue Oct 31 11:22:31 2017 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYSV_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, +* IPIV, IPIV2, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* DOUBLE PRECISION A( LDA, * ), TB( * ), B( LDB, *), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYSV_AA_2STAGE computes the solution to a real system of +*> linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's 2-stage algorithm is used to factor A as +*> A = U * T * U**T, if UPLO = 'U', or +*> A = L * T * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is symmetric and band. The matrix T is +*> then LU-factored with partial pivoting. The factored form of A +*> is then used to solve the system of equations A * X = B. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is DOUBLE PRECISION array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYsolve +* +* ===================================================================== + SUBROUTINE DSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSYTRF_AA_2STAGE, DSYTRS_AA_2STAGE, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL DSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, + $ IPIV2, WORK, -1, INFO ) + LWKOPT = INT( WORK(1) ) + IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYSV_AA_2STAGE', -INFO ) + RETURN + ELSE IF( WQUERY .OR. TQUERY ) THEN + RETURN + END IF +* +* +* Compute the factorization A = U*T*U**T or A = L*T*L**T. +* + CALL DSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, IPIV2, + $ WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, + $ IPIV2, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DSYSV_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/dsytrd_2stage.f b/lapack-netlib/SRC/dsytrd_2stage.f index 9997ecd25..522602bb2 100644 --- a/lapack-netlib/SRC/dsytrd_2stage.f +++ b/lapack-netlib/SRC/dsytrd_2stage.f @@ -144,7 +144,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is DOUBLE PRECISION array, dimension LWORK. +*> WORK is DOUBLE PRECISION array, dimension (LWORK) *> \endverbatim *> *> \param[in] LWORK @@ -183,7 +183,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleSYcomputational * @@ -227,10 +227,10 @@ * IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER VECT, UPLO @@ -253,8 +253,8 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV - EXTERNAL LSAME, ILAENV + INTEGER ILAENV2STAGE + EXTERNAL LSAME, ILAENV2STAGE * .. * .. Executable Statements .. * @@ -267,10 +267,10 @@ * * Determine the block size, the workspace size and the hous size. * - KD = ILAENV( 17, 'DSYTRD_2STAGE', VECT, N, -1, -1, -1 ) - IB = ILAENV( 18, 'DSYTRD_2STAGE', VECT, N, KD, -1, -1 ) - LHMIN = ILAENV( 19, 'DSYTRD_2STAGE', VECT, N, KD, IB, -1 ) - LWMIN = ILAENV( 20, 'DSYTRD_2STAGE', VECT, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'DSYTRD_2STAGE', VECT, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'DSYTRD_2STAGE', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV2STAGE( 3, 'DSYTRD_2STAGE', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV2STAGE( 4, 'DSYTRD_2STAGE', VECT, N, KD, IB, -1 ) * WRITE(*,*),'DSYTRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO, * $ LHMIN, LWMIN * diff --git a/lapack-netlib/SRC/dsytrd_sb2st.F b/lapack-netlib/SRC/dsytrd_sb2st.F index 59ef01381..1e860004e 100644 --- a/lapack-netlib/SRC/dsytrd_sb2st.F +++ b/lapack-netlib/SRC/dsytrd_sb2st.F @@ -188,7 +188,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup real16OTHERcomputational * @@ -236,10 +236,10 @@ * IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER STAGE1, UPLO, VECT @@ -270,7 +270,7 @@ $ SIDEV, SIZETAU, LDV, LHMIN, LWMIN * .. * .. External Subroutines .. - EXTERNAL DSB2ST_KERNELS, DLACPY, DLASET + EXTERNAL DSB2ST_KERNELS, DLACPY, DLASET, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MIN, MAX, CEILING, REAL diff --git a/lapack-netlib/SRC/dsytrd_sy2sb.f b/lapack-netlib/SRC/dsytrd_sy2sb.f index a0e028a30..85337f792 100644 --- a/lapack-netlib/SRC/dsytrd_sy2sb.f +++ b/lapack-netlib/SRC/dsytrd_sy2sb.f @@ -123,7 +123,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is DOUBLE PRECISION array, dimension LWORK. +*> WORK is DOUBLE PRECISION array, dimension (LWORK) *> On exit, if INFO = 0, or if LWORK=-1, *> WORK(1) returns the size of LWORK. *> \endverbatim @@ -132,7 +132,7 @@ *> \verbatim *> LWORK is INTEGER *> The dimension of the array WORK which should be calculated -* by a workspace query. LWORK = MAX(1, LWORK_QUERY) +*> by a workspace query. LWORK = MAX(1, LWORK_QUERY) *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns *> this value as the first entry of the WORK array, and no error @@ -158,7 +158,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleSYcomputational * @@ -222,7 +222,7 @@ *> *> where tau is a real scalar, and v is a real vector with *> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in -* A(i+kd+2:n,i), and tau in TAU(i). +*> A(i+kd+2:n,i), and tau in TAU(i). *> *> The contents of A on exit are illustrated by the following examples *> with n = 5: @@ -245,10 +245,10 @@ * IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -277,7 +277,7 @@ $ TPOS, WPOS, S2POS, S1POS * .. * .. External Subroutines .. - EXTERNAL XERBLA, DSYR2K, DSYMM, DGEMM, + EXTERNAL XERBLA, DSYR2K, DSYMM, DGEMM, DCOPY, $ DLARFT, DGELQF, DGEQRF, DLASET * .. * .. Intrinsic Functions .. diff --git a/lapack-netlib/SRC/dsytrf_aa.f b/lapack-netlib/SRC/dsytrf_aa.f index c3d598b28..24b3f393b 100644 --- a/lapack-netlib/SRC/dsytrf_aa.f +++ b/lapack-netlib/SRC/dsytrf_aa.f @@ -114,11 +114,7 @@ *> \verbatim *> INFO is INTEGER *> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization -*> has been completed, but the block diagonal matrix D is -*> exactly singular, and division by zero will occur if it -*> is used to solve a system of equations. +*> < 0: if INFO = -i, the i-th argument had an illegal value. *> \endverbatim * * Authors: @@ -129,17 +125,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleSYcomputational * * ===================================================================== SUBROUTINE DSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -159,7 +155,7 @@ * * .. Local Scalars .. LOGICAL LQUERY, UPPER - INTEGER J, LWKOPT, IINFO + INTEGER J, LWKOPT INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB DOUBLE PRECISION ALPHA * .. @@ -169,7 +165,8 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL DLASYF_AA, DGEMM, DGEMV, DSCAL, DCOPY, DSWAP, + $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -178,7 +175,7 @@ * * Determine the block size * - NB = ILAENV( 1, 'DSYTRF', UPLO, N, -1, -1, -1 ) + NB = ILAENV( 1, 'DSYTRF_AA', UPLO, N, -1, -1, -1 ) * * Test the input parameters. * @@ -214,13 +211,10 @@ ENDIF IPIV( 1 ) = 1 IF ( N.EQ.1 ) THEN - IF ( A( 1, 1 ).EQ.ZERO ) THEN - INFO = 1 - END IF RETURN END IF * -* Adjubst block size based on the workspace size +* Adjust block size based on the workspace size * IF( LWORK.LT.((1+NB)*N) ) THEN NB = ( LWORK-N ) / N @@ -260,11 +254,7 @@ * CALL DLASYF_AA( UPLO, 2-K1, N-J, JB, $ A( MAX(1, J), J+1 ), LDA, - $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), - $ IINFO ) - IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN - INFO = IINFO+J - ENDIF + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) * * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) * @@ -383,10 +373,7 @@ * CALL DLASYF_AA( UPLO, 2-K1, N-J, JB, $ A( J+1, MAX(1, J) ), LDA, - $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), IINFO) - IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN - INFO = IINFO+J - ENDIF + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) * * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) * diff --git a/lapack-netlib/SRC/dsytrf_aa_2stage.f b/lapack-netlib/SRC/dsytrf_aa_2stage.f new file mode 100644 index 000000000..299130564 --- /dev/null +++ b/lapack-netlib/SRC/dsytrf_aa_2stage.f @@ -0,0 +1,647 @@ +*> \brief \b DSYTRF_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRF_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, +* IPIV2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* DOUBLE PRECISION A( LDA, * ), TB( * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRF_AA_2STAGE computes the factorization of a real symmetric matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a symmetric band matrix with the +*> bandwidth of NB (NB is internally selected and stored in TB( 1 ), and T is +*> LU factorized with partial pivoting). +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is DOUBLE PRECISION array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, + $ IPIV2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + DOUBLE PRECISION A( LDA, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER I, J, K, I1, I2, TD + INTEGER LDTB, NB, KB, JB, NT, IINFO + DOUBLE PRECISION PIV +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DCOPY, DLACGV, DLACPY, + $ DLASET, DGBTRF, DGEMM, DGETRF, + $ DSYGST, DSWAP, DTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LTB .LT. 4*N .AND. .NOT.TQUERY ) THEN + INFO = -6 + ELSE IF ( LWORK .LT. N .AND. .NOT.WQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRF_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Answer the query +* + NB = ILAENV( 1, 'DSYTRF_AA_2STAGE', UPLO, N, -1, -1, -1 ) + IF( INFO.EQ.0 ) THEN + IF( TQUERY ) THEN + TB( 1 ) = (3*NB+1)*N + END IF + IF( WQUERY ) THEN + WORK( 1 ) = N*NB + END IF + END IF + IF( TQUERY .OR. WQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF +* +* Determine the number of the block size +* + LDTB = LTB/N + IF( LDTB .LT. 3*NB+1 ) THEN + NB = (LDTB-1)/3 + END IF + IF( LWORK .LT. NB*N ) THEN + NB = LWORK/N + END IF +* +* Determine the number of the block columns +* + NT = (N+NB-1)/NB + TD = 2*NB + KB = MIN(NB, N) +* +* Initialize vectors/matrices +* + DO J = 1, KB + IPIV( J ) = J + END DO +* +* Save NB +* + TB( 1 ) = NB +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I .EQ. 1 ) THEN +* H(I,J) = T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL DGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( (I-1)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J) + IF( I .EQ. J-1) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL DGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (I-2)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL DLACPY( 'Upper', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = U(1:J,J)'*H(1:J) + CALL DGEMM( 'Transpose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -ONE, A( 1, J*NB+1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += U(J,J)'*T(J,J-1)*U(J-1,J) + CALL DGEMM( 'Transpose', 'NoTranspose', + $ KB, NB, KB, + $ ONE, A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ ZERO, WORK( 1 ), N ) + CALL DGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB, + $ -ONE, WORK( 1 ), N, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF + IF( J.GT.0 ) THEN + CALL DSYGST( 1, 'Upper', KB, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, IINFO ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + DO K = I+1, KB + TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + $ = TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL DGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, KB, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + ELSE + CALL DGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB+KB, + $ ONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL DGEMM( 'Transpose', 'NoTranspose', + $ NB, N-(J+1)*NB, J*NB, + $ -ONE, WORK( NB+1 ), N, + $ A( 1, (J+1)*NB+1 ), LDA, + $ ONE, A( J*NB+1, (J+1)*NB+1 ), LDA ) + END IF +* +* Copy panel to workspace to call DGETRF +* + DO K = 1, NB + CALL DCOPY( N-(J+1)*NB, + $ A( J*NB+K, (J+1)*NB+1 ), LDA, + $ WORK( 1+(K-1)*N ), 1 ) + END DO +* +* Factorize panel +* + CALL DGETRF( N-(J+1)*NB, NB, + $ WORK, N, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Copy panel back +* + DO K = 1, NB + CALL DCOPY( N-(J+1)*NB, + $ WORK( 1+(K-1)*N ), 1, + $ A( J*NB+K, (J+1)*NB+1 ), LDA ) + END DO +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL DLASET( 'Full', KB, NB, ZERO, ZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL DLACPY( 'Upper', KB, NB, + $ WORK, N, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL DTRSM( 'R', 'U', 'N', 'U', KB, NB, ONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J,J+1) into T(J+1, J), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) + END DO + END DO + CALL DLASET( 'Lower', KB, NB, ZERO, ONE, + $ A( J*NB+1, (J+1)*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL DSWAP( K-1, A( (J+1)*NB+1, I1 ), 1, + $ A( (J+1)*NB+1, I2 ), 1 ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL DSWAP( I2-I1-1, A( I1, I1+1 ), LDA, + $ A( I1+1, I2 ), 1 ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL DSWAP( N-I2, A( I1, I2+1 ), LDA, + $ A( I2, I2+1 ), LDA ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL DSWAP( J*NB, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ENDIF + END DO + END IF + END DO + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)' + IF( I .EQ. J-1) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL DGEMM( 'NoTranspose', 'Transpose', + $ NB, KB, JB, + $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (I-1)*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)' + IF( I .EQ. J-1) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL DGEMM( 'NoTranspose', 'Transpose', + $ NB, KB, JB, + $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (I-2)*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL DLACPY( 'Lower', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = L(J,1:J)*H(1:J) + CALL DGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -ONE, A( J*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += L(J,J)*T(J,J-1)*L(J,J-1)' + CALL DGEMM( 'NoTranspose', 'NoTranspose', + $ KB, NB, KB, + $ ONE, A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ ZERO, WORK( 1 ), N ) + CALL DGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, NB, + $ -ONE, WORK( 1 ), N, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF + IF( J.GT.0 ) THEN + CALL DSYGST( 1, 'Lower', KB, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, IINFO ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + DO K = I+1, KB + TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + $ = TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL DGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, KB, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + ELSE + CALL DGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, NB+KB, + $ ONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL DGEMM( 'NoTranspose', 'NoTranspose', + $ N-(J+1)*NB, NB, J*NB, + $ -ONE, A( (J+1)*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, A( (J+1)*NB+1, J*NB+1 ), LDA ) + END IF +* +* Factorize panel +* + CALL DGETRF( N-(J+1)*NB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL DLASET( 'Full', KB, NB, ZERO, ZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL DLACPY( 'Upper', KB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL DTRSM( 'R', 'L', 'T', 'U', KB, NB, ONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J+1,J) into T(J, J+1), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) + END DO + END DO + CALL DLASET( 'Upper', KB, NB, ZERO, ONE, + $ A( (J+1)*NB+1, J*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL DSWAP( K-1, A( I1, (J+1)*NB+1 ), LDA, + $ A( I2, (J+1)*NB+1 ), LDA ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL DSWAP( I2-I1-1, A( I1+1, I1 ), 1, + $ A( I2, I1+1 ), LDA ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL DSWAP( N-I2, A( I2+1, I1 ), 1, + $ A( I2+1, I2 ), 1 ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL DSWAP( J*NB, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ENDIF + END DO +* +* Apply pivots to previous columns of L +* +c CALL DLASWP( J*NB, A( 1, 1 ), LDA, +c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) + END IF + END DO + END IF +* +* Factor the band matrix + CALL DGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* +* End of DSYTRF_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/dsytri2.f b/lapack-netlib/SRC/dsytri2.f index 0d5b029d6..9aa21a854 100644 --- a/lapack-netlib/SRC/dsytri2.f +++ b/lapack-netlib/SRC/dsytri2.f @@ -120,17 +120,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleSYcomputational * * ===================================================================== SUBROUTINE DSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -153,7 +153,7 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL DSYTRI, DSYTRI2X + EXTERNAL DSYTRI, DSYTRI2X, XERBLA * .. * .. Executable Statements .. * diff --git a/lapack-netlib/SRC/dsytri2x.f b/lapack-netlib/SRC/dsytri2x.f index bcd5c9424..ae29f1209 100644 --- a/lapack-netlib/SRC/dsytri2x.f +++ b/lapack-netlib/SRC/dsytri2x.f @@ -87,7 +87,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is DOUBLE PRECISION array, dimension (N+NNB+1,NNB+3) +*> WORK is DOUBLE PRECISION array, dimension (N+NB+1,NB+3) *> \endverbatim *> *> \param[in] NB @@ -113,17 +113,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleSYcomputational * * ===================================================================== SUBROUTINE DSYTRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/dsytri_3.f b/lapack-netlib/SRC/dsytri_3.f index 3437e1414..d1980f8c4 100644 --- a/lapack-netlib/SRC/dsytri_3.f +++ b/lapack-netlib/SRC/dsytri_3.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleSYcomputational * @@ -160,7 +160,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> November 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -170,10 +170,10 @@ SUBROUTINE DSYTRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -196,7 +196,7 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL DSYTRI_3X + EXTERNAL DSYTRI_3X, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/dsytri_3x.f b/lapack-netlib/SRC/dsytri_3x.f index fecde38f3..d95e6274b 100644 --- a/lapack-netlib/SRC/dsytri_3x.f +++ b/lapack-netlib/SRC/dsytri_3x.f @@ -142,7 +142,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleSYcomputational * @@ -150,7 +150,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -159,10 +159,10 @@ * ===================================================================== SUBROUTINE DSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/dsytrs_3.f b/lapack-netlib/SRC/dsytrs_3.f index 85c09e01b..2d61f6b71 100644 --- a/lapack-netlib/SRC/dsytrs_3.f +++ b/lapack-netlib/SRC/dsytrs_3.f @@ -142,7 +142,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleSYcomputational * @@ -151,7 +151,7 @@ *> *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -165,10 +165,10 @@ SUBROUTINE DSYTRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/dsytrs_aa.f b/lapack-netlib/SRC/dsytrs_aa.f index b572581e5..05ef31ff3 100644 --- a/lapack-netlib/SRC/dsytrs_aa.f +++ b/lapack-netlib/SRC/dsytrs_aa.f @@ -66,7 +66,7 @@ *> of the matrix B. NRHS >= 0. *> \endverbatim *> -*> \param[in,out] A +*> \param[in] A *> \verbatim *> A is DOUBLE PRECISION array, dimension (LDA,N) *> Details of factors computed by DSYTRF_AA. @@ -121,7 +121,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleSYcomputational * @@ -129,10 +129,10 @@ SUBROUTINE DSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, $ WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -159,7 +159,7 @@ EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL DGTSV, DSWAP, DTRSM, XERBLA + EXTERNAL DLACPY, DGTSV, DSWAP, DTRSM, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/dsytrs_aa_2stage.f b/lapack-netlib/SRC/dsytrs_aa_2stage.f new file mode 100644 index 000000000..caff5d4ad --- /dev/null +++ b/lapack-netlib/SRC/dsytrs_aa_2stage.f @@ -0,0 +1,281 @@ +*> \brief \b DSYTRS_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRS_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, +* IPIV2, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* DOUBLE PRECISION A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRS_AA_2STAGE solves a system of linear equations A*X = B with a real +*> symmetric matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by DSYTRF_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> Details of factors computed by DSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is DOUBLE PRECISION array, dimension (LTB) +*> Details of factors computed by DSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> DSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] IPIV2 +*> \verbatim +*> IPIV2 is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> DSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + DOUBLE PRECISION A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER LDTB, NB + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGBTRS, DLASWP, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LTB.LT.( 4*N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRS_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Read NB and compute LDTB +* + NB = INT( TB( 1 ) ) + LDTB = LTB/N +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL DLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (U**T \P**T * B) -> B [ (U**T \P**T * B) ] +* + CALL DTRSM( 'L', 'U', 'T', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (U**T \P**T * B) ] +* + CALL DGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (U \ B) -> B [ U \ (T \ (U**T \P**T * B) ) ] +* + CALL DTRSM( 'L', 'U', 'N', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (U \ (T \ (U**T \P**T * B) )) ] +* + CALL DLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL DLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL DTRSM( 'L', 'L', 'N', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL DGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL DTRSM( 'L', 'L', 'T', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + CALL DLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF + END IF +* + RETURN +* +* End of DSYTRS_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/dtgsen.f b/lapack-netlib/SRC/dtgsen.f index 9f4923934..22323cf8f 100644 --- a/lapack-netlib/SRC/dtgsen.f +++ b/lapack-netlib/SRC/dtgsen.f @@ -222,7 +222,7 @@ *> \verbatim *> PL is DOUBLE PRECISION *> \endverbatim - +*> *> \param[out] PR *> \verbatim *> PR is DOUBLE PRECISION @@ -248,8 +248,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is DOUBLE PRECISION array, -*> dimension (MAX(1,LWORK)) +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. *> \endverbatim *> @@ -452,7 +451,7 @@ $ ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, M, PL, $ PR, DIF, WORK, LWORK, IWORK, LIWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/dtplqt.f b/lapack-netlib/SRC/dtplqt.f index b312c501f..4712950f9 100644 --- a/lapack-netlib/SRC/dtplqt.f +++ b/lapack-netlib/SRC/dtplqt.f @@ -73,8 +73,8 @@ *> *> \param[in,out] A *> \verbatim -*> A is DOUBLE PRECISION array, dimension (LDA,N) -*> On entry, the lower triangular N-by-N matrix A. +*> A is DOUBLE PRECISION array, dimension (LDA,M) +*> On entry, the lower triangular M-by-M matrix A. *> On exit, the elements on and below the diagonal of the array *> contain the lower triangular matrix L. *> \endverbatim @@ -82,7 +82,7 @@ *> \param[in] LDA *> \verbatim *> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(1,N). +*> The leading dimension of the array A. LDA >= max(1,M). *> \endverbatim *> *> \param[in,out] B @@ -132,7 +132,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERcomputational * @@ -146,26 +146,26 @@ *> C = [ A ] [ B ] *> *> -*> where A is an lower triangular N-by-N matrix, and B is M-by-N pentagonal +*> where A is an lower triangular M-by-M matrix, and B is M-by-N pentagonal *> matrix consisting of a M-by-(N-L) rectangular matrix B1 on left of a M-by-L *> upper trapezoidal matrix B2: *> [ B ] = [ B1 ] [ B2 ] *> [ B1 ] <- M-by-(N-L) rectangular -*> [ B2 ] <- M-by-L upper trapezoidal. +*> [ B2 ] <- M-by-L lower trapezoidal. *> *> The lower trapezoidal matrix B2 consists of the first L columns of a -*> N-by-N lower triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> M-by-M lower triangular matrix, where 0 <= L <= MIN(M,N). If L=0, *> B is rectangular M-by-N; if M=L=N, B is lower triangular. *> *> The matrix W stores the elementary reflectors H(i) in the i-th row *> above the diagonal (of A) in the M-by-(M+N) input matrix C *> [ C ] = [ A ] [ B ] -*> [ A ] <- lower triangular N-by-N +*> [ A ] <- lower triangular M-by-M *> [ B ] <- M-by-N pentagonal *> *> so that W can be represented as *> [ W ] = [ I ] [ V ] -*> [ I ] <- identity, N-by-N +*> [ I ] <- identity, M-by-M *> [ V ] <- M-by-N, same form as B. *> *> Thus, all of information needed for W is contained on exit in B, which @@ -189,10 +189,10 @@ SUBROUTINE DTPLQT( M, N, L, MB, A, LDA, B, LDB, T, LDT, WORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDB, LDT, N, M, L, MB diff --git a/lapack-netlib/SRC/dtplqt2.f b/lapack-netlib/SRC/dtplqt2.f index 7e87e6c5b..e2fefb68d 100644 --- a/lapack-netlib/SRC/dtplqt2.f +++ b/lapack-netlib/SRC/dtplqt2.f @@ -65,7 +65,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> A is DOUBLE PRECISION array, dimension (LDA,M) *> On entry, the lower triangular M-by-M matrix A. *> On exit, the elements on and below the diagonal of the array *> contain the lower triangular matrix L. @@ -74,7 +74,7 @@ *> \param[in] LDA *> \verbatim *> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(1,N). +*> The leading dimension of the array A. LDA >= max(1,M). *> \endverbatim *> *> \param[in,out] B @@ -119,7 +119,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERcomputational * @@ -133,7 +133,7 @@ *> C = [ A ][ B ] *> *> -*> where A is an lower triangular N-by-N matrix, and B is M-by-N pentagonal +*> where A is an lower triangular M-by-M matrix, and B is M-by-N pentagonal *> matrix consisting of a M-by-(N-L) rectangular matrix B1 left of a M-by-L *> upper trapezoidal matrix B2: *> @@ -149,13 +149,13 @@ *> above the diagonal (of A) in the M-by-(M+N) input matrix C *> *> C = [ A ][ B ] -*> [ A ] <- lower triangular N-by-N +*> [ A ] <- lower triangular M-by-M *> [ B ] <- M-by-N pentagonal *> *> so that W can be represented as *> *> W = [ I ][ V ] -*> [ I ] <- identity, N-by-N +*> [ I ] <- identity, M-by-M *> [ V ] <- M-by-N, same form as B. *> *> Thus, all of information needed for W is contained on exit in B, which @@ -177,10 +177,10 @@ * ===================================================================== SUBROUTINE DTPLQT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDB, LDT, N, M, L diff --git a/lapack-netlib/SRC/dtpmlqt.f b/lapack-netlib/SRC/dtpmlqt.f index fd31bed57..3782d0c71 100644 --- a/lapack-netlib/SRC/dtpmlqt.f +++ b/lapack-netlib/SRC/dtpmlqt.f @@ -175,7 +175,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleOTHERcomputational * @@ -216,10 +216,10 @@ SUBROUTINE DTPMLQT( SIDE, TRANS, M, N, K, L, MB, V, LDV, T, LDT, $ A, LDA, B, LDB, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS @@ -242,7 +242,7 @@ EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL XERBLA, DLARFB + EXTERNAL XERBLA, DLARFB, DTPRFB * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN diff --git a/lapack-netlib/SRC/dtpmqrt.f b/lapack-netlib/SRC/dtpmqrt.f index ba9fdf858..44985a80d 100644 --- a/lapack-netlib/SRC/dtpmqrt.f +++ b/lapack-netlib/SRC/dtpmqrt.f @@ -175,7 +175,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleOTHERcomputational * @@ -216,10 +216,10 @@ SUBROUTINE DTPMQRT( SIDE, TRANS, M, N, K, L, NB, V, LDV, T, LDT, $ A, LDA, B, LDB, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS @@ -242,7 +242,7 @@ EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL DTPRFB, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN diff --git a/lapack-netlib/SRC/dtrevc.f b/lapack-netlib/SRC/dtrevc.f index 921f5143a..2ed3efaca 100644 --- a/lapack-netlib/SRC/dtrevc.f +++ b/lapack-netlib/SRC/dtrevc.f @@ -200,7 +200,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleOTHERcomputational * @@ -222,10 +222,10 @@ SUBROUTINE DTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, $ LDVR, MM, M, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER HOWMNY, SIDE @@ -257,7 +257,8 @@ EXTERNAL LSAME, IDAMAX, DDOT, DLAMCH * .. * .. External Subroutines .. - EXTERNAL DAXPY, DCOPY, DGEMV, DLALN2, DSCAL, XERBLA + EXTERNAL DLABAD, DAXPY, DCOPY, DGEMV, DLALN2, DSCAL, + $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT diff --git a/lapack-netlib/SRC/dtrevc3.f b/lapack-netlib/SRC/dtrevc3.f index e6c0f2ffb..745f636d0 100644 --- a/lapack-netlib/SRC/dtrevc3.f +++ b/lapack-netlib/SRC/dtrevc3.f @@ -215,7 +215,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * * @precisions fortran d -> s * @@ -240,10 +240,10 @@ $ VR, LDVR, MM, M, WORK, LWORK, INFO ) IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER HOWMNY, SIDE @@ -280,7 +280,7 @@ * .. * .. External Subroutines .. EXTERNAL DAXPY, DCOPY, DGEMV, DLALN2, DSCAL, XERBLA, - $ DGEMM, DLASET, DLABAD + $ DGEMM, DLASET, DLABAD, DLACPY * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT diff --git a/lapack-netlib/SRC/dtrsna.f b/lapack-netlib/SRC/dtrsna.f index 2966e5fb5..dd0ad2f05 100644 --- a/lapack-netlib/SRC/dtrsna.f +++ b/lapack-netlib/SRC/dtrsna.f @@ -213,7 +213,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleOTHERcomputational * @@ -265,10 +265,10 @@ $ LDVR, S, SEP, MM, M, WORK, LDWORK, IWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER HOWMNY, JOB @@ -303,7 +303,7 @@ EXTERNAL LSAME, DDOT, DLAMCH, DLAPY2, DNRM2 * .. * .. External Subroutines .. - EXTERNAL DLACN2, DLACPY, DLAQTR, DTREXC, XERBLA + EXTERNAL DLABAD, DLACN2, DLACPY, DLAQTR, DTREXC, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT diff --git a/lapack-netlib/SRC/dtrttp.f b/lapack-netlib/SRC/dtrttp.f index ac10ef4be..5ee3dd5fa 100644 --- a/lapack-netlib/SRC/dtrttp.f +++ b/lapack-netlib/SRC/dtrttp.f @@ -74,7 +74,7 @@ *> *> \param[out] AP *> \verbatim -*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2 +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) *> On exit, the upper or lower triangular matrix A, packed *> columnwise in a linear array. The j-th column of A is stored *> in the array AP as follows: @@ -97,17 +97,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERcomputational * * ===================================================================== SUBROUTINE DTRTTP( UPLO, N, A, LDA, AP, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/ilaclr.f b/lapack-netlib/SRC/ilaclr.f index c2e0584bb..3aad730b4 100644 --- a/lapack-netlib/SRC/ilaclr.f +++ b/lapack-netlib/SRC/ilaclr.f @@ -53,7 +53,7 @@ *> *> \param[in] A *> \verbatim -*> A is array, dimension (LDA,N) +*> A is COMPLEX array, dimension (LDA,N) *> The m by n matrix A. *> \endverbatim *> @@ -71,17 +71,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complexOTHERauxiliary * * ===================================================================== INTEGER FUNCTION ILACLR( M, N, A, LDA ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER M, N, LDA diff --git a/lapack-netlib/SRC/ilaenv.f b/lapack-netlib/SRC/ilaenv.f index 2be058151..a438ada38 100644 --- a/lapack-netlib/SRC/ilaenv.f +++ b/lapack-netlib/SRC/ilaenv.f @@ -132,7 +132,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup OTHERauxiliary * @@ -162,10 +162,10 @@ * ===================================================================== INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER*( * ) NAME, OPTS @@ -176,8 +176,8 @@ * * .. Local Scalars .. INTEGER I, IC, IZ, NB, NBMIN, NX - LOGICAL CNAME, SNAME - CHARACTER C1*1, C2*2, C4*2, C3*3, SUBNAM*6 + LOGICAL CNAME, SNAME, TWOSTAGE + CHARACTER C1*1, C2*2, C4*2, C3*3, SUBNAM*16 * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, INT, MIN, REAL @@ -189,8 +189,7 @@ * .. Executable Statements .. * GO TO ( 10, 10, 10, 80, 90, 100, 110, 120, - $ 130, 140, 150, 160, 160, 160, 160, 160, - $ 170, 170, 170, 170, 170 )ISPEC + $ 130, 140, 150, 160, 160, 160, 160, 160)ISPEC * * Invalid value for ISPEC * @@ -257,6 +256,8 @@ C2 = SUBNAM( 2: 3 ) C3 = SUBNAM( 4: 6 ) C4 = C3( 2: 3 ) + TWOSTAGE = LEN( SUBNAM ).GE.11 + $ .AND. SUBNAM( 11: 11 ).EQ.'2' * GO TO ( 50, 60, 70 )ISPEC * @@ -360,9 +361,17 @@ ELSE IF( C2.EQ.'SY' ) THEN IF( C3.EQ.'TRF' ) THEN IF( SNAME ) THEN - NB = 64 + IF( TWOSTAGE ) THEN + NB = 192 + ELSE + NB = 64 + END IF ELSE - NB = 64 + IF( TWOSTAGE ) THEN + NB = 192 + ELSE + NB = 64 + END IF END IF ELSE IF( SNAME .AND. C3.EQ.'TRD' ) THEN NB = 32 @@ -371,7 +380,11 @@ END IF ELSE IF( CNAME .AND. C2.EQ.'HE' ) THEN IF( C3.EQ.'TRF' ) THEN - NB = 64 + IF( TWOSTAGE ) THEN + NB = 192 + ELSE + NB = 64 + END IF ELSE IF( C3.EQ.'TRD' ) THEN NB = 32 ELSE IF( C3.EQ.'GST' ) THEN @@ -690,13 +703,6 @@ * ILAENV = IPARMQ( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) RETURN -* - 170 CONTINUE -* -* 17 <= ISPEC <= 21: 2stage eigenvalues and SVD or related subroutines. -* - ILAENV = IPARAM2STAGE( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) - RETURN * * End of ILAENV * diff --git a/lapack-netlib/SRC/ilaenv2stage.f b/lapack-netlib/SRC/ilaenv2stage.f new file mode 100644 index 000000000..3c0d34a12 --- /dev/null +++ b/lapack-netlib/SRC/ilaenv2stage.f @@ -0,0 +1,191 @@ +*> \brief \b ILAENV2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILAENV2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILAENV2STAGE( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) +* +* .. Scalar Arguments .. +* CHARACTER*( * ) NAME, OPTS +* INTEGER ISPEC, N1, N2, N3, N4 +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILAENV2STAGE is called from the LAPACK routines to choose problem-dependent +*> parameters for the local environment. See ISPEC for a description of +*> the parameters. +*> It sets problem and machine dependent parameters useful for *_2STAGE and +*> related subroutines. +*> +*> ILAENV2STAGE returns an INTEGER +*> if ILAENV2STAGE >= 0: ILAENV2STAGE returns the value of the parameter +* specified by ISPEC +*> if ILAENV2STAGE < 0: if ILAENV2STAGE = -k, the k-th argument had an +* illegal value. +*> +*> This version provides a set of parameters which should give good, +*> but not optimal, performance on many of the currently available +*> computers for the 2-stage solvers. Users are encouraged to modify this +*> subroutine to set the tuning parameters for their particular machine using +*> the option and problem size information in the arguments. +*> +*> This routine will not function correctly if it is converted to all +*> lower case. Converting it to all upper case is allowed. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ISPEC +*> \verbatim +*> ISPEC is INTEGER +*> Specifies the parameter to be returned as the value of +*> ILAENV2STAGE. +*> = 1: the optimal blocksize nb for the reduction to BAND +*> +*> = 2: the optimal blocksize ib for the eigenvectors +*> singular vectors update routine +*> +*> = 3: The length of the array that store the Housholder +*> representation for the second stage +*> Band to Tridiagonal or Bidiagonal +*> +*> = 4: The workspace needed for the routine in input. +*> +*> = 5: For future release. +*> \endverbatim +*> +*> \param[in] NAME +*> \verbatim +*> NAME is CHARACTER*(*) +*> The name of the calling subroutine, in either upper case or +*> lower case. +*> \endverbatim +*> +*> \param[in] OPTS +*> \verbatim +*> OPTS is CHARACTER*(*) +*> The character options to the subroutine NAME, concatenated +*> into a single character string. For example, UPLO = 'U', +*> TRANS = 'T', and DIAG = 'N' for a triangular routine would +*> be specified as OPTS = 'UTN'. +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> \endverbatim +*> +*> \param[in] N2 +*> \verbatim +*> N2 is INTEGER +*> \endverbatim +*> +*> \param[in] N3 +*> \verbatim +*> N3 is INTEGER +*> \endverbatim +*> +*> \param[in] N4 +*> \verbatim +*> N4 is INTEGER +*> Problem dimensions for the subroutine NAME; these may not all +*> be required. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +*> \author Nick R. Papior +* +*> \date July 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The following conventions have been used when calling ILAENV2STAGE +*> from the LAPACK routines: +*> 1) OPTS is a concatenation of all of the character options to +*> subroutine NAME, in the same order that they appear in the +*> argument list for NAME, even if they are not used in determining +*> the value of the parameter specified by ISPEC. +*> 2) The problem dimensions N1, N2, N3, N4 are specified in the order +*> that they appear in the argument list for NAME. N1 is used +*> first, N2 second, and so on, and unused problem dimensions are +*> passed a value of -1. +*> 3) The parameter value returned by ILAENV2STAGE is checked for validity in +*> the calling subroutine. +*> +*> \endverbatim +*> +* ===================================================================== + INTEGER FUNCTION ILAENV2STAGE( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2017 +* +* .. Scalar Arguments .. + CHARACTER*( * ) NAME, OPTS + INTEGER ISPEC, N1, N2, N3, N4 +* .. +* +* ===================================================================== +* .. +* .. Local Scalars .. + INTEGER IISPEC +* .. +* .. External Functions .. + INTEGER IPARAM2STAGE + EXTERNAL IPARAM2STAGE +* .. +* .. Executable Statements .. +* + GO TO ( 10, 10, 10, 10, 10 )ISPEC +* +* Invalid value for ISPEC +* + ILAENV2STAGE = -1 + RETURN +* + 10 CONTINUE +* +* 2stage eigenvalues and SVD or related subroutines. +* + IISPEC = 16 + ISPEC + ILAENV2STAGE = IPARAM2STAGE( IISPEC, NAME, OPTS, + $ N1, N2, N3, N4 ) + RETURN +* +* End of ILAENV2STAGE +* + END diff --git a/lapack-netlib/SRC/ilaslc.f b/lapack-netlib/SRC/ilaslc.f index d7770fd4b..e59c57517 100644 --- a/lapack-netlib/SRC/ilaslc.f +++ b/lapack-netlib/SRC/ilaslc.f @@ -71,17 +71,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realOTHERauxiliary * * ===================================================================== INTEGER FUNCTION ILASLC( M, N, A, LDA ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER M, N, LDA @@ -94,7 +94,7 @@ * * .. Parameters .. REAL ZERO - PARAMETER ( ZERO = 0.0D+0 ) + PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I diff --git a/lapack-netlib/SRC/ilaver.f b/lapack-netlib/SRC/ilaver.f deleted file mode 100644 index a99f727d5..000000000 --- a/lapack-netlib/SRC/ilaver.f +++ /dev/null @@ -1,72 +0,0 @@ -*> \brief \b ILAVER returns the LAPACK version. -** -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -* Definition: -* =========== -* -* SUBROUTINE ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) -* -* INTEGER VERS_MAJOR, VERS_MINOR, VERS_PATCH -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> This subroutine returns the LAPACK version. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[out] VERS_MAJOR -*> \verbatim -*> return the lapack major version -*> \endverbatim -*> -*> \param[out] VERS_MINOR -*> \verbatim -*> return the lapack minor version from the major version -*> \endverbatim -*> -*> \param[out] VERS_PATCH -*> \verbatim -*> return the lapack patch version from the minor version -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date December 2016 -* -*> \ingroup OTHERauxiliary -* -* ===================================================================== - SUBROUTINE ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) -* -* -- LAPACK computational routine (version 3.7.0) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* June 2016 -* -* ===================================================================== -* - INTEGER VERS_MAJOR, VERS_MINOR, VERS_PATCH -* ===================================================================== - VERS_MAJOR = 3 - VERS_MINOR = 7 - VERS_PATCH = 0 -* ===================================================================== -* - RETURN - END diff --git a/lapack-netlib/SRC/iparam2stage.F b/lapack-netlib/SRC/iparam2stage.F index 0fc179514..836e20eed 100644 --- a/lapack-netlib/SRC/iparam2stage.F +++ b/lapack-netlib/SRC/iparam2stage.F @@ -38,7 +38,9 @@ *> useful for xHETRD_2STAGE, xHETRD_H@2HB, xHETRD_HB2ST, *> xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD *> and related subroutines for eigenvalue problems. -*> It is called whenever ILAENV is called with 17 <= ISPEC <= 21 +*> It is called whenever ILAENV is called with 17 <= ISPEC <= 21. +*> It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5 +*> with a direct conversion ISPEC + 16. *> \endverbatim * * Arguments: @@ -157,7 +159,7 @@ #endif IMPLICIT NONE * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -172,7 +174,7 @@ INTEGER I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS, $ FACTOPTNB, QROPTNB, LQOPTNB LOGICAL RPREC, CPREC - CHARACTER PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*3 + CHARACTER PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*1 * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX diff --git a/lapack-netlib/SRC/iparmq.f b/lapack-netlib/SRC/iparmq.f index c0dedc9ec..e576e0db0 100644 --- a/lapack-netlib/SRC/iparmq.f +++ b/lapack-netlib/SRC/iparmq.f @@ -41,7 +41,7 @@ * *> \param[in] ISPEC *> \verbatim -*> ISPEC is integer scalar +*> ISPEC is INTEGER *> ISPEC specifies which tunable parameter IPARMQ should *> return. *> @@ -117,7 +117,7 @@ *> *> \param[in] N *> \verbatim -*> N is integer scalar +*> N is INTEGER *> N is the order of the Hessenberg matrix H. *> \endverbatim *> @@ -135,7 +135,7 @@ *> *> \param[in] LWORK *> \verbatim -*> LWORK is integer scalar +*> LWORK is INTEGER *> The amount of workspace available. *> \endverbatim * @@ -147,7 +147,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -222,10 +222,10 @@ * ===================================================================== INTEGER FUNCTION IPARMQ( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER IHI, ILO, ISPEC, LWORK, N diff --git a/lapack-netlib/SRC/sbbcsd.f b/lapack-netlib/SRC/sbbcsd.f index 6d1d833da..e1f83976f 100644 --- a/lapack-netlib/SRC/sbbcsd.f +++ b/lapack-netlib/SRC/sbbcsd.f @@ -190,7 +190,7 @@ *> *> \param[in,out] V2T *> \verbatim -*> V2T is REAL array, dimenison (LDV2T,M-Q) +*> V2T is REAL array, dimension (LDV2T,M-Q) *> On entry, an (M-Q)-by-(M-Q) matrix. On exit, V2T is *> premultiplied by the transpose of the right *> singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and @@ -332,7 +332,7 @@ $ V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E, $ B22D, B22E, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/sbdsdc.f b/lapack-netlib/SRC/sbdsdc.f index 21c0b640a..1c559cec7 100644 --- a/lapack-netlib/SRC/sbdsdc.f +++ b/lapack-netlib/SRC/sbdsdc.f @@ -205,7 +205,7 @@ SUBROUTINE SBDSDC( UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ, $ WORK, IWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -311,12 +311,12 @@ WSTART = 1 QSTART = 3 IF( ICOMPQ.EQ.1 ) THEN - CALL SCOPY( N, D, 1, Q( 1 ), 1 ) + CALL SCOPY( N, D, 1, Q( 1 ), 1 ) CALL SCOPY( N-1, E, 1, Q( N+1 ), 1 ) END IF IF( IUPLO.EQ.2 ) THEN QSTART = 5 - WSTART = 2*N - 1 + IF( ICOMPQ .EQ. 2 ) WSTART = 2*N - 1 DO 10 I = 1, N - 1 CALL SLARTG( D( I ), E( I ), CS, SN, R ) D( I ) = R diff --git a/lapack-netlib/SRC/sbdsqr.f b/lapack-netlib/SRC/sbdsqr.f index e80ac4ea9..b0067f679 100644 --- a/lapack-netlib/SRC/sbdsqr.f +++ b/lapack-netlib/SRC/sbdsqr.f @@ -232,7 +232,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup auxOTHERcomputational * @@ -240,10 +240,10 @@ SUBROUTINE SBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, $ LDU, C, LDC, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -276,7 +276,7 @@ * .. * .. Local Scalars .. LOGICAL LOWER, ROTATE - INTEGER I, IDIR, ISUB, ITER, ITERDIVN J, LL, LLL, M, + INTEGER I, IDIR, ISUB, ITER, ITERDIVN, J, LL, LLL, M, $ MAXITDIVN, NM1, NM12, NM13, OLDLL, OLDM REAL ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU, $ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL, @@ -440,12 +440,12 @@ * IF( M.LE.1 ) $ GO TO 160 -* +* IF( ITER.GE.N ) THEN - ITER = ITER - N + ITER = ITER - N ITERDIVN = ITERDIVN + 1 IF( ITERDIVN.GE.MAXITDIVN ) - $ GO TO 200 + $ GO TO 200 END IF * * Find diagonal block of matrix to work on diff --git a/lapack-netlib/SRC/sbdsvdx.f b/lapack-netlib/SRC/sbdsvdx.f index 4fa16ec85..a4b1887b2 100644 --- a/lapack-netlib/SRC/sbdsvdx.f +++ b/lapack-netlib/SRC/sbdsvdx.f @@ -226,10 +226,10 @@ SUBROUTINE SBDSVDX( UPLO, JOBZ, RANGE, N, D, E, VL, VU, IL, IU, $ NS, S, Z, LDZ, WORK, IWORK, INFO) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, RANGE, UPLO @@ -269,7 +269,7 @@ EXTERNAL ISAMAX, LSAME, SAXPY, SDOT, SLAMCH, SNRM2 * .. * .. External Subroutines .. - EXTERNAL SCOPY, SLASET, SSCAL, SSWAP + EXTERNAL SCOPY, SLASET, SSCAL, SSWAP, SSTEVX, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL, SIGN, SQRT diff --git a/lapack-netlib/SRC/sgebd2.f b/lapack-netlib/SRC/sgebd2.f index eb125fba3..2123ce25e 100644 --- a/lapack-netlib/SRC/sgebd2.f +++ b/lapack-netlib/SRC/sgebd2.f @@ -100,7 +100,7 @@ *> *> \param[out] TAUQ *> \verbatim -*> TAUQ is REAL array dimension (min(M,N)) +*> TAUQ is REAL array, dimension (min(M,N)) *> The scalar factors of the elementary reflectors which *> represent the orthogonal matrix Q. See Further Details. *> \endverbatim @@ -132,7 +132,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realGEcomputational * @@ -189,10 +189,10 @@ * ===================================================================== SUBROUTINE SGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, M, N diff --git a/lapack-netlib/SRC/sgebrd.f b/lapack-netlib/SRC/sgebrd.f index 0f38156dc..e825b4fc9 100644 --- a/lapack-netlib/SRC/sgebrd.f +++ b/lapack-netlib/SRC/sgebrd.f @@ -101,7 +101,7 @@ *> *> \param[out] TAUQ *> \verbatim -*> TAUQ is REAL array dimension (min(M,N)) +*> TAUQ is REAL array, dimension (min(M,N)) *> The scalar factors of the elementary reflectors which *> represent the orthogonal matrix Q. See Further Details. *> \endverbatim @@ -147,7 +147,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realGEcomputational * @@ -205,10 +205,10 @@ SUBROUTINE SGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LWORK, M, N @@ -227,8 +227,7 @@ * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB, - $ NBMIN, NX - REAL WS + $ NBMIN, NX, WS * .. * .. External Subroutines .. EXTERNAL SGEBD2, SGEMM, SLABRD, XERBLA diff --git a/lapack-netlib/SRC/sgees.f b/lapack-netlib/SRC/sgees.f index 06319bf73..06c451322 100644 --- a/lapack-netlib/SRC/sgees.f +++ b/lapack-netlib/SRC/sgees.f @@ -80,7 +80,7 @@ *> *> \param[in] SELECT *> \verbatim -*> SELECT is LOGICAL FUNCTION of two REAL arguments +*> SELECT is a LOGICAL FUNCTION of two REAL arguments *> SELECT must be declared EXTERNAL in the calling subroutine. *> If SORT = 'S', SELECT is used to select eigenvalues to sort *> to the top left of the Schur form. @@ -208,7 +208,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realGEeigen * @@ -216,10 +216,10 @@ SUBROUTINE SGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, WR, WI, $ VS, LDVS, WORK, LWORK, BWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER JOBVS, SORT diff --git a/lapack-netlib/SRC/sgeevx.f b/lapack-netlib/SRC/sgeevx.f index 164cc6f9a..b11f50041 100644 --- a/lapack-netlib/SRC/sgeevx.f +++ b/lapack-netlib/SRC/sgeevx.f @@ -25,11 +25,11 @@ * .. Scalar Arguments .. * CHARACTER BALANC, JOBVL, JOBVR, SENSE * INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N -* REAL ABNRM +* REAL ABNRM * .. * .. Array Arguments .. * INTEGER IWORK( * ) -* REAL A( LDA, * ), RCONDE( * ), RCONDV( * ), +* REAL A( LDA, * ), RCONDE( * ), RCONDV( * ), * $ SCALE( * ), VL( LDVL, * ), VR( LDVR, * ), * $ WI( * ), WORK( * ), WR( * ) * .. @@ -306,7 +306,7 @@ $ RCONDE, RCONDV, WORK, LWORK, IWORK, INFO ) implicit none * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -314,11 +314,11 @@ * .. Scalar Arguments .. CHARACTER BALANC, JOBVL, JOBVR, SENSE INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N - REAL ABNRM + REAL ABNRM * .. * .. Array Arguments .. INTEGER IWORK( * ) - REAL A( LDA, * ), RCONDE( * ), RCONDV( * ), + REAL A( LDA, * ), RCONDE( * ), RCONDV( * ), $ SCALE( * ), VL( LDVL, * ), VR( LDVR, * ), $ WI( * ), WORK( * ), WR( * ) * .. @@ -335,12 +335,12 @@ CHARACTER JOB, SIDE INTEGER HSWORK, I, ICOND, IERR, ITAU, IWRK, K, $ LWORK_TREVC, MAXWRK, MINWRK, NOUT - REAL ANRM, BIGNUM, CS, CSCALE, EPS, R, SCL, SMLNUM, + REAL ANRM, BIGNUM, CS, CSCALE, EPS, R, SCL, SMLNUM, $ SN * .. * .. Local Arrays .. LOGICAL SELECT( 1 ) - REAL DUM( 1 ) + REAL DUM( 1 ) * .. * .. External Subroutines .. EXTERNAL SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, SLACPY, @@ -350,7 +350,7 @@ * .. External Functions .. LOGICAL LSAME INTEGER ISAMAX, ILAENV - REAL SLAMCH, SLANGE, SLAPY2, SNRM2 + REAL SLAMCH, SLANGE, SLAPY2, SNRM2 EXTERNAL LSAME, ISAMAX, ILAENV, SLAMCH, SLANGE, SLAPY2, $ SNRM2 * .. diff --git a/lapack-netlib/SRC/sgejsv.f b/lapack-netlib/SRC/sgejsv.f index 589cf07f6..e4cbe8d0e 100644 --- a/lapack-netlib/SRC/sgejsv.f +++ b/lapack-netlib/SRC/sgejsv.f @@ -271,7 +271,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is REAL array, dimension at least LWORK. +*> WORK is REAL array, dimension (LWORK) *> On exit, *> WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such *> that SCALE*SVA(1:N) are the computed singular values @@ -362,7 +362,7 @@ *> *> \param[out] IWORK *> \verbatim -*> IWORK is INTEGER array, dimension M+3*N. +*> IWORK is INTEGER array, dimension (M+3*N). *> On exit, *> IWORK(1) = the numerical rank determined after the initial *> QR factorization with pivoting. See the descriptions @@ -476,7 +476,7 @@ $ M, N, A, LDA, SVA, U, LDU, V, LDV, $ WORK, LWORK, IWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/sgelqt.f b/lapack-netlib/SRC/sgelqt.f index 786255d12..9a93af332 100644 --- a/lapack-netlib/SRC/sgelqt.f +++ b/lapack-netlib/SRC/sgelqt.f @@ -91,7 +91,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleGEcomputational * @@ -100,8 +100,8 @@ *> *> \verbatim *> -*> The matrix V stores the elementary reflectors H(i) in the i-th column -*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> The matrix V stores the elementary reflectors H(i) in the i-th row +*> above the diagonal. For example, if M=5 and N=3, the matrix V is *> *> V = ( 1 v1 v1 v1 v1 ) *> ( 1 v2 v2 v2 ) @@ -110,11 +110,11 @@ *> *> where the vi's represent the vectors which define H(i), which are returned *> in the matrix A. The 1's along the diagonal of V are not stored in A. -*> Let K=MIN(M,N). The number of blocks is B = ceiling(K/NB), where each -*> block is of order NB except for the last block, which is of order -*> IB = K - (B-1)*NB. For each of the B blocks, a upper triangular block -*> reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB -*> for the last block) T's are stored in the NB-by-N matrix T as +*> Let K=MIN(M,N). The number of blocks is B = ceiling(K/MB), where each +*> block is of order MB except for the last block, which is of order +*> IB = K - (B-1)*MB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The MB-by-MB (and IB-by-IB +*> for the last block) T's are stored in the MB-by-K matrix T as *> *> T = (T1 T2 ... TB). *> \endverbatim @@ -122,10 +122,10 @@ * ===================================================================== SUBROUTINE SGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDT, M, N, MB @@ -141,7 +141,7 @@ INTEGER I, IB, IINFO, K * .. * .. External Subroutines .. - EXTERNAL SGEQRT2, SGEQRT3, SLARFB, XERBLA + EXTERNAL SGEQRT2, SGEQRT3, SGELQT3, SLARFB, XERBLA * .. * .. Executable Statements .. * diff --git a/lapack-netlib/SRC/sgelqt3.f b/lapack-netlib/SRC/sgelqt3.f index b94fc278e..292ae88a3 100644 --- a/lapack-netlib/SRC/sgelqt3.f +++ b/lapack-netlib/SRC/sgelqt3.f @@ -83,7 +83,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleGEcomputational * @@ -92,8 +92,8 @@ *> *> \verbatim *> -*> The matrix V stores the elementary reflectors H(i) in the i-th column -*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> The matrix V stores the elementary reflectors H(i) in the i-th row +*> above the diagonal. For example, if M=5 and N=3, the matrix V is *> *> V = ( 1 v1 v1 v1 v1 ) *> ( 1 v2 v2 v2 ) @@ -114,10 +114,10 @@ * ===================================================================== RECURSIVE SUBROUTINE SGELQT3( M, N, A, LDA, T, LDT, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, M, N, LDT @@ -130,13 +130,13 @@ * * .. Parameters .. REAL ONE - PARAMETER ( ONE = 1.0D+00 ) + PARAMETER ( ONE = 1.0E+00 ) * .. * .. Local Scalars .. - INTEGER I, I1, J, J1, M1, M2, N1, N2, IINFO + INTEGER I, I1, J, J1, M1, M2, IINFO * .. * .. External Subroutines .. - EXTERNAL DLARFG, DTRMM, DGEMM, XERBLA + EXTERNAL SLARFG, STRMM, SGEMM, XERBLA * .. * .. Executable Statements .. * diff --git a/lapack-netlib/SRC/sgelsd.f b/lapack-netlib/SRC/sgelsd.f index 91656a9b3..9a18961d6 100644 --- a/lapack-netlib/SRC/sgelsd.f +++ b/lapack-netlib/SRC/sgelsd.f @@ -89,7 +89,7 @@ *> of the matrices B and X. NRHS >= 0. *> \endverbatim *> -*> \param[in] A +*> \param[in,out] A *> \verbatim *> A is REAL array, dimension (LDA,N) *> On entry, the M-by-N matrix A. @@ -195,7 +195,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realGEsolve * @@ -210,10 +210,10 @@ SUBROUTINE SGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, $ RANK, WORK, LWORK, IWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK diff --git a/lapack-netlib/SRC/sgemlqt.f b/lapack-netlib/SRC/sgemlqt.f index 470743429..a8f022bdc 100644 --- a/lapack-netlib/SRC/sgemlqt.f +++ b/lapack-netlib/SRC/sgemlqt.f @@ -18,7 +18,7 @@ *> *> \verbatim *> -*> DGEMQRT overwrites the general real M-by-N matrix C with +*> DGEMLQT overwrites the general real M-by-N matrix C with *> *> SIDE = 'L' SIDE = 'R' *> TRANS = 'N': Q C C Q @@ -82,7 +82,9 @@ *> *> \param[in] V *> \verbatim -*> V is REAL array, dimension (LDV,K) +*> V is REAL array, dimension +*> (LDV,M) if SIDE = 'L', +*> (LDV,N) if SIDE = 'R' *> The i-th row must contain the vector which defines the *> elementary reflector H(i), for i = 1,2,...,k, as returned by *> DGELQT in the first K rows of its array argument A. @@ -91,16 +93,14 @@ *> \param[in] LDV *> \verbatim *> LDV is INTEGER -*> The leading dimension of the array V. -*> If SIDE = 'L', LDA >= max(1,M); -*> if SIDE = 'R', LDA >= max(1,N). +*> The leading dimension of the array V. LDV >= max(1,K). *> \endverbatim *> *> \param[in] T *> \verbatim *> T is REAL array, dimension (LDT,K) *> The upper triangular factors of the block reflectors -*> as returned by DGELQT, stored as a MB-by-M matrix. +*> as returned by DGELQT, stored as a MB-by-K matrix. *> \endverbatim *> *> \param[in] LDT @@ -143,7 +143,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleGEcomputational * @@ -151,10 +151,10 @@ SUBROUTINE SGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, $ C, LDC, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS @@ -169,14 +169,14 @@ * .. * .. Local Scalars .. LOGICAL LEFT, RIGHT, TRAN, NOTRAN - INTEGER I, IB, LDWORK, KF, Q + INTEGER I, IB, LDWORK, KF * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL XERBLA, DLARFB + EXTERNAL XERBLA, SLARFB * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN diff --git a/lapack-netlib/SRC/sgeqrt.f b/lapack-netlib/SRC/sgeqrt.f index d8b9fade5..f7c58172c 100644 --- a/lapack-netlib/SRC/sgeqrt.f +++ b/lapack-netlib/SRC/sgeqrt.f @@ -108,7 +108,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realGEcomputational * @@ -133,7 +133,7 @@ *> block is of order NB except for the last block, which is of order *> IB = K - (B-1)*NB. For each of the B blocks, a upper triangular block *> reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB -*> for the last block) T's are stored in the NB-by-N matrix T as +*> for the last block) T's are stored in the NB-by-K matrix T as *> *> T = (T1 T2 ... TB). *> \endverbatim @@ -141,10 +141,10 @@ * ===================================================================== SUBROUTINE SGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDT, M, N, NB diff --git a/lapack-netlib/SRC/sgesvdx.f b/lapack-netlib/SRC/sgesvdx.f index 24422fdae..d74e98f10 100644 --- a/lapack-netlib/SRC/sgesvdx.f +++ b/lapack-netlib/SRC/sgesvdx.f @@ -263,7 +263,7 @@ $ IL, IU, NS, S, U, LDU, VT, LDVT, WORK, $ LWORK, IWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -299,7 +299,7 @@ * .. External Subroutines .. EXTERNAL SBDSVDX, SGEBRD, SGELQF, SGEQRF, SLACPY, $ SLASCL, SLASET, SORMBR, SORMLQ, SORMQR, - $ XERBLA + $ SCOPY, XERBLA * .. * .. External Functions .. LOGICAL LSAME diff --git a/lapack-netlib/SRC/sgesvj.f b/lapack-netlib/SRC/sgesvj.f index 5e53cea10..7a7901135 100644 --- a/lapack-netlib/SRC/sgesvj.f +++ b/lapack-netlib/SRC/sgesvj.f @@ -54,7 +54,7 @@ * *> \param[in] JOBA *> \verbatim -*> JOBA is CHARACTER* 1 +*> JOBA is CHARACTER*1 *> Specifies the structure of A. *> = 'L': The input matrix A is lower triangular; *> = 'U': The input matrix A is upper triangular; @@ -208,7 +208,7 @@ *> *> \param[in,out] WORK *> \verbatim -*> WORK is REAL array, dimension MAX(6,M+N). +*> WORK is REAL array, dimension (LWORK) *> On entry, *> If JOBU .EQ. 'C' : *> WORK(1) = CTOL, where CTOL defines the threshold for convergence. @@ -260,7 +260,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realGEcomputational * @@ -323,10 +323,10 @@ SUBROUTINE SGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, $ LDV, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDV, LWORK, M, MV, N diff --git a/lapack-netlib/SRC/sgetrf2.f b/lapack-netlib/SRC/sgetrf2.f index a7e778b9f..aa826f6e6 100644 --- a/lapack-netlib/SRC/sgetrf2.f +++ b/lapack-netlib/SRC/sgetrf2.f @@ -38,7 +38,7 @@ *> *> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 *> A = [ -----|----- ] with n1 = min(m,n)/2 -* [ A21 | A22 ] n2 = n-n1 +*> [ A21 | A22 ] n2 = n-n1 *> *> [ A11 ] *> The subroutine calls itself to factor [ --- ], @@ -113,7 +113,7 @@ * ===================================================================== RECURSIVE SUBROUTINE SGETRF2( M, N, A, LDA, IPIV, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/sgetsls.f b/lapack-netlib/SRC/sgetsls.f index b2312d642..35af66c19 100644 --- a/lapack-netlib/SRC/sgetsls.f +++ b/lapack-netlib/SRC/sgetsls.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleGEsolve * @@ -160,10 +160,10 @@ SUBROUTINE SGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, $ WORK, LWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER TRANS @@ -185,7 +185,7 @@ INTEGER I, IASCL, IBSCL, J, MINMN, MAXMN, BROW, $ SCLLEN, MNK, TSZO, TSZM, LWO, LWM, LW1, LW2, $ WSIZEO, WSIZEM, INFO2 - REAL ANRM, BIGNUM, BNRM, SMLNUM, TQ( 5 ), WORKQ + REAL ANRM, BIGNUM, BNRM, SMLNUM, TQ( 5 ), WORKQ( 1 ) * .. * .. External Functions .. LOGICAL LSAME @@ -233,31 +233,31 @@ IF( M.GE.N ) THEN CALL SGEQR( M, N, A, LDA, TQ, -1, WORKQ, -1, INFO2 ) TSZO = INT( TQ( 1 ) ) - LWO = INT( WORKQ ) + LWO = INT( WORKQ( 1 ) ) CALL SGEMQR( 'L', TRANS, M, NRHS, N, A, LDA, TQ, $ TSZO, B, LDB, WORKQ, -1, INFO2 ) - LWO = MAX( LWO, INT( WORKQ ) ) + LWO = MAX( LWO, INT( WORKQ( 1 ) ) ) CALL SGEQR( M, N, A, LDA, TQ, -2, WORKQ, -2, INFO2 ) TSZM = INT( TQ( 1 ) ) - LWM = INT( WORKQ ) + LWM = INT( WORKQ( 1 ) ) CALL SGEMQR( 'L', TRANS, M, NRHS, N, A, LDA, TQ, $ TSZM, B, LDB, WORKQ, -1, INFO2 ) - LWM = MAX( LWM, INT( WORKQ ) ) + LWM = MAX( LWM, INT( WORKQ( 1 ) ) ) WSIZEO = TSZO + LWO WSIZEM = TSZM + LWM ELSE CALL SGELQ( M, N, A, LDA, TQ, -1, WORKQ, -1, INFO2 ) TSZO = INT( TQ( 1 ) ) - LWO = INT( WORKQ ) + LWO = INT( WORKQ( 1 ) ) CALL SGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ, $ TSZO, B, LDB, WORKQ, -1, INFO2 ) - LWO = MAX( LWO, INT( WORKQ ) ) + LWO = MAX( LWO, INT( WORKQ( 1 ) ) ) CALL SGELQ( M, N, A, LDA, TQ, -2, WORKQ, -2, INFO2 ) TSZM = INT( TQ( 1 ) ) - LWM = INT( WORKQ ) + LWM = INT( WORKQ( 1 ) ) CALL SGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ, $ TSZO, B, LDB, WORKQ, -1, INFO2 ) - LWM = MAX( LWM, INT( WORKQ ) ) + LWM = MAX( LWM, INT( WORKQ( 1 ) ) ) WSIZEO = TSZO + LWO WSIZEM = TSZM + LWM END IF diff --git a/lapack-netlib/SRC/sggesx.f b/lapack-netlib/SRC/sggesx.f index 223256d55..3c6273dcf 100644 --- a/lapack-netlib/SRC/sggesx.f +++ b/lapack-netlib/SRC/sggesx.f @@ -111,7 +111,7 @@ *> *> \param[in] SELCTG *> \verbatim -*> SELCTG is procedure) LOGICAL FUNCTION of three REAL arguments +*> SELCTG is a LOGICAL FUNCTION of three REAL arguments *> SELCTG must be declared EXTERNAL in the calling subroutine. *> If SORT = 'N', SELCTG is not referenced. *> If SORT = 'S', SELCTG is used to select eigenvalues to sort @@ -337,7 +337,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realGEeigen * @@ -365,10 +365,10 @@ $ VSR, LDVSR, RCONDE, RCONDV, WORK, LWORK, IWORK, $ LIWORK, BWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER JOBVSL, JOBVSR, SENSE, SORT diff --git a/lapack-netlib/SRC/sgghd3.f b/lapack-netlib/SRC/sgghd3.f index 758f4b5c7..add216140 100644 --- a/lapack-netlib/SRC/sgghd3.f +++ b/lapack-netlib/SRC/sgghd3.f @@ -230,7 +230,7 @@ SUBROUTINE SGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, $ LDQ, Z, LDZ, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.6.1) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * January 2015 @@ -266,7 +266,8 @@ EXTERNAL ILAENV, LSAME * .. * .. External Subroutines .. - EXTERNAL SGGHRD, SLARTG, SLASET, SORM22, SROT, XERBLA + EXTERNAL SGGHRD, SLARTG, SLASET, SORM22, SROT, SGEMM, + $ SGEMV, STRMV, SLACPY, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC REAL, MAX diff --git a/lapack-netlib/SRC/sgsvj0.f b/lapack-netlib/SRC/sgsvj0.f index 690b03eb3..e580efc30 100644 --- a/lapack-netlib/SRC/sgsvj0.f +++ b/lapack-netlib/SRC/sgsvj0.f @@ -169,7 +169,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is REAL array, dimension LWORK. +*> WORK is REAL array, dimension (LWORK) *> \endverbatim *> *> \param[in] LWORK @@ -193,7 +193,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realOTHERcomputational * @@ -218,10 +218,10 @@ SUBROUTINE SGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, $ SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP @@ -262,7 +262,8 @@ EXTERNAL ISAMAX, LSAME, SDOT, SNRM2 * .. * .. External Subroutines .. - EXTERNAL SAXPY, SCOPY, SLASCL, SLASSQ, SROTM, SSWAP + EXTERNAL SAXPY, SCOPY, SLASCL, SLASSQ, SROTM, SSWAP, + $ XERBLA * .. * .. Executable Statements .. * diff --git a/lapack-netlib/SRC/sgsvj1.f b/lapack-netlib/SRC/sgsvj1.f index 1005ee2b5..49b81cf4f 100644 --- a/lapack-netlib/SRC/sgsvj1.f +++ b/lapack-netlib/SRC/sgsvj1.f @@ -199,7 +199,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is REAL array, dimension LWORK. +*> WORK is REAL array, dimension (LWORK) *> \endverbatim *> *> \param[in] LWORK @@ -223,7 +223,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realOTHERcomputational * @@ -236,10 +236,10 @@ SUBROUTINE SGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, $ EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. REAL EPS, SFMIN, TOL @@ -280,7 +280,8 @@ EXTERNAL ISAMAX, LSAME, SDOT, SNRM2 * .. * .. External Subroutines .. - EXTERNAL SAXPY, SCOPY, SLASCL, SLASSQ, SROTM, SSWAP + EXTERNAL SAXPY, SCOPY, SLASCL, SLASSQ, SROTM, SSWAP, + $ XERBLA * .. * .. Executable Statements .. * diff --git a/lapack-netlib/SRC/sisnan.f b/lapack-netlib/SRC/sisnan.f index 05f597439..311bfbf95 100644 --- a/lapack-netlib/SRC/sisnan.f +++ b/lapack-netlib/SRC/sisnan.f @@ -21,7 +21,7 @@ * LOGICAL FUNCTION SISNAN( SIN ) * * .. Scalar Arguments .. -* REAL SIN +* REAL, INTENT(IN) :: SIN * .. * * @@ -52,20 +52,20 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * * ===================================================================== LOGICAL FUNCTION SISNAN( SIN ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. - REAL SIN + REAL, INTENT(IN) :: SIN * .. * * ===================================================================== diff --git a/lapack-netlib/SRC/sla_gbamv.f b/lapack-netlib/SRC/sla_gbamv.f index 0798bacd3..b513f24da 100644 --- a/lapack-netlib/SRC/sla_gbamv.f +++ b/lapack-netlib/SRC/sla_gbamv.f @@ -106,7 +106,7 @@ *> *> \param[in] AB *> \verbatim -*> AB is REAL array of DIMENSION ( LDAB, n ) +*> AB is REAL array, dimension ( LDAB, n ) *> Before entry, the leading m by n part of the array AB must *> contain the matrix of coefficients. *> Unchanged on exit. @@ -177,7 +177,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realGBcomputational * @@ -185,10 +185,10 @@ SUBROUTINE SLA_GBAMV( TRANS, M, N, KL, KU, ALPHA, AB, LDAB, X, $ INCX, BETA, Y, INCY ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. REAL ALPHA, BETA diff --git a/lapack-netlib/SRC/sla_gbrfsx_extended.f b/lapack-netlib/SRC/sla_gbrfsx_extended.f index 8262002bb..a81feb45e 100644 --- a/lapack-netlib/SRC/sla_gbrfsx_extended.f +++ b/lapack-netlib/SRC/sla_gbrfsx_extended.f @@ -208,8 +208,7 @@ *> *> \param[in,out] ERR_BNDS_NORM *> \verbatim -*> ERR_BNDS_NORM is REAL array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> normwise relative error, which is defined as follows: @@ -255,8 +254,7 @@ *> *> \param[in,out] ERR_BNDS_COMP *> \verbatim -*> ERR_BNDS_COMP is REAL array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> componentwise relative error, which is defined as follows: @@ -399,7 +397,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realGBcomputational * @@ -412,10 +410,10 @@ $ Y_TAIL, RCOND, ITHRESH, RTHRESH, $ DZ_UB, IGNORE_CWISE, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDAB, LDAFB, LDB, LDY, N, KL, KU, NRHS, diff --git a/lapack-netlib/SRC/sla_geamv.f b/lapack-netlib/SRC/sla_geamv.f index 35ce8b804..7906d04d6 100644 --- a/lapack-netlib/SRC/sla_geamv.f +++ b/lapack-netlib/SRC/sla_geamv.f @@ -94,7 +94,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, n ) +*> A is REAL array, dimension ( LDA, n ) *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. *> Unchanged on exit. @@ -138,8 +138,8 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is REAL -*> Array of DIMENSION at least +*> Y is REAL array, +*> dimension at least *> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. @@ -166,7 +166,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realGEcomputational * @@ -174,10 +174,10 @@ SUBROUTINE SLA_GEAMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, $ Y, INCY ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. REAL ALPHA, BETA diff --git a/lapack-netlib/SRC/sla_syamv.f b/lapack-netlib/SRC/sla_syamv.f index 962e17ac2..d40e7bd95 100644 --- a/lapack-netlib/SRC/sla_syamv.f +++ b/lapack-netlib/SRC/sla_syamv.f @@ -88,7 +88,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION ( LDA, n ). +*> A is REAL array, dimension ( LDA, n ). *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. *> Unchanged on exit. @@ -153,7 +153,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realSYcomputational * @@ -177,10 +177,10 @@ SUBROUTINE SLA_SYAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, $ INCY ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. REAL ALPHA, BETA diff --git a/lapack-netlib/SRC/slabrd.f b/lapack-netlib/SRC/slabrd.f index 8073d0031..f4e3d73a2 100644 --- a/lapack-netlib/SRC/slabrd.f +++ b/lapack-netlib/SRC/slabrd.f @@ -110,7 +110,7 @@ *> *> \param[out] TAUQ *> \verbatim -*> TAUQ is REAL array dimension (NB) +*> TAUQ is REAL array, dimension (NB) *> The scalar factors of the elementary reflectors which *> represent the orthogonal matrix Q. See Further Details. *> \endverbatim @@ -156,7 +156,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realOTHERauxiliary * @@ -210,10 +210,10 @@ SUBROUTINE SLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, $ LDY ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER LDA, LDX, LDY, M, N, NB diff --git a/lapack-netlib/SRC/slaed3.f b/lapack-netlib/SRC/slaed3.f index 3ff991d89..dffd23e43 100644 --- a/lapack-netlib/SRC/slaed3.f +++ b/lapack-netlib/SRC/slaed3.f @@ -116,7 +116,7 @@ *> *> \param[in] Q2 *> \verbatim -*> Q2 is REAL array, dimension (LDQ2, N) +*> Q2 is REAL array, dimension (LDQ2*N) *> The first K columns of this matrix contain the non-deflated *> eigenvectors for the split problem. *> \endverbatim @@ -170,7 +170,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup auxOTHERcomputational * @@ -185,10 +185,10 @@ SUBROUTINE SLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX, $ CTOT, W, S, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, K, LDQ, N, N1 diff --git a/lapack-netlib/SRC/slaisnan.f b/lapack-netlib/SRC/slaisnan.f index 32f723d55..568d632a1 100644 --- a/lapack-netlib/SRC/slaisnan.f +++ b/lapack-netlib/SRC/slaisnan.f @@ -21,7 +21,7 @@ * LOGICAL FUNCTION SLAISNAN( SIN1, SIN2 ) * * .. Scalar Arguments .. -* REAL SIN1, SIN2 +* REAL, INTENT(IN) :: SIN1, SIN2 * .. * * @@ -67,20 +67,20 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * * ===================================================================== LOGICAL FUNCTION SLAISNAN( SIN1, SIN2 ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. - REAL SIN1, SIN2 + REAL, INTENT(IN) :: SIN1, SIN2 * .. * * ===================================================================== diff --git a/lapack-netlib/SRC/slalsa.f b/lapack-netlib/SRC/slalsa.f index 65707a15b..135a6736f 100644 --- a/lapack-netlib/SRC/slalsa.f +++ b/lapack-netlib/SRC/slalsa.f @@ -227,14 +227,12 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is REAL array. -*> The dimension must be at least N. +*> WORK is REAL array, dimension (N) *> \endverbatim *> *> \param[out] IWORK *> \verbatim -*> IWORK is INTEGER array. -*> The dimension must be at least 3 * N +*> IWORK is INTEGER array, dimension (3*N) *> \endverbatim *> *> \param[out] INFO @@ -252,7 +250,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realOTHERcomputational * @@ -269,10 +267,10 @@ $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK, $ IWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, diff --git a/lapack-netlib/SRC/slamswlq.f b/lapack-netlib/SRC/slamswlq.f index ccdddbb3d..b13d02b6c 100644 --- a/lapack-netlib/SRC/slamswlq.f +++ b/lapack-netlib/SRC/slamswlq.f @@ -18,7 +18,7 @@ *> *> \verbatim *> -*> DLAMQRTS overwrites the general real M-by-N matrix C with +*> SLAMSWLQ overwrites the general real M-by-N matrix C with *> *> *> SIDE = 'L' SIDE = 'R' @@ -26,7 +26,7 @@ *> TRANS = 'T': Q**T * C C * Q**T *> where Q is a real orthogonal matrix defined as the product of blocked *> elementary reflectors computed by short wide LQ -*> factorization (DLASWLQ) +*> factorization (SLASWLQ) *> \endverbatim * * Arguments: @@ -49,7 +49,7 @@ *> \param[in] M *> \verbatim *> M is INTEGER -*> The number of rows of the matrix A. M >=0. +*> The number of rows of the matrix C. M >=0. *> \endverbatim *> *> \param[in] N @@ -88,12 +88,14 @@ *> *> \endverbatim *> -*> \param[in,out] A +*> \param[in] A *> \verbatim -*> A is REAL array, dimension (LDA,K) +*> A is REAL array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' *> The i-th row must contain the vector which defines the blocked *> elementary reflector H(i), for i = 1,2,...,k, as returned by -*> DLASWLQ in the first k rows of its array argument A. +*> SLASWLQ in the first k rows of its array argument A. *> \endverbatim *> *> \param[in] LDA @@ -200,10 +202,10 @@ SUBROUTINE SLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, $ LDT, C, LDC, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS @@ -279,7 +281,7 @@ END IF * IF((NB.LE.K).OR.(NB.GE.MAX(M,N,K))) THEN - CALL DGEMLQT( SIDE, TRANS, M, N, K, MB, A, LDA, + CALL SGEMLQT( SIDE, TRANS, M, N, K, MB, A, LDA, $ T, LDT, C, LDC, WORK, INFO) RETURN END IF diff --git a/lapack-netlib/SRC/slamtsqr.f b/lapack-netlib/SRC/slamtsqr.f index 747481da9..84ac86ee2 100644 --- a/lapack-netlib/SRC/slamtsqr.f +++ b/lapack-netlib/SRC/slamtsqr.f @@ -81,7 +81,7 @@ *> N >= NB >= 1. *> \endverbatim *> -*> \param[in,out] A +*> \param[in] A *> \verbatim *> A is REAL array, dimension (LDA,K) *> The i-th column must contain the vector which defines the @@ -195,10 +195,10 @@ SUBROUTINE SLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, $ LDT, C, LDC, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS diff --git a/lapack-netlib/SRC/slapy2.f b/lapack-netlib/SRC/slapy2.f index 13e21981a..3b1b7e43c 100644 --- a/lapack-netlib/SRC/slapy2.f +++ b/lapack-netlib/SRC/slapy2.f @@ -56,17 +56,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * * ===================================================================== REAL FUNCTION SLAPY2( X, Y ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. REAL X, Y @@ -82,20 +82,35 @@ * .. * .. Local Scalars .. REAL W, XABS, YABS, Z + LOGICAL X_IS_NAN, Y_IS_NAN +* .. +* .. External Functions .. + LOGICAL SISNAN + EXTERNAL SISNAN * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, SQRT * .. * .. Executable Statements .. * - XABS = ABS( X ) - YABS = ABS( Y ) - W = MAX( XABS, YABS ) - Z = MIN( XABS, YABS ) - IF( Z.EQ.ZERO ) THEN - SLAPY2 = W - ELSE - SLAPY2 = W*SQRT( ONE+( Z / W )**2 ) +* .. +* .. Executable Statements .. +* + X_IS_NAN = SISNAN( X ) + Y_IS_NAN = SISNAN( Y ) + IF ( X_IS_NAN ) SLAPY2 = X + IF ( Y_IS_NAN ) SLAPY2 = Y +* + IF ( .NOT.( X_IS_NAN.OR.Y_IS_NAN ) ) THEN + XABS = ABS( X ) + YABS = ABS( Y ) + W = MAX( XABS, YABS ) + Z = MIN( XABS, YABS ) + IF( Z.EQ.ZERO ) THEN + SLAPY2 = W + ELSE + SLAPY2 = W*SQRT( ONE+( Z / W )**2 ) + END IF END IF RETURN * diff --git a/lapack-netlib/SRC/slaqr1.f b/lapack-netlib/SRC/slaqr1.f index 7e35b804a..7d7d851ee 100644 --- a/lapack-netlib/SRC/slaqr1.f +++ b/lapack-netlib/SRC/slaqr1.f @@ -55,19 +55,19 @@ * *> \param[in] N *> \verbatim -*> N is integer +*> N is INTEGER *> Order of the matrix H. N must be either 2 or 3. *> \endverbatim *> *> \param[in] H *> \verbatim -*> H is REAL array of dimension (LDH,N) +*> H is REAL array, dimension (LDH,N) *> The 2-by-2 or 3-by-3 matrix H in (*). *> \endverbatim *> *> \param[in] LDH *> \verbatim -*> LDH is integer +*> LDH is INTEGER *> The leading dimension of H as declared in *> the calling procedure. LDH.GE.N *> \endverbatim @@ -95,7 +95,7 @@ *> *> \param[out] V *> \verbatim -*> V is REAL array of dimension N +*> V is REAL array, dimension (N) *> A scalar multiple of the first column of the *> matrix K in (*). *> \endverbatim @@ -108,7 +108,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realOTHERauxiliary * @@ -121,10 +121,10 @@ * ===================================================================== SUBROUTINE SLAQR1( N, H, LDH, SR1, SI1, SR2, SI2, V ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. REAL SI1, SI2, SR1, SR2 diff --git a/lapack-netlib/SRC/slaqr2.f b/lapack-netlib/SRC/slaqr2.f index 1bcb138c2..8e1f34910 100644 --- a/lapack-netlib/SRC/slaqr2.f +++ b/lapack-netlib/SRC/slaqr2.f @@ -119,7 +119,7 @@ *> *> \param[in] LDH *> \verbatim -*> LDH is integer +*> LDH is INTEGER *> Leading dimension of H just as declared in the calling *> subroutine. N .LE. LDH *> \endverbatim @@ -147,14 +147,14 @@ *> *> \param[in] LDZ *> \verbatim -*> LDZ is integer +*> LDZ is INTEGER *> The leading dimension of Z just as declared in the *> calling subroutine. 1 .LE. LDZ. *> \endverbatim *> *> \param[out] NS *> \verbatim -*> NS is integer +*> NS is INTEGER *> The number of unconverged (ie approximate) eigenvalues *> returned in SR and SI that may be used as shifts by the *> calling subroutine. @@ -162,19 +162,19 @@ *> *> \param[out] ND *> \verbatim -*> ND is integer +*> ND is INTEGER *> The number of converged eigenvalues uncovered by this *> subroutine. *> \endverbatim *> *> \param[out] SR *> \verbatim -*> SR is REAL array, dimension KBOT +*> SR is REAL array, dimension (KBOT) *> \endverbatim *> *> \param[out] SI *> \verbatim -*> SI is REAL array, dimension KBOT +*> SI is REAL array, dimension (KBOT) *> On output, the real and imaginary parts of approximate *> eigenvalues that may be used for shifts are stored in *> SR(KBOT-ND-NS+1) through SR(KBOT-ND) and @@ -192,14 +192,14 @@ *> *> \param[in] LDV *> \verbatim -*> LDV is integer scalar +*> LDV is INTEGER *> The leading dimension of V just as declared in the *> calling subroutine. NW .LE. LDV *> \endverbatim *> *> \param[in] NH *> \verbatim -*> NH is integer scalar +*> NH is INTEGER *> The number of columns of T. NH.GE.NW. *> \endverbatim *> @@ -210,14 +210,14 @@ *> *> \param[in] LDT *> \verbatim -*> LDT is integer +*> LDT is INTEGER *> The leading dimension of T just as declared in the *> calling subroutine. NW .LE. LDT *> \endverbatim *> *> \param[in] NV *> \verbatim -*> NV is integer +*> NV is INTEGER *> The number of rows of work array WV available for *> workspace. NV.GE.NW. *> \endverbatim @@ -229,21 +229,21 @@ *> *> \param[in] LDWV *> \verbatim -*> LDWV is integer +*> LDWV is INTEGER *> The leading dimension of W just as declared in the *> calling subroutine. NW .LE. LDV *> \endverbatim *> *> \param[out] WORK *> \verbatim -*> WORK is REAL array, dimension LWORK. +*> WORK is REAL array, dimension (LWORK) *> On exit, WORK(1) is set to an estimate of the optimal value *> of LWORK for the given values of N, NW, KTOP and KBOT. *> \endverbatim *> *> \param[in] LWORK *> \verbatim -*> LWORK is integer +*> LWORK is INTEGER *> The dimension of the work array WORK. LWORK = 2*NW *> suffices, but greater efficiency may result from larger *> values of LWORK. @@ -263,7 +263,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realOTHERauxiliary * @@ -278,10 +278,10 @@ $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, $ LDT, NV, WV, LDWV, WORK, LWORK ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, diff --git a/lapack-netlib/SRC/slaqr3.f b/lapack-netlib/SRC/slaqr3.f index 2fabacf4a..534e2c489 100644 --- a/lapack-netlib/SRC/slaqr3.f +++ b/lapack-netlib/SRC/slaqr3.f @@ -116,7 +116,7 @@ *> *> \param[in] LDH *> \verbatim -*> LDH is integer +*> LDH is INTEGER *> Leading dimension of H just as declared in the calling *> subroutine. N .LE. LDH *> \endverbatim @@ -144,14 +144,14 @@ *> *> \param[in] LDZ *> \verbatim -*> LDZ is integer +*> LDZ is INTEGER *> The leading dimension of Z just as declared in the *> calling subroutine. 1 .LE. LDZ. *> \endverbatim *> *> \param[out] NS *> \verbatim -*> NS is integer +*> NS is INTEGER *> The number of unconverged (ie approximate) eigenvalues *> returned in SR and SI that may be used as shifts by the *> calling subroutine. @@ -159,19 +159,19 @@ *> *> \param[out] ND *> \verbatim -*> ND is integer +*> ND is INTEGER *> The number of converged eigenvalues uncovered by this *> subroutine. *> \endverbatim *> *> \param[out] SR *> \verbatim -*> SR is REAL array, dimension KBOT +*> SR is REAL array, dimension (KBOT) *> \endverbatim *> *> \param[out] SI *> \verbatim -*> SI is REAL array, dimension KBOT +*> SI is REAL array, dimension (KBOT) *> On output, the real and imaginary parts of approximate *> eigenvalues that may be used for shifts are stored in *> SR(KBOT-ND-NS+1) through SR(KBOT-ND) and @@ -189,14 +189,14 @@ *> *> \param[in] LDV *> \verbatim -*> LDV is integer scalar +*> LDV is INTEGER *> The leading dimension of V just as declared in the *> calling subroutine. NW .LE. LDV *> \endverbatim *> *> \param[in] NH *> \verbatim -*> NH is integer scalar +*> NH is INTEGER *> The number of columns of T. NH.GE.NW. *> \endverbatim *> @@ -207,14 +207,14 @@ *> *> \param[in] LDT *> \verbatim -*> LDT is integer +*> LDT is INTEGER *> The leading dimension of T just as declared in the *> calling subroutine. NW .LE. LDT *> \endverbatim *> *> \param[in] NV *> \verbatim -*> NV is integer +*> NV is INTEGER *> The number of rows of work array WV available for *> workspace. NV.GE.NW. *> \endverbatim @@ -226,21 +226,21 @@ *> *> \param[in] LDWV *> \verbatim -*> LDWV is integer +*> LDWV is INTEGER *> The leading dimension of W just as declared in the *> calling subroutine. NW .LE. LDV *> \endverbatim *> *> \param[out] WORK *> \verbatim -*> WORK is REAL array, dimension LWORK. +*> WORK is REAL array, dimension (LWORK) *> On exit, WORK(1) is set to an estimate of the optimal value *> of LWORK for the given values of N, NW, KTOP and KBOT. *> \endverbatim *> *> \param[in] LWORK *> \verbatim -*> LWORK is integer +*> LWORK is INTEGER *> The dimension of the work array WORK. LWORK = 2*NW *> suffices, but greater efficiency may result from larger *> values of LWORK. @@ -275,7 +275,7 @@ $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, $ LDT, NV, WV, LDWV, WORK, LWORK ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/slaqr5.f b/lapack-netlib/SRC/slaqr5.f index ea3910a5d..65278e355 100644 --- a/lapack-netlib/SRC/slaqr5.f +++ b/lapack-netlib/SRC/slaqr5.f @@ -48,21 +48,21 @@ * *> \param[in] WANTT *> \verbatim -*> WANTT is logical scalar +*> WANTT is LOGICAL *> WANTT = .true. if the quasi-triangular Schur factor *> is being computed. WANTT is set to .false. otherwise. *> \endverbatim *> *> \param[in] WANTZ *> \verbatim -*> WANTZ is logical scalar +*> WANTZ is LOGICAL *> WANTZ = .true. if the orthogonal Schur factor is being *> computed. WANTZ is set to .false. otherwise. *> \endverbatim *> *> \param[in] KACC22 *> \verbatim -*> KACC22 is integer with value 0, 1, or 2. +*> KACC22 is INTEGER with value 0, 1, or 2. *> Specifies the computation mode of far-from-diagonal *> orthogonal updates. *> = 0: SLAQR5 does not accumulate reflections and does not @@ -78,19 +78,19 @@ *> *> \param[in] N *> \verbatim -*> N is integer scalar +*> N is INTEGER *> N is the order of the Hessenberg matrix H upon which this *> subroutine operates. *> \endverbatim *> *> \param[in] KTOP *> \verbatim -*> KTOP is integer scalar +*> KTOP is INTEGER *> \endverbatim *> *> \param[in] KBOT *> \verbatim -*> KBOT is integer scalar +*> KBOT is INTEGER *> These are the first and last rows and columns of an *> isolated diagonal block upon which the QR sweep is to be *> applied. It is assumed without a check that @@ -101,19 +101,19 @@ *> *> \param[in] NSHFTS *> \verbatim -*> NSHFTS is integer scalar +*> NSHFTS is INTEGER *> NSHFTS gives the number of simultaneous shifts. NSHFTS *> must be positive and even. *> \endverbatim *> *> \param[in,out] SR *> \verbatim -*> SR is REAL array of size (NSHFTS) +*> SR is REAL array, dimension (NSHFTS) *> \endverbatim *> *> \param[in,out] SI *> \verbatim -*> SI is REAL array of size (NSHFTS) +*> SI is REAL array, dimension (NSHFTS) *> SR contains the real parts and SI contains the imaginary *> parts of the NSHFTS shifts of origin that define the *> multi-shift QR sweep. On output SR and SI may be @@ -122,7 +122,7 @@ *> *> \param[in,out] H *> \verbatim -*> H is REAL array of size (LDH,N) +*> H is REAL array, dimension (LDH,N) *> On input H contains a Hessenberg matrix. On output a *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied *> to the isolated diagonal block in rows and columns KTOP @@ -131,7 +131,7 @@ *> *> \param[in] LDH *> \verbatim -*> LDH is integer scalar +*> LDH is INTEGER *> LDH is the leading dimension of H just as declared in the *> calling procedure. LDH.GE.MAX(1,N). *> \endverbatim @@ -150,7 +150,7 @@ *> *> \param[in,out] Z *> \verbatim -*> Z is REAL array of size (LDZ,IHIZ) +*> Z is REAL array, dimension (LDZ,IHIZ) *> If WANTZ = .TRUE., then the QR Sweep orthogonal *> similarity transformation is accumulated into *> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. @@ -159,71 +159,69 @@ *> *> \param[in] LDZ *> \verbatim -*> LDZ is integer scalar +*> LDZ is INTEGER *> LDA is the leading dimension of Z just as declared in *> the calling procedure. LDZ.GE.N. *> \endverbatim *> *> \param[out] V *> \verbatim -*> V is REAL array of size (LDV,NSHFTS/2) +*> V is REAL array, dimension (LDV,NSHFTS/2) *> \endverbatim *> *> \param[in] LDV *> \verbatim -*> LDV is integer scalar +*> LDV is INTEGER *> LDV is the leading dimension of V as declared in the *> calling procedure. LDV.GE.3. *> \endverbatim *> *> \param[out] U *> \verbatim -*> U is REAL array of size -*> (LDU,3*NSHFTS-3) +*> U is REAL array, dimension (LDU,3*NSHFTS-3) *> \endverbatim *> *> \param[in] LDU *> \verbatim -*> LDU is integer scalar +*> LDU is INTEGER *> LDU is the leading dimension of U just as declared in the *> in the calling subroutine. LDU.GE.3*NSHFTS-3. *> \endverbatim *> *> \param[in] NH *> \verbatim -*> NH is integer scalar +*> NH is INTEGER *> NH is the number of columns in array WH available for *> workspace. NH.GE.1. *> \endverbatim *> *> \param[out] WH *> \verbatim -*> WH is REAL array of size (LDWH,NH) +*> WH is REAL array, dimension (LDWH,NH) *> \endverbatim *> *> \param[in] LDWH *> \verbatim -*> LDWH is integer scalar +*> LDWH is INTEGER *> Leading dimension of WH just as declared in the *> calling procedure. LDWH.GE.3*NSHFTS-3. *> \endverbatim *> *> \param[in] NV *> \verbatim -*> NV is integer scalar +*> NV is INTEGER *> NV is the number of rows in WV agailable for workspace. *> NV.GE.1. *> \endverbatim *> *> \param[out] WV *> \verbatim -*> WV is REAL array of size -*> (LDWV,3*NSHFTS-3) +*> WV is REAL array, dimension (LDWV,3*NSHFTS-3) *> \endverbatim *> *> \param[in] LDWV *> \verbatim -*> LDWV is integer scalar +*> LDWV is INTEGER *> LDWV is the leading dimension of WV as declared in the *> in the calling subroutine. LDWV.GE.NV. *> \endverbatim @@ -259,7 +257,7 @@ $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, $ LDU, NV, WV, LDWV, NH, WH, LDWH ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/slarfg.f b/lapack-netlib/SRC/slarfg.f index d63c4ac29..a528ce39c 100644 --- a/lapack-netlib/SRC/slarfg.f +++ b/lapack-netlib/SRC/slarfg.f @@ -99,17 +99,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realOTHERauxiliary * * ===================================================================== SUBROUTINE SLARFG( N, ALPHA, X, INCX, TAU ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX, N @@ -170,7 +170,7 @@ CALL SSCAL( N-1, RSAFMN, X, INCX ) BETA = BETA*RSAFMN ALPHA = ALPHA*RSAFMN - IF( ABS( BETA ).LT.SAFMIN .AND. KNT .LT. 1000) + IF( (ABS( BETA ).LT.SAFMIN) .AND. (KNT .LT. 20) ) $ GO TO 10 * * New BETA is at most 1, at least SAFMIN diff --git a/lapack-netlib/SRC/slarfgp.f b/lapack-netlib/SRC/slarfgp.f index d63a409a1..6a4c08476 100644 --- a/lapack-netlib/SRC/slarfgp.f +++ b/lapack-netlib/SRC/slarfgp.f @@ -97,17 +97,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realOTHERauxiliary * * ===================================================================== SUBROUTINE SLARFGP( N, ALPHA, X, INCX, TAU ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX, N @@ -181,7 +181,7 @@ CALL SSCAL( N-1, BIGNUM, X, INCX ) BETA = BETA*BIGNUM ALPHA = ALPHA*BIGNUM - IF( ABS( BETA ).LT.SMLNUM .AND. KNT .LT. 1000 ) + IF( (ABS( BETA ).LT.SMLNUM) .AND. (KNT .LT. 20) ) $ GO TO 10 * * New BETA is at most 1, at least SMLNUM diff --git a/lapack-netlib/SRC/slarra.f b/lapack-netlib/SRC/slarra.f index fd248c9d6..0456263a6 100644 --- a/lapack-netlib/SRC/slarra.f +++ b/lapack-netlib/SRC/slarra.f @@ -119,7 +119,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -136,10 +136,10 @@ SUBROUTINE SLARRA( N, D, E, E2, SPLTOL, TNRM, $ NSPLIT, ISPLIT, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, N, NSPLIT @@ -167,7 +167,13 @@ * .. Executable Statements .. * INFO = 0 - +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* * Compute splitting points NSPLIT = 1 IF(SPLTOL.LT.ZERO) THEN diff --git a/lapack-netlib/SRC/slarrb.f b/lapack-netlib/SRC/slarrb.f index c2d130b5e..988e25ff0 100644 --- a/lapack-netlib/SRC/slarrb.f +++ b/lapack-netlib/SRC/slarrb.f @@ -178,7 +178,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -196,10 +196,10 @@ $ RTOL2, OFFSET, W, WGAP, WERR, WORK, IWORK, $ PIVMIN, SPDIAM, TWIST, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER IFIRST, ILAST, INFO, N, OFFSET, TWIST @@ -236,6 +236,12 @@ * .. Executable Statements .. * INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF * MAXITR = INT( ( LOG( SPDIAM+PIVMIN )-LOG( PIVMIN ) ) / $ LOG( TWO ) ) + 2 diff --git a/lapack-netlib/SRC/slarrc.f b/lapack-netlib/SRC/slarrc.f index 8469660a2..f0c033514 100644 --- a/lapack-netlib/SRC/slarrc.f +++ b/lapack-netlib/SRC/slarrc.f @@ -137,7 +137,7 @@ SUBROUTINE SLARRC( JOBT, N, VL, VU, D, E, PIVMIN, $ EIGCNT, LCNT, RCNT, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -170,6 +170,13 @@ * .. Executable Statements .. * INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* LCNT = 0 RCNT = 0 EIGCNT = 0 diff --git a/lapack-netlib/SRC/slarrd.f b/lapack-netlib/SRC/slarrd.f index 8da31a999..7a7ec0abb 100644 --- a/lapack-netlib/SRC/slarrd.f +++ b/lapack-netlib/SRC/slarrd.f @@ -329,7 +329,7 @@ $ M, W, WERR, WL, WU, IBLOCK, INDEXW, $ WORK, IWORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -385,6 +385,12 @@ * INFO = 0 * +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* * Decode RANGE * IF( LSAME( RANGE, 'A' ) ) THEN diff --git a/lapack-netlib/SRC/slarre.f b/lapack-netlib/SRC/slarre.f index 3c1b51136..ea9b8fcbc 100644 --- a/lapack-netlib/SRC/slarre.f +++ b/lapack-netlib/SRC/slarre.f @@ -305,7 +305,7 @@ $ W, WERR, WGAP, IBLOCK, INDEXW, GERS, PIVMIN, $ WORK, IWORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -360,7 +360,7 @@ * .. * .. External Subroutines .. EXTERNAL SCOPY, SLARNV, SLARRA, SLARRB, SLARRC, SLARRD, - $ SLASQ2 + $ SLASQ2, SLARRK * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN @@ -370,7 +370,12 @@ * INFO = 0 - +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF * * Decode RANGE * diff --git a/lapack-netlib/SRC/slarrf.f b/lapack-netlib/SRC/slarrf.f index ee8af8c2a..8c8de52fe 100644 --- a/lapack-netlib/SRC/slarrf.f +++ b/lapack-netlib/SRC/slarrf.f @@ -193,7 +193,7 @@ $ SPDIAM, CLGAPL, CLGAPR, PIVMIN, SIGMA, $ DPLUS, LPLUS, WORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -239,6 +239,13 @@ * .. Executable Statements .. * INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* FACT = REAL(2**KTRYMAX) EPS = SLAMCH( 'Precision' ) SHIFT = 0 diff --git a/lapack-netlib/SRC/slarrj.f b/lapack-netlib/SRC/slarrj.f index 6ce15164d..a721d0751 100644 --- a/lapack-netlib/SRC/slarrj.f +++ b/lapack-netlib/SRC/slarrj.f @@ -150,7 +150,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -168,10 +168,10 @@ $ RTOL, OFFSET, W, WERR, WORK, IWORK, $ PIVMIN, SPDIAM, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER IFIRST, ILAST, INFO, N, OFFSET @@ -203,6 +203,12 @@ * .. Executable Statements .. * INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF * MAXITR = INT( ( LOG( SPDIAM+PIVMIN )-LOG( PIVMIN ) ) / $ LOG( TWO ) ) + 2 diff --git a/lapack-netlib/SRC/slarrk.f b/lapack-netlib/SRC/slarrk.f index 4d625c5ed..f39f074db 100644 --- a/lapack-netlib/SRC/slarrk.f +++ b/lapack-netlib/SRC/slarrk.f @@ -137,7 +137,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -145,10 +145,10 @@ SUBROUTINE SLARRK( N, IW, GL, GU, $ D, E2, PIVMIN, RELTOL, W, WERR, INFO) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, IW, N @@ -179,6 +179,13 @@ * .. * .. Executable Statements .. * +* Quick return if possible +* + IF( N.LE.0 ) THEN + INFO = 0 + RETURN + END IF +* * Get machine constants EPS = SLAMCH( 'P' ) diff --git a/lapack-netlib/SRC/slarrr.f b/lapack-netlib/SRC/slarrr.f index e4181ea5e..3f77d3888 100644 --- a/lapack-netlib/SRC/slarrr.f +++ b/lapack-netlib/SRC/slarrr.f @@ -78,7 +78,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -94,10 +94,10 @@ * ===================================================================== SUBROUTINE SLARRR( N, D, E, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER N, INFO @@ -130,6 +130,13 @@ * .. * .. Executable Statements .. * +* Quick return if possible +* + IF( N.LE.0 ) THEN + INFO = 0 + RETURN + END IF +* * As a default, do NOT go for relative-accuracy preserving computations. INFO = 1 diff --git a/lapack-netlib/SRC/slarrv.f b/lapack-netlib/SRC/slarrv.f index e574da516..f9e3cf2b9 100644 --- a/lapack-netlib/SRC/slarrv.f +++ b/lapack-netlib/SRC/slarrv.f @@ -68,8 +68,14 @@ *> \verbatim *> VU is REAL *> Upper bound of the interval that contains the desired -*> eigenvalues. VL < VU. Needed to compute gaps on the left or right -*> end of the extremal eigenvalues in the desired RANGE. +*> eigenvalues. VL < VU. +*> Note: VU is currently not used by this implementation of SLARRV, VU is +*> passed to SLARRV because it could be used compute gaps on the right end +*> of the extremal eigenvalues. However, with not much initial accuracy in +*> LAMBDA and VU, the formula can lead to an overestimation of the right gap +*> and thus to inadequately early RQI 'convergence'. This is currently +*> prevented this by forcing a small right gap. And so it turns out that VU +*> is currently not used by this implementation of SLARRV. *> \endverbatim *> *> \param[in,out] D @@ -286,7 +292,7 @@ $ IBLOCK, INDEXW, GERS, Z, LDZ, ISUPPZ, $ WORK, IWORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -344,6 +350,13 @@ * .. INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* * The first N entries of WORK are reserved for the eigenvalues INDLD = N+1 INDLLD= 2*N+1 diff --git a/lapack-netlib/SRC/slartgs.f b/lapack-netlib/SRC/slartgs.f index fbff9c0e6..59a2182ca 100644 --- a/lapack-netlib/SRC/slartgs.f +++ b/lapack-netlib/SRC/slartgs.f @@ -83,17 +83,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup auxOTHERcomputational * * ===================================================================== SUBROUTINE SLARTGS( X, Y, SIGMA, CS, SN ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. REAL CS, SIGMA, SN, X, Y @@ -108,6 +108,9 @@ * .. Local Scalars .. REAL R, S, THRESH, W, Z * .. +* .. External Subroutines .. + EXTERNAL SLARTGP +* .. * .. External Functions .. REAL SLAMCH EXTERNAL SLAMCH diff --git a/lapack-netlib/SRC/slasd0.f b/lapack-netlib/SRC/slasd0.f index b3eb07358..3b8754427 100644 --- a/lapack-netlib/SRC/slasd0.f +++ b/lapack-netlib/SRC/slasd0.f @@ -81,7 +81,7 @@ *> *> \param[out] U *> \verbatim -*> U is REAL array, dimension at least (LDQ, N) +*> U is REAL array, dimension (LDU, N) *> On exit, U contains the left singular vectors. *> \endverbatim *> @@ -93,7 +93,7 @@ *> *> \param[out] VT *> \verbatim -*> VT is REAL array, dimension at least (LDVT, M) +*> VT is REAL array, dimension (LDVT, M) *> On exit, VT**T contains the right singular vectors. *> \endverbatim *> @@ -136,7 +136,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -150,10 +150,10 @@ SUBROUTINE SLASD0( N, SQRE, D, E, U, LDU, VT, LDVT, SMLSIZ, IWORK, $ WORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDU, LDVT, N, SMLSIZ, SQRE diff --git a/lapack-netlib/SRC/slasd3.f b/lapack-netlib/SRC/slasd3.f index 6b1d0f00e..add92f354 100644 --- a/lapack-netlib/SRC/slasd3.f +++ b/lapack-netlib/SRC/slasd3.f @@ -94,8 +94,7 @@ *> *> \param[out] Q *> \verbatim -*> Q is REAL array, -*> dimension at least (LDQ,K). +*> Q is REAL array, dimension (LDQ,K) *> \endverbatim *> *> \param[in] LDQ @@ -210,7 +209,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -225,10 +224,10 @@ $ LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z, $ INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR, diff --git a/lapack-netlib/SRC/slasd8.f b/lapack-netlib/SRC/slasd8.f index 81a8625ae..1d1d5cdb8 100644 --- a/lapack-netlib/SRC/slasd8.f +++ b/lapack-netlib/SRC/slasd8.f @@ -133,7 +133,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is REAL array, dimension at least 3 * K +*> WORK is REAL array, dimension (3*K) *> \endverbatim *> *> \param[out] INFO @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup OTHERauxiliary * @@ -166,10 +166,10 @@ SUBROUTINE SLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDDIFR, $ DSIGMA, WORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER ICOMPQ, INFO, K, LDDIFR diff --git a/lapack-netlib/SRC/slasq4.f b/lapack-netlib/SRC/slasq4.f index 32496a245..99317831c 100644 --- a/lapack-netlib/SRC/slasq4.f +++ b/lapack-netlib/SRC/slasq4.f @@ -151,7 +151,7 @@ SUBROUTINE SLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, $ DN1, DN2, TAU, TTYPE, G ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -240,7 +240,6 @@ NP = NN - 9 ELSE NP = NN - 2*PP - B2 = Z( NP-2 ) GAM = DN1 IF( Z( NP-4 ) .GT. Z( NP-2 ) ) $ RETURN diff --git a/lapack-netlib/SRC/slaswlq.f b/lapack-netlib/SRC/slaswlq.f index efd28a6b1..27b5b8067 100644 --- a/lapack-netlib/SRC/slaswlq.f +++ b/lapack-netlib/SRC/slaswlq.f @@ -55,7 +55,7 @@ *> \verbatim *> A is REAL array, dimension (LDA,N) *> On entry, the M-by-N matrix A. -*> On exit, the elements on and bleow the diagonal +*> On exit, the elements on and below the diagonal *> of the array contain the N-by-N lower triangular matrix L; *> the elements above the diagonal represent Q by the rows *> of blocked V (see Further Details). @@ -150,10 +150,10 @@ SUBROUTINE SLASWLQ( M, N, MB, NB, A, LDA, T, LDT, WORK, LWORK, $ INFO) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, M, N, MB, NB, LWORK, LDT @@ -173,7 +173,7 @@ LOGICAL LSAME EXTERNAL LSAME * .. EXTERNAL SUBROUTINES .. - EXTERNAL SGEQRT, STPQRT, XERBLA + EXTERNAL SGELQT, SGEQRT, STPLQT, STPQRT, XERBLA * .. INTRINSIC FUNCTIONS .. INTRINSIC MAX, MIN, MOD * .. diff --git a/lapack-netlib/SRC/slaswp.f b/lapack-netlib/SRC/slaswp.f index ad12a3a3a..4fcef5b91 100644 --- a/lapack-netlib/SRC/slaswp.f +++ b/lapack-netlib/SRC/slaswp.f @@ -79,14 +79,15 @@ *> \verbatim *> IPIV is INTEGER array, dimension (K1+(K2-K1)*abs(INCX)) *> The vector of pivot indices. Only the elements in positions -*> K1 through K1+(K2-K1)*INCX of IPIV are accessed. -*> IPIV(K) = L implies rows K and L are to be interchanged. +*> K1 through K1+(K2-K1)*abs(INCX) of IPIV are accessed. +*> IPIV(K1+(K-K1)*abs(INCX)) = L implies rows K and L are to be +*> interchanged. *> \endverbatim *> *> \param[in] INCX *> \verbatim *> INCX is INTEGER -*> The increment between successive values of IPIV. If IPIV +*> The increment between successive values of IPIV. If INCX *> is negative, the pivots are applied in reverse order. *> \endverbatim * @@ -98,7 +99,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realOTHERauxiliary * @@ -114,10 +115,10 @@ * ===================================================================== SUBROUTINE SLASWP( N, A, LDA, K1, K2, IPIV, INCX ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INCX, K1, K2, LDA, N @@ -135,7 +136,8 @@ * .. * .. Executable Statements .. * -* Interchange row I with row IPIV(I) for each of rows K1 through K2. +* Interchange row I with row IPIV(K1+(I-K1)*abs(INCX)) for each of rows +* K1 through K2. * IF( INCX.GT.0 ) THEN IX0 = K1 diff --git a/lapack-netlib/SRC/slasyf_aa.f b/lapack-netlib/SRC/slasyf_aa.f index 5fb3cc9aa..ed4ef6291 100644 --- a/lapack-netlib/SRC/slasyf_aa.f +++ b/lapack-netlib/SRC/slasyf_aa.f @@ -19,11 +19,11 @@ * =========== * * SUBROUTINE SLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, -* H, LDH, WORK, INFO ) +* H, LDH, WORK ) * * .. Scalar Arguments .. * CHARACTER UPLO -* INTEGER J1, M, NB, LDA, LDH, INFO +* INTEGER J1, M, NB, LDA, LDH * .. * .. Array Arguments .. * INTEGER IPIV( * ) @@ -99,12 +99,12 @@ *> \param[in] LDA *> \verbatim *> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(1,N). +*> The leading dimension of the array A. LDA >= max(1,M). *> \endverbatim *> *> \param[out] IPIV *> \verbatim -*> IPIV is INTEGER array, dimension (N) +*> IPIV is INTEGER array, dimension (M) *> Details of the row and column interchanges, *> the row and column k were interchanged with the row and *> column IPIV(k). @@ -127,16 +127,6 @@ *> WORK is REAL workspace, dimension (M). *> \endverbatim *> -*> \param[out] INFO -*> \verbatim -*> INFO is INTEGER -*> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization -*> has been completed, but the block diagonal matrix D is -*> exactly singular, and division by zero will occur if it -*> is used to solve a system of equations. -*> \endverbatim * * Authors: * ======== @@ -146,24 +136,24 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realSYcomputational * * ===================================================================== SUBROUTINE SLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, - $ H, LDH, WORK, INFO ) + $ H, LDH, WORK ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * * .. Scalar Arguments .. CHARACTER UPLO - INTEGER M, NB, J1, LDA, LDH, INFO + INTEGER M, NB, J1, LDA, LDH * .. * .. Array Arguments .. INTEGER IPIV( * ) @@ -176,7 +166,7 @@ PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * * .. Local Scalars .. - INTEGER J, K, K1, I1, I2 + INTEGER J, K, K1, I1, I2, MJ REAL PIV, ALPHA * .. * .. External Functions .. @@ -185,14 +175,14 @@ EXTERNAL LSAME, ILAENV, ISAMAX * .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL SAXPY, SGEMV, SSCAL, SCOPY, SSWAP, SLASET, + $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * - INFO = 0 J = 1 * * K1 is the first column of the panel to be factorized @@ -216,9 +206,17 @@ * > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, * K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF * -* H(J:N, J) := A(J, J:N) - H(J:N, 1:(J-1)) * L(J1:(J-1), J), -* where H(J:N, J) has been initialized to be A(J, J:N) +* H(J:M, J) := A(J, J:M) - H(J:M, 1:(J-1)) * L(J1:(J-1), J), +* where H(J:M, J) has been initialized to be A(J, J:M) * IF( K.GT.2 ) THEN * @@ -228,23 +226,23 @@ * > for the rest of the columns, K is J+1, skipping only the * first column * - CALL SGEMV( 'No transpose', M-J+1, J-K1, + CALL SGEMV( 'No transpose', MJ, J-K1, $ -ONE, H( J, K1 ), LDH, $ A( 1, J ), 1, $ ONE, H( J, J ), 1 ) END IF * -* Copy H(i:n, i) into WORK +* Copy H(i:M, i) into WORK * - CALL SCOPY( M-J+1, H( J, J ), 1, WORK( 1 ), 1 ) + CALL SCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) * IF( J.GT.K1 ) THEN * -* Compute WORK := WORK - L(J-1, J:N) * T(J-1,J), -* where A(J-1, J) stores T(J-1, J) and A(J-2, J:N) stores U(J-1, J:N) +* Compute WORK := WORK - L(J-1, J:M) * T(J-1,J), +* where A(J-1, J) stores T(J-1, J) and A(J-2, J:M) stores U(J-1, J:M) * ALPHA = -A( K-1, J ) - CALL SAXPY( M-J+1, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) + CALL SAXPY( MJ, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) END IF * * Set A(J, J) = T(J, J) @@ -253,8 +251,8 @@ * IF( J.LT.M ) THEN * -* Compute WORK(2:N) = T(J, J) L(J, (J+1):N) -* where A(J, J) stores T(J, J) and A(J-1, (J+1):N) stores U(J, (J+1):N) +* Compute WORK(2:M) = T(J, J) L(J, (J+1):M) +* where A(J, J) stores T(J, J) and A(J-1, (J+1):M) stores U(J, (J+1):M) * IF( K.GT.1 ) THEN ALPHA = -A( K, J ) @@ -262,7 +260,7 @@ $ WORK( 2 ), 1 ) ENDIF * -* Find max(|WORK(2:n)|) +* Find max(|WORK(2:M)|) * I2 = ISAMAX( M-J, WORK( 2 ), 1 ) + 1 PIV = WORK( I2 ) @@ -277,14 +275,14 @@ WORK( I2 ) = WORK( I1 ) WORK( I1 ) = PIV * -* Swap A(I1, I1+1:N) with A(I1+1:N, I2) +* Swap A(I1, I1+1:M) with A(I1+1:M, I2) * I1 = I1+J-1 I2 = I2+J-1 CALL SSWAP( I2-I1-1, A( J1+I1-1, I1+1 ), LDA, $ A( J1+I1, I2 ), 1 ) * -* Swap A(I1, I2+1:N) with A(I2, I2+1:N) +* Swap A(I1, I2+1:M) with A(I2, I2+1:M) * CALL SSWAP( M-I2, A( J1+I1-1, I2+1 ), LDA, $ A( J1+I2-1, I2+1 ), LDA ) @@ -315,23 +313,17 @@ * Set A(J, J+1) = T(J, J+1) * A( K, J+1 ) = WORK( 2 ) - IF( (A( K, J ).EQ.ZERO ) .AND. - $ ( (J.EQ.M) .OR. (A( K, J+1 ).EQ.ZERO))) THEN - IF(INFO .EQ. 0) THEN - INFO = J - ENDIF - END IF * IF( J.LT.NB ) THEN * -* Copy A(J+1:N, J+1) into H(J:N, J), +* Copy A(J+1:M, J+1) into H(J:M, J), * CALL SCOPY( M-J, A( K+1, J+1 ), LDA, $ H( J+1, J+1 ), 1 ) END IF * -* Compute L(J+2, J+1) = WORK( 3:N ) / T(J, J+1), -* where A(J, J+1) = T(J, J+1) and A(J+2:N, J) = L(J+2:N, J+1) +* Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1) * IF( A( K, J+1 ).NE.ZERO ) THEN ALPHA = ONE / A( K, J+1 ) @@ -341,10 +333,6 @@ CALL SLASET( 'Full', 1, M-J-1, ZERO, ZERO, $ A( K, J+2 ), LDA) END IF - ELSE - IF( (A( K, J ).EQ.ZERO) .AND. (INFO.EQ.0) ) THEN - INFO = J - END IF END IF J = J + 1 GO TO 10 @@ -366,9 +354,17 @@ * > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, * K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF * -* H(J:N, J) := A(J:N, J) - H(J:N, 1:(J-1)) * L(J, J1:(J-1))^T, -* where H(J:N, J) has been initialized to be A(J:N, J) +* H(J:M, J) := A(J:M, J) - H(J:M, 1:(J-1)) * L(J, J1:(J-1))^T, +* where H(J:M, J) has been initialized to be A(J:M, J) * IF( K.GT.2 ) THEN * @@ -378,23 +374,23 @@ * > for the rest of the columns, K is J+1, skipping only the * first column * - CALL SGEMV( 'No transpose', M-J+1, J-K1, + CALL SGEMV( 'No transpose', MJ, J-K1, $ -ONE, H( J, K1 ), LDH, $ A( J, 1 ), LDA, $ ONE, H( J, J ), 1 ) END IF * -* Copy H(J:N, J) into WORK +* Copy H(J:M, J) into WORK * - CALL SCOPY( M-J+1, H( J, J ), 1, WORK( 1 ), 1 ) + CALL SCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) * IF( J.GT.K1 ) THEN * -* Compute WORK := WORK - L(J:N, J-1) * T(J-1,J), +* Compute WORK := WORK - L(J:M, J-1) * T(J-1,J), * where A(J-1, J) = T(J-1, J) and A(J, J-2) = L(J, J-1) * ALPHA = -A( J, K-1 ) - CALL SAXPY( M-J+1, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) + CALL SAXPY( MJ, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) END IF * * Set A(J, J) = T(J, J) @@ -403,8 +399,8 @@ * IF( J.LT.M ) THEN * -* Compute WORK(2:N) = T(J, J) L((J+1):N, J) -* where A(J, J) = T(J, J) and A((J+1):N, J-1) = L((J+1):N, J) +* Compute WORK(2:M) = T(J, J) L((J+1):M, J) +* where A(J, J) = T(J, J) and A((J+1):M, J-1) = L((J+1):M, J) * IF( K.GT.1 ) THEN ALPHA = -A( J, K ) @@ -412,7 +408,7 @@ $ WORK( 2 ), 1 ) ENDIF * -* Find max(|WORK(2:n)|) +* Find max(|WORK(2:M)|) * I2 = ISAMAX( M-J, WORK( 2 ), 1 ) + 1 PIV = WORK( I2 ) @@ -427,14 +423,14 @@ WORK( I2 ) = WORK( I1 ) WORK( I1 ) = PIV * -* Swap A(I1+1:N, I1) with A(I2, I1+1:N) +* Swap A(I1+1:M, I1) with A(I2, I1+1:M) * I1 = I1+J-1 I2 = I2+J-1 CALL SSWAP( I2-I1-1, A( I1+1, J1+I1-1 ), 1, $ A( I2, J1+I1 ), LDA ) * -* Swap A(I2+1:N, I1) with A(I2+1:N, I2) +* Swap A(I2+1:M, I1) with A(I2+1:M, I2) * CALL SSWAP( M-I2, A( I2+1, J1+I1-1 ), 1, $ A( I2+1, J1+I2-1 ), 1 ) @@ -465,22 +461,17 @@ * Set A(J+1, J) = T(J+1, J) * A( J+1, K ) = WORK( 2 ) - IF( (A( J, K ).EQ.ZERO) .AND. - $ ( (J.EQ.M) .OR. (A( J+1, K ).EQ.ZERO)) ) THEN - IF (INFO .EQ. 0) - $ INFO = J - END IF * IF( J.LT.NB ) THEN * -* Copy A(J+1:N, J+1) into H(J+1:N, J), +* Copy A(J+1:M, J+1) into H(J+1:M, J), * CALL SCOPY( M-J, A( J+1, K+1 ), 1, $ H( J+1, J+1 ), 1 ) END IF * -* Compute L(J+2, J+1) = WORK( 3:N ) / T(J, J+1), -* where A(J, J+1) = T(J, J+1) and A(J+2:N, J) = L(J+2:N, J+1) +* Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1) * IF( A( J+1, K ).NE.ZERO ) THEN ALPHA = ONE / A( J+1, K ) @@ -490,10 +481,6 @@ CALL SLASET( 'Full', M-J-1, 1, ZERO, ZERO, $ A( J+2, K ), LDA ) END IF - ELSE - IF( (A( J, K ).EQ.ZERO) .AND. (INFO.EQ.0) ) THEN - INFO = J - END IF END IF J = J + 1 GO TO 30 diff --git a/lapack-netlib/SRC/sorbdb1.f b/lapack-netlib/SRC/sorbdb1.f index 0e9982694..98bc8e798 100644 --- a/lapack-netlib/SRC/sorbdb1.f +++ b/lapack-netlib/SRC/sorbdb1.f @@ -32,7 +32,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -203,7 +203,7 @@ SUBROUTINE SORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/sorbdb2.f b/lapack-netlib/SRC/sorbdb2.f index dec4dbe12..684778ef0 100644 --- a/lapack-netlib/SRC/sorbdb2.f +++ b/lapack-netlib/SRC/sorbdb2.f @@ -32,7 +32,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -201,7 +201,7 @@ SUBROUTINE SORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/sorbdb3.f b/lapack-netlib/SRC/sorbdb3.f index 5b6e946d8..d849613c2 100644 --- a/lapack-netlib/SRC/sorbdb3.f +++ b/lapack-netlib/SRC/sorbdb3.f @@ -32,7 +32,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -202,7 +202,7 @@ SUBROUTINE SORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/sorbdb4.f b/lapack-netlib/SRC/sorbdb4.f index 0b209b2d2..42bbcbda8 100644 --- a/lapack-netlib/SRC/sorbdb4.f +++ b/lapack-netlib/SRC/sorbdb4.f @@ -33,7 +33,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -214,7 +214,7 @@ $ TAUP1, TAUP2, TAUQ1, PHANTOM, WORK, LWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/sorbdb5.f b/lapack-netlib/SRC/sorbdb5.f index f037ba1d3..f5af1db1a 100644 --- a/lapack-netlib/SRC/sorbdb5.f +++ b/lapack-netlib/SRC/sorbdb5.f @@ -31,7 +31,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -156,7 +156,7 @@ SUBROUTINE SORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/sorbdb6.f b/lapack-netlib/SRC/sorbdb6.f index 83d96612a..beedbfcb6 100644 --- a/lapack-netlib/SRC/sorbdb6.f +++ b/lapack-netlib/SRC/sorbdb6.f @@ -31,7 +31,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -154,7 +154,7 @@ SUBROUTINE SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/sorcsd.f b/lapack-netlib/SRC/sorcsd.f index 5621d58ed..06c77d8e3 100644 --- a/lapack-netlib/SRC/sorcsd.f +++ b/lapack-netlib/SRC/sorcsd.f @@ -186,7 +186,7 @@ *> *> \param[out] U1 *> \verbatim -*> U1 is REAL array, dimension (P) +*> U1 is REAL array, dimension (LDU1,P) *> If JOBU1 = 'Y', U1 contains the P-by-P orthogonal matrix U1. *> \endverbatim *> @@ -199,7 +199,7 @@ *> *> \param[out] U2 *> \verbatim -*> U2 is REAL array, dimension (M-P) +*> U2 is REAL array, dimension (LDU2,M-P) *> If JOBU2 = 'Y', U2 contains the (M-P)-by-(M-P) orthogonal *> matrix U2. *> \endverbatim @@ -213,7 +213,7 @@ *> *> \param[out] V1T *> \verbatim -*> V1T is REAL array, dimension (Q) +*> V1T is REAL array, dimension (LDV1T,Q) *> If JOBV1T = 'Y', V1T contains the Q-by-Q matrix orthogonal *> matrix V1**T. *> \endverbatim @@ -227,7 +227,7 @@ *> *> \param[out] V2T *> \verbatim -*> V2T is REAL array, dimension (M-Q) +*> V2T is REAL array, dimension (LDV2T,M-Q) *> If JOBV2T = 'Y', V2T contains the (M-Q)-by-(M-Q) orthogonal *> matrix V2**T. *> \endverbatim @@ -289,7 +289,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realOTHERcomputational * @@ -300,10 +300,10 @@ $ U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, $ LDV2T, WORK, LWORK, IWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, SIGNS, TRANS diff --git a/lapack-netlib/SRC/sorcsd2by1.f b/lapack-netlib/SRC/sorcsd2by1.f index 1ff4732c9..18a2a7904 100644 --- a/lapack-netlib/SRC/sorcsd2by1.f +++ b/lapack-netlib/SRC/sorcsd2by1.f @@ -36,7 +36,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -233,7 +233,7 @@ $ X21, LDX21, THETA, U1, LDU1, U2, LDU2, V1T, $ LDV1T, WORK, LWORK, IWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/sorm22.f b/lapack-netlib/SRC/sorm22.f index fdb5cd8b1..265e9769e 100644 --- a/lapack-netlib/SRC/sorm22.f +++ b/lapack-netlib/SRC/sorm22.f @@ -53,8 +53,8 @@ *> N2-by-N2 upper triangular matrix. *> \endverbatim * -* Arguments -* ========= +* Arguments: +* ========== * *> \param[in] SIDE *> \verbatim @@ -163,7 +163,7 @@ SUBROUTINE SORM22( SIDE, TRANS, M, N, N1, N2, Q, LDQ, C, LDC, $ WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.6.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * January 2015 diff --git a/lapack-netlib/SRC/spotrf2.f b/lapack-netlib/SRC/spotrf2.f index 6a371b370..474bd39a8 100644 --- a/lapack-netlib/SRC/spotrf2.f +++ b/lapack-netlib/SRC/spotrf2.f @@ -99,17 +99,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realPOcomputational * * ===================================================================== RECURSIVE SUBROUTINE SPOTRF2( UPLO, N, A, LDA, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -134,7 +134,7 @@ EXTERNAL LSAME, SISNAN * .. * .. External Subroutines .. - EXTERNAL SSYRK, XERBLA + EXTERNAL SSYRK, STRSM, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, SQRT diff --git a/lapack-netlib/SRC/sppsvx.f b/lapack-netlib/SRC/sppsvx.f index 021aa6078..a4ac14942 100644 --- a/lapack-netlib/SRC/sppsvx.f +++ b/lapack-netlib/SRC/sppsvx.f @@ -147,8 +147,7 @@ *> *> \param[in,out] AFP *> \verbatim -*> AFP is REAL array, dimension -*> (N*(N+1)/2) +*> AFP is REAL array, dimension (N*(N+1)/2) *> If FACT = 'F', then AFP is an input argument and on entry *> contains the triangular factor U or L from the Cholesky *> factorization A = U**T*U or A = L*L**T, in the same storage @@ -312,7 +311,7 @@ SUBROUTINE SPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB, $ X, LDX, RCOND, FERR, BERR, WORK, IWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 diff --git a/lapack-netlib/SRC/ssb2st_kernels.f b/lapack-netlib/SRC/ssb2st_kernels.f index 7183c9ada..54479f89e 100644 --- a/lapack-netlib/SRC/ssb2st_kernels.f +++ b/lapack-netlib/SRC/ssb2st_kernels.f @@ -47,45 +47,90 @@ * Arguments: * ========== * -*> @param[in] n -*> The order of the matrix A. -*> -*> @param[in] nb -*> The size of the band. -*> -*> @param[in, out] A -*> A pointer to the matrix A. -*> -*> @param[in] lda -*> The leading dimension of the matrix A. +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> \endverbatim *> -*> @param[out] V -*> REAL array, dimension 2*n if eigenvalues only are -*> requested or to be queried for vectors. +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both +*> Eigenvalue/Eigenvectors. +*> \endverbatim *> -*> @param[out] TAU -*> REAL array, dimension (2*n). -*> The scalar factors of the Householder reflectors are stored -*> in this array. +*> \param[in] TTYPE +*> \verbatim +*> TTYPE is INTEGER +*> \endverbatim *> -*> @param[in] st +*> \param[in] ST +*> \verbatim +*> ST is INTEGER *> internal parameter for indices. +*> \endverbatim *> -*> @param[in] ed +*> \param[in] ED +*> \verbatim +*> ED is INTEGER *> internal parameter for indices. +*> \endverbatim *> -*> @param[in] sweep +*> \param[in] SWEEP +*> \verbatim +*> SWEEP is INTEGER *> internal parameter for indices. +*> \endverbatim *> -*> @param[in] Vblksiz -*> internal parameter for indices. +*> \param[in] N +*> \verbatim +*> N is INTEGER. The order of the matrix A. +*> \endverbatim *> -*> @param[in] wantz -*> logical which indicate if Eigenvalue are requested or both -*> Eigenvalue/Eigenvectors. +*> \param[in] NB +*> \verbatim +*> NB is INTEGER. The size of the band. +*> \endverbatim +*> +*> \param[in] IB +*> \verbatim +*> IB is INTEGER. +*> \endverbatim +*> +*> \param[in, out] A +*> \verbatim +*> A is REAL array. A pointer to the matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER. The leading dimension of the matrix A. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is REAL array, dimension 2*n if eigenvalues only are +*> requested or to be queried for vectors. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (2*n). +*> The scalar factors of the Householder reflectors are stored +*> in this array. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is REAL array. Workspace of size nb. +*> \endverbatim +*> @param[in] n +*> The order of the matrix A. *> -*> @param[in] work -*> Workspace of size nb. *> *> \par Further Details: * ===================== @@ -128,10 +173,10 @@ * IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/ssbev_2stage.f b/lapack-netlib/SRC/ssbev_2stage.f index f77368ab1..542fa8d8b 100644 --- a/lapack-netlib/SRC/ssbev_2stage.f +++ b/lapack-netlib/SRC/ssbev_2stage.f @@ -164,7 +164,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realOTHEReigen * @@ -206,10 +206,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -234,9 +234,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE REAL SLAMCH, SLANSB - EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV + EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL SLASCL, SSCAL, SSTEQR, SSTERF, XERBLA, @@ -273,9 +273,12 @@ LWMIN = 1 WORK( 1 ) = LWMIN ELSE - IB = ILAENV( 18, 'SSYTRD_SB2ST', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'SSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'SSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + IB = ILAENV2STAGE( 2, 'SSYTRD_SB2ST', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'SSYTRD_SB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'SSYTRD_SB2ST', JOBZ, + $ N, KD, IB, -1 ) LWMIN = N + LHTRD + LWTRD WORK( 1 ) = LWMIN ENDIF diff --git a/lapack-netlib/SRC/ssbevd_2stage.f b/lapack-netlib/SRC/ssbevd_2stage.f index b1c67d4d9..84add8440 100644 --- a/lapack-netlib/SRC/ssbevd_2stage.f +++ b/lapack-netlib/SRC/ssbevd_2stage.f @@ -194,7 +194,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realOTHEReigen * @@ -236,10 +236,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -266,9 +266,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE REAL SLAMCH, SLANSB - EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV + EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL SGEMM, SLACPY, SLASCL, SSCAL, SSTEDC, @@ -290,9 +290,9 @@ LIWMIN = 1 LWMIN = 1 ELSE - IB = ILAENV( 18, 'SSYTRD_SB2ST', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'SSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'SSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + IB = ILAENV2STAGE( 2, 'SSYTRD_SB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'SSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'SSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) IF( WANTZ ) THEN LIWMIN = 3 + 5*N LWMIN = 1 + 5*N + 2*N**2 diff --git a/lapack-netlib/SRC/ssbevx_2stage.f b/lapack-netlib/SRC/ssbevx_2stage.f index 1d8f2cc16..5962e7827 100644 --- a/lapack-netlib/SRC/ssbevx_2stage.f +++ b/lapack-netlib/SRC/ssbevx_2stage.f @@ -324,7 +324,7 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -359,9 +359,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE REAL SLAMCH, SLANSB - EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV + EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL SCOPY, SGEMV, SLACPY, SLASCL, SSCAL, @@ -419,9 +419,12 @@ LWMIN = 1 WORK( 1 ) = LWMIN ELSE - IB = ILAENV( 18, 'SSYTRD_SB2ST', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'SSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'SSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + IB = ILAENV2STAGE( 2, 'SSYTRD_SB2ST', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'SSYTRD_SB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'SSYTRD_SB2ST', JOBZ, + $ N, KD, IB, -1 ) LWMIN = 2*N + LHTRD + LWTRD WORK( 1 ) = LWMIN ENDIF diff --git a/lapack-netlib/SRC/ssbgvx.f b/lapack-netlib/SRC/ssbgvx.f index fe2ea6102..3408810bd 100644 --- a/lapack-netlib/SRC/ssbgvx.f +++ b/lapack-netlib/SRC/ssbgvx.f @@ -241,12 +241,12 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is REAL array, dimension (7N) +*> WORK is REAL array, dimension (7*N) *> \endverbatim *> *> \param[out] IWORK *> \verbatim -*> IWORK is INTEGER array, dimension (5N) +*> IWORK is INTEGER array, dimension (5*N) *> \endverbatim *> *> \param[out] IFAIL @@ -294,7 +294,7 @@ $ LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, $ LDZ, WORK, IWORK, IFAIL, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/ssfrk.f b/lapack-netlib/SRC/ssfrk.f index b2cc4fcad..6dc50fee3 100644 --- a/lapack-netlib/SRC/ssfrk.f +++ b/lapack-netlib/SRC/ssfrk.f @@ -117,7 +117,7 @@ *> *> \param[in] A *> \verbatim -*> A is REAL array of DIMENSION (LDA,ka) +*> A is REAL array, dimension (LDA,ka) *> where KA *> is K when TRANS = 'N' or 'n', and is N otherwise. Before *> entry with TRANS = 'N' or 'n', the leading N--by--K part of @@ -158,7 +158,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realOTHERcomputational * @@ -166,10 +166,10 @@ SUBROUTINE SSFRK( TRANSR, UPLO, TRANS, N, K, ALPHA, A, LDA, BETA, $ C ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. REAL ALPHA, BETA diff --git a/lapack-netlib/SRC/sspgv.f b/lapack-netlib/SRC/sspgv.f index bb8279aa1..3f20f62b6 100644 --- a/lapack-netlib/SRC/sspgv.f +++ b/lapack-netlib/SRC/sspgv.f @@ -77,8 +77,7 @@ *> *> \param[in,out] AP *> \verbatim -*> AP is REAL array, dimension -*> (N*(N+1)/2) +*> AP is REAL array, dimension (N*(N+1)/2) *> On entry, the upper or lower triangle of the symmetric matrix *> A, packed columnwise in a linear array. The j-th column of A *> is stored in the array AP as follows: @@ -153,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realOTHEReigen * @@ -161,10 +160,10 @@ SUBROUTINE SSPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, $ INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO diff --git a/lapack-netlib/SRC/sspsvx.f b/lapack-netlib/SRC/sspsvx.f index 53d097389..52819b139 100644 --- a/lapack-netlib/SRC/sspsvx.f +++ b/lapack-netlib/SRC/sspsvx.f @@ -123,8 +123,7 @@ *> *> \param[in,out] AFP *> \verbatim -*> AFP is REAL array, dimension -*> (N*(N+1)/2) +*> AFP is REAL array, dimension (N*(N+1)/2) *> If FACT = 'F', then AFP is an input argument and on entry *> contains the block diagonal matrix D and the multipliers used *> to obtain the factor U or L from the factorization @@ -277,7 +276,7 @@ SUBROUTINE SSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, $ LDX, RCOND, FERR, BERR, WORK, IWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 diff --git a/lapack-netlib/SRC/sstegr.f b/lapack-netlib/SRC/sstegr.f index 0a28c5843..37ce8cdbc 100644 --- a/lapack-netlib/SRC/sstegr.f +++ b/lapack-netlib/SRC/sstegr.f @@ -184,7 +184,7 @@ *> *> \param[out] ISUPPZ *> \verbatim -*> ISUPPZ is INTEGER ARRAY, dimension ( 2*max(1,M) ) +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) *> The support of the eigenvectors in Z, i.e., the indices *> indicating the nonzero elements in Z. The i-th computed eigenvector *> is nonzero only in elements ISUPPZ( 2*i-1 ) through @@ -265,7 +265,7 @@ $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, $ LIWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/sstemr.f b/lapack-netlib/SRC/sstemr.f index cff89ef90..228538161 100644 --- a/lapack-netlib/SRC/sstemr.f +++ b/lapack-netlib/SRC/sstemr.f @@ -222,7 +222,7 @@ *> *> \param[out] ISUPPZ *> \verbatim -*> ISUPPZ is INTEGER ARRAY, dimension ( 2*max(1,M) ) +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) *> The support of the eigenvectors in Z, i.e., the indices *> indicating the nonzero elements in Z. The i-th computed eigenvector *> is nonzero only in elements ISUPPZ( 2*i-1 ) through @@ -321,7 +321,7 @@ $ M, W, Z, LDZ, NZC, ISUPPZ, TRYRAC, WORK, LWORK, $ IWORK, LIWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/ssycon_3.f b/lapack-netlib/SRC/ssycon_3.f index 74f6761e2..f91a527f7 100644 --- a/lapack-netlib/SRC/ssycon_3.f +++ b/lapack-netlib/SRC/ssycon_3.f @@ -149,7 +149,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup singleSYcomputational * @@ -157,7 +157,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -171,10 +171,10 @@ SUBROUTINE SSYCON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, $ WORK, IWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/ssyconvf.f b/lapack-netlib/SRC/ssyconvf.f index b9069093e..d43b9473f 100644 --- a/lapack-netlib/SRC/ssyconvf.f +++ b/lapack-netlib/SRC/ssyconvf.f @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE SSYCONVF( UPLO, WAY, N, A, LDA, IPIV, E, INFO ) +* SUBROUTINE SSYCONVF( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) * * .. Scalar Arguments .. * CHARACTER UPLO, WAY @@ -189,7 +189,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup singleSYcomputational * @@ -198,7 +198,7 @@ *> *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> November 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -206,10 +206,10 @@ * ===================================================================== SUBROUTINE SSYCONVF( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO, WAY diff --git a/lapack-netlib/SRC/ssyconvf_rook.f b/lapack-netlib/SRC/ssyconvf_rook.f index 1ed5774fd..833b9c632 100644 --- a/lapack-netlib/SRC/ssyconvf_rook.f +++ b/lapack-netlib/SRC/ssyconvf_rook.f @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE SSYCONVF_ROOK( UPLO, WAY, N, A, LDA, IPIV, E, INFO ) +* SUBROUTINE SSYCONVF_ROOK( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) * * .. Scalar Arguments .. * CHARACTER UPLO, WAY @@ -180,7 +180,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup singleSYcomputational * @@ -189,7 +189,7 @@ *> *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> November 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -197,10 +197,10 @@ * ===================================================================== SUBROUTINE SSYCONVF_ROOK( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO, WAY diff --git a/lapack-netlib/SRC/ssyequb.f b/lapack-netlib/SRC/ssyequb.f index e03f5feb2..deb55104a 100644 --- a/lapack-netlib/SRC/ssyequb.f +++ b/lapack-netlib/SRC/ssyequb.f @@ -116,7 +116,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realSYcomputational * @@ -131,10 +131,10 @@ * ===================================================================== SUBROUTINE SSYEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, N @@ -165,7 +165,7 @@ EXTERNAL LSAME, SLAMCH * .. * .. External Subroutines .. - EXTERNAL SLASSQ + EXTERNAL SLASSQ, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, INT, LOG, MAX, MIN, SQRT diff --git a/lapack-netlib/SRC/ssyev_2stage.f b/lapack-netlib/SRC/ssyev_2stage.f index 0de3ca7e4..166766919 100644 --- a/lapack-netlib/SRC/ssyev_2stage.f +++ b/lapack-netlib/SRC/ssyev_2stage.f @@ -143,7 +143,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realSYeigen * @@ -185,10 +185,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -213,9 +213,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE REAL SLAMCH, SLANSY - EXTERNAL LSAME, ILAENV, SLAMCH, SLANSY + EXTERNAL LSAME, SLAMCH, SLANSY, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL SLASCL, SORGTR, SSCAL, SSTEQR, SSTERF, @@ -244,10 +244,10 @@ END IF * IF( INFO.EQ.0 ) THEN - KD = ILAENV( 17, 'SSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'SSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'SSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'SSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) LWMIN = 2*N + LHTRD + LWTRD WORK( 1 ) = LWMIN * diff --git a/lapack-netlib/SRC/ssyevd_2stage.f b/lapack-netlib/SRC/ssyevd_2stage.f index d65547732..8ab90b641 100644 --- a/lapack-netlib/SRC/ssyevd_2stage.f +++ b/lapack-netlib/SRC/ssyevd_2stage.f @@ -180,7 +180,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realSYeigen * @@ -229,10 +229,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -260,9 +260,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE REAL SLAMCH, SLANSY - EXTERNAL LSAME, SLAMCH, SLANSY, ILAENV + EXTERNAL LSAME, SLAMCH, SLANSY, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL SLACPY, SLASCL, SORMTR, SSCAL, SSTEDC, SSTERF, @@ -295,10 +295,14 @@ LIWMIN = 1 LWMIN = 1 ELSE - KD = ILAENV( 17, 'SSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'SSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'SSYTRD_2STAGE', JOBZ, + $ N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'SSYTRD_2STAGE', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'SSYTRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'SSYTRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) IF( WANTZ ) THEN LIWMIN = 3 + 5*N LWMIN = 1 + 6*N + 2*N**2 diff --git a/lapack-netlib/SRC/ssyevr_2stage.f b/lapack-netlib/SRC/ssyevr_2stage.f index 9628a8992..60339cd85 100644 --- a/lapack-netlib/SRC/ssyevr_2stage.f +++ b/lapack-netlib/SRC/ssyevr_2stage.f @@ -383,7 +383,7 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -418,9 +418,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV, ILAENV2STAGE REAL SLAMCH, SLANSY - EXTERNAL LSAME, ILAENV, SLAMCH, SLANSY + EXTERNAL LSAME, SLAMCH, SLANSY, ILAENV, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL SCOPY, SORMTR, SSCAL, SSTEBZ, SSTEMR, SSTEIN, @@ -443,10 +443,10 @@ * LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) * - KD = ILAENV( 17, 'SSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'SSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'SSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'SSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) LWMIN = MAX( 26*N, 5*N + LHTRD + LWTRD ) LIWMIN = MAX( 1, 10*N ) * diff --git a/lapack-netlib/SRC/ssyevx_2stage.f b/lapack-netlib/SRC/ssyevx_2stage.f index fd8518c30..227a70bac 100644 --- a/lapack-netlib/SRC/ssyevx_2stage.f +++ b/lapack-netlib/SRC/ssyevx_2stage.f @@ -302,7 +302,7 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -336,9 +336,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE REAL SLAMCH, SLANSY - EXTERNAL LSAME, ILAENV, SLAMCH, SLANSY + EXTERNAL LSAME, SLAMCH, SLANSY, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL SCOPY, SLACPY, SORGTR, SORMTR, SSCAL, SSTEBZ, @@ -393,10 +393,14 @@ LWMIN = 1 WORK( 1 ) = LWMIN ELSE - KD = ILAENV( 17, 'SSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'SSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'SSYTRD_2STAGE', JOBZ, + $ N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'SSYTRD_2STAGE', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'SSYTRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'SSYTRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) LWMIN = MAX( 8*N, 3*N + LHTRD + LWTRD ) WORK( 1 ) = LWMIN END IF diff --git a/lapack-netlib/SRC/ssygv_2stage.f b/lapack-netlib/SRC/ssygv_2stage.f index 2a376ea3d..7dfbbaabe 100644 --- a/lapack-netlib/SRC/ssygv_2stage.f +++ b/lapack-netlib/SRC/ssygv_2stage.f @@ -46,7 +46,7 @@ *> positive definite. *> This routine use the 2stage technique for the reduction to tridiagonal *> which showed higher performance on recent architecture and for large -* sizes N>2000. +*> sizes N>2000. *> \endverbatim * * Arguments: @@ -186,7 +186,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realSYeigen * @@ -228,10 +228,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -250,13 +250,12 @@ * .. Local Scalars .. LOGICAL LQUERY, UPPER, WANTZ CHARACTER TRANS - INTEGER NEIG, - $ LLWORK, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + INTEGER NEIG, LWMIN, LHTRD, LWTRD, KD, IB * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV - EXTERNAL LSAME, ILAENV + INTEGER ILAENV2STAGE + EXTERNAL LSAME, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL SPOTRF, SSYGST, STRMM, STRSM, XERBLA, @@ -289,10 +288,10 @@ END IF * IF( INFO.EQ.0 ) THEN - KD = ILAENV( 17, 'SSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'SSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'SSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'SSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) LWMIN = 2*N + LHTRD + LWTRD WORK( 1 ) = LWMIN * diff --git a/lapack-netlib/SRC/ssygvx.f b/lapack-netlib/SRC/ssygvx.f index 2982e1725..8a99c939f 100644 --- a/lapack-netlib/SRC/ssygvx.f +++ b/lapack-netlib/SRC/ssygvx.f @@ -110,7 +110,7 @@ *> *> \param[in,out] B *> \verbatim -*> B is REAL array, dimension (LDA, N) +*> B is REAL array, dimension (LDB, N) *> On entry, the symmetric matrix B. If UPLO = 'U', the *> leading N-by-N upper triangular part of B contains the *> upper triangular part of the matrix B. If UPLO = 'L', @@ -297,7 +297,7 @@ $ VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, $ LWORK, IWORK, IFAIL, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/ssysv_aa.f b/lapack-netlib/SRC/ssysv_aa.f index 5221dbad5..abf52b143 100644 --- a/lapack-netlib/SRC/ssysv_aa.f +++ b/lapack-netlib/SRC/ssysv_aa.f @@ -154,7 +154,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realSYsolve * @@ -162,10 +162,10 @@ SUBROUTINE SSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, $ LWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -187,7 +187,7 @@ EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL XERBLA, SSYTRF, SSYTRS, SSYTRS2 + EXTERNAL XERBLA, SSYTRS_AA, SSYTRF_AA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/ssysv_aa_2stage.f b/lapack-netlib/SRC/ssysv_aa_2stage.f new file mode 100644 index 000000000..a738c7415 --- /dev/null +++ b/lapack-netlib/SRC/ssysv_aa_2stage.f @@ -0,0 +1,279 @@ +*> \brief SSYSV_AA_2STAGE computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYSV_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, +* IPIV, IPIV2, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* REAL A( LDA, * ), TB( * ), B( LDB, *), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYSV_AA_2STAGE computes the solution to a real system of +*> linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's 2-stage algorithm is used to factor A as +*> A = U * T * U**T, if UPLO = 'U', or +*> A = L * T * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is symmetric and band. The matrix T is +*> then LU-factored with partial pivoting. The factored form of A +*> is then used to solve the system of equations A * X = B. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is REAL array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYsolve +* +* ===================================================================== + SUBROUTINE SSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + REAL A( LDA, * ), B( LDB, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSYTRF_AA_2STAGE, SSYTRS_AA_2STAGE, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL SSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, + $ IPIV2, WORK, -1, INFO ) + LWKOPT = INT( WORK(1) ) + IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYSV_AA_2STAGE', -INFO ) + RETURN + ELSE IF( WQUERY .OR. TQUERY ) THEN + RETURN + END IF +* +* +* Compute the factorization A = U*T*U**T or A = L*T*L**T. +* + CALL SSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, IPIV2, + $ WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, + $ IPIV2, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SSYSV_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/ssytrd_2stage.f b/lapack-netlib/SRC/ssytrd_2stage.f index 7bb38c76d..7ddc0224e 100644 --- a/lapack-netlib/SRC/ssytrd_2stage.f +++ b/lapack-netlib/SRC/ssytrd_2stage.f @@ -144,7 +144,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is REAL array, dimension LWORK. +*> WORK is REAL array, dimension (LWORK) *> \endverbatim *> *> \param[in] LWORK @@ -183,7 +183,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realSYcomputational * @@ -227,10 +227,10 @@ * IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER VECT, UPLO @@ -253,8 +253,8 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV - EXTERNAL LSAME, ILAENV + INTEGER ILAENV2STAGE + EXTERNAL LSAME, ILAENV2STAGE * .. * .. Executable Statements .. * @@ -267,10 +267,10 @@ * * Determine the block size, the workspace size and the hous size. * - KD = ILAENV( 17, 'SSYTRD_2STAGE', VECT, N, -1, -1, -1 ) - IB = ILAENV( 18, 'SSYTRD_2STAGE', VECT, N, KD, -1, -1 ) - LHMIN = ILAENV( 19, 'SSYTRD_2STAGE', VECT, N, KD, IB, -1 ) - LWMIN = ILAENV( 20, 'SSYTRD_2STAGE', VECT, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'SSYTRD_2STAGE', VECT, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'SSYTRD_2STAGE', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV2STAGE( 3, 'SSYTRD_2STAGE', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV2STAGE( 4, 'SSYTRD_2STAGE', VECT, N, KD, IB, -1 ) * WRITE(*,*),'SSYTRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO, * $ LHMIN, LWMIN * diff --git a/lapack-netlib/SRC/ssytrd_sb2st.F b/lapack-netlib/SRC/ssytrd_sb2st.F index c3c406943..891ec9b53 100644 --- a/lapack-netlib/SRC/ssytrd_sb2st.F +++ b/lapack-netlib/SRC/ssytrd_sb2st.F @@ -188,7 +188,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup real16OTHERcomputational * @@ -236,10 +236,10 @@ * IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER STAGE1, UPLO, VECT @@ -270,7 +270,7 @@ $ SISEV, SIZETAU, LDV, LHMIN, LWMIN * .. * .. External Subroutines .. - EXTERNAL SSB2ST_KERNELS, SLACPY, SLASET + EXTERNAL SSB2ST_KERNELS, SLACPY, SLASET, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MIN, MAX, CEILING, REAL diff --git a/lapack-netlib/SRC/ssytrd_sy2sb.f b/lapack-netlib/SRC/ssytrd_sy2sb.f index a37672eff..c01fe3598 100644 --- a/lapack-netlib/SRC/ssytrd_sy2sb.f +++ b/lapack-netlib/SRC/ssytrd_sy2sb.f @@ -123,7 +123,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is REAL array, dimension LWORK. +*> WORK is REAL array, dimension (LWORK) *> On exit, if INFO = 0, or if LWORK=-1, *> WORK(1) returns the size of LWORK. *> \endverbatim @@ -132,7 +132,7 @@ *> \verbatim *> LWORK is INTEGER *> The dimension of the array WORK which should be calculated -* by a workspace query. LWORK = MAX(1, LWORK_QUERY) +*> by a workspace query. LWORK = MAX(1, LWORK_QUERY) *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns *> this value as the first entry of the WORK array, and no error @@ -158,7 +158,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realSYcomputational * @@ -222,7 +222,7 @@ *> *> where tau is a real scalar, and v is a real vector with *> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in -* A(i+kd+2:n,i), and tau in TAU(i). +*> A(i+kd+2:n,i), and tau in TAU(i). *> *> The contents of A on exit are illustrated by the following examples *> with n = 5: @@ -245,10 +245,10 @@ * IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -277,7 +277,7 @@ $ TPOS, WPOS, S2POS, S1POS * .. * .. External Subroutines .. - EXTERNAL XERBLA, SSYR2K, SSYMM, SGEMM, + EXTERNAL XERBLA, SSYR2K, SSYMM, SGEMM, SCOPY, $ SLARFT, SGELQF, SGEQRF, SLASET * .. * .. Intrinsic Functions .. diff --git a/lapack-netlib/SRC/ssytrf_aa.f b/lapack-netlib/SRC/ssytrf_aa.f index 98f433afd..4aaa978ad 100644 --- a/lapack-netlib/SRC/ssytrf_aa.f +++ b/lapack-netlib/SRC/ssytrf_aa.f @@ -114,11 +114,7 @@ *> \verbatim *> INFO is INTEGER *> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization -*> has been completed, but the block diagonal matrix D is -*> exactly singular, and division by zero will occur if it -*> is used to solve a system of equations. +*> < 0: if INFO = -i, the i-th argument had an illegal value. *> \endverbatim * * Authors: @@ -129,17 +125,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realSYcomputational * * ===================================================================== SUBROUTINE SSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -159,7 +155,7 @@ * * .. Local Scalars .. LOGICAL LQUERY, UPPER - INTEGER J, LWKOPT, IINFO + INTEGER J, LWKOPT INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB REAL ALPHA * .. @@ -169,7 +165,8 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL SLASYF_AA, SGEMV, SSCAL, SCOPY, SSWAP, SGEMM, + $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -178,7 +175,7 @@ * * Determine the block size * - NB = ILAENV( 1, 'SSYTRF', UPLO, N, -1, -1, -1 ) + NB = ILAENV( 1, 'SSYTRF_AA', UPLO, N, -1, -1, -1 ) * * Test the input parameters. * @@ -214,13 +211,10 @@ ENDIF IPIV( 1 ) = 1 IF ( N.EQ.1 ) THEN - IF ( A( 1, 1 ).EQ.ZERO ) THEN - INFO = 1 - END IF RETURN END IF * -* Adjubst block size based on the workspace size +* Adjust block size based on the workspace size * IF( LWORK.LT.((1+NB)*N) ) THEN NB = ( LWORK-N ) / N @@ -260,11 +254,7 @@ * CALL SLASYF_AA( UPLO, 2-K1, N-J, JB, $ A( MAX(1, J), J+1 ), LDA, - $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), - $ IINFO ) - IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN - INFO = IINFO+J - ENDIF + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) * * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) * @@ -383,10 +373,7 @@ * CALL SLASYF_AA( UPLO, 2-K1, N-J, JB, $ A( J+1, MAX(1, J) ), LDA, - $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), IINFO) - IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN - INFO = IINFO+J - ENDIF + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) * * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) * diff --git a/lapack-netlib/SRC/ssytrf_aa_2stage.f b/lapack-netlib/SRC/ssytrf_aa_2stage.f new file mode 100644 index 000000000..be6809dfa --- /dev/null +++ b/lapack-netlib/SRC/ssytrf_aa_2stage.f @@ -0,0 +1,647 @@ +*> \brief \b SSYTRF_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRF_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, +* IPIV2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* REAL A( LDA, * ), TB( * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRF_AA_2STAGE computes the factorization of a real symmetric matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a symmetric band matrix with the +*> bandwidth of NB (NB is internally selected and stored in TB( 1 ), and T is +*> LU factorized with partial pivoting). +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is REAL array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, + $ IPIV2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + REAL A( LDA, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER I, J, K, I1, I2, TD + INTEGER LDTB, NB, KB, JB, NT, IINFO + REAL PIV +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SCOPY, SLACGV, SLACPY, + $ SLASET, SGBTRF, SGEMM, SGETRF, + $ SSYGST, SSWAP, STRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LTB .LT. 4*N .AND. .NOT.TQUERY ) THEN + INFO = -6 + ELSE IF ( LWORK .LT. N .AND. .NOT.WQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRF_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Answer the query +* + NB = ILAENV( 1, 'SSYTRF_AA_2STAGE', UPLO, N, -1, -1, -1 ) + IF( INFO.EQ.0 ) THEN + IF( TQUERY ) THEN + TB( 1 ) = (3*NB+1)*N + END IF + IF( WQUERY ) THEN + WORK( 1 ) = N*NB + END IF + END IF + IF( TQUERY .OR. WQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF +* +* Determine the number of the block size +* + LDTB = LTB/N + IF( LDTB .LT. 3*NB+1 ) THEN + NB = (LDTB-1)/3 + END IF + IF( LWORK .LT. NB*N ) THEN + NB = LWORK/N + END IF +* +* Determine the number of the block columns +* + NT = (N+NB-1)/NB + TD = 2*NB + KB = MIN(NB, N) +* +* Initialize vectors/matrices +* + DO J = 1, KB + IPIV( J ) = J + END DO +* +* Save NB +* + TB( 1 ) = NB +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*U(I,J) + T(I+1,I)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL SGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( (I-1)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J) + IF( I .EQ. J-1) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL SGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (I-2)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL SLACPY( 'Upper', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = U(1:J,J)'*H(1:J) + CALL SGEMM( 'Transpose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -ONE, A( 1, J*NB+1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += U(J,J)'*T(J,J-1)*U(J-1,J) + CALL SGEMM( 'Transpose', 'NoTranspose', + $ KB, NB, KB, + $ ONE, A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ ZERO, WORK( 1 ), N ) + CALL SGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB, + $ -ONE, WORK( 1 ), N, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF + IF( J.GT.0 ) THEN + CALL SSYGST( 1, 'Upper', KB, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, IINFO ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + DO K = I+1, KB + TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + $ = TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL SGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, KB, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + ELSE + CALL SGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB+KB, + $ ONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL SGEMM( 'Transpose', 'NoTranspose', + $ NB, N-(J+1)*NB, J*NB, + $ -ONE, WORK( NB+1 ), N, + $ A( 1, (J+1)*NB+1 ), LDA, + $ ONE, A( J*NB+1, (J+1)*NB+1 ), LDA ) + END IF +* +* Copy panel to workspace to call SGETRF +* + DO K = 1, NB + CALL SCOPY( N-(J+1)*NB, + $ A( J*NB+K, (J+1)*NB+1 ), LDA, + $ WORK( 1+(K-1)*N ), 1 ) + END DO +* +* Factorize panel +* + CALL SGETRF( N-(J+1)*NB, NB, + $ WORK, N, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Copy panel back +* + DO K = 1, NB + CALL SCOPY( N-(J+1)*NB, + $ WORK( 1+(K-1)*N ), 1, + $ A( J*NB+K, (J+1)*NB+1 ), LDA ) + END DO +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL SLASET( 'Full', KB, NB, ZERO, ZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL SLACPY( 'Upper', KB, NB, + $ WORK, N, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL STRSM( 'R', 'U', 'N', 'U', KB, NB, ONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J,J+1) into T(J+1, J), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) + END DO + END DO + CALL SLASET( 'Lower', KB, NB, ZERO, ONE, + $ A( J*NB+1, (J+1)*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL SSWAP( K-1, A( (J+1)*NB+1, I1 ), 1, + $ A( (J+1)*NB+1, I2 ), 1 ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL SSWAP( I2-I1-1, A( I1, I1+1 ), LDA, + $ A( I1+1, I2 ), 1 ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL SSWAP( N-I2, A( I1, I2+1 ), LDA, + $ A( I2, I2+1 ), LDA ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL SSWAP( J*NB, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ENDIF + END DO + END IF + END DO + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL SGEMM( 'NoTranspose', 'Transpose', + $ NB, KB, JB, + $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (I-1)*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)' + IF( I .EQ. J-1) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL SGEMM( 'NoTranspose', 'Transpose', + $ NB, KB, JB, + $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (I-2)*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL SLACPY( 'Lower', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = L(J,1:J)*H(1:J) + CALL SGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -ONE, A( J*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += L(J,J)*T(J,J-1)*L(J,J-1)' + CALL SGEMM( 'NoTranspose', 'NoTranspose', + $ KB, NB, KB, + $ ONE, A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ ZERO, WORK( 1 ), N ) + CALL SGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, NB, + $ -ONE, WORK( 1 ), N, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF + IF( J.GT.0 ) THEN + CALL SSYGST( 1, 'Lower', KB, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, IINFO ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + DO K = I+1, KB + TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + $ = TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL SGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, KB, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + ELSE + CALL SGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, NB+KB, + $ ONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL SGEMM( 'NoTranspose', 'NoTranspose', + $ N-(J+1)*NB, NB, J*NB, + $ -ONE, A( (J+1)*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, A( (J+1)*NB+1, J*NB+1 ), LDA ) + END IF +* +* Factorize panel +* + CALL SGETRF( N-(J+1)*NB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL SLASET( 'Full', KB, NB, ZERO, ZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL SLACPY( 'Upper', KB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL STRSM( 'R', 'L', 'T', 'U', KB, NB, ONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J+1,J) into T(J, J+1), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) = + $ TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) + END DO + END DO + CALL SLASET( 'Upper', KB, NB, ZERO, ONE, + $ A( (J+1)*NB+1, J*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL SSWAP( K-1, A( I1, (J+1)*NB+1 ), LDA, + $ A( I2, (J+1)*NB+1 ), LDA ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL SSWAP( I2-I1-1, A( I1+1, I1 ), 1, + $ A( I2, I1+1 ), LDA ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL SSWAP( N-I2, A( I2+1, I1 ), 1, + $ A( I2+1, I2 ), 1 ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL SSWAP( J*NB, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ENDIF + END DO +* +* Apply pivots to previous columns of L +* +c CALL SLASWP( J*NB, A( 1, 1 ), LDA, +c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) + END IF + END DO + END IF +* +* Factor the band matrix + CALL SGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* +* End of SSYTRF_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/ssytri2.f b/lapack-netlib/SRC/ssytri2.f index 7da7eedce..97b539005 100644 --- a/lapack-netlib/SRC/ssytri2.f +++ b/lapack-netlib/SRC/ssytri2.f @@ -120,17 +120,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realSYcomputational * * ===================================================================== SUBROUTINE SSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -153,7 +153,7 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL SSYTRI, SSYTRI2X + EXTERNAL SSYTRI, SSYTRI2X, XERBLA * .. * .. Executable Statements .. * diff --git a/lapack-netlib/SRC/ssytri2x.f b/lapack-netlib/SRC/ssytri2x.f index c218383a6..9aa567fd7 100644 --- a/lapack-netlib/SRC/ssytri2x.f +++ b/lapack-netlib/SRC/ssytri2x.f @@ -87,7 +87,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is REAL array, dimension (N+NNB+1,NNB+3) +*> WORK is REAL array, dimension (N+NB+1,NB+3) *> \endverbatim *> *> \param[in] NB @@ -113,17 +113,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realSYcomputational * * ===================================================================== SUBROUTINE SSYTRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/ssytri_3.f b/lapack-netlib/SRC/ssytri_3.f index e1ef66ee6..a7d8fe7e8 100644 --- a/lapack-netlib/SRC/ssytri_3.f +++ b/lapack-netlib/SRC/ssytri_3.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup singleSYcomputational * @@ -160,7 +160,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> November 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -170,10 +170,10 @@ SUBROUTINE SSYTRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -196,7 +196,7 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL SSYTRI_3X + EXTERNAL SSYTRI_3X, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/ssytri_3x.f b/lapack-netlib/SRC/ssytri_3x.f index 09c9f9392..a2b011f5f 100644 --- a/lapack-netlib/SRC/ssytri_3x.f +++ b/lapack-netlib/SRC/ssytri_3x.f @@ -142,7 +142,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup singleSYcomputational * @@ -150,7 +150,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -159,10 +159,10 @@ * ===================================================================== SUBROUTINE SSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/ssytrs_3.f b/lapack-netlib/SRC/ssytrs_3.f index bf565704a..4e9881927 100644 --- a/lapack-netlib/SRC/ssytrs_3.f +++ b/lapack-netlib/SRC/ssytrs_3.f @@ -142,7 +142,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup singleSYcomputational * @@ -151,7 +151,7 @@ *> *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -165,10 +165,10 @@ SUBROUTINE SSYTRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/ssytrs_aa.f b/lapack-netlib/SRC/ssytrs_aa.f index 6d0847362..b05c9f7e6 100644 --- a/lapack-netlib/SRC/ssytrs_aa.f +++ b/lapack-netlib/SRC/ssytrs_aa.f @@ -66,7 +66,7 @@ *> of the matrix B. NRHS >= 0. *> \endverbatim *> -*> \param[in,out] A +*> \param[in] A *> \verbatim *> A is REAL array, dimension (LDA,N) *> Details of factors computed by SSYTRF_AA. @@ -121,7 +121,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realSYcomputational * @@ -129,10 +129,10 @@ SUBROUTINE SSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, $ WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -159,7 +159,7 @@ EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL SGTSV, SSWAP, STRSM, XERBLA + EXTERNAL SGTSV, SSWAP, SLACPY, STRSM, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/ssytrs_aa_2stage.f b/lapack-netlib/SRC/ssytrs_aa_2stage.f new file mode 100644 index 000000000..c9c7181f2 --- /dev/null +++ b/lapack-netlib/SRC/ssytrs_aa_2stage.f @@ -0,0 +1,281 @@ +*> \brief \b SSYTRS_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRS_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, +* IPIV2, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* REAL A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRS_AA_2STAGE solves a system of linear equations A*X = B with a real +*> symmetric matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by SSYTRF_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> Details of factors computed by SSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is REAL array, dimension (LTB) +*> Details of factors computed by SSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> SSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] IPIV2 +*> \verbatim +*> IPIV2 is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> SSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + REAL A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER LDTB, NB + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGBTRS, SLASWP, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LTB.LT.( 4*N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRS_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Read NB and compute LDTB +* + NB = INT( TB( 1 ) ) + LDTB = LTB/N +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL SLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (U**T \P**T * B) -> B [ (U**T \P**T * B) ] +* + CALL STRSM( 'L', 'U', 'T', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (U**T \P**T * B) ] +* + CALL SGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (U \ B) -> B [ U \ (T \ (U**T \P**T * B) ) ] +* + CALL STRSM( 'L', 'U', 'N', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (U \ (T \ (U**T \P**T * B) )) ] +* + CALL SLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL SLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL STRSM( 'L', 'L', 'N', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL SGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL STRSM( 'L', 'L', 'T', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + CALL SLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF + END IF +* + RETURN +* +* End of SSYTRS_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/stfsm.f b/lapack-netlib/SRC/stfsm.f index b8b81a872..a631cc477 100644 --- a/lapack-netlib/SRC/stfsm.f +++ b/lapack-netlib/SRC/stfsm.f @@ -159,7 +159,7 @@ *> *> \param[in,out] B *> \verbatim -*> B is REAL array, DIMENSION (LDB,N) +*> B is REAL array, dimension (LDB,N) *> Before entry, the leading m by n part of the array B must *> contain the right-hand side matrix B, and on exit is *> overwritten by the solution matrix X. @@ -182,7 +182,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realOTHERcomputational * @@ -277,10 +277,10 @@ SUBROUTINE STFSM( TRANSR, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, $ B, LDB ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER TRANSR, DIAG, SIDE, TRANS, UPLO diff --git a/lapack-netlib/SRC/stgex2.f b/lapack-netlib/SRC/stgex2.f index fc5f9330d..1ae9563c7 100644 --- a/lapack-netlib/SRC/stgex2.f +++ b/lapack-netlib/SRC/stgex2.f @@ -77,7 +77,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is REAL arrays, dimensions (LDA,N) +*> A is REAL array, dimension (LDA,N) *> On entry, the matrix A in the pair (A, B). *> On exit, the updated matrix A. *> \endverbatim @@ -90,7 +90,7 @@ *> *> \param[in,out] B *> \verbatim -*> B is REAL arrays, dimensions (LDB,N) +*> B is REAL array, dimension (LDB,N) *> On entry, the matrix B in the pair (A, B). *> On exit, the updated matrix B. *> \endverbatim @@ -103,7 +103,7 @@ *> *> \param[in,out] Q *> \verbatim -*> Q is REAL array, dimension (LDZ,N) +*> Q is REAL array, dimension (LDQ,N) *> On entry, if WANTQ = .TRUE., the orthogonal matrix Q. *> On exit, the updated matrix Q. *> Not referenced if WANTQ = .FALSE.. @@ -181,7 +181,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realGEauxiliary * @@ -221,10 +221,10 @@ SUBROUTINE STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, $ LDZ, J1, N1, N2, WORK, LWORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. LOGICAL WANTQ, WANTZ diff --git a/lapack-netlib/SRC/stgexc.f b/lapack-netlib/SRC/stgexc.f index a1d9dbb56..840036db7 100644 --- a/lapack-netlib/SRC/stgexc.f +++ b/lapack-netlib/SRC/stgexc.f @@ -111,7 +111,7 @@ *> *> \param[in,out] Q *> \verbatim -*> Q is REAL array, dimension (LDZ,N) +*> Q is REAL array, dimension (LDQ,N) *> On entry, if WANTQ = .TRUE., the orthogonal matrix Q. *> On exit, the updated matrix Q. *> If WANTQ = .FALSE., Q is not referenced. @@ -195,7 +195,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realGEcomputational * @@ -220,10 +220,10 @@ SUBROUTINE STGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, $ LDZ, IFST, ILST, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. LOGICAL WANTQ, WANTZ diff --git a/lapack-netlib/SRC/stplqt.f b/lapack-netlib/SRC/stplqt.f index e3c37abff..8077b0863 100644 --- a/lapack-netlib/SRC/stplqt.f +++ b/lapack-netlib/SRC/stplqt.f @@ -73,8 +73,8 @@ *> *> \param[in,out] A *> \verbatim -*> A is REAL array, dimension (LDA,N) -*> On entry, the lower triangular N-by-N matrix A. +*> A is REAL array, dimension (LDA,M) +*> On entry, the lower triangular M-by-M matrix A. *> On exit, the elements on and below the diagonal of the array *> contain the lower triangular matrix L. *> \endverbatim @@ -82,7 +82,7 @@ *> \param[in] LDA *> \verbatim *> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(1,N). +*> The leading dimension of the array A. LDA >= max(1,M). *> \endverbatim *> *> \param[in,out] B @@ -132,7 +132,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERcomputational * @@ -146,26 +146,26 @@ *> C = [ A ] [ B ] *> *> -*> where A is an lower triangular N-by-N matrix, and B is M-by-N pentagonal +*> where A is an lower triangular M-by-M matrix, and B is M-by-N pentagonal *> matrix consisting of a M-by-(N-L) rectangular matrix B1 on left of a M-by-L *> upper trapezoidal matrix B2: *> [ B ] = [ B1 ] [ B2 ] *> [ B1 ] <- M-by-(N-L) rectangular -*> [ B2 ] <- M-by-L upper trapezoidal. +*> [ B2 ] <- M-by-L lower trapezoidal. *> *> The lower trapezoidal matrix B2 consists of the first L columns of a -*> N-by-N lower triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> M-by-M lower triangular matrix, where 0 <= L <= MIN(M,N). If L=0, *> B is rectangular M-by-N; if M=L=N, B is lower triangular. *> *> The matrix W stores the elementary reflectors H(i) in the i-th row *> above the diagonal (of A) in the M-by-(M+N) input matrix C *> [ C ] = [ A ] [ B ] -*> [ A ] <- lower triangular N-by-N +*> [ A ] <- lower triangular M-by-M *> [ B ] <- M-by-N pentagonal *> *> so that W can be represented as *> [ W ] = [ I ] [ V ] -*> [ I ] <- identity, N-by-N +*> [ I ] <- identity, M-by-M *> [ V ] <- M-by-N, same form as B. *> *> Thus, all of information needed for W is contained on exit in B, which @@ -189,10 +189,10 @@ SUBROUTINE STPLQT( M, N, L, MB, A, LDA, B, LDB, T, LDT, WORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDB, LDT, N, M, L, MB diff --git a/lapack-netlib/SRC/stplqt2.f b/lapack-netlib/SRC/stplqt2.f index f1b8e0303..ca38a9540 100644 --- a/lapack-netlib/SRC/stplqt2.f +++ b/lapack-netlib/SRC/stplqt2.f @@ -65,7 +65,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is REAL array, dimension (LDA,N) +*> A is REAL array, dimension (LDA,M) *> On entry, the lower triangular M-by-M matrix A. *> On exit, the elements on and below the diagonal of the array *> contain the lower triangular matrix L. @@ -74,7 +74,7 @@ *> \param[in] LDA *> \verbatim *> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(1,N). +*> The leading dimension of the array A. LDA >= max(1,M). *> \endverbatim *> *> \param[in,out] B @@ -119,7 +119,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERcomputational * @@ -133,7 +133,7 @@ *> C = [ A ][ B ] *> *> -*> where A is an lower triangular N-by-N matrix, and B is M-by-N pentagonal +*> where A is an lower triangular M-by-M matrix, and B is M-by-N pentagonal *> matrix consisting of a M-by-(N-L) rectangular matrix B1 left of a M-by-L *> upper trapezoidal matrix B2: *> @@ -149,13 +149,13 @@ *> above the diagonal (of A) in the M-by-(M+N) input matrix C *> *> C = [ A ][ B ] -*> [ A ] <- lower triangular N-by-N +*> [ A ] <- lower triangular M-by-M *> [ B ] <- M-by-N pentagonal *> *> so that W can be represented as *> *> W = [ I ][ V ] -*> [ I ] <- identity, N-by-N +*> [ I ] <- identity, M-by-M *> [ V ] <- M-by-N, same form as B. *> *> Thus, all of information needed for W is contained on exit in B, which @@ -177,10 +177,10 @@ * ===================================================================== SUBROUTINE STPLQT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDB, LDT, N, M, L diff --git a/lapack-netlib/SRC/stpmlqt.f b/lapack-netlib/SRC/stpmlqt.f index 3f8716402..565dadd0c 100644 --- a/lapack-netlib/SRC/stpmlqt.f +++ b/lapack-netlib/SRC/stpmlqt.f @@ -175,7 +175,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleOTHERcomputational * @@ -216,10 +216,10 @@ SUBROUTINE STPMLQT( SIDE, TRANS, M, N, K, L, MB, V, LDV, T, LDT, $ A, LDA, B, LDB, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS @@ -242,7 +242,7 @@ EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL XERBLA, SLARFB + EXTERNAL SLARFB, STPRFB, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN diff --git a/lapack-netlib/SRC/stpmqrt.f b/lapack-netlib/SRC/stpmqrt.f index 2a97505b9..b1813b7dd 100644 --- a/lapack-netlib/SRC/stpmqrt.f +++ b/lapack-netlib/SRC/stpmqrt.f @@ -175,7 +175,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup realOTHERcomputational * @@ -216,10 +216,10 @@ SUBROUTINE STPMQRT( SIDE, TRANS, M, N, K, L, NB, V, LDV, T, LDT, $ A, LDA, B, LDB, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS @@ -242,7 +242,7 @@ EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL STPRFB, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN diff --git a/lapack-netlib/SRC/strevc3.f b/lapack-netlib/SRC/strevc3.f index 84a4a8f58..0df1189f0 100644 --- a/lapack-netlib/SRC/strevc3.f +++ b/lapack-netlib/SRC/strevc3.f @@ -215,7 +215,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * * @generated from dtrevc3.f, fortran d -> s, Tue Apr 19 01:47:44 2016 * @@ -240,10 +240,10 @@ $ VR, LDVR, MM, M, WORK, LWORK, INFO ) IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER HOWMNY, SIDE @@ -280,7 +280,7 @@ * .. * .. External Subroutines .. EXTERNAL SAXPY, SCOPY, SGEMV, SLALN2, SSCAL, XERBLA, - $ SGEMM, SLABAD, SLASET + $ SLACPY, SGEMM, SLABAD, SLASET * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT diff --git a/lapack-netlib/SRC/strttp.f b/lapack-netlib/SRC/strttp.f index c51a52890..557ccfa46 100644 --- a/lapack-netlib/SRC/strttp.f +++ b/lapack-netlib/SRC/strttp.f @@ -74,7 +74,7 @@ *> *> \param[out] AP *> \verbatim -*> AP is REAL array, dimension (N*(N+1)/2 +*> AP is REAL array, dimension (N*(N+1)/2) *> On exit, the upper or lower triangular matrix A, packed *> columnwise in a linear array. The j-th column of A is stored *> in the array AP as follows: @@ -97,17 +97,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup realOTHERcomputational * * ===================================================================== SUBROUTINE STRTTP( UPLO, N, A, LDA, AP, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/zbbcsd.f b/lapack-netlib/SRC/zbbcsd.f index 51e148b42..4647181e2 100644 --- a/lapack-netlib/SRC/zbbcsd.f +++ b/lapack-netlib/SRC/zbbcsd.f @@ -190,7 +190,7 @@ *> *> \param[in,out] V2T *> \verbatim -*> V2T is COMPLEX*16 array, dimenison (LDV2T,M-Q) +*> V2T is COMPLEX*16 array, dimension (LDV2T,M-Q) *> On entry, an (M-Q)-by-(M-Q) matrix. On exit, V2T is *> premultiplied by the conjugate transpose of the right *> singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and @@ -332,7 +332,7 @@ $ V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E, $ B22D, B22E, RWORK, LRWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/zcgesv.f b/lapack-netlib/SRC/zcgesv.f index 8240e2b65..bb12d4f3a 100644 --- a/lapack-netlib/SRC/zcgesv.f +++ b/lapack-netlib/SRC/zcgesv.f @@ -142,7 +142,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is COMPLEX*16 array, dimension (N*NRHS) +*> WORK is COMPLEX*16 array, dimension (N,NRHS) *> This array is used to hold the residual vectors. *> \endverbatim *> @@ -201,7 +201,7 @@ SUBROUTINE ZCGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK, $ SWORK, RWORK, ITER, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -240,7 +240,7 @@ * * .. External Subroutines .. EXTERNAL CGETRS, CGETRF, CLAG2Z, XERBLA, ZAXPY, ZGEMM, - $ ZLACPY, ZLAG2C + $ ZLACPY, ZLAG2C, ZGETRF, ZGETRS * .. * .. External Functions .. INTEGER IZAMAX diff --git a/lapack-netlib/SRC/zcposv.f b/lapack-netlib/SRC/zcposv.f index 3159c3dd9..eafcce623 100644 --- a/lapack-netlib/SRC/zcposv.f +++ b/lapack-netlib/SRC/zcposv.f @@ -150,7 +150,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is COMPLEX*16 array, dimension (N*NRHS) +*> WORK is COMPLEX*16 array, dimension (N,NRHS) *> This array is used to hold the residual vectors. *> \endverbatim *> @@ -209,7 +209,7 @@ SUBROUTINE ZCPOSV( UPLO, N, NRHS, A, LDA, B, LDB, X, LDX, WORK, $ SWORK, RWORK, ITER, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -248,7 +248,7 @@ * * .. External Subroutines .. EXTERNAL ZAXPY, ZHEMM, ZLACPY, ZLAT2C, ZLAG2C, CLAG2Z, - $ CPOTRF, CPOTRS, XERBLA + $ CPOTRF, CPOTRS, XERBLA, ZPOTRF, ZPOTRS * .. * .. External Functions .. INTEGER IZAMAX diff --git a/lapack-netlib/SRC/zgebal.f b/lapack-netlib/SRC/zgebal.f index 601d54314..68291e2b4 100644 --- a/lapack-netlib/SRC/zgebal.f +++ b/lapack-netlib/SRC/zgebal.f @@ -83,10 +83,12 @@ *> *> \param[out] ILO *> \verbatim +*> ILO is INTEGER *> \endverbatim *> *> \param[out] IHI *> \verbatim +*> IHI is INTEGER *> ILO and IHI are set to INTEGER such that on exit *> A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. *> If JOB = 'N' or 'S', ILO = 1 and IHI = N. @@ -121,7 +123,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16GEcomputational * @@ -160,10 +162,10 @@ * ===================================================================== SUBROUTINE ZGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER JOB diff --git a/lapack-netlib/SRC/zgebd2.f b/lapack-netlib/SRC/zgebd2.f index d5752e043..3afe79ec5 100644 --- a/lapack-netlib/SRC/zgebd2.f +++ b/lapack-netlib/SRC/zgebd2.f @@ -100,7 +100,7 @@ *> *> \param[out] TAUQ *> \verbatim -*> TAUQ is COMPLEX*16 array dimension (min(M,N)) +*> TAUQ is COMPLEX*16 array, dimension (min(M,N)) *> The scalar factors of the elementary reflectors which *> represent the unitary matrix Q. See Further Details. *> \endverbatim @@ -132,7 +132,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16GEcomputational * @@ -189,10 +189,10 @@ * ===================================================================== SUBROUTINE ZGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, M, N diff --git a/lapack-netlib/SRC/zgebrd.f b/lapack-netlib/SRC/zgebrd.f index 26879a75a..bccd6b8b2 100644 --- a/lapack-netlib/SRC/zgebrd.f +++ b/lapack-netlib/SRC/zgebrd.f @@ -101,7 +101,7 @@ *> *> \param[out] TAUQ *> \verbatim -*> TAUQ is COMPLEX*16 array dimension (min(M,N)) +*> TAUQ is COMPLEX*16 array, dimension (min(M,N)) *> The scalar factors of the elementary reflectors which *> represent the unitary matrix Q. See Further Details. *> \endverbatim @@ -147,7 +147,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16GEcomputational * @@ -205,10 +205,10 @@ SUBROUTINE ZGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LWORK, M, N @@ -227,8 +227,7 @@ * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB, - $ NBMIN, NX - DOUBLE PRECISION WS + $ NBMIN, NX, WS * .. * .. External Subroutines .. EXTERNAL XERBLA, ZGEBD2, ZGEMM, ZLABRD diff --git a/lapack-netlib/SRC/zgejsv.f b/lapack-netlib/SRC/zgejsv.f index fcf073514..e8418c680 100644 --- a/lapack-netlib/SRC/zgejsv.f +++ b/lapack-netlib/SRC/zgejsv.f @@ -1,2237 +1,2237 @@ -*> \brief \b ZGEJSV -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download ZGEJSV + dependencies -*> -*> [TGZ] -*> -*> [ZIP] -*> -*> [TXT] -*> \endhtmlonly -* -* Definition: -* =========== -* -* SUBROUTINE ZGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, -* M, N, A, LDA, SVA, U, LDU, V, LDV, -* CWORK, LWORK, RWORK, LRWORK, IWORK, INFO ) -* -* .. Scalar Arguments .. -* IMPLICIT NONE -* INTEGER INFO, LDA, LDU, LDV, LWORK, M, N -* .. -* .. Array Arguments .. -* COMPLEX*16 A( LDA, * ), U( LDU, * ), V( LDV, * ), CWORK( LWORK ) -* DOUBLE PRECISION SVA( N ), RWORK( LRWORK ) -* INTEGER IWORK( * ) -* CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> ZGEJSV computes the singular value decomposition (SVD) of a complex M-by-N -*> matrix [A], where M >= N. The SVD of [A] is written as -*> -*> [A] = [U] * [SIGMA] * [V]^*, -*> -*> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N -*> diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and -*> [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are -*> the singular values of [A]. The columns of [U] and [V] are the left and -*> the right singular vectors of [A], respectively. The matrices [U] and [V] -*> are computed and stored in the arrays U and V, respectively. The diagonal -*> of [SIGMA] is computed and stored in the array SVA. -*> \endverbatim -*> -*> Arguments: -*> ========== -*> -*> \param[in] JOBA -*> \verbatim -*> JOBA is CHARACTER*1 -*> Specifies the level of accuracy: -*> = 'C': This option works well (high relative accuracy) if A = B * D, -*> with well-conditioned B and arbitrary diagonal matrix D. -*> The accuracy cannot be spoiled by COLUMN scaling. The -*> accuracy of the computed output depends on the condition of -*> B, and the procedure aims at the best theoretical accuracy. -*> The relative error max_{i=1:N}|d sigma_i| / sigma_i is -*> bounded by f(M,N)*epsilon* cond(B), independent of D. -*> The input matrix is preprocessed with the QRF with column -*> pivoting. This initial preprocessing and preconditioning by -*> a rank revealing QR factorization is common for all values of -*> JOBA. Additional actions are specified as follows: -*> = 'E': Computation as with 'C' with an additional estimate of the -*> condition number of B. It provides a realistic error bound. -*> = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings -*> D1, D2, and well-conditioned matrix C, this option gives -*> higher accuracy than the 'C' option. If the structure of the -*> input matrix is not known, and relative accuracy is -*> desirable, then this option is advisable. The input matrix A -*> is preprocessed with QR factorization with FULL (row and -*> column) pivoting. -*> = 'G' Computation as with 'F' with an additional estimate of the -*> condition number of B, where A=B*D. If A has heavily weighted -*> rows, then using this condition number gives too pessimistic -*> error bound. -*> = 'A': Small singular values are not well determined by the data -*> and are considered as noisy; the matrix is treated as -*> numerically rank defficient. The error in the computed -*> singular values is bounded by f(m,n)*epsilon*||A||. -*> The computed SVD A = U * S * V^* restores A up to -*> f(m,n)*epsilon*||A||. -*> This gives the procedure the licence to discard (set to zero) -*> all singular values below N*epsilon*||A||. -*> = 'R': Similar as in 'A'. Rank revealing property of the initial -*> QR factorization is used do reveal (using triangular factor) -*> a gap sigma_{r+1} < epsilon * sigma_r in which case the -*> numerical RANK is declared to be r. The SVD is computed with -*> absolute error bounds, but more accurately than with 'A'. -*> \endverbatim -*> -*> \param[in] JOBU -*> \verbatim -*> JOBU is CHARACTER*1 -*> Specifies whether to compute the columns of U: -*> = 'U': N columns of U are returned in the array U. -*> = 'F': full set of M left sing. vectors is returned in the array U. -*> = 'W': U may be used as workspace of length M*N. See the description -*> of U. -*> = 'N': U is not computed. -*> \endverbatim -*> -*> \param[in] JOBV -*> \verbatim -*> JOBV is CHARACTER*1 -*> Specifies whether to compute the matrix V: -*> = 'V': N columns of V are returned in the array V; Jacobi rotations -*> are not explicitly accumulated. -*> = 'J': N columns of V are returned in the array V, but they are -*> computed as the product of Jacobi rotations, if JOBT .EQ. 'N'. -*> = 'W': V may be used as workspace of length N*N. See the description -*> of V. -*> = 'N': V is not computed. -*> \endverbatim -*> -*> \param[in] JOBR -*> \verbatim -*> JOBR is CHARACTER*1 -*> Specifies the RANGE for the singular values. Issues the licence to -*> set to zero small positive singular values if they are outside -*> specified range. If A .NE. 0 is scaled so that the largest singular -*> value of c*A is around SQRT(BIG), BIG=DLAMCH('O'), then JOBR issues -*> the licence to kill columns of A whose norm in c*A is less than -*> SQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN, -*> where SFMIN=DLAMCH('S'), EPSLN=DLAMCH('E'). -*> = 'N': Do not kill small columns of c*A. This option assumes that -*> BLAS and QR factorizations and triangular solvers are -*> implemented to work in that range. If the condition of A -*> is greater than BIG, use ZGESVJ. -*> = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)] -*> (roughly, as described above). This option is recommended. -*> =========================== -*> For computing the singular values in the FULL range [SFMIN,BIG] -*> use ZGESVJ. -*> \endverbatim -*> -*> \param[in] JOBT -*> \verbatim -*> JOBT is CHARACTER*1 -*> If the matrix is square then the procedure may determine to use -*> transposed A if A^* seems to be better with respect to convergence. -*> If the matrix is not square, JOBT is ignored. -*> The decision is based on two values of entropy over the adjoint -*> orbit of A^* * A. See the descriptions of WORK(6) and WORK(7). -*> = 'T': transpose if entropy test indicates possibly faster -*> convergence of Jacobi process if A^* is taken as input. If A is -*> replaced with A^*, then the row pivoting is included automatically. -*> = 'N': do not speculate. -*> The option 'T' can be used to compute only the singular values, or -*> the full SVD (U, SIGMA and V). For only one set of singular vectors -*> (U or V), the caller should provide both U and V, as one of the -*> matrices is used as workspace if the matrix A is transposed. -*> The implementer can easily remove this constraint and make the -*> code more complicated. See the descriptions of U and V. -*> In general, this option is considered experimental, and 'N'; should -*> be preferred. This is subject to changes in the future. -*> \endverbatim -*> -*> \param[in] JOBP -*> \verbatim -*> JOBP is CHARACTER*1 -*> Issues the licence to introduce structured perturbations to drown -*> denormalized numbers. This licence should be active if the -*> denormals are poorly implemented, causing slow computation, -*> especially in cases of fast convergence (!). For details see [1,2]. -*> For the sake of simplicity, this perturbations are included only -*> when the full SVD or only the singular values are requested. The -*> implementer/user can easily add the perturbation for the cases of -*> computing one set of singular vectors. -*> = 'P': introduce perturbation -*> = 'N': do not perturb -*> \endverbatim -*> -*> \param[in] M -*> \verbatim -*> M is INTEGER -*> The number of rows of the input matrix A. M >= 0. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The number of columns of the input matrix A. M >= N >= 0. -*> \endverbatim -*> -*> \param[in,out] A -*> \verbatim -*> A is COMPLEX*16 array, dimension (LDA,N) -*> On entry, the M-by-N matrix A. -*> \endverbatim -*> -*> \param[in] LDA -*> \verbatim -*> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(1,M). -*> \endverbatim -*> -*> \param[out] SVA -*> \verbatim -*> SVA is DOUBLE PRECISION array, dimension (N) -*> On exit, -*> - For WORK(1)/WORK(2) = ONE: The singular values of A. During the -*> computation SVA contains Euclidean column norms of the -*> iterated matrices in the array A. -*> - For WORK(1) .NE. WORK(2): The singular values of A are -*> (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if -*> sigma_max(A) overflows or if small singular values have been -*> saved from underflow by scaling the input matrix A. -*> - If JOBR='R' then some of the singular values may be returned -*> as exact zeros obtained by "set to zero" because they are -*> below the numerical rank threshold or are denormalized numbers. -*> \endverbatim -*> -*> \param[out] U -*> \verbatim -*> U is COMPLEX*16 array, dimension ( LDU, N ) -*> If JOBU = 'U', then U contains on exit the M-by-N matrix of -*> the left singular vectors. -*> If JOBU = 'F', then U contains on exit the M-by-M matrix of -*> the left singular vectors, including an ONB -*> of the orthogonal complement of the Range(A). -*> If JOBU = 'W' .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N), -*> then U is used as workspace if the procedure -*> replaces A with A^*. In that case, [V] is computed -*> in U as left singular vectors of A^* and then -*> copied back to the V array. This 'W' option is just -*> a reminder to the caller that in this case U is -*> reserved as workspace of length N*N. -*> If JOBU = 'N' U is not referenced, unless JOBT='T'. -*> \endverbatim -*> -*> \param[in] LDU -*> \verbatim -*> LDU is INTEGER -*> The leading dimension of the array U, LDU >= 1. -*> IF JOBU = 'U' or 'F' or 'W', then LDU >= M. -*> \endverbatim -*> -*> \param[out] V -*> \verbatim -*> V is COMPLEX*16 array, dimension ( LDV, N ) -*> If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of -*> the right singular vectors; -*> If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N), -*> then V is used as workspace if the pprocedure -*> replaces A with A^*. In that case, [U] is computed -*> in V as right singular vectors of A^* and then -*> copied back to the U array. This 'W' option is just -*> a reminder to the caller that in this case V is -*> reserved as workspace of length N*N. -*> If JOBV = 'N' V is not referenced, unless JOBT='T'. -*> \endverbatim -*> -*> \param[in] LDV -*> \verbatim -*> LDV is INTEGER -*> The leading dimension of the array V, LDV >= 1. -*> If JOBV = 'V' or 'J' or 'W', then LDV >= N. -*> \endverbatim -*> -*> \param[out] CWORK -*> \verbatim -*> CWORK is COMPLEX*16 array, dimension (MAX(2,LWORK)) -*> If the call to ZGEJSV is a workspace query (indicated by LWORK=-1 or -*> LRWORK=-1), then on exit CWORK(1) contains the required length of -*> CWORK for the job parameters used in the call. -*> \endverbatim -*> -*> \param[in] LWORK -*> \verbatim -*> LWORK is INTEGER -*> Length of CWORK to confirm proper allocation of workspace. -*> LWORK depends on the job: -*> -*> 1. If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and -*> 1.1 .. no scaled condition estimate required (JOBA.NE.'E'.AND.JOBA.NE.'G'): -*> LWORK >= 2*N+1. This is the minimal requirement. -*> ->> For optimal performance (blocked code) the optimal value -*> is LWORK >= N + (N+1)*NB. Here NB is the optimal -*> block size for ZGEQP3 and ZGEQRF. -*> In general, optimal LWORK is computed as -*> LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZGEQRF), LWORK(ZGESVJ)). -*> 1.2. .. an estimate of the scaled condition number of A is -*> required (JOBA='E', or 'G'). In this case, LWORK the minimal -*> requirement is LWORK >= N*N + 2*N. -*> ->> For optimal performance (blocked code) the optimal value -*> is LWORK >= max(N+(N+1)*NB, N*N+2*N)=N**2+2*N. -*> In general, the optimal length LWORK is computed as -*> LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZGEQRF), LWORK(ZGESVJ), -*> N*N+LWORK(ZPOCON)). -*> 2. If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'), -*> (JOBU.EQ.'N') -*> 2.1 .. no scaled condition estimate requested (JOBE.EQ.'N'): -*> -> the minimal requirement is LWORK >= 3*N. -*> -> For optimal performance, -*> LWORK >= max(N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, -*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQ, -*> ZUNMLQ. In general, the optimal length LWORK is computed as -*> LWORK >= max(N+LWORK(ZGEQP3), N+LWORK(ZGESVJ), -*> N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMLQ)). -*> 2.2 .. an estimate of the scaled condition number of A is -*> required (JOBA='E', or 'G'). -*> -> the minimal requirement is LWORK >= 3*N. -*> -> For optimal performance, -*> LWORK >= max(N+(N+1)*NB, 2*N,2*N+N*NB)=2*N+N*NB, -*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQ, -*> ZUNMLQ. In general, the optimal length LWORK is computed as -*> LWORK >= max(N+LWORK(ZGEQP3), LWORK(ZPOCON), N+LWORK(ZGESVJ), -*> N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMLQ)). -*> 3. If SIGMA and the left singular vectors are needed -*> 3.1 .. no scaled condition estimate requested (JOBE.EQ.'N'): -*> -> the minimal requirement is LWORK >= 3*N. -*> -> For optimal performance: -*> if JOBU.EQ.'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, -*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZUNMQR. -*> In general, the optimal length LWORK is computed as -*> LWORK >= max(N+LWORK(ZGEQP3), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMQR)). -*> 3.2 .. an estimate of the scaled condition number of A is -*> required (JOBA='E', or 'G'). -*> -> the minimal requirement is LWORK >= 3*N. -*> -> For optimal performance: -*> if JOBU.EQ.'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, -*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZUNMQR. -*> In general, the optimal length LWORK is computed as -*> LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZPOCON), -*> 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMQR)). -*> 4. If the full SVD is needed: (JOBU.EQ.'U' or JOBU.EQ.'F') and -*> 4.1. if JOBV.EQ.'V' -*> the minimal requirement is LWORK >= 5*N+2*N*N. -*> 4.2. if JOBV.EQ.'J' the minimal requirement is -*> LWORK >= 4*N+N*N. -*> In both cases, the allocated CWORK can accommodate blocked runs -*> of ZGEQP3, ZGEQRF, ZGELQF, SUNMQR, ZUNMLQ. -*> -*> If the call to ZGEJSV is a workspace query (indicated by LWORK=-1 or -*> LRWORK=-1), then on exit CWORK(1) contains the optimal and CWORK(2) contains the -*> minimal length of CWORK for the job parameters used in the call. -*> \endverbatim -*> -*> \param[out] RWORK -*> \verbatim -*> RWORK is DOUBLE PRECISION array, dimension (MAX(7,LWORK)) -*> On exit, -*> RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1) -*> such that SCALE*SVA(1:N) are the computed singular values -*> of A. (See the description of SVA().) -*> RWORK(2) = See the description of RWORK(1). -*> RWORK(3) = SCONDA is an estimate for the condition number of -*> column equilibrated A. (If JOBA .EQ. 'E' or 'G') -*> SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1). -*> It is computed using SPOCON. It holds -*> N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA -*> where R is the triangular factor from the QRF of A. -*> However, if R is truncated and the numerical rank is -*> determined to be strictly smaller than N, SCONDA is -*> returned as -1, thus indicating that the smallest -*> singular values might be lost. -*> -*> If full SVD is needed, the following two condition numbers are -*> useful for the analysis of the algorithm. They are provied for -*> a developer/implementer who is familiar with the details of -*> the method. -*> -*> RWORK(4) = an estimate of the scaled condition number of the -*> triangular factor in the first QR factorization. -*> RWORK(5) = an estimate of the scaled condition number of the -*> triangular factor in the second QR factorization. -*> The following two parameters are computed if JOBT .EQ. 'T'. -*> They are provided for a developer/implementer who is familiar -*> with the details of the method. -*> RWORK(6) = the entropy of A^* * A :: this is the Shannon entropy -*> of diag(A^* * A) / Trace(A^* * A) taken as point in the -*> probability simplex. -*> RWORK(7) = the entropy of A * A^*. (See the description of RWORK(6).) -*> If the call to ZGEJSV is a workspace query (indicated by LWORK=-1 or -*> LRWORK=-1), then on exit RWORK(1) contains the required length of -*> RWORK for the job parameters used in the call. -*> \endverbatim -*> -*> \param[in] LRWORK -*> \verbatim -*> LRWORK is INTEGER -*> Length of RWORK to confirm proper allocation of workspace. -*> LRWORK depends on the job: -*> -*> 1. If only the singular values are requested i.e. if -*> LSAME(JOBU,'N') .AND. LSAME(JOBV,'N') -*> then: -*> 1.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), -*> then: LRWORK = max( 7, 2 * M ). -*> 1.2. Otherwise, LRWORK = max( 7, N ). -*> 2. If singular values with the right singular vectors are requested -*> i.e. if -*> (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) .AND. -*> .NOT.(LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) -*> then: -*> 2.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), -*> then LRWORK = max( 7, 2 * M ). -*> 2.2. Otherwise, LRWORK = max( 7, N ). -*> 3. If singular values with the left singular vectors are requested, i.e. if -*> (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND. -*> .NOT.(LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) -*> then: -*> 3.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), -*> then LRWORK = max( 7, 2 * M ). -*> 3.2. Otherwise, LRWORK = max( 7, N ). -*> 4. If singular values with both the left and the right singular vectors -*> are requested, i.e. if -*> (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND. -*> (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) -*> then: -*> 4.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), -*> then LRWORK = max( 7, 2 * M ). -*> 4.2. Otherwise, LRWORK = max( 7, N ). -*> -*> If, on entry, LRWORK = -1 or LWORK=-1, a workspace query is assumed and -*> the length of RWORK is returned in RWORK(1). -*> \endverbatim -*> -*> \param[out] IWORK -*> \verbatim -*> IWORK is INTEGER array, of dimension at least 4, that further depends -*> on the job: -*> -*> 1. If only the singular values are requested then: -*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) -*> then the length of IWORK is N+M; otherwise the length of IWORK is N. -*> 2. If the singular values and the right singular vectors are requested then: -*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) -*> then the length of IWORK is N+M; otherwise the length of IWORK is N. -*> 3. If the singular values and the left singular vectors are requested then: -*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) -*> then the length of IWORK is N+M; otherwise the length of IWORK is N. -*> 4. If the singular values with both the left and the right singular vectors -*> are requested, then: -*> 4.1. If LSAME(JOBV,'J') the length of IWORK is determined as follows: -*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) -*> then the length of IWORK is N+M; otherwise the length of IWORK is N. -*> 4.2. If LSAME(JOBV,'V') the length of IWORK is determined as follows: -*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) -*> then the length of IWORK is 2*N+M; otherwise the length of IWORK is 2*N. -*> -*> On exit, -*> IWORK(1) = the numerical rank determined after the initial -*> QR factorization with pivoting. See the descriptions -*> of JOBA and JOBR. -*> IWORK(2) = the number of the computed nonzero singular values -*> IWORK(3) = if nonzero, a warning message: -*> If IWORK(3).EQ.1 then some of the column norms of A -*> were denormalized floats. The requested high accuracy -*> is not warranted by the data. -*> IWORK(4) = 1 or -1. If IWORK(4) .EQ. 1, then the procedure used A^* to -*> do the job as specified by the JOB parameters. -*> If the call to ZGEJSV is a workspace query (indicated by LWORK .EQ. -1 or -*> LRWORK .EQ. -1), then on exit IWORK(1) contains the required length of -*> IWORK for the job parameters used in the call. -*> \endverbatim -*> -*> \param[out] INFO -*> \verbatim -*> INFO is INTEGER -*> < 0 : if INFO = -i, then the i-th argument had an illegal value. -*> = 0 : successful exit; -*> > 0 : ZGEJSV did not converge in the maximal allowed number -*> of sweeps. The computed values may be inaccurate. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date June 2016 -* -*> \ingroup complex16GEsing -* -*> \par Further Details: -* ===================== -*> -*> \verbatim -*> -*> ZGEJSV implements a preconditioned Jacobi SVD algorithm. It uses ZGEQP3, -*> ZGEQRF, and ZGELQF as preprocessors and preconditioners. Optionally, an -*> additional row pivoting can be used as a preprocessor, which in some -*> cases results in much higher accuracy. An example is matrix A with the -*> structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned -*> diagonal matrices and C is well-conditioned matrix. In that case, complete -*> pivoting in the first QR factorizations provides accuracy dependent on the -*> condition number of C, and independent of D1, D2. Such higher accuracy is -*> not completely understood theoretically, but it works well in practice. -*> Further, if A can be written as A = B*D, with well-conditioned B and some -*> diagonal D, then the high accuracy is guaranteed, both theoretically and -*> in software, independent of D. For more details see [1], [2]. -*> The computational range for the singular values can be the full range -*> ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS -*> & LAPACK routines called by ZGEJSV are implemented to work in that range. -*> If that is not the case, then the restriction for safe computation with -*> the singular values in the range of normalized IEEE numbers is that the -*> spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not -*> overflow. This code (ZGEJSV) is best used in this restricted range, -*> meaning that singular values of magnitude below ||A||_2 / DLAMCH('O') are -*> returned as zeros. See JOBR for details on this. -*> Further, this implementation is somewhat slower than the one described -*> in [1,2] due to replacement of some non-LAPACK components, and because -*> the choice of some tuning parameters in the iterative part (ZGESVJ) is -*> left to the implementer on a particular machine. -*> The rank revealing QR factorization (in this code: ZGEQP3) should be -*> implemented as in [3]. We have a new version of ZGEQP3 under development -*> that is more robust than the current one in LAPACK, with a cleaner cut in -*> rank deficient cases. It will be available in the SIGMA library [4]. -*> If M is much larger than N, it is obvious that the initial QRF with -*> column pivoting can be preprocessed by the QRF without pivoting. That -*> well known trick is not used in ZGEJSV because in some cases heavy row -*> weighting can be treated with complete pivoting. The overhead in cases -*> M much larger than N is then only due to pivoting, but the benefits in -*> terms of accuracy have prevailed. The implementer/user can incorporate -*> this extra QRF step easily. The implementer can also improve data movement -*> (matrix transpose, matrix copy, matrix transposed copy) - this -*> implementation of ZGEJSV uses only the simplest, naive data movement. -*> \endverbatim -* -*> \par Contributor: -* ================== -*> -*> Zlatko Drmac, Department of Mathematics, Faculty of Science, -*> University of Zagreb (Zagreb, Croatia); drmac@math.hr -* -*> \par References: -* ================ -*> -*> \verbatim -*> -*> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. -*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. -*> LAPACK Working note 169. -*> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. -*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. -*> LAPACK Working note 170. -*> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR -*> factorization software - a case study. -*> ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28. -*> LAPACK Working note 176. -*> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, -*> QSVD, (H,K)-SVD computations. -*> Department of Mathematics, University of Zagreb, 2008, 2016. -*> \endverbatim -* -*> \par Bugs, examples and comments: -* ================================= -*> -*> Please report all bugs and send interesting examples and/or comments to -*> drmac@math.hr. Thank you. -*> -* ===================================================================== - SUBROUTINE ZGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, - $ M, N, A, LDA, SVA, U, LDU, V, LDV, - $ CWORK, LWORK, RWORK, LRWORK, IWORK, INFO ) -* -* -- LAPACK computational routine (version 3.7.0) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 -* -* .. Scalar Arguments .. - IMPLICIT NONE - INTEGER INFO, LDA, LDU, LDV, LWORK, LRWORK, M, N -* .. -* .. Array Arguments .. - COMPLEX*16 A( LDA, * ), U( LDU, * ), V( LDV, * ), - $ CWORK( LWORK ) - DOUBLE PRECISION SVA( N ), RWORK( LRWORK ) - INTEGER IWORK( * ) - CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV -* .. -* -* =========================================================================== -* -* .. Local Parameters .. - DOUBLE PRECISION ZERO, ONE - PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) - COMPLEX*16 CZERO, CONE - PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), CONE = ( 1.0D0, 0.0D0 ) ) -* .. -* .. Local Scalars .. - COMPLEX*16 CTEMP - DOUBLE PRECISION AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, - $ COND_OK, CONDR1, CONDR2, ENTRA, ENTRAT, EPSLN, - $ MAXPRJ, SCALEM, SCONDA, SFMIN, SMALL, TEMP1, - $ USCAL1, USCAL2, XSC - INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING - LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LQUERY, - $ LSVEC, L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN, NOSCAL, - $ ROWPIV, RSVEC, TRANSP -* - INTEGER OPTWRK, MINWRK, MINRWRK, MINIWRK - INTEGER LWCON, LWLQF, LWQP3, LWQRF, LWUNMLQ, LWUNMQR, LWUNMQRM, - $ LWSVDJ, LWSVDJV, LRWQP3, LRWCON, LRWSVDJ, IWOFF - INTEGER LWRK_ZGELQF, LWRK_ZGEQP3, LWRK_ZGEQP3N, LWRK_ZGEQRF, - $ LWRK_ZGESVJ, LWRK_ZGESVJV, LWRK_ZGESVJU, LWRK_ZUNMLQ, - $ LWRK_ZUNMQR, LWRK_ZUNMQRM -* .. -* .. Local Arrays - COMPLEX*16 CDUMMY(1) - DOUBLE PRECISION RDUMMY(1) -* -* .. Intrinsic Functions .. - INTRINSIC ABS, DCMPLX, CONJG, DLOG, MAX, MIN, DBLE, NINT, SQRT -* .. -* .. External Functions .. - DOUBLE PRECISION DLAMCH, DZNRM2 - INTEGER IDAMAX, IZAMAX - LOGICAL LSAME - EXTERNAL IDAMAX, IZAMAX, LSAME, DLAMCH, DZNRM2 -* .. -* .. External Subroutines .. - EXTERNAL DLASSQ, ZCOPY, ZGELQF, ZGEQP3, ZGEQRF, ZLACPY, ZLAPMR, - $ ZLASCL, DLASCL, ZLASET, ZLASSQ, ZLASWP, ZUNGQR, ZUNMLQ, - $ ZUNMQR, ZPOCON, DSCAL, ZDSCAL, ZSWAP, ZTRSM, ZLACGV, - $ XERBLA -* - EXTERNAL ZGESVJ -* .. -* -* Test the input arguments -* - LSVEC = LSAME( JOBU, 'U' ) .OR. LSAME( JOBU, 'F' ) - JRACC = LSAME( JOBV, 'J' ) - RSVEC = LSAME( JOBV, 'V' ) .OR. JRACC - ROWPIV = LSAME( JOBA, 'F' ) .OR. LSAME( JOBA, 'G' ) - L2RANK = LSAME( JOBA, 'R' ) - L2ABER = LSAME( JOBA, 'A' ) - ERREST = LSAME( JOBA, 'E' ) .OR. LSAME( JOBA, 'G' ) - L2TRAN = LSAME( JOBT, 'T' ) .AND. ( M .EQ. N ) - L2KILL = LSAME( JOBR, 'R' ) - DEFR = LSAME( JOBR, 'N' ) - L2PERT = LSAME( JOBP, 'P' ) -* - LQUERY = ( LWORK .EQ. -1 ) .OR. ( LRWORK .EQ. -1 ) -* - IF ( .NOT.(ROWPIV .OR. L2RANK .OR. L2ABER .OR. - $ ERREST .OR. LSAME( JOBA, 'C' ) )) THEN - INFO = - 1 - ELSE IF ( .NOT.( LSVEC .OR. LSAME( JOBU, 'N' ) .OR. - $ ( LSAME( JOBU, 'W' ) .AND. RSVEC .AND. L2TRAN ) ) ) THEN - INFO = - 2 - ELSE IF ( .NOT.( RSVEC .OR. LSAME( JOBV, 'N' ) .OR. - $ ( LSAME( JOBV, 'W' ) .AND. LSVEC .AND. L2TRAN ) ) ) THEN - INFO = - 3 - ELSE IF ( .NOT. ( L2KILL .OR. DEFR ) ) THEN - INFO = - 4 - ELSE IF ( .NOT. ( LSAME(JOBT,'T') .OR. LSAME(JOBT,'N') ) ) THEN - INFO = - 5 - ELSE IF ( .NOT. ( L2PERT .OR. LSAME( JOBP, 'N' ) ) ) THEN - INFO = - 6 - ELSE IF ( M .LT. 0 ) THEN - INFO = - 7 - ELSE IF ( ( N .LT. 0 ) .OR. ( N .GT. M ) ) THEN - INFO = - 8 - ELSE IF ( LDA .LT. M ) THEN - INFO = - 10 - ELSE IF ( LSVEC .AND. ( LDU .LT. M ) ) THEN - INFO = - 13 - ELSE IF ( RSVEC .AND. ( LDV .LT. N ) ) THEN - INFO = - 15 - ELSE -* #:) - INFO = 0 - END IF -* - IF ( INFO .EQ. 0 ) THEN -* .. compute the minimal and the optimal workspace lengths -* [[The expressions for computing the minimal and the optimal -* values of LCWORK, LRWORK are written with a lot of redundancy and -* can be simplified. However, this verbose form is useful for -* maintenance and modifications of the code.]] -* -* .. minimal workspace length for ZGEQP3 of an M x N matrix, -* ZGEQRF of an N x N matrix, ZGELQF of an N x N matrix, -* ZUNMLQ for computing N x N matrix, ZUNMQR for computing N x N -* matrix, ZUNMQR for computing M x N matrix, respectively. - LWQP3 = N+1 - LWQRF = MAX( 1, N ) - LWLQF = MAX( 1, N ) - LWUNMLQ = MAX( 1, N ) - LWUNMQR = MAX( 1, N ) - LWUNMQRM = MAX( 1, M ) -* .. minimal workspace length for ZPOCON of an N x N matrix - LWCON = 2 * N -* .. minimal workspace length for ZGESVJ of an N x N matrix, -* without and with explicit accumulation of Jacobi rotations - LWSVDJ = MAX( 2 * N, 1 ) - LWSVDJV = MAX( 2 * N, 1 ) -* .. minimal REAL workspace length for ZGEQP3, ZPOCON, ZGESVJ - LRWQP3 = N - LRWCON = N - LRWSVDJ = N - IF ( LQUERY ) THEN - CALL ZGEQP3( M, N, A, LDA, IWORK, CDUMMY, CDUMMY, -1, - $ RDUMMY, IERR ) - LWRK_ZGEQP3 = CDUMMY(1) - CALL ZGEQRF( N, N, A, LDA, CDUMMY, CDUMMY,-1, IERR ) - LWRK_ZGEQRF = CDUMMY(1) - CALL ZGELQF( N, N, A, LDA, CDUMMY, CDUMMY,-1, IERR ) - LWRK_ZGELQF = CDUMMY(1) - END IF - MINWRK = 2 - OPTWRK = 2 - MINIWRK = N - IF ( .NOT. (LSVEC .OR. RSVEC ) ) THEN -* .. minimal and optimal sizes of the complex workspace if -* only the singular values are requested - IF ( ERREST ) THEN - MINWRK = MAX( N+LWQP3, N**2+LWCON, N+LWQRF, LWSVDJ ) - ELSE - MINWRK = MAX( N+LWQP3, N+LWQRF, LWSVDJ ) - END IF - IF ( LQUERY ) THEN - CALL ZGESVJ( 'L', 'N', 'N', N, N, A, LDA, SVA, N, V, - $ LDV, CDUMMY, -1, RDUMMY, -1, IERR ) - LWRK_ZGESVJ = CDUMMY(1) - IF ( ERREST ) THEN - OPTWRK = MAX( N+LWRK_ZGEQP3, N**2+LWCON, - $ N+LWRK_ZGEQRF, LWRK_ZGESVJ ) - ELSE - OPTWRK = MAX( N+LWRK_ZGEQP3, N+LWRK_ZGEQRF, - $ LWRK_ZGESVJ ) - END IF - END IF - IF ( L2TRAN .OR. ROWPIV ) THEN - IF ( ERREST ) THEN - MINRWRK = MAX( 7, 2*M, LRWQP3, LRWCON, LRWSVDJ ) - ELSE - MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) - END IF - ELSE - IF ( ERREST ) THEN - MINRWRK = MAX( 7, LRWQP3, LRWCON, LRWSVDJ ) - ELSE - MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) - END IF - END IF - IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M - ELSE IF ( RSVEC .AND. (.NOT.LSVEC) ) THEN -* .. minimal and optimal sizes of the complex workspace if the -* singular values and the right singular vectors are requested - IF ( ERREST ) THEN - MINWRK = MAX( N+LWQP3, LWCON, LWSVDJ, N+LWLQF, - $ 2*N+LWQRF, N+LWSVDJ, N+LWUNMLQ ) - ELSE - MINWRK = MAX( N+LWQP3, LWSVDJ, N+LWLQF, 2*N+LWQRF, - $ N+LWSVDJ, N+LWUNMLQ ) - END IF - IF ( LQUERY ) THEN - CALL ZGESVJ( 'L', 'U', 'N', N,N, U, LDU, SVA, N, A, - $ LDA, CDUMMY, -1, RDUMMY, -1, IERR ) - LWRK_ZGESVJ = CDUMMY(1) - CALL ZUNMLQ( 'L', 'C', N, N, N, A, LDA, CDUMMY, - $ V, LDV, CDUMMY, -1, IERR ) - LWRK_ZUNMLQ = CDUMMY(1) - IF ( ERREST ) THEN - OPTWRK = MAX( N+LWRK_ZGEQP3, LWCON, LWRK_ZGESVJ, - $ N+LWRK_ZGELQF, 2*N+LWRK_ZGEQRF, - $ N+LWRK_ZGESVJ, N+LWRK_ZUNMLQ ) - ELSE - OPTWRK = MAX( N+LWRK_ZGEQP3, LWRK_ZGESVJ,N+LWRK_ZGELQF, - $ 2*N+LWRK_ZGEQRF, N+LWRK_ZGESVJ, - $ N+LWRK_ZUNMLQ ) - END IF - END IF - IF ( L2TRAN .OR. ROWPIV ) THEN - IF ( ERREST ) THEN - MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) - ELSE - MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) - END IF - ELSE - IF ( ERREST ) THEN - MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) - ELSE - MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) - END IF - END IF - IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M - ELSE IF ( LSVEC .AND. (.NOT.RSVEC) ) THEN -* .. minimal and optimal sizes of the complex workspace if the -* singular values and the left singular vectors are requested - IF ( ERREST ) THEN - MINWRK = N + MAX( LWQP3,LWCON,N+LWQRF,LWSVDJ,LWUNMQRM ) - ELSE - MINWRK = N + MAX( LWQP3, N+LWQRF, LWSVDJ, LWUNMQRM ) - END IF - IF ( LQUERY ) THEN - CALL ZGESVJ( 'L', 'U', 'N', N,N, U, LDU, SVA, N, A, - $ LDA, CDUMMY, -1, RDUMMY, -1, IERR ) - LWRK_ZGESVJ = CDUMMY(1) - CALL ZUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, - $ LDU, CDUMMY, -1, IERR ) - LWRK_ZUNMQRM = CDUMMY(1) - IF ( ERREST ) THEN - OPTWRK = N + MAX( LWRK_ZGEQP3, LWCON, N+LWRK_ZGEQRF, - $ LWRK_ZGESVJ, LWRK_ZUNMQRM ) - ELSE - OPTWRK = N + MAX( LWRK_ZGEQP3, N+LWRK_ZGEQRF, - $ LWRK_ZGESVJ, LWRK_ZUNMQRM ) - END IF - END IF - IF ( L2TRAN .OR. ROWPIV ) THEN - IF ( ERREST ) THEN - MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) - ELSE - MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) - END IF - ELSE - IF ( ERREST ) THEN - MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) - ELSE - MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) - END IF - END IF - IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M - ELSE -* .. minimal and optimal sizes of the complex workspace if the -* full SVD is requested - IF ( .NOT. JRACC ) THEN - IF ( ERREST ) THEN - MINWRK = MAX( N+LWQP3, N+LWCON, 2*N+N**2+LWCON, - $ 2*N+LWQRF, 2*N+LWQP3, - $ 2*N+N**2+N+LWLQF, 2*N+N**2+N+N**2+LWCON, - $ 2*N+N**2+N+LWSVDJ, 2*N+N**2+N+LWSVDJV, - $ 2*N+N**2+N+LWUNMQR,2*N+N**2+N+LWUNMLQ, - $ N+N**2+LWSVDJ, N+LWUNMQRM ) - ELSE - MINWRK = MAX( N+LWQP3, 2*N+N**2+LWCON, - $ 2*N+LWQRF, 2*N+LWQP3, - $ 2*N+N**2+N+LWLQF, 2*N+N**2+N+N**2+LWCON, - $ 2*N+N**2+N+LWSVDJ, 2*N+N**2+N+LWSVDJV, - $ 2*N+N**2+N+LWUNMQR,2*N+N**2+N+LWUNMLQ, - $ N+N**2+LWSVDJ, N+LWUNMQRM ) - END IF - MINIWRK = MINIWRK + N - IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M - ELSE - IF ( ERREST ) THEN - MINWRK = MAX( N+LWQP3, N+LWCON, 2*N+LWQRF, - $ 2*N+N**2+LWSVDJV, 2*N+N**2+N+LWUNMQR, - $ N+LWUNMQRM ) - ELSE - MINWRK = MAX( N+LWQP3, 2*N+LWQRF, - $ 2*N+N**2+LWSVDJV, 2*N+N**2+N+LWUNMQR, - $ N+LWUNMQRM ) - END IF - IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M - END IF - IF ( LQUERY ) THEN - CALL ZUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, - $ LDU, CDUMMY, -1, IERR ) - LWRK_ZUNMQRM = CDUMMY(1) - CALL ZUNMQR( 'L', 'N', N, N, N, A, LDA, CDUMMY, U, - $ LDU, CDUMMY, -1, IERR ) - LWRK_ZUNMQR = CDUMMY(1) - IF ( .NOT. JRACC ) THEN - CALL ZGEQP3( N,N, A, LDA, IWORK, CDUMMY,CDUMMY, -1, - $ RDUMMY, IERR ) - LWRK_ZGEQP3N = CDUMMY(1) - CALL ZGESVJ( 'L', 'U', 'N', N, N, U, LDU, SVA, - $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) - LWRK_ZGESVJ = CDUMMY(1) - CALL ZGESVJ( 'U', 'U', 'N', N, N, U, LDU, SVA, - $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) - LWRK_ZGESVJU = CDUMMY(1) - CALL ZGESVJ( 'L', 'U', 'V', N, N, U, LDU, SVA, - $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) - LWRK_ZGESVJV = CDUMMY(1) - CALL ZUNMLQ( 'L', 'C', N, N, N, A, LDA, CDUMMY, - $ V, LDV, CDUMMY, -1, IERR ) - LWRK_ZUNMLQ = CDUMMY(1) - IF ( ERREST ) THEN - OPTWRK = MAX( N+LWRK_ZGEQP3, N+LWCON, - $ 2*N+N**2+LWCON, 2*N+LWRK_ZGEQRF, - $ 2*N+LWRK_ZGEQP3N, - $ 2*N+N**2+N+LWRK_ZGELQF, - $ 2*N+N**2+N+N**2+LWCON, - $ 2*N+N**2+N+LWRK_ZGESVJ, - $ 2*N+N**2+N+LWRK_ZGESVJV, - $ 2*N+N**2+N+LWRK_ZUNMQR, - $ 2*N+N**2+N+LWRK_ZUNMLQ, - $ N+N**2+LWRK_ZGESVJU, - $ N+LWRK_ZUNMQRM ) - ELSE - OPTWRK = MAX( N+LWRK_ZGEQP3, - $ 2*N+N**2+LWCON, 2*N+LWRK_ZGEQRF, - $ 2*N+LWRK_ZGEQP3N, - $ 2*N+N**2+N+LWRK_ZGELQF, - $ 2*N+N**2+N+N**2+LWCON, - $ 2*N+N**2+N+LWRK_ZGESVJ, - $ 2*N+N**2+N+LWRK_ZGESVJV, - $ 2*N+N**2+N+LWRK_ZUNMQR, - $ 2*N+N**2+N+LWRK_ZUNMLQ, - $ N+N**2+LWRK_ZGESVJU, - $ N+LWRK_ZUNMQRM ) - END IF - ELSE - CALL ZGESVJ( 'L', 'U', 'V', N, N, U, LDU, SVA, - $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) - LWRK_ZGESVJV = CDUMMY(1) - CALL ZUNMQR( 'L', 'N', N, N, N, CDUMMY, N, CDUMMY, - $ V, LDV, CDUMMY, -1, IERR ) - LWRK_ZUNMQR = CDUMMY(1) - CALL ZUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, - $ LDU, CDUMMY, -1, IERR ) - LWRK_ZUNMQRM = CDUMMY(1) - IF ( ERREST ) THEN - OPTWRK = MAX( N+LWRK_ZGEQP3, N+LWCON, - $ 2*N+LWRK_ZGEQRF, 2*N+N**2, - $ 2*N+N**2+LWRK_ZGESVJV, - $ 2*N+N**2+N+LWRK_ZUNMQR,N+LWRK_ZUNMQRM ) - ELSE - OPTWRK = MAX( N+LWRK_ZGEQP3, 2*N+LWRK_ZGEQRF, - $ 2*N+N**2, 2*N+N**2+LWRK_ZGESVJV, - $ 2*N+N**2+N+LWRK_ZUNMQR, - $ N+LWRK_ZUNMQRM ) - END IF - END IF - END IF - IF ( L2TRAN .OR. ROWPIV ) THEN - MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) - ELSE - MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) - END IF - END IF - MINWRK = MAX( 2, MINWRK ) - OPTWRK = MAX( 2, OPTWRK ) - IF ( LWORK .LT. MINWRK .AND. (.NOT.LQUERY) ) INFO = - 17 - IF ( LRWORK .LT. MINRWRK .AND. (.NOT.LQUERY) ) INFO = - 19 - END IF -* - IF ( INFO .NE. 0 ) THEN -* #:( - CALL XERBLA( 'ZGEJSV', - INFO ) - RETURN - ELSE IF ( LQUERY ) THEN - CWORK(1) = OPTWRK - CWORK(2) = MINWRK - RWORK(1) = MINRWRK - IWORK(1) = MAX( 4, MINIWRK ) - RETURN - END IF -* -* Quick return for void matrix (Y3K safe) -* #:) - IF ( ( M .EQ. 0 ) .OR. ( N .EQ. 0 ) ) THEN - IWORK(1:4) = 0 - RWORK(1:7) = 0 - RETURN - ENDIF -* -* Determine whether the matrix U should be M x N or M x M -* - IF ( LSVEC ) THEN - N1 = N - IF ( LSAME( JOBU, 'F' ) ) N1 = M - END IF -* -* Set numerical parameters -* -*! NOTE: Make sure DLAMCH() does not fail on the target architecture. -* - EPSLN = DLAMCH('Epsilon') - SFMIN = DLAMCH('SafeMinimum') - SMALL = SFMIN / EPSLN - BIG = DLAMCH('O') -* BIG = ONE / SFMIN -* -* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N -* -*(!) If necessary, scale SVA() to protect the largest norm from -* overflow. It is possible that this scaling pushes the smallest -* column norm left from the underflow threshold (extreme case). -* - SCALEM = ONE / SQRT(DBLE(M)*DBLE(N)) - NOSCAL = .TRUE. - GOSCAL = .TRUE. - DO 1874 p = 1, N - AAPP = ZERO - AAQQ = ONE - CALL ZLASSQ( M, A(1,p), 1, AAPP, AAQQ ) - IF ( AAPP .GT. BIG ) THEN - INFO = - 9 - CALL XERBLA( 'ZGEJSV', -INFO ) - RETURN - END IF - AAQQ = SQRT(AAQQ) - IF ( ( AAPP .LT. (BIG / AAQQ) ) .AND. NOSCAL ) THEN - SVA(p) = AAPP * AAQQ - ELSE - NOSCAL = .FALSE. - SVA(p) = AAPP * ( AAQQ * SCALEM ) - IF ( GOSCAL ) THEN - GOSCAL = .FALSE. - CALL DSCAL( p-1, SCALEM, SVA, 1 ) - END IF - END IF - 1874 CONTINUE -* - IF ( NOSCAL ) SCALEM = ONE -* - AAPP = ZERO - AAQQ = BIG - DO 4781 p = 1, N - AAPP = MAX( AAPP, SVA(p) ) - IF ( SVA(p) .NE. ZERO ) AAQQ = MIN( AAQQ, SVA(p) ) - 4781 CONTINUE -* -* Quick return for zero M x N matrix -* #:) - IF ( AAPP .EQ. ZERO ) THEN - IF ( LSVEC ) CALL ZLASET( 'G', M, N1, CZERO, CONE, U, LDU ) - IF ( RSVEC ) CALL ZLASET( 'G', N, N, CZERO, CONE, V, LDV ) - RWORK(1) = ONE - RWORK(2) = ONE - IF ( ERREST ) RWORK(3) = ONE - IF ( LSVEC .AND. RSVEC ) THEN - RWORK(4) = ONE - RWORK(5) = ONE - END IF - IF ( L2TRAN ) THEN - RWORK(6) = ZERO - RWORK(7) = ZERO - END IF - IWORK(1) = 0 - IWORK(2) = 0 - IWORK(3) = 0 - IWORK(4) = -1 - RETURN - END IF -* -* Issue warning if denormalized column norms detected. Override the -* high relative accuracy request. Issue licence to kill nonzero columns -* (set them to zero) whose norm is less than sigma_max / BIG (roughly). -* #:( - WARNING = 0 - IF ( AAQQ .LE. SFMIN ) THEN - L2RANK = .TRUE. - L2KILL = .TRUE. - WARNING = 1 - END IF -* -* Quick return for one-column matrix -* #:) - IF ( N .EQ. 1 ) THEN -* - IF ( LSVEC ) THEN - CALL ZLASCL( 'G',0,0,SVA(1),SCALEM, M,1,A(1,1),LDA,IERR ) - CALL ZLACPY( 'A', M, 1, A, LDA, U, LDU ) -* computing all M left singular vectors of the M x 1 matrix - IF ( N1 .NE. N ) THEN - CALL ZGEQRF( M, N, U,LDU, CWORK, CWORK(N+1),LWORK-N,IERR ) - CALL ZUNGQR( M,N1,1, U,LDU,CWORK,CWORK(N+1),LWORK-N,IERR ) - CALL ZCOPY( M, A(1,1), 1, U(1,1), 1 ) - END IF - END IF - IF ( RSVEC ) THEN - V(1,1) = CONE - END IF - IF ( SVA(1) .LT. (BIG*SCALEM) ) THEN - SVA(1) = SVA(1) / SCALEM - SCALEM = ONE - END IF - RWORK(1) = ONE / SCALEM - RWORK(2) = ONE - IF ( SVA(1) .NE. ZERO ) THEN - IWORK(1) = 1 - IF ( ( SVA(1) / SCALEM) .GE. SFMIN ) THEN - IWORK(2) = 1 - ELSE - IWORK(2) = 0 - END IF - ELSE - IWORK(1) = 0 - IWORK(2) = 0 - END IF - IWORK(3) = 0 - IWORK(4) = -1 - IF ( ERREST ) RWORK(3) = ONE - IF ( LSVEC .AND. RSVEC ) THEN - RWORK(4) = ONE - RWORK(5) = ONE - END IF - IF ( L2TRAN ) THEN - RWORK(6) = ZERO - RWORK(7) = ZERO - END IF - RETURN -* - END IF -* - TRANSP = .FALSE. -* - AATMAX = -ONE - AATMIN = BIG - IF ( ROWPIV .OR. L2TRAN ) THEN -* -* Compute the row norms, needed to determine row pivoting sequence -* (in the case of heavily row weighted A, row pivoting is strongly -* advised) and to collect information needed to compare the -* structures of A * A^* and A^* * A (in the case L2TRAN.EQ..TRUE.). -* - IF ( L2TRAN ) THEN - DO 1950 p = 1, M - XSC = ZERO - TEMP1 = ONE - CALL ZLASSQ( N, A(p,1), LDA, XSC, TEMP1 ) -* ZLASSQ gets both the ell_2 and the ell_infinity norm -* in one pass through the vector - RWORK(M+p) = XSC * SCALEM - RWORK(p) = XSC * (SCALEM*SQRT(TEMP1)) - AATMAX = MAX( AATMAX, RWORK(p) ) - IF (RWORK(p) .NE. ZERO) - $ AATMIN = MIN(AATMIN,RWORK(p)) - 1950 CONTINUE - ELSE - DO 1904 p = 1, M - RWORK(M+p) = SCALEM*ABS( A(p,IZAMAX(N,A(p,1),LDA)) ) - AATMAX = MAX( AATMAX, RWORK(M+p) ) - AATMIN = MIN( AATMIN, RWORK(M+p) ) - 1904 CONTINUE - END IF -* - END IF -* -* For square matrix A try to determine whether A^* would be better -* input for the preconditioned Jacobi SVD, with faster convergence. -* The decision is based on an O(N) function of the vector of column -* and row norms of A, based on the Shannon entropy. This should give -* the right choice in most cases when the difference actually matters. -* It may fail and pick the slower converging side. -* - ENTRA = ZERO - ENTRAT = ZERO - IF ( L2TRAN ) THEN -* - XSC = ZERO - TEMP1 = ONE - CALL DLASSQ( N, SVA, 1, XSC, TEMP1 ) - TEMP1 = ONE / TEMP1 -* - ENTRA = ZERO - DO 1113 p = 1, N - BIG1 = ( ( SVA(p) / XSC )**2 ) * TEMP1 - IF ( BIG1 .NE. ZERO ) ENTRA = ENTRA + BIG1 * DLOG(BIG1) - 1113 CONTINUE - ENTRA = - ENTRA / DLOG(DBLE(N)) -* -* Now, SVA().^2/Trace(A^* * A) is a point in the probability simplex. -* It is derived from the diagonal of A^* * A. Do the same with the -* diagonal of A * A^*, compute the entropy of the corresponding -* probability distribution. Note that A * A^* and A^* * A have the -* same trace. -* - ENTRAT = ZERO - DO 1114 p = 1, M - BIG1 = ( ( RWORK(p) / XSC )**2 ) * TEMP1 - IF ( BIG1 .NE. ZERO ) ENTRAT = ENTRAT + BIG1 * DLOG(BIG1) - 1114 CONTINUE - ENTRAT = - ENTRAT / DLOG(DBLE(M)) -* -* Analyze the entropies and decide A or A^*. Smaller entropy -* usually means better input for the algorithm. -* - TRANSP = ( ENTRAT .LT. ENTRA ) -* -* If A^* is better than A, take the adjoint of A. This is allowed -* only for square matrices, M=N. - IF ( TRANSP ) THEN -* In an optimal implementation, this trivial transpose -* should be replaced with faster transpose. - DO 1115 p = 1, N - 1 - A(p,p) = CONJG(A(p,p)) - DO 1116 q = p + 1, N - CTEMP = CONJG(A(q,p)) - A(q,p) = CONJG(A(p,q)) - A(p,q) = CTEMP - 1116 CONTINUE - 1115 CONTINUE - A(N,N) = CONJG(A(N,N)) - DO 1117 p = 1, N - RWORK(M+p) = SVA(p) - SVA(p) = RWORK(p) -* previously computed row 2-norms are now column 2-norms -* of the transposed matrix - 1117 CONTINUE - TEMP1 = AAPP - AAPP = AATMAX - AATMAX = TEMP1 - TEMP1 = AAQQ - AAQQ = AATMIN - AATMIN = TEMP1 - KILL = LSVEC - LSVEC = RSVEC - RSVEC = KILL - IF ( LSVEC ) N1 = N -* - ROWPIV = .TRUE. - END IF -* - END IF -* END IF L2TRAN -* -* Scale the matrix so that its maximal singular value remains less -* than SQRT(BIG) -- the matrix is scaled so that its maximal column -* has Euclidean norm equal to SQRT(BIG/N). The only reason to keep -* SQRT(BIG) instead of BIG is the fact that ZGEJSV uses LAPACK and -* BLAS routines that, in some implementations, are not capable of -* working in the full interval [SFMIN,BIG] and that they may provoke -* overflows in the intermediate results. If the singular values spread -* from SFMIN to BIG, then ZGESVJ will compute them. So, in that case, -* one should use ZGESVJ instead of ZGEJSV. -* >> change in the April 2016 update: allow bigger range, i.e. the -* largest column is allowed up to BIG/N and ZGESVJ will do the rest. - BIG1 = SQRT( BIG ) - TEMP1 = SQRT( BIG / DBLE(N) ) -* TEMP1 = BIG/DBLE(N) -* - CALL DLASCL( 'G', 0, 0, AAPP, TEMP1, N, 1, SVA, N, IERR ) - IF ( AAQQ .GT. (AAPP * SFMIN) ) THEN - AAQQ = ( AAQQ / AAPP ) * TEMP1 - ELSE - AAQQ = ( AAQQ * TEMP1 ) / AAPP - END IF - TEMP1 = TEMP1 * SCALEM - CALL ZLASCL( 'G', 0, 0, AAPP, TEMP1, M, N, A, LDA, IERR ) -* -* To undo scaling at the end of this procedure, multiply the -* computed singular values with USCAL2 / USCAL1. -* - USCAL1 = TEMP1 - USCAL2 = AAPP -* - IF ( L2KILL ) THEN -* L2KILL enforces computation of nonzero singular values in -* the restricted range of condition number of the initial A, -* sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN). - XSC = SQRT( SFMIN ) - ELSE - XSC = SMALL -* -* Now, if the condition number of A is too big, -* sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN, -* as a precaution measure, the full SVD is computed using ZGESVJ -* with accumulated Jacobi rotations. This provides numerically -* more robust computation, at the cost of slightly increased run -* time. Depending on the concrete implementation of BLAS and LAPACK -* (i.e. how they behave in presence of extreme ill-conditioning) the -* implementor may decide to remove this switch. - IF ( ( AAQQ.LT.SQRT(SFMIN) ) .AND. LSVEC .AND. RSVEC ) THEN - JRACC = .TRUE. - END IF -* - END IF - IF ( AAQQ .LT. XSC ) THEN - DO 700 p = 1, N - IF ( SVA(p) .LT. XSC ) THEN - CALL ZLASET( 'A', M, 1, CZERO, CZERO, A(1,p), LDA ) - SVA(p) = ZERO - END IF - 700 CONTINUE - END IF -* -* Preconditioning using QR factorization with pivoting -* - IF ( ROWPIV ) THEN -* Optional row permutation (Bjoerck row pivoting): -* A result by Cox and Higham shows that the Bjoerck's -* row pivoting combined with standard column pivoting -* has similar effect as Powell-Reid complete pivoting. -* The ell-infinity norms of A are made nonincreasing. - IF ( ( LSVEC .AND. RSVEC ) .AND. .NOT.( JRACC ) ) THEN - IWOFF = 2*N - ELSE - IWOFF = N - END IF - DO 1952 p = 1, M - 1 - q = IDAMAX( M-p+1, RWORK(M+p), 1 ) + p - 1 - IWORK(IWOFF+p) = q - IF ( p .NE. q ) THEN - TEMP1 = RWORK(M+p) - RWORK(M+p) = RWORK(M+q) - RWORK(M+q) = TEMP1 - END IF - 1952 CONTINUE - CALL ZLASWP( N, A, LDA, 1, M-1, IWORK(IWOFF+1), 1 ) - END IF -* -* End of the preparation phase (scaling, optional sorting and -* transposing, optional flushing of small columns). -* -* Preconditioning -* -* If the full SVD is needed, the right singular vectors are computed -* from a matrix equation, and for that we need theoretical analysis -* of the Businger-Golub pivoting. So we use ZGEQP3 as the first RR QRF. -* In all other cases the first RR QRF can be chosen by other criteria -* (eg speed by replacing global with restricted window pivoting, such -* as in xGEQPX from TOMS # 782). Good results will be obtained using -* xGEQPX with properly (!) chosen numerical parameters. -* Any improvement of ZGEQP3 improves overal performance of ZGEJSV. -* -* A * P1 = Q1 * [ R1^* 0]^*: - DO 1963 p = 1, N -* .. all columns are free columns - IWORK(p) = 0 - 1963 CONTINUE - CALL ZGEQP3( M, N, A, LDA, IWORK, CWORK, CWORK(N+1), LWORK-N, - $ RWORK, IERR ) -* -* The upper triangular matrix R1 from the first QRF is inspected for -* rank deficiency and possibilities for deflation, or possible -* ill-conditioning. Depending on the user specified flag L2RANK, -* the procedure explores possibilities to reduce the numerical -* rank by inspecting the computed upper triangular factor. If -* L2RANK or L2ABER are up, then ZGEJSV will compute the SVD of -* A + dA, where ||dA|| <= f(M,N)*EPSLN. -* - NR = 1 - IF ( L2ABER ) THEN -* Standard absolute error bound suffices. All sigma_i with -* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an -* agressive enforcement of lower numerical rank by introducing a -* backward error of the order of N*EPSLN*||A||. - TEMP1 = SQRT(DBLE(N))*EPSLN - DO 3001 p = 2, N - IF ( ABS(A(p,p)) .GE. (TEMP1*ABS(A(1,1))) ) THEN - NR = NR + 1 - ELSE - GO TO 3002 - END IF - 3001 CONTINUE - 3002 CONTINUE - ELSE IF ( L2RANK ) THEN -* .. similarly as above, only slightly more gentle (less agressive). -* Sudden drop on the diagonal of R1 is used as the criterion for -* close-to-rank-deficient. - TEMP1 = SQRT(SFMIN) - DO 3401 p = 2, N - IF ( ( ABS(A(p,p)) .LT. (EPSLN*ABS(A(p-1,p-1))) ) .OR. - $ ( ABS(A(p,p)) .LT. SMALL ) .OR. - $ ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3402 - NR = NR + 1 - 3401 CONTINUE - 3402 CONTINUE -* - ELSE -* The goal is high relative accuracy. However, if the matrix -* has high scaled condition number the relative accuracy is in -* general not feasible. Later on, a condition number estimator -* will be deployed to estimate the scaled condition number. -* Here we just remove the underflowed part of the triangular -* factor. This prevents the situation in which the code is -* working hard to get the accuracy not warranted by the data. - TEMP1 = SQRT(SFMIN) - DO 3301 p = 2, N - IF ( ( ABS(A(p,p)) .LT. SMALL ) .OR. - $ ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3302 - NR = NR + 1 - 3301 CONTINUE - 3302 CONTINUE -* - END IF -* - ALMORT = .FALSE. - IF ( NR .EQ. N ) THEN - MAXPRJ = ONE - DO 3051 p = 2, N - TEMP1 = ABS(A(p,p)) / SVA(IWORK(p)) - MAXPRJ = MIN( MAXPRJ, TEMP1 ) - 3051 CONTINUE - IF ( MAXPRJ**2 .GE. ONE - DBLE(N)*EPSLN ) ALMORT = .TRUE. - END IF -* -* - SCONDA = - ONE - CONDR1 = - ONE - CONDR2 = - ONE -* - IF ( ERREST ) THEN - IF ( N .EQ. NR ) THEN - IF ( RSVEC ) THEN -* .. V is available as workspace - CALL ZLACPY( 'U', N, N, A, LDA, V, LDV ) - DO 3053 p = 1, N - TEMP1 = SVA(IWORK(p)) - CALL ZDSCAL( p, ONE/TEMP1, V(1,p), 1 ) - 3053 CONTINUE - IF ( LSVEC )THEN - CALL ZPOCON( 'U', N, V, LDV, ONE, TEMP1, - $ CWORK(N+1), RWORK, IERR ) - ELSE - CALL ZPOCON( 'U', N, V, LDV, ONE, TEMP1, - $ CWORK, RWORK, IERR ) - END IF -* - ELSE IF ( LSVEC ) THEN -* .. U is available as workspace - CALL ZLACPY( 'U', N, N, A, LDA, U, LDU ) - DO 3054 p = 1, N - TEMP1 = SVA(IWORK(p)) - CALL ZDSCAL( p, ONE/TEMP1, U(1,p), 1 ) - 3054 CONTINUE - CALL ZPOCON( 'U', N, U, LDU, ONE, TEMP1, - $ CWORK(N+1), RWORK, IERR ) - ELSE - CALL ZLACPY( 'U', N, N, A, LDA, CWORK, N ) -*[] CALL ZLACPY( 'U', N, N, A, LDA, CWORK(N+1), N ) -* Change: here index shifted by N to the left, CWORK(1:N) -* not needed for SIGMA only computation - DO 3052 p = 1, N - TEMP1 = SVA(IWORK(p)) -*[] CALL ZDSCAL( p, ONE/TEMP1, CWORK(N+(p-1)*N+1), 1 ) - CALL ZDSCAL( p, ONE/TEMP1, CWORK((p-1)*N+1), 1 ) - 3052 CONTINUE -* .. the columns of R are scaled to have unit Euclidean lengths. -*[] CALL ZPOCON( 'U', N, CWORK(N+1), N, ONE, TEMP1, -*[] $ CWORK(N+N*N+1), RWORK, IERR ) - CALL ZPOCON( 'U', N, CWORK, N, ONE, TEMP1, - $ CWORK(N*N+1), RWORK, IERR ) -* - END IF - IF ( TEMP1 .NE. ZERO ) THEN - SCONDA = ONE / SQRT(TEMP1) - ELSE - SCONDA = - ONE - END IF -* SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1). -* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA - ELSE - SCONDA = - ONE - END IF - END IF -* - L2PERT = L2PERT .AND. ( ABS( A(1,1)/A(NR,NR) ) .GT. SQRT(BIG1) ) -* If there is no violent scaling, artificial perturbation is not needed. -* -* Phase 3: -* - IF ( .NOT. ( RSVEC .OR. LSVEC ) ) THEN -* -* Singular Values only -* -* .. transpose A(1:NR,1:N) - DO 1946 p = 1, MIN( N-1, NR ) - CALL ZCOPY( N-p, A(p,p+1), LDA, A(p+1,p), 1 ) - CALL ZLACGV( N-p+1, A(p,p), 1 ) - 1946 CONTINUE - IF ( NR .EQ. N ) A(N,N) = CONJG(A(N,N)) -* -* The following two DO-loops introduce small relative perturbation -* into the strict upper triangle of the lower triangular matrix. -* Small entries below the main diagonal are also changed. -* This modification is useful if the computing environment does not -* provide/allow FLUSH TO ZERO underflow, for it prevents many -* annoying denormalized numbers in case of strongly scaled matrices. -* The perturbation is structured so that it does not introduce any -* new perturbation of the singular values, and it does not destroy -* the job done by the preconditioner. -* The licence for this perturbation is in the variable L2PERT, which -* should be .FALSE. if FLUSH TO ZERO underflow is active. -* - IF ( .NOT. ALMORT ) THEN -* - IF ( L2PERT ) THEN -* XSC = SQRT(SMALL) - XSC = EPSLN / DBLE(N) - DO 4947 q = 1, NR - CTEMP = DCMPLX(XSC*ABS(A(q,q)),ZERO) - DO 4949 p = 1, N - IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) ) - $ .OR. ( p .LT. q ) ) -* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) ) - $ A(p,q) = CTEMP - 4949 CONTINUE - 4947 CONTINUE - ELSE - CALL ZLASET( 'U', NR-1,NR-1, CZERO,CZERO, A(1,2),LDA ) - END IF -* -* .. second preconditioning using the QR factorization -* - CALL ZGEQRF( N,NR, A,LDA, CWORK, CWORK(N+1),LWORK-N, IERR ) -* -* .. and transpose upper to lower triangular - DO 1948 p = 1, NR - 1 - CALL ZCOPY( NR-p, A(p,p+1), LDA, A(p+1,p), 1 ) - CALL ZLACGV( NR-p+1, A(p,p), 1 ) - 1948 CONTINUE -* - END IF -* -* Row-cyclic Jacobi SVD algorithm with column pivoting -* -* .. again some perturbation (a "background noise") is added -* to drown denormals - IF ( L2PERT ) THEN -* XSC = SQRT(SMALL) - XSC = EPSLN / DBLE(N) - DO 1947 q = 1, NR - CTEMP = DCMPLX(XSC*ABS(A(q,q)),ZERO) - DO 1949 p = 1, NR - IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) ) - $ .OR. ( p .LT. q ) ) -* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) ) - $ A(p,q) = CTEMP - 1949 CONTINUE - 1947 CONTINUE - ELSE - CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, A(1,2), LDA ) - END IF -* -* .. and one-sided Jacobi rotations are started on a lower -* triangular matrix (plus perturbation which is ignored in -* the part which destroys triangular form (confusing?!)) -* - CALL ZGESVJ( 'L', 'N', 'N', NR, NR, A, LDA, SVA, - $ N, V, LDV, CWORK, LWORK, RWORK, LRWORK, INFO ) -* - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) -* -* - ELSE IF ( ( RSVEC .AND. ( .NOT. LSVEC ) .AND. ( .NOT. JRACC ) ) - $ .OR. - $ ( JRACC .AND. ( .NOT. LSVEC ) .AND. ( NR .NE. N ) ) ) THEN -* -* -> Singular Values and Right Singular Vectors <- -* - IF ( ALMORT ) THEN -* -* .. in this case NR equals N - DO 1998 p = 1, NR - CALL ZCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) - CALL ZLACGV( N-p+1, V(p,p), 1 ) - 1998 CONTINUE - CALL ZLASET( 'U', NR-1,NR-1, CZERO, CZERO, V(1,2), LDV ) -* - CALL ZGESVJ( 'L','U','N', N, NR, V, LDV, SVA, NR, A, LDA, - $ CWORK, LWORK, RWORK, LRWORK, INFO ) - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) - - ELSE -* -* .. two more QR factorizations ( one QRF is not enough, two require -* accumulated product of Jacobi rotations, three are perfect ) -* - CALL ZLASET( 'L', NR-1,NR-1, CZERO, CZERO, A(2,1), LDA ) - CALL ZGELQF( NR,N, A, LDA, CWORK, CWORK(N+1), LWORK-N, IERR) - CALL ZLACPY( 'L', NR, NR, A, LDA, V, LDV ) - CALL ZLASET( 'U', NR-1,NR-1, CZERO, CZERO, V(1,2), LDV ) - CALL ZGEQRF( NR, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), - $ LWORK-2*N, IERR ) - DO 8998 p = 1, NR - CALL ZCOPY( NR-p+1, V(p,p), LDV, V(p,p), 1 ) - CALL ZLACGV( NR-p+1, V(p,p), 1 ) - 8998 CONTINUE - CALL ZLASET('U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV) -* - CALL ZGESVJ( 'L', 'U','N', NR, NR, V,LDV, SVA, NR, U, - $ LDU, CWORK(N+1), LWORK-N, RWORK, LRWORK, INFO ) - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) - IF ( NR .LT. N ) THEN - CALL ZLASET( 'A',N-NR, NR, CZERO,CZERO, V(NR+1,1), LDV ) - CALL ZLASET( 'A',NR, N-NR, CZERO,CZERO, V(1,NR+1), LDV ) - CALL ZLASET( 'A',N-NR,N-NR,CZERO,CONE, V(NR+1,NR+1),LDV ) - END IF -* - CALL ZUNMLQ( 'L', 'C', N, N, NR, A, LDA, CWORK, - $ V, LDV, CWORK(N+1), LWORK-N, IERR ) -* - END IF -* .. permute the rows of V -* DO 8991 p = 1, N -* CALL ZCOPY( N, V(p,1), LDV, A(IWORK(p),1), LDA ) -* 8991 CONTINUE -* CALL ZLACPY( 'All', N, N, A, LDA, V, LDV ) - CALL ZLAPMR( .FALSE., N, N, V, LDV, IWORK ) -* - IF ( TRANSP ) THEN - CALL ZLACPY( 'A', N, N, V, LDV, U, LDU ) - END IF -* - ELSE IF ( JRACC .AND. (.NOT. LSVEC) .AND. ( NR.EQ. N ) ) THEN -* - CALL ZLASET( 'L', N-1,N-1, CZERO, CZERO, A(2,1), LDA ) -* - CALL ZGESVJ( 'U','N','V', N, N, A, LDA, SVA, N, V, LDV, - $ CWORK, LWORK, RWORK, LRWORK, INFO ) - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) - CALL ZLAPMR( .FALSE., N, N, V, LDV, IWORK ) -* - ELSE IF ( LSVEC .AND. ( .NOT. RSVEC ) ) THEN -* -* .. Singular Values and Left Singular Vectors .. -* -* .. second preconditioning step to avoid need to accumulate -* Jacobi rotations in the Jacobi iterations. - DO 1965 p = 1, NR - CALL ZCOPY( N-p+1, A(p,p), LDA, U(p,p), 1 ) - CALL ZLACGV( N-p+1, U(p,p), 1 ) - 1965 CONTINUE - CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) -* - CALL ZGEQRF( N, NR, U, LDU, CWORK(N+1), CWORK(2*N+1), - $ LWORK-2*N, IERR ) -* - DO 1967 p = 1, NR - 1 - CALL ZCOPY( NR-p, U(p,p+1), LDU, U(p+1,p), 1 ) - CALL ZLACGV( N-p+1, U(p,p), 1 ) - 1967 CONTINUE - CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) -* - CALL ZGESVJ( 'L', 'U', 'N', NR,NR, U, LDU, SVA, NR, A, - $ LDA, CWORK(N+1), LWORK-N, RWORK, LRWORK, INFO ) - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) -* - IF ( NR .LT. M ) THEN - CALL ZLASET( 'A', M-NR, NR,CZERO, CZERO, U(NR+1,1), LDU ) - IF ( NR .LT. N1 ) THEN - CALL ZLASET( 'A',NR, N1-NR, CZERO, CZERO, U(1,NR+1),LDU ) - CALL ZLASET( 'A',M-NR,N1-NR,CZERO,CONE,U(NR+1,NR+1),LDU ) - END IF - END IF -* - CALL ZUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, - $ LDU, CWORK(N+1), LWORK-N, IERR ) -* - IF ( ROWPIV ) - $ CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) -* - DO 1974 p = 1, N1 - XSC = ONE / DZNRM2( M, U(1,p), 1 ) - CALL ZDSCAL( M, XSC, U(1,p), 1 ) - 1974 CONTINUE -* - IF ( TRANSP ) THEN - CALL ZLACPY( 'A', N, N, U, LDU, V, LDV ) - END IF -* - ELSE -* -* .. Full SVD .. -* - IF ( .NOT. JRACC ) THEN -* - IF ( .NOT. ALMORT ) THEN -* -* Second Preconditioning Step (QRF [with pivoting]) -* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is -* equivalent to an LQF CALL. Since in many libraries the QRF -* seems to be better optimized than the LQF, we do explicit -* transpose and use the QRF. This is subject to changes in an -* optimized implementation of ZGEJSV. -* - DO 1968 p = 1, NR - CALL ZCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) - CALL ZLACGV( N-p+1, V(p,p), 1 ) - 1968 CONTINUE -* -* .. the following two loops perturb small entries to avoid -* denormals in the second QR factorization, where they are -* as good as zeros. This is done to avoid painfully slow -* computation with denormals. The relative size of the perturbation -* is a parameter that can be changed by the implementer. -* This perturbation device will be obsolete on machines with -* properly implemented arithmetic. -* To switch it off, set L2PERT=.FALSE. To remove it from the -* code, remove the action under L2PERT=.TRUE., leave the ELSE part. -* The following two loops should be blocked and fused with the -* transposed copy above. -* - IF ( L2PERT ) THEN - XSC = SQRT(SMALL) - DO 2969 q = 1, NR - CTEMP = DCMPLX(XSC*ABS( V(q,q) ),ZERO) - DO 2968 p = 1, N - IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 ) - $ .OR. ( p .LT. q ) ) -* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) ) - $ V(p,q) = CTEMP - IF ( p .LT. q ) V(p,q) = - V(p,q) - 2968 CONTINUE - 2969 CONTINUE - ELSE - CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV ) - END IF -* -* Estimate the row scaled condition number of R1 -* (If R1 is rectangular, N > NR, then the condition number -* of the leading NR x NR submatrix is estimated.) -* - CALL ZLACPY( 'L', NR, NR, V, LDV, CWORK(2*N+1), NR ) - DO 3950 p = 1, NR - TEMP1 = DZNRM2(NR-p+1,CWORK(2*N+(p-1)*NR+p),1) - CALL ZDSCAL(NR-p+1,ONE/TEMP1,CWORK(2*N+(p-1)*NR+p),1) - 3950 CONTINUE - CALL ZPOCON('L',NR,CWORK(2*N+1),NR,ONE,TEMP1, - $ CWORK(2*N+NR*NR+1),RWORK,IERR) - CONDR1 = ONE / SQRT(TEMP1) -* .. here need a second oppinion on the condition number -* .. then assume worst case scenario -* R1 is OK for inverse <=> CONDR1 .LT. DBLE(N) -* more conservative <=> CONDR1 .LT. SQRT(DBLE(N)) -* - COND_OK = SQRT(SQRT(DBLE(NR))) -*[TP] COND_OK is a tuning parameter. -* - IF ( CONDR1 .LT. COND_OK ) THEN -* .. the second QRF without pivoting. Note: in an optimized -* implementation, this QRF should be implemented as the QRF -* of a lower triangular matrix. -* R1^* = Q2 * R2 - CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), - $ LWORK-2*N, IERR ) -* - IF ( L2PERT ) THEN - XSC = SQRT(SMALL)/EPSLN - DO 3959 p = 2, NR - DO 3958 q = 1, p - 1 - CTEMP=DCMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), - $ ZERO) - IF ( ABS(V(q,p)) .LE. TEMP1 ) -* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) ) - $ V(q,p) = CTEMP - 3958 CONTINUE - 3959 CONTINUE - END IF -* - IF ( NR .NE. N ) - $ CALL ZLACPY( 'A', N, NR, V, LDV, CWORK(2*N+1), N ) -* .. save ... -* -* .. this transposed copy should be better than naive - DO 1969 p = 1, NR - 1 - CALL ZCOPY( NR-p, V(p,p+1), LDV, V(p+1,p), 1 ) - CALL ZLACGV(NR-p+1, V(p,p), 1 ) - 1969 CONTINUE - V(NR,NR)=CONJG(V(NR,NR)) -* - CONDR2 = CONDR1 -* - ELSE -* -* .. ill-conditioned case: second QRF with pivoting -* Note that windowed pivoting would be equaly good -* numerically, and more run-time efficient. So, in -* an optimal implementation, the next call to ZGEQP3 -* should be replaced with eg. CALL ZGEQPX (ACM TOMS #782) -* with properly (carefully) chosen parameters. -* -* R1^* * P2 = Q2 * R2 - DO 3003 p = 1, NR - IWORK(N+p) = 0 - 3003 CONTINUE - CALL ZGEQP3( N, NR, V, LDV, IWORK(N+1), CWORK(N+1), - $ CWORK(2*N+1), LWORK-2*N, RWORK, IERR ) -** CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), -** $ LWORK-2*N, IERR ) - IF ( L2PERT ) THEN - XSC = SQRT(SMALL) - DO 3969 p = 2, NR - DO 3968 q = 1, p - 1 - CTEMP=DCMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), - $ ZERO) - IF ( ABS(V(q,p)) .LE. TEMP1 ) -* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) ) - $ V(q,p) = CTEMP - 3968 CONTINUE - 3969 CONTINUE - END IF -* - CALL ZLACPY( 'A', N, NR, V, LDV, CWORK(2*N+1), N ) -* - IF ( L2PERT ) THEN - XSC = SQRT(SMALL) - DO 8970 p = 2, NR - DO 8971 q = 1, p - 1 - CTEMP=DCMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), - $ ZERO) -* V(p,q) = - TEMP1*( V(q,p) / ABS(V(q,p)) ) - V(p,q) = - CTEMP - 8971 CONTINUE - 8970 CONTINUE - ELSE - CALL ZLASET( 'L',NR-1,NR-1,CZERO,CZERO,V(2,1),LDV ) - END IF -* Now, compute R2 = L3 * Q3, the LQ factorization. - CALL ZGELQF( NR, NR, V, LDV, CWORK(2*N+N*NR+1), - $ CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, IERR ) -* .. and estimate the condition number - CALL ZLACPY( 'L',NR,NR,V,LDV,CWORK(2*N+N*NR+NR+1),NR ) - DO 4950 p = 1, NR - TEMP1 = DZNRM2( p, CWORK(2*N+N*NR+NR+p), NR ) - CALL ZDSCAL( p, ONE/TEMP1, CWORK(2*N+N*NR+NR+p), NR ) - 4950 CONTINUE - CALL ZPOCON( 'L',NR,CWORK(2*N+N*NR+NR+1),NR,ONE,TEMP1, - $ CWORK(2*N+N*NR+NR+NR*NR+1),RWORK,IERR ) - CONDR2 = ONE / SQRT(TEMP1) -* -* - IF ( CONDR2 .GE. COND_OK ) THEN -* .. save the Householder vectors used for Q3 -* (this overwrittes the copy of R2, as it will not be -* needed in this branch, but it does not overwritte the -* Huseholder vectors of Q2.). - CALL ZLACPY( 'U', NR, NR, V, LDV, CWORK(2*N+1), N ) -* .. and the rest of the information on Q3 is in -* WORK(2*N+N*NR+1:2*N+N*NR+N) - END IF -* - END IF -* - IF ( L2PERT ) THEN - XSC = SQRT(SMALL) - DO 4968 q = 2, NR - CTEMP = XSC * V(q,q) - DO 4969 p = 1, q - 1 -* V(p,q) = - TEMP1*( V(p,q) / ABS(V(p,q)) ) - V(p,q) = - CTEMP - 4969 CONTINUE - 4968 CONTINUE - ELSE - CALL ZLASET( 'U', NR-1,NR-1, CZERO,CZERO, V(1,2), LDV ) - END IF -* -* Second preconditioning finished; continue with Jacobi SVD -* The input matrix is lower trinagular. -* -* Recover the right singular vectors as solution of a well -* conditioned triangular matrix equation. -* - IF ( CONDR1 .LT. COND_OK ) THEN -* - CALL ZGESVJ( 'L','U','N',NR,NR,V,LDV,SVA,NR,U, LDU, - $ CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,RWORK, - $ LRWORK, INFO ) - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) - DO 3970 p = 1, NR - CALL ZCOPY( NR, V(1,p), 1, U(1,p), 1 ) - CALL ZDSCAL( NR, SVA(p), V(1,p), 1 ) - 3970 CONTINUE - -* .. pick the right matrix equation and solve it -* - IF ( NR .EQ. N ) THEN -* :)) .. best case, R1 is inverted. The solution of this matrix -* equation is Q2*V2 = the product of the Jacobi rotations -* used in ZGESVJ, premultiplied with the orthogonal matrix -* from the second QR factorization. - CALL ZTRSM('L','U','N','N', NR,NR,CONE, A,LDA, V,LDV) - ELSE -* .. R1 is well conditioned, but non-square. Adjoint of R2 -* is inverted to get the product of the Jacobi rotations -* used in ZGESVJ. The Q-factor from the second QR -* factorization is then built in explicitly. - CALL ZTRSM('L','U','C','N',NR,NR,CONE,CWORK(2*N+1), - $ N,V,LDV) - IF ( NR .LT. N ) THEN - CALL ZLASET('A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV) - CALL ZLASET('A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV) - CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) - END IF - CALL ZUNMQR('L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), - $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR) - END IF -* - ELSE IF ( CONDR2 .LT. COND_OK ) THEN -* -* The matrix R2 is inverted. The solution of the matrix equation -* is Q3^* * V3 = the product of the Jacobi rotations (appplied to -* the lower triangular L3 from the LQ factorization of -* R2=L3*Q3), pre-multiplied with the transposed Q3. - CALL ZGESVJ( 'L', 'U', 'N', NR, NR, V, LDV, SVA, NR, U, - $ LDU, CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, - $ RWORK, LRWORK, INFO ) - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) - DO 3870 p = 1, NR - CALL ZCOPY( NR, V(1,p), 1, U(1,p), 1 ) - CALL ZDSCAL( NR, SVA(p), U(1,p), 1 ) - 3870 CONTINUE - CALL ZTRSM('L','U','N','N',NR,NR,CONE,CWORK(2*N+1),N, - $ U,LDU) -* .. apply the permutation from the second QR factorization - DO 873 q = 1, NR - DO 872 p = 1, NR - CWORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q) - 872 CONTINUE - DO 874 p = 1, NR - U(p,q) = CWORK(2*N+N*NR+NR+p) - 874 CONTINUE - 873 CONTINUE - IF ( NR .LT. N ) THEN - CALL ZLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) - CALL ZLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) - CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) - END IF - CALL ZUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), - $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) - ELSE -* Last line of defense. -* #:( This is a rather pathological case: no scaled condition -* improvement after two pivoted QR factorizations. Other -* possibility is that the rank revealing QR factorization -* or the condition estimator has failed, or the COND_OK -* is set very close to ONE (which is unnecessary). Normally, -* this branch should never be executed, but in rare cases of -* failure of the RRQR or condition estimator, the last line of -* defense ensures that ZGEJSV completes the task. -* Compute the full SVD of L3 using ZGESVJ with explicit -* accumulation of Jacobi rotations. - CALL ZGESVJ( 'L', 'U', 'V', NR, NR, V, LDV, SVA, NR, U, - $ LDU, CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, - $ RWORK, LRWORK, INFO ) - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) - IF ( NR .LT. N ) THEN - CALL ZLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) - CALL ZLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) - CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) - END IF - CALL ZUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), - $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) -* - CALL ZUNMLQ( 'L', 'C', NR, NR, NR, CWORK(2*N+1), N, - $ CWORK(2*N+N*NR+1), U, LDU, CWORK(2*N+N*NR+NR+1), - $ LWORK-2*N-N*NR-NR, IERR ) - DO 773 q = 1, NR - DO 772 p = 1, NR - CWORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q) - 772 CONTINUE - DO 774 p = 1, NR - U(p,q) = CWORK(2*N+N*NR+NR+p) - 774 CONTINUE - 773 CONTINUE -* - END IF -* -* Permute the rows of V using the (column) permutation from the -* first QRF. Also, scale the columns to make them unit in -* Euclidean norm. This applies to all cases. -* - TEMP1 = SQRT(DBLE(N)) * EPSLN - DO 1972 q = 1, N - DO 972 p = 1, N - CWORK(2*N+N*NR+NR+IWORK(p)) = V(p,q) - 972 CONTINUE - DO 973 p = 1, N - V(p,q) = CWORK(2*N+N*NR+NR+p) - 973 CONTINUE - XSC = ONE / DZNRM2( N, V(1,q), 1 ) - IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) - $ CALL ZDSCAL( N, XSC, V(1,q), 1 ) - 1972 CONTINUE -* At this moment, V contains the right singular vectors of A. -* Next, assemble the left singular vector matrix U (M x N). - IF ( NR .LT. M ) THEN - CALL ZLASET('A', M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU) - IF ( NR .LT. N1 ) THEN - CALL ZLASET('A',NR,N1-NR,CZERO,CZERO,U(1,NR+1),LDU) - CALL ZLASET('A',M-NR,N1-NR,CZERO,CONE, - $ U(NR+1,NR+1),LDU) - END IF - END IF -* -* The Q matrix from the first QRF is built into the left singular -* matrix U. This applies to all cases. -* - CALL ZUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, - $ LDU, CWORK(N+1), LWORK-N, IERR ) - -* The columns of U are normalized. The cost is O(M*N) flops. - TEMP1 = SQRT(DBLE(M)) * EPSLN - DO 1973 p = 1, NR - XSC = ONE / DZNRM2( M, U(1,p), 1 ) - IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) - $ CALL ZDSCAL( M, XSC, U(1,p), 1 ) - 1973 CONTINUE -* -* If the initial QRF is computed with row pivoting, the left -* singular vectors must be adjusted. -* - IF ( ROWPIV ) - $ CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) -* - ELSE -* -* .. the initial matrix A has almost orthogonal columns and -* the second QRF is not needed -* - CALL ZLACPY( 'U', N, N, A, LDA, CWORK(N+1), N ) - IF ( L2PERT ) THEN - XSC = SQRT(SMALL) - DO 5970 p = 2, N - CTEMP = XSC * CWORK( N + (p-1)*N + p ) - DO 5971 q = 1, p - 1 -* CWORK(N+(q-1)*N+p)=-TEMP1 * ( CWORK(N+(p-1)*N+q) / -* $ ABS(CWORK(N+(p-1)*N+q)) ) - CWORK(N+(q-1)*N+p)=-CTEMP - 5971 CONTINUE - 5970 CONTINUE - ELSE - CALL ZLASET( 'L',N-1,N-1,CZERO,CZERO,CWORK(N+2),N ) - END IF -* - CALL ZGESVJ( 'U', 'U', 'N', N, N, CWORK(N+1), N, SVA, - $ N, U, LDU, CWORK(N+N*N+1), LWORK-N-N*N, RWORK, LRWORK, - $ INFO ) -* - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) - DO 6970 p = 1, N - CALL ZCOPY( N, CWORK(N+(p-1)*N+1), 1, U(1,p), 1 ) - CALL ZDSCAL( N, SVA(p), CWORK(N+(p-1)*N+1), 1 ) - 6970 CONTINUE -* - CALL ZTRSM( 'L', 'U', 'N', 'N', N, N, - $ CONE, A, LDA, CWORK(N+1), N ) - DO 6972 p = 1, N - CALL ZCOPY( N, CWORK(N+p), N, V(IWORK(p),1), LDV ) - 6972 CONTINUE - TEMP1 = SQRT(DBLE(N))*EPSLN - DO 6971 p = 1, N - XSC = ONE / DZNRM2( N, V(1,p), 1 ) - IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) - $ CALL ZDSCAL( N, XSC, V(1,p), 1 ) - 6971 CONTINUE -* -* Assemble the left singular vector matrix U (M x N). -* - IF ( N .LT. M ) THEN - CALL ZLASET( 'A', M-N, N, CZERO, CZERO, U(N+1,1), LDU ) - IF ( N .LT. N1 ) THEN - CALL ZLASET('A',N, N1-N, CZERO, CZERO, U(1,N+1),LDU) - CALL ZLASET( 'A',M-N,N1-N, CZERO, CONE,U(N+1,N+1),LDU) - END IF - END IF - CALL ZUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, - $ LDU, CWORK(N+1), LWORK-N, IERR ) - TEMP1 = SQRT(DBLE(M))*EPSLN - DO 6973 p = 1, N1 - XSC = ONE / DZNRM2( M, U(1,p), 1 ) - IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) - $ CALL ZDSCAL( M, XSC, U(1,p), 1 ) - 6973 CONTINUE -* - IF ( ROWPIV ) - $ CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) -* - END IF -* -* end of the >> almost orthogonal case << in the full SVD -* - ELSE -* -* This branch deploys a preconditioned Jacobi SVD with explicitly -* accumulated rotations. It is included as optional, mainly for -* experimental purposes. It does perfom well, and can also be used. -* In this implementation, this branch will be automatically activated -* if the condition number sigma_max(A) / sigma_min(A) is predicted -* to be greater than the overflow threshold. This is because the -* a posteriori computation of the singular vectors assumes robust -* implementation of BLAS and some LAPACK procedures, capable of working -* in presence of extreme values, e.g. when the singular values spread from -* the underflow to the overflow threshold. -* - DO 7968 p = 1, NR - CALL ZCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) - CALL ZLACGV( N-p+1, V(p,p), 1 ) - 7968 CONTINUE -* - IF ( L2PERT ) THEN - XSC = SQRT(SMALL/EPSLN) - DO 5969 q = 1, NR - CTEMP = DCMPLX(XSC*ABS( V(q,q) ),ZERO) - DO 5968 p = 1, N - IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 ) - $ .OR. ( p .LT. q ) ) -* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) ) - $ V(p,q) = CTEMP - IF ( p .LT. q ) V(p,q) = - V(p,q) - 5968 CONTINUE - 5969 CONTINUE - ELSE - CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV ) - END IF - - CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), - $ LWORK-2*N, IERR ) - CALL ZLACPY( 'L', N, NR, V, LDV, CWORK(2*N+1), N ) -* - DO 7969 p = 1, NR - CALL ZCOPY( NR-p+1, V(p,p), LDV, U(p,p), 1 ) - CALL ZLACGV( NR-p+1, U(p,p), 1 ) - 7969 CONTINUE - - IF ( L2PERT ) THEN - XSC = SQRT(SMALL/EPSLN) - DO 9970 q = 2, NR - DO 9971 p = 1, q - 1 - CTEMP = DCMPLX(XSC * MIN(ABS(U(p,p)),ABS(U(q,q))), - $ ZERO) -* U(p,q) = - TEMP1 * ( U(q,p) / ABS(U(q,p)) ) - U(p,q) = - CTEMP - 9971 CONTINUE - 9970 CONTINUE - ELSE - CALL ZLASET('U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) - END IF - - CALL ZGESVJ( 'L', 'U', 'V', NR, NR, U, LDU, SVA, - $ N, V, LDV, CWORK(2*N+N*NR+1), LWORK-2*N-N*NR, - $ RWORK, LRWORK, INFO ) - SCALEM = RWORK(1) - NUMRANK = NINT(RWORK(2)) - - IF ( NR .LT. N ) THEN - CALL ZLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) - CALL ZLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) - CALL ZLASET( 'A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV ) - END IF - - CALL ZUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), - $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) -* -* Permute the rows of V using the (column) permutation from the -* first QRF. Also, scale the columns to make them unit in -* Euclidean norm. This applies to all cases. -* - TEMP1 = SQRT(DBLE(N)) * EPSLN - DO 7972 q = 1, N - DO 8972 p = 1, N - CWORK(2*N+N*NR+NR+IWORK(p)) = V(p,q) - 8972 CONTINUE - DO 8973 p = 1, N - V(p,q) = CWORK(2*N+N*NR+NR+p) - 8973 CONTINUE - XSC = ONE / DZNRM2( N, V(1,q), 1 ) - IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) - $ CALL ZDSCAL( N, XSC, V(1,q), 1 ) - 7972 CONTINUE -* -* At this moment, V contains the right singular vectors of A. -* Next, assemble the left singular vector matrix U (M x N). -* - IF ( NR .LT. M ) THEN - CALL ZLASET( 'A', M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU ) - IF ( NR .LT. N1 ) THEN - CALL ZLASET('A',NR, N1-NR, CZERO, CZERO, U(1,NR+1),LDU) - CALL ZLASET('A',M-NR,N1-NR, CZERO, CONE,U(NR+1,NR+1),LDU) - END IF - END IF -* - CALL ZUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, - $ LDU, CWORK(N+1), LWORK-N, IERR ) -* - IF ( ROWPIV ) - $ CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) -* -* - END IF - IF ( TRANSP ) THEN -* .. swap U and V because the procedure worked on A^* - DO 6974 p = 1, N - CALL ZSWAP( N, U(1,p), 1, V(1,p), 1 ) - 6974 CONTINUE - END IF -* - END IF -* end of the full SVD -* -* Undo scaling, if necessary (and possible) -* - IF ( USCAL2 .LE. (BIG/SVA(1))*USCAL1 ) THEN - CALL DLASCL( 'G', 0, 0, USCAL1, USCAL2, NR, 1, SVA, N, IERR ) - USCAL1 = ONE - USCAL2 = ONE - END IF -* - IF ( NR .LT. N ) THEN - DO 3004 p = NR+1, N - SVA(p) = ZERO - 3004 CONTINUE - END IF -* - RWORK(1) = USCAL2 * SCALEM - RWORK(2) = USCAL1 - IF ( ERREST ) RWORK(3) = SCONDA - IF ( LSVEC .AND. RSVEC ) THEN - RWORK(4) = CONDR1 - RWORK(5) = CONDR2 - END IF - IF ( L2TRAN ) THEN - RWORK(6) = ENTRA - RWORK(7) = ENTRAT - END IF -* - IWORK(1) = NR - IWORK(2) = NUMRANK - IWORK(3) = WARNING - IF ( TRANSP ) THEN - IWORK(4) = 1 - ELSE - IWORK(4) = -1 - END IF - -* - RETURN -* .. -* .. END OF ZGEJSV -* .. - END -* +*> \brief \b ZGEJSV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEJSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, +* M, N, A, LDA, SVA, U, LDU, V, LDV, +* CWORK, LWORK, RWORK, LRWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* IMPLICIT NONE +* INTEGER INFO, LDA, LDU, LDV, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), U( LDU, * ), V( LDV, * ), CWORK( LWORK ) +* DOUBLE PRECISION SVA( N ), RWORK( LRWORK ) +* INTEGER IWORK( * ) +* CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEJSV computes the singular value decomposition (SVD) of a complex M-by-N +*> matrix [A], where M >= N. The SVD of [A] is written as +*> +*> [A] = [U] * [SIGMA] * [V]^*, +*> +*> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N +*> diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and +*> [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are +*> the singular values of [A]. The columns of [U] and [V] are the left and +*> the right singular vectors of [A], respectively. The matrices [U] and [V] +*> are computed and stored in the arrays U and V, respectively. The diagonal +*> of [SIGMA] is computed and stored in the array SVA. +*> \endverbatim +*> +*> Arguments: +*> ========== +*> +*> \param[in] JOBA +*> \verbatim +*> JOBA is CHARACTER*1 +*> Specifies the level of accuracy: +*> = 'C': This option works well (high relative accuracy) if A = B * D, +*> with well-conditioned B and arbitrary diagonal matrix D. +*> The accuracy cannot be spoiled by COLUMN scaling. The +*> accuracy of the computed output depends on the condition of +*> B, and the procedure aims at the best theoretical accuracy. +*> The relative error max_{i=1:N}|d sigma_i| / sigma_i is +*> bounded by f(M,N)*epsilon* cond(B), independent of D. +*> The input matrix is preprocessed with the QRF with column +*> pivoting. This initial preprocessing and preconditioning by +*> a rank revealing QR factorization is common for all values of +*> JOBA. Additional actions are specified as follows: +*> = 'E': Computation as with 'C' with an additional estimate of the +*> condition number of B. It provides a realistic error bound. +*> = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings +*> D1, D2, and well-conditioned matrix C, this option gives +*> higher accuracy than the 'C' option. If the structure of the +*> input matrix is not known, and relative accuracy is +*> desirable, then this option is advisable. The input matrix A +*> is preprocessed with QR factorization with FULL (row and +*> column) pivoting. +*> = 'G' Computation as with 'F' with an additional estimate of the +*> condition number of B, where A=B*D. If A has heavily weighted +*> rows, then using this condition number gives too pessimistic +*> error bound. +*> = 'A': Small singular values are not well determined by the data +*> and are considered as noisy; the matrix is treated as +*> numerically rank defficient. The error in the computed +*> singular values is bounded by f(m,n)*epsilon*||A||. +*> The computed SVD A = U * S * V^* restores A up to +*> f(m,n)*epsilon*||A||. +*> This gives the procedure the licence to discard (set to zero) +*> all singular values below N*epsilon*||A||. +*> = 'R': Similar as in 'A'. Rank revealing property of the initial +*> QR factorization is used do reveal (using triangular factor) +*> a gap sigma_{r+1} < epsilon * sigma_r in which case the +*> numerical RANK is declared to be r. The SVD is computed with +*> absolute error bounds, but more accurately than with 'A'. +*> \endverbatim +*> +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies whether to compute the columns of U: +*> = 'U': N columns of U are returned in the array U. +*> = 'F': full set of M left sing. vectors is returned in the array U. +*> = 'W': U may be used as workspace of length M*N. See the description +*> of U. +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether to compute the matrix V: +*> = 'V': N columns of V are returned in the array V; Jacobi rotations +*> are not explicitly accumulated. +*> = 'J': N columns of V are returned in the array V, but they are +*> computed as the product of Jacobi rotations, if JOBT .EQ. 'N'. +*> = 'W': V may be used as workspace of length N*N. See the description +*> of V. +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBR +*> \verbatim +*> JOBR is CHARACTER*1 +*> Specifies the RANGE for the singular values. Issues the licence to +*> set to zero small positive singular values if they are outside +*> specified range. If A .NE. 0 is scaled so that the largest singular +*> value of c*A is around SQRT(BIG), BIG=DLAMCH('O'), then JOBR issues +*> the licence to kill columns of A whose norm in c*A is less than +*> SQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN, +*> where SFMIN=DLAMCH('S'), EPSLN=DLAMCH('E'). +*> = 'N': Do not kill small columns of c*A. This option assumes that +*> BLAS and QR factorizations and triangular solvers are +*> implemented to work in that range. If the condition of A +*> is greater than BIG, use ZGESVJ. +*> = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)] +*> (roughly, as described above). This option is recommended. +*> =========================== +*> For computing the singular values in the FULL range [SFMIN,BIG] +*> use ZGESVJ. +*> \endverbatim +*> +*> \param[in] JOBT +*> \verbatim +*> JOBT is CHARACTER*1 +*> If the matrix is square then the procedure may determine to use +*> transposed A if A^* seems to be better with respect to convergence. +*> If the matrix is not square, JOBT is ignored. +*> The decision is based on two values of entropy over the adjoint +*> orbit of A^* * A. See the descriptions of WORK(6) and WORK(7). +*> = 'T': transpose if entropy test indicates possibly faster +*> convergence of Jacobi process if A^* is taken as input. If A is +*> replaced with A^*, then the row pivoting is included automatically. +*> = 'N': do not speculate. +*> The option 'T' can be used to compute only the singular values, or +*> the full SVD (U, SIGMA and V). For only one set of singular vectors +*> (U or V), the caller should provide both U and V, as one of the +*> matrices is used as workspace if the matrix A is transposed. +*> The implementer can easily remove this constraint and make the +*> code more complicated. See the descriptions of U and V. +*> In general, this option is considered experimental, and 'N'; should +*> be preferred. This is subject to changes in the future. +*> \endverbatim +*> +*> \param[in] JOBP +*> \verbatim +*> JOBP is CHARACTER*1 +*> Issues the licence to introduce structured perturbations to drown +*> denormalized numbers. This licence should be active if the +*> denormals are poorly implemented, causing slow computation, +*> especially in cases of fast convergence (!). For details see [1,2]. +*> For the sake of simplicity, this perturbations are included only +*> when the full SVD or only the singular values are requested. The +*> implementer/user can easily add the perturbation for the cases of +*> computing one set of singular vectors. +*> = 'P': introduce perturbation +*> = 'N': do not perturb +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] SVA +*> \verbatim +*> SVA is DOUBLE PRECISION array, dimension (N) +*> On exit, +*> - For WORK(1)/WORK(2) = ONE: The singular values of A. During the +*> computation SVA contains Euclidean column norms of the +*> iterated matrices in the array A. +*> - For WORK(1) .NE. WORK(2): The singular values of A are +*> (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if +*> sigma_max(A) overflows or if small singular values have been +*> saved from underflow by scaling the input matrix A. +*> - If JOBR='R' then some of the singular values may be returned +*> as exact zeros obtained by "set to zero" because they are +*> below the numerical rank threshold or are denormalized numbers. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX*16 array, dimension ( LDU, N ) +*> If JOBU = 'U', then U contains on exit the M-by-N matrix of +*> the left singular vectors. +*> If JOBU = 'F', then U contains on exit the M-by-M matrix of +*> the left singular vectors, including an ONB +*> of the orthogonal complement of the Range(A). +*> If JOBU = 'W' .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N), +*> then U is used as workspace if the procedure +*> replaces A with A^*. In that case, [V] is computed +*> in U as left singular vectors of A^* and then +*> copied back to the V array. This 'W' option is just +*> a reminder to the caller that in this case U is +*> reserved as workspace of length N*N. +*> If JOBU = 'N' U is not referenced, unless JOBT='T'. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U, LDU >= 1. +*> IF JOBU = 'U' or 'F' or 'W', then LDU >= M. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension ( LDV, N ) +*> If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of +*> the right singular vectors; +*> If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N), +*> then V is used as workspace if the pprocedure +*> replaces A with A^*. In that case, [U] is computed +*> in V as right singular vectors of A^* and then +*> copied back to the U array. This 'W' option is just +*> a reminder to the caller that in this case V is +*> reserved as workspace of length N*N. +*> If JOBV = 'N' V is not referenced, unless JOBT='T'. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV >= 1. +*> If JOBV = 'V' or 'J' or 'W', then LDV >= N. +*> \endverbatim +*> +*> \param[out] CWORK +*> \verbatim +*> CWORK is COMPLEX*16 array, dimension (MAX(2,LWORK)) +*> If the call to ZGEJSV is a workspace query (indicated by LWORK=-1 or +*> LRWORK=-1), then on exit CWORK(1) contains the required length of +*> CWORK for the job parameters used in the call. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> Length of CWORK to confirm proper allocation of workspace. +*> LWORK depends on the job: +*> +*> 1. If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and +*> 1.1 .. no scaled condition estimate required (JOBA.NE.'E'.AND.JOBA.NE.'G'): +*> LWORK >= 2*N+1. This is the minimal requirement. +*> ->> For optimal performance (blocked code) the optimal value +*> is LWORK >= N + (N+1)*NB. Here NB is the optimal +*> block size for ZGEQP3 and ZGEQRF. +*> In general, optimal LWORK is computed as +*> LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZGEQRF), LWORK(ZGESVJ)). +*> 1.2. .. an estimate of the scaled condition number of A is +*> required (JOBA='E', or 'G'). In this case, LWORK the minimal +*> requirement is LWORK >= N*N + 2*N. +*> ->> For optimal performance (blocked code) the optimal value +*> is LWORK >= max(N+(N+1)*NB, N*N+2*N)=N**2+2*N. +*> In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZGEQRF), LWORK(ZGESVJ), +*> N*N+LWORK(ZPOCON)). +*> 2. If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'), +*> (JOBU.EQ.'N') +*> 2.1 .. no scaled condition estimate requested (JOBE.EQ.'N'): +*> -> the minimal requirement is LWORK >= 3*N. +*> -> For optimal performance, +*> LWORK >= max(N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, +*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQ, +*> ZUNMLQ. In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(ZGEQP3), N+LWORK(ZGESVJ), +*> N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMLQ)). +*> 2.2 .. an estimate of the scaled condition number of A is +*> required (JOBA='E', or 'G'). +*> -> the minimal requirement is LWORK >= 3*N. +*> -> For optimal performance, +*> LWORK >= max(N+(N+1)*NB, 2*N,2*N+N*NB)=2*N+N*NB, +*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQ, +*> ZUNMLQ. In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(ZGEQP3), LWORK(ZPOCON), N+LWORK(ZGESVJ), +*> N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMLQ)). +*> 3. If SIGMA and the left singular vectors are needed +*> 3.1 .. no scaled condition estimate requested (JOBE.EQ.'N'): +*> -> the minimal requirement is LWORK >= 3*N. +*> -> For optimal performance: +*> if JOBU.EQ.'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, +*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZUNMQR. +*> In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(ZGEQP3), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMQR)). +*> 3.2 .. an estimate of the scaled condition number of A is +*> required (JOBA='E', or 'G'). +*> -> the minimal requirement is LWORK >= 3*N. +*> -> For optimal performance: +*> if JOBU.EQ.'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, +*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZUNMQR. +*> In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZPOCON), +*> 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMQR)). +*> 4. If the full SVD is needed: (JOBU.EQ.'U' or JOBU.EQ.'F') and +*> 4.1. if JOBV.EQ.'V' +*> the minimal requirement is LWORK >= 5*N+2*N*N. +*> 4.2. if JOBV.EQ.'J' the minimal requirement is +*> LWORK >= 4*N+N*N. +*> In both cases, the allocated CWORK can accommodate blocked runs +*> of ZGEQP3, ZGEQRF, ZGELQF, SUNMQR, ZUNMLQ. +*> +*> If the call to ZGEJSV is a workspace query (indicated by LWORK=-1 or +*> LRWORK=-1), then on exit CWORK(1) contains the optimal and CWORK(2) contains the +*> minimal length of CWORK for the job parameters used in the call. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (MAX(7,LWORK)) +*> On exit, +*> RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1) +*> such that SCALE*SVA(1:N) are the computed singular values +*> of A. (See the description of SVA().) +*> RWORK(2) = See the description of RWORK(1). +*> RWORK(3) = SCONDA is an estimate for the condition number of +*> column equilibrated A. (If JOBA .EQ. 'E' or 'G') +*> SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1). +*> It is computed using SPOCON. It holds +*> N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA +*> where R is the triangular factor from the QRF of A. +*> However, if R is truncated and the numerical rank is +*> determined to be strictly smaller than N, SCONDA is +*> returned as -1, thus indicating that the smallest +*> singular values might be lost. +*> +*> If full SVD is needed, the following two condition numbers are +*> useful for the analysis of the algorithm. They are provied for +*> a developer/implementer who is familiar with the details of +*> the method. +*> +*> RWORK(4) = an estimate of the scaled condition number of the +*> triangular factor in the first QR factorization. +*> RWORK(5) = an estimate of the scaled condition number of the +*> triangular factor in the second QR factorization. +*> The following two parameters are computed if JOBT .EQ. 'T'. +*> They are provided for a developer/implementer who is familiar +*> with the details of the method. +*> RWORK(6) = the entropy of A^* * A :: this is the Shannon entropy +*> of diag(A^* * A) / Trace(A^* * A) taken as point in the +*> probability simplex. +*> RWORK(7) = the entropy of A * A^*. (See the description of RWORK(6).) +*> If the call to ZGEJSV is a workspace query (indicated by LWORK=-1 or +*> LRWORK=-1), then on exit RWORK(1) contains the required length of +*> RWORK for the job parameters used in the call. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> Length of RWORK to confirm proper allocation of workspace. +*> LRWORK depends on the job: +*> +*> 1. If only the singular values are requested i.e. if +*> LSAME(JOBU,'N') .AND. LSAME(JOBV,'N') +*> then: +*> 1.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), +*> then: LRWORK = max( 7, 2 * M ). +*> 1.2. Otherwise, LRWORK = max( 7, N ). +*> 2. If singular values with the right singular vectors are requested +*> i.e. if +*> (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) .AND. +*> .NOT.(LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) +*> then: +*> 2.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), +*> then LRWORK = max( 7, 2 * M ). +*> 2.2. Otherwise, LRWORK = max( 7, N ). +*> 3. If singular values with the left singular vectors are requested, i.e. if +*> (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND. +*> .NOT.(LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) +*> then: +*> 3.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), +*> then LRWORK = max( 7, 2 * M ). +*> 3.2. Otherwise, LRWORK = max( 7, N ). +*> 4. If singular values with both the left and the right singular vectors +*> are requested, i.e. if +*> (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND. +*> (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) +*> then: +*> 4.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), +*> then LRWORK = max( 7, 2 * M ). +*> 4.2. Otherwise, LRWORK = max( 7, N ). +*> +*> If, on entry, LRWORK = -1 or LWORK=-1, a workspace query is assumed and +*> the length of RWORK is returned in RWORK(1). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, of dimension at least 4, that further depends +*> on the job: +*> +*> 1. If only the singular values are requested then: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is N+M; otherwise the length of IWORK is N. +*> 2. If the singular values and the right singular vectors are requested then: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is N+M; otherwise the length of IWORK is N. +*> 3. If the singular values and the left singular vectors are requested then: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is N+M; otherwise the length of IWORK is N. +*> 4. If the singular values with both the left and the right singular vectors +*> are requested, then: +*> 4.1. If LSAME(JOBV,'J') the length of IWORK is determined as follows: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is N+M; otherwise the length of IWORK is N. +*> 4.2. If LSAME(JOBV,'V') the length of IWORK is determined as follows: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is 2*N+M; otherwise the length of IWORK is 2*N. +*> +*> On exit, +*> IWORK(1) = the numerical rank determined after the initial +*> QR factorization with pivoting. See the descriptions +*> of JOBA and JOBR. +*> IWORK(2) = the number of the computed nonzero singular values +*> IWORK(3) = if nonzero, a warning message: +*> If IWORK(3).EQ.1 then some of the column norms of A +*> were denormalized floats. The requested high accuracy +*> is not warranted by the data. +*> IWORK(4) = 1 or -1. If IWORK(4) .EQ. 1, then the procedure used A^* to +*> do the job as specified by the JOB parameters. +*> If the call to ZGEJSV is a workspace query (indicated by LWORK .EQ. -1 or +*> LRWORK .EQ. -1), then on exit IWORK(1) contains the required length of +*> IWORK for the job parameters used in the call. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> < 0 : if INFO = -i, then the i-th argument had an illegal value. +*> = 0 : successful exit; +*> > 0 : ZGEJSV did not converge in the maximal allowed number +*> of sweeps. The computed values may be inaccurate. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16GEsing +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> ZGEJSV implements a preconditioned Jacobi SVD algorithm. It uses ZGEQP3, +*> ZGEQRF, and ZGELQF as preprocessors and preconditioners. Optionally, an +*> additional row pivoting can be used as a preprocessor, which in some +*> cases results in much higher accuracy. An example is matrix A with the +*> structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned +*> diagonal matrices and C is well-conditioned matrix. In that case, complete +*> pivoting in the first QR factorizations provides accuracy dependent on the +*> condition number of C, and independent of D1, D2. Such higher accuracy is +*> not completely understood theoretically, but it works well in practice. +*> Further, if A can be written as A = B*D, with well-conditioned B and some +*> diagonal D, then the high accuracy is guaranteed, both theoretically and +*> in software, independent of D. For more details see [1], [2]. +*> The computational range for the singular values can be the full range +*> ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS +*> & LAPACK routines called by ZGEJSV are implemented to work in that range. +*> If that is not the case, then the restriction for safe computation with +*> the singular values in the range of normalized IEEE numbers is that the +*> spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not +*> overflow. This code (ZGEJSV) is best used in this restricted range, +*> meaning that singular values of magnitude below ||A||_2 / DLAMCH('O') are +*> returned as zeros. See JOBR for details on this. +*> Further, this implementation is somewhat slower than the one described +*> in [1,2] due to replacement of some non-LAPACK components, and because +*> the choice of some tuning parameters in the iterative part (ZGESVJ) is +*> left to the implementer on a particular machine. +*> The rank revealing QR factorization (in this code: ZGEQP3) should be +*> implemented as in [3]. We have a new version of ZGEQP3 under development +*> that is more robust than the current one in LAPACK, with a cleaner cut in +*> rank deficient cases. It will be available in the SIGMA library [4]. +*> If M is much larger than N, it is obvious that the initial QRF with +*> column pivoting can be preprocessed by the QRF without pivoting. That +*> well known trick is not used in ZGEJSV because in some cases heavy row +*> weighting can be treated with complete pivoting. The overhead in cases +*> M much larger than N is then only due to pivoting, but the benefits in +*> terms of accuracy have prevailed. The implementer/user can incorporate +*> this extra QRF step easily. The implementer can also improve data movement +*> (matrix transpose, matrix copy, matrix transposed copy) - this +*> implementation of ZGEJSV uses only the simplest, naive data movement. +*> \endverbatim +* +*> \par Contributor: +* ================== +*> +*> Zlatko Drmac, Department of Mathematics, Faculty of Science, +*> University of Zagreb (Zagreb, Croatia); drmac@math.hr +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. +*> LAPACK Working note 169. +*> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. +*> LAPACK Working note 170. +*> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR +*> factorization software - a case study. +*> ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28. +*> LAPACK Working note 176. +*> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, +*> QSVD, (H,K)-SVD computations. +*> Department of Mathematics, University of Zagreb, 2008, 2016. +*> \endverbatim +* +*> \par Bugs, examples and comments: +* ================================= +*> +*> Please report all bugs and send interesting examples and/or comments to +*> drmac@math.hr. Thank you. +*> +* ===================================================================== + SUBROUTINE ZGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, + $ M, N, A, LDA, SVA, U, LDU, V, LDV, + $ CWORK, LWORK, RWORK, LRWORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + IMPLICIT NONE + INTEGER INFO, LDA, LDU, LDV, LWORK, LRWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), U( LDU, * ), V( LDV, * ), + $ CWORK( LWORK ) + DOUBLE PRECISION SVA( N ), RWORK( LRWORK ) + INTEGER IWORK( * ) + CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV +* .. +* +* =========================================================================== +* +* .. Local Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + COMPLEX*16 CTEMP + DOUBLE PRECISION AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, + $ COND_OK, CONDR1, CONDR2, ENTRA, ENTRAT, EPSLN, + $ MAXPRJ, SCALEM, SCONDA, SFMIN, SMALL, TEMP1, + $ USCAL1, USCAL2, XSC + INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING + LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LQUERY, + $ LSVEC, L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN, NOSCAL, + $ ROWPIV, RSVEC, TRANSP +* + INTEGER OPTWRK, MINWRK, MINRWRK, MINIWRK + INTEGER LWCON, LWLQF, LWQP3, LWQRF, LWUNMLQ, LWUNMQR, LWUNMQRM, + $ LWSVDJ, LWSVDJV, LRWQP3, LRWCON, LRWSVDJ, IWOFF + INTEGER LWRK_ZGELQF, LWRK_ZGEQP3, LWRK_ZGEQP3N, LWRK_ZGEQRF, + $ LWRK_ZGESVJ, LWRK_ZGESVJV, LWRK_ZGESVJU, LWRK_ZUNMLQ, + $ LWRK_ZUNMQR, LWRK_ZUNMQRM +* .. +* .. Local Arrays + COMPLEX*16 CDUMMY(1) + DOUBLE PRECISION RDUMMY(1) +* +* .. Intrinsic Functions .. + INTRINSIC ABS, DCMPLX, CONJG, DLOG, MAX, MIN, DBLE, NINT, SQRT +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DZNRM2 + INTEGER IDAMAX, IZAMAX + LOGICAL LSAME + EXTERNAL IDAMAX, IZAMAX, LSAME, DLAMCH, DZNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ, ZCOPY, ZGELQF, ZGEQP3, ZGEQRF, ZLACPY, ZLAPMR, + $ ZLASCL, DLASCL, ZLASET, ZLASSQ, ZLASWP, ZUNGQR, ZUNMLQ, + $ ZUNMQR, ZPOCON, DSCAL, ZDSCAL, ZSWAP, ZTRSM, ZLACGV, + $ XERBLA +* + EXTERNAL ZGESVJ +* .. +* +* Test the input arguments +* + LSVEC = LSAME( JOBU, 'U' ) .OR. LSAME( JOBU, 'F' ) + JRACC = LSAME( JOBV, 'J' ) + RSVEC = LSAME( JOBV, 'V' ) .OR. JRACC + ROWPIV = LSAME( JOBA, 'F' ) .OR. LSAME( JOBA, 'G' ) + L2RANK = LSAME( JOBA, 'R' ) + L2ABER = LSAME( JOBA, 'A' ) + ERREST = LSAME( JOBA, 'E' ) .OR. LSAME( JOBA, 'G' ) + L2TRAN = LSAME( JOBT, 'T' ) .AND. ( M .EQ. N ) + L2KILL = LSAME( JOBR, 'R' ) + DEFR = LSAME( JOBR, 'N' ) + L2PERT = LSAME( JOBP, 'P' ) +* + LQUERY = ( LWORK .EQ. -1 ) .OR. ( LRWORK .EQ. -1 ) +* + IF ( .NOT.(ROWPIV .OR. L2RANK .OR. L2ABER .OR. + $ ERREST .OR. LSAME( JOBA, 'C' ) )) THEN + INFO = - 1 + ELSE IF ( .NOT.( LSVEC .OR. LSAME( JOBU, 'N' ) .OR. + $ ( LSAME( JOBU, 'W' ) .AND. RSVEC .AND. L2TRAN ) ) ) THEN + INFO = - 2 + ELSE IF ( .NOT.( RSVEC .OR. LSAME( JOBV, 'N' ) .OR. + $ ( LSAME( JOBV, 'W' ) .AND. LSVEC .AND. L2TRAN ) ) ) THEN + INFO = - 3 + ELSE IF ( .NOT. ( L2KILL .OR. DEFR ) ) THEN + INFO = - 4 + ELSE IF ( .NOT. ( LSAME(JOBT,'T') .OR. LSAME(JOBT,'N') ) ) THEN + INFO = - 5 + ELSE IF ( .NOT. ( L2PERT .OR. LSAME( JOBP, 'N' ) ) ) THEN + INFO = - 6 + ELSE IF ( M .LT. 0 ) THEN + INFO = - 7 + ELSE IF ( ( N .LT. 0 ) .OR. ( N .GT. M ) ) THEN + INFO = - 8 + ELSE IF ( LDA .LT. M ) THEN + INFO = - 10 + ELSE IF ( LSVEC .AND. ( LDU .LT. M ) ) THEN + INFO = - 13 + ELSE IF ( RSVEC .AND. ( LDV .LT. N ) ) THEN + INFO = - 15 + ELSE +* #:) + INFO = 0 + END IF +* + IF ( INFO .EQ. 0 ) THEN +* .. compute the minimal and the optimal workspace lengths +* [[The expressions for computing the minimal and the optimal +* values of LCWORK, LRWORK are written with a lot of redundancy and +* can be simplified. However, this verbose form is useful for +* maintenance and modifications of the code.]] +* +* .. minimal workspace length for ZGEQP3 of an M x N matrix, +* ZGEQRF of an N x N matrix, ZGELQF of an N x N matrix, +* ZUNMLQ for computing N x N matrix, ZUNMQR for computing N x N +* matrix, ZUNMQR for computing M x N matrix, respectively. + LWQP3 = N+1 + LWQRF = MAX( 1, N ) + LWLQF = MAX( 1, N ) + LWUNMLQ = MAX( 1, N ) + LWUNMQR = MAX( 1, N ) + LWUNMQRM = MAX( 1, M ) +* .. minimal workspace length for ZPOCON of an N x N matrix + LWCON = 2 * N +* .. minimal workspace length for ZGESVJ of an N x N matrix, +* without and with explicit accumulation of Jacobi rotations + LWSVDJ = MAX( 2 * N, 1 ) + LWSVDJV = MAX( 2 * N, 1 ) +* .. minimal REAL workspace length for ZGEQP3, ZPOCON, ZGESVJ + LRWQP3 = N + LRWCON = N + LRWSVDJ = N + IF ( LQUERY ) THEN + CALL ZGEQP3( M, N, A, LDA, IWORK, CDUMMY, CDUMMY, -1, + $ RDUMMY, IERR ) + LWRK_ZGEQP3 = CDUMMY(1) + CALL ZGEQRF( N, N, A, LDA, CDUMMY, CDUMMY,-1, IERR ) + LWRK_ZGEQRF = CDUMMY(1) + CALL ZGELQF( N, N, A, LDA, CDUMMY, CDUMMY,-1, IERR ) + LWRK_ZGELQF = CDUMMY(1) + END IF + MINWRK = 2 + OPTWRK = 2 + MINIWRK = N + IF ( .NOT. (LSVEC .OR. RSVEC ) ) THEN +* .. minimal and optimal sizes of the complex workspace if +* only the singular values are requested + IF ( ERREST ) THEN + MINWRK = MAX( N+LWQP3, N**2+LWCON, N+LWQRF, LWSVDJ ) + ELSE + MINWRK = MAX( N+LWQP3, N+LWQRF, LWSVDJ ) + END IF + IF ( LQUERY ) THEN + CALL ZGESVJ( 'L', 'N', 'N', N, N, A, LDA, SVA, N, V, + $ LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_ZGESVJ = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = MAX( N+LWRK_ZGEQP3, N**2+LWCON, + $ N+LWRK_ZGEQRF, LWRK_ZGESVJ ) + ELSE + OPTWRK = MAX( N+LWRK_ZGEQP3, N+LWRK_ZGEQRF, + $ LWRK_ZGESVJ ) + END IF + END IF + IF ( L2TRAN .OR. ROWPIV ) THEN + IF ( ERREST ) THEN + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWCON, LRWSVDJ ) + ELSE + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) + END IF + ELSE + IF ( ERREST ) THEN + MINRWRK = MAX( 7, LRWQP3, LRWCON, LRWSVDJ ) + ELSE + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) + END IF + END IF + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + ELSE IF ( RSVEC .AND. (.NOT.LSVEC) ) THEN +* .. minimal and optimal sizes of the complex workspace if the +* singular values and the right singular vectors are requested + IF ( ERREST ) THEN + MINWRK = MAX( N+LWQP3, LWCON, LWSVDJ, N+LWLQF, + $ 2*N+LWQRF, N+LWSVDJ, N+LWUNMLQ ) + ELSE + MINWRK = MAX( N+LWQP3, LWSVDJ, N+LWLQF, 2*N+LWQRF, + $ N+LWSVDJ, N+LWUNMLQ ) + END IF + IF ( LQUERY ) THEN + CALL ZGESVJ( 'L', 'U', 'N', N,N, U, LDU, SVA, N, A, + $ LDA, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_ZGESVJ = CDUMMY(1) + CALL ZUNMLQ( 'L', 'C', N, N, N, A, LDA, CDUMMY, + $ V, LDV, CDUMMY, -1, IERR ) + LWRK_ZUNMLQ = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = MAX( N+LWRK_ZGEQP3, LWCON, LWRK_ZGESVJ, + $ N+LWRK_ZGELQF, 2*N+LWRK_ZGEQRF, + $ N+LWRK_ZGESVJ, N+LWRK_ZUNMLQ ) + ELSE + OPTWRK = MAX( N+LWRK_ZGEQP3, LWRK_ZGESVJ,N+LWRK_ZGELQF, + $ 2*N+LWRK_ZGEQRF, N+LWRK_ZGESVJ, + $ N+LWRK_ZUNMLQ ) + END IF + END IF + IF ( L2TRAN .OR. ROWPIV ) THEN + IF ( ERREST ) THEN + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) + END IF + ELSE + IF ( ERREST ) THEN + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) + END IF + END IF + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + ELSE IF ( LSVEC .AND. (.NOT.RSVEC) ) THEN +* .. minimal and optimal sizes of the complex workspace if the +* singular values and the left singular vectors are requested + IF ( ERREST ) THEN + MINWRK = N + MAX( LWQP3,LWCON,N+LWQRF,LWSVDJ,LWUNMQRM ) + ELSE + MINWRK = N + MAX( LWQP3, N+LWQRF, LWSVDJ, LWUNMQRM ) + END IF + IF ( LQUERY ) THEN + CALL ZGESVJ( 'L', 'U', 'N', N,N, U, LDU, SVA, N, A, + $ LDA, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_ZGESVJ = CDUMMY(1) + CALL ZUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, + $ LDU, CDUMMY, -1, IERR ) + LWRK_ZUNMQRM = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = N + MAX( LWRK_ZGEQP3, LWCON, N+LWRK_ZGEQRF, + $ LWRK_ZGESVJ, LWRK_ZUNMQRM ) + ELSE + OPTWRK = N + MAX( LWRK_ZGEQP3, N+LWRK_ZGEQRF, + $ LWRK_ZGESVJ, LWRK_ZUNMQRM ) + END IF + END IF + IF ( L2TRAN .OR. ROWPIV ) THEN + IF ( ERREST ) THEN + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) + END IF + ELSE + IF ( ERREST ) THEN + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) + END IF + END IF + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + ELSE +* .. minimal and optimal sizes of the complex workspace if the +* full SVD is requested + IF ( .NOT. JRACC ) THEN + IF ( ERREST ) THEN + MINWRK = MAX( N+LWQP3, N+LWCON, 2*N+N**2+LWCON, + $ 2*N+LWQRF, 2*N+LWQP3, + $ 2*N+N**2+N+LWLQF, 2*N+N**2+N+N**2+LWCON, + $ 2*N+N**2+N+LWSVDJ, 2*N+N**2+N+LWSVDJV, + $ 2*N+N**2+N+LWUNMQR,2*N+N**2+N+LWUNMLQ, + $ N+N**2+LWSVDJ, N+LWUNMQRM ) + ELSE + MINWRK = MAX( N+LWQP3, 2*N+N**2+LWCON, + $ 2*N+LWQRF, 2*N+LWQP3, + $ 2*N+N**2+N+LWLQF, 2*N+N**2+N+N**2+LWCON, + $ 2*N+N**2+N+LWSVDJ, 2*N+N**2+N+LWSVDJV, + $ 2*N+N**2+N+LWUNMQR,2*N+N**2+N+LWUNMLQ, + $ N+N**2+LWSVDJ, N+LWUNMQRM ) + END IF + MINIWRK = MINIWRK + N + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + ELSE + IF ( ERREST ) THEN + MINWRK = MAX( N+LWQP3, N+LWCON, 2*N+LWQRF, + $ 2*N+N**2+LWSVDJV, 2*N+N**2+N+LWUNMQR, + $ N+LWUNMQRM ) + ELSE + MINWRK = MAX( N+LWQP3, 2*N+LWQRF, + $ 2*N+N**2+LWSVDJV, 2*N+N**2+N+LWUNMQR, + $ N+LWUNMQRM ) + END IF + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + END IF + IF ( LQUERY ) THEN + CALL ZUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, + $ LDU, CDUMMY, -1, IERR ) + LWRK_ZUNMQRM = CDUMMY(1) + CALL ZUNMQR( 'L', 'N', N, N, N, A, LDA, CDUMMY, U, + $ LDU, CDUMMY, -1, IERR ) + LWRK_ZUNMQR = CDUMMY(1) + IF ( .NOT. JRACC ) THEN + CALL ZGEQP3( N,N, A, LDA, IWORK, CDUMMY,CDUMMY, -1, + $ RDUMMY, IERR ) + LWRK_ZGEQP3N = CDUMMY(1) + CALL ZGESVJ( 'L', 'U', 'N', N, N, U, LDU, SVA, + $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_ZGESVJ = CDUMMY(1) + CALL ZGESVJ( 'U', 'U', 'N', N, N, U, LDU, SVA, + $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_ZGESVJU = CDUMMY(1) + CALL ZGESVJ( 'L', 'U', 'V', N, N, U, LDU, SVA, + $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_ZGESVJV = CDUMMY(1) + CALL ZUNMLQ( 'L', 'C', N, N, N, A, LDA, CDUMMY, + $ V, LDV, CDUMMY, -1, IERR ) + LWRK_ZUNMLQ = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = MAX( N+LWRK_ZGEQP3, N+LWCON, + $ 2*N+N**2+LWCON, 2*N+LWRK_ZGEQRF, + $ 2*N+LWRK_ZGEQP3N, + $ 2*N+N**2+N+LWRK_ZGELQF, + $ 2*N+N**2+N+N**2+LWCON, + $ 2*N+N**2+N+LWRK_ZGESVJ, + $ 2*N+N**2+N+LWRK_ZGESVJV, + $ 2*N+N**2+N+LWRK_ZUNMQR, + $ 2*N+N**2+N+LWRK_ZUNMLQ, + $ N+N**2+LWRK_ZGESVJU, + $ N+LWRK_ZUNMQRM ) + ELSE + OPTWRK = MAX( N+LWRK_ZGEQP3, + $ 2*N+N**2+LWCON, 2*N+LWRK_ZGEQRF, + $ 2*N+LWRK_ZGEQP3N, + $ 2*N+N**2+N+LWRK_ZGELQF, + $ 2*N+N**2+N+N**2+LWCON, + $ 2*N+N**2+N+LWRK_ZGESVJ, + $ 2*N+N**2+N+LWRK_ZGESVJV, + $ 2*N+N**2+N+LWRK_ZUNMQR, + $ 2*N+N**2+N+LWRK_ZUNMLQ, + $ N+N**2+LWRK_ZGESVJU, + $ N+LWRK_ZUNMQRM ) + END IF + ELSE + CALL ZGESVJ( 'L', 'U', 'V', N, N, U, LDU, SVA, + $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_ZGESVJV = CDUMMY(1) + CALL ZUNMQR( 'L', 'N', N, N, N, CDUMMY, N, CDUMMY, + $ V, LDV, CDUMMY, -1, IERR ) + LWRK_ZUNMQR = CDUMMY(1) + CALL ZUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, + $ LDU, CDUMMY, -1, IERR ) + LWRK_ZUNMQRM = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = MAX( N+LWRK_ZGEQP3, N+LWCON, + $ 2*N+LWRK_ZGEQRF, 2*N+N**2, + $ 2*N+N**2+LWRK_ZGESVJV, + $ 2*N+N**2+N+LWRK_ZUNMQR,N+LWRK_ZUNMQRM ) + ELSE + OPTWRK = MAX( N+LWRK_ZGEQP3, 2*N+LWRK_ZGEQRF, + $ 2*N+N**2, 2*N+N**2+LWRK_ZGESVJV, + $ 2*N+N**2+N+LWRK_ZUNMQR, + $ N+LWRK_ZUNMQRM ) + END IF + END IF + END IF + IF ( L2TRAN .OR. ROWPIV ) THEN + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) + END IF + END IF + MINWRK = MAX( 2, MINWRK ) + OPTWRK = MAX( 2, OPTWRK ) + IF ( LWORK .LT. MINWRK .AND. (.NOT.LQUERY) ) INFO = - 17 + IF ( LRWORK .LT. MINRWRK .AND. (.NOT.LQUERY) ) INFO = - 19 + END IF +* + IF ( INFO .NE. 0 ) THEN +* #:( + CALL XERBLA( 'ZGEJSV', - INFO ) + RETURN + ELSE IF ( LQUERY ) THEN + CWORK(1) = OPTWRK + CWORK(2) = MINWRK + RWORK(1) = MINRWRK + IWORK(1) = MAX( 4, MINIWRK ) + RETURN + END IF +* +* Quick return for void matrix (Y3K safe) +* #:) + IF ( ( M .EQ. 0 ) .OR. ( N .EQ. 0 ) ) THEN + IWORK(1:4) = 0 + RWORK(1:7) = 0 + RETURN + ENDIF +* +* Determine whether the matrix U should be M x N or M x M +* + IF ( LSVEC ) THEN + N1 = N + IF ( LSAME( JOBU, 'F' ) ) N1 = M + END IF +* +* Set numerical parameters +* +*! NOTE: Make sure DLAMCH() does not fail on the target architecture. +* + EPSLN = DLAMCH('Epsilon') + SFMIN = DLAMCH('SafeMinimum') + SMALL = SFMIN / EPSLN + BIG = DLAMCH('O') +* BIG = ONE / SFMIN +* +* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N +* +*(!) If necessary, scale SVA() to protect the largest norm from +* overflow. It is possible that this scaling pushes the smallest +* column norm left from the underflow threshold (extreme case). +* + SCALEM = ONE / SQRT(DBLE(M)*DBLE(N)) + NOSCAL = .TRUE. + GOSCAL = .TRUE. + DO 1874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL ZLASSQ( M, A(1,p), 1, AAPP, AAQQ ) + IF ( AAPP .GT. BIG ) THEN + INFO = - 9 + CALL XERBLA( 'ZGEJSV', -INFO ) + RETURN + END IF + AAQQ = SQRT(AAQQ) + IF ( ( AAPP .LT. (BIG / AAQQ) ) .AND. NOSCAL ) THEN + SVA(p) = AAPP * AAQQ + ELSE + NOSCAL = .FALSE. + SVA(p) = AAPP * ( AAQQ * SCALEM ) + IF ( GOSCAL ) THEN + GOSCAL = .FALSE. + CALL DSCAL( p-1, SCALEM, SVA, 1 ) + END IF + END IF + 1874 CONTINUE +* + IF ( NOSCAL ) SCALEM = ONE +* + AAPP = ZERO + AAQQ = BIG + DO 4781 p = 1, N + AAPP = MAX( AAPP, SVA(p) ) + IF ( SVA(p) .NE. ZERO ) AAQQ = MIN( AAQQ, SVA(p) ) + 4781 CONTINUE +* +* Quick return for zero M x N matrix +* #:) + IF ( AAPP .EQ. ZERO ) THEN + IF ( LSVEC ) CALL ZLASET( 'G', M, N1, CZERO, CONE, U, LDU ) + IF ( RSVEC ) CALL ZLASET( 'G', N, N, CZERO, CONE, V, LDV ) + RWORK(1) = ONE + RWORK(2) = ONE + IF ( ERREST ) RWORK(3) = ONE + IF ( LSVEC .AND. RSVEC ) THEN + RWORK(4) = ONE + RWORK(5) = ONE + END IF + IF ( L2TRAN ) THEN + RWORK(6) = ZERO + RWORK(7) = ZERO + END IF + IWORK(1) = 0 + IWORK(2) = 0 + IWORK(3) = 0 + IWORK(4) = -1 + RETURN + END IF +* +* Issue warning if denormalized column norms detected. Override the +* high relative accuracy request. Issue licence to kill nonzero columns +* (set them to zero) whose norm is less than sigma_max / BIG (roughly). +* #:( + WARNING = 0 + IF ( AAQQ .LE. SFMIN ) THEN + L2RANK = .TRUE. + L2KILL = .TRUE. + WARNING = 1 + END IF +* +* Quick return for one-column matrix +* #:) + IF ( N .EQ. 1 ) THEN +* + IF ( LSVEC ) THEN + CALL ZLASCL( 'G',0,0,SVA(1),SCALEM, M,1,A(1,1),LDA,IERR ) + CALL ZLACPY( 'A', M, 1, A, LDA, U, LDU ) +* computing all M left singular vectors of the M x 1 matrix + IF ( N1 .NE. N ) THEN + CALL ZGEQRF( M, N, U,LDU, CWORK, CWORK(N+1),LWORK-N,IERR ) + CALL ZUNGQR( M,N1,1, U,LDU,CWORK,CWORK(N+1),LWORK-N,IERR ) + CALL ZCOPY( M, A(1,1), 1, U(1,1), 1 ) + END IF + END IF + IF ( RSVEC ) THEN + V(1,1) = CONE + END IF + IF ( SVA(1) .LT. (BIG*SCALEM) ) THEN + SVA(1) = SVA(1) / SCALEM + SCALEM = ONE + END IF + RWORK(1) = ONE / SCALEM + RWORK(2) = ONE + IF ( SVA(1) .NE. ZERO ) THEN + IWORK(1) = 1 + IF ( ( SVA(1) / SCALEM) .GE. SFMIN ) THEN + IWORK(2) = 1 + ELSE + IWORK(2) = 0 + END IF + ELSE + IWORK(1) = 0 + IWORK(2) = 0 + END IF + IWORK(3) = 0 + IWORK(4) = -1 + IF ( ERREST ) RWORK(3) = ONE + IF ( LSVEC .AND. RSVEC ) THEN + RWORK(4) = ONE + RWORK(5) = ONE + END IF + IF ( L2TRAN ) THEN + RWORK(6) = ZERO + RWORK(7) = ZERO + END IF + RETURN +* + END IF +* + TRANSP = .FALSE. +* + AATMAX = -ONE + AATMIN = BIG + IF ( ROWPIV .OR. L2TRAN ) THEN +* +* Compute the row norms, needed to determine row pivoting sequence +* (in the case of heavily row weighted A, row pivoting is strongly +* advised) and to collect information needed to compare the +* structures of A * A^* and A^* * A (in the case L2TRAN.EQ..TRUE.). +* + IF ( L2TRAN ) THEN + DO 1950 p = 1, M + XSC = ZERO + TEMP1 = ONE + CALL ZLASSQ( N, A(p,1), LDA, XSC, TEMP1 ) +* ZLASSQ gets both the ell_2 and the ell_infinity norm +* in one pass through the vector + RWORK(M+p) = XSC * SCALEM + RWORK(p) = XSC * (SCALEM*SQRT(TEMP1)) + AATMAX = MAX( AATMAX, RWORK(p) ) + IF (RWORK(p) .NE. ZERO) + $ AATMIN = MIN(AATMIN,RWORK(p)) + 1950 CONTINUE + ELSE + DO 1904 p = 1, M + RWORK(M+p) = SCALEM*ABS( A(p,IZAMAX(N,A(p,1),LDA)) ) + AATMAX = MAX( AATMAX, RWORK(M+p) ) + AATMIN = MIN( AATMIN, RWORK(M+p) ) + 1904 CONTINUE + END IF +* + END IF +* +* For square matrix A try to determine whether A^* would be better +* input for the preconditioned Jacobi SVD, with faster convergence. +* The decision is based on an O(N) function of the vector of column +* and row norms of A, based on the Shannon entropy. This should give +* the right choice in most cases when the difference actually matters. +* It may fail and pick the slower converging side. +* + ENTRA = ZERO + ENTRAT = ZERO + IF ( L2TRAN ) THEN +* + XSC = ZERO + TEMP1 = ONE + CALL DLASSQ( N, SVA, 1, XSC, TEMP1 ) + TEMP1 = ONE / TEMP1 +* + ENTRA = ZERO + DO 1113 p = 1, N + BIG1 = ( ( SVA(p) / XSC )**2 ) * TEMP1 + IF ( BIG1 .NE. ZERO ) ENTRA = ENTRA + BIG1 * DLOG(BIG1) + 1113 CONTINUE + ENTRA = - ENTRA / DLOG(DBLE(N)) +* +* Now, SVA().^2/Trace(A^* * A) is a point in the probability simplex. +* It is derived from the diagonal of A^* * A. Do the same with the +* diagonal of A * A^*, compute the entropy of the corresponding +* probability distribution. Note that A * A^* and A^* * A have the +* same trace. +* + ENTRAT = ZERO + DO 1114 p = 1, M + BIG1 = ( ( RWORK(p) / XSC )**2 ) * TEMP1 + IF ( BIG1 .NE. ZERO ) ENTRAT = ENTRAT + BIG1 * DLOG(BIG1) + 1114 CONTINUE + ENTRAT = - ENTRAT / DLOG(DBLE(M)) +* +* Analyze the entropies and decide A or A^*. Smaller entropy +* usually means better input for the algorithm. +* + TRANSP = ( ENTRAT .LT. ENTRA ) +* +* If A^* is better than A, take the adjoint of A. This is allowed +* only for square matrices, M=N. + IF ( TRANSP ) THEN +* In an optimal implementation, this trivial transpose +* should be replaced with faster transpose. + DO 1115 p = 1, N - 1 + A(p,p) = CONJG(A(p,p)) + DO 1116 q = p + 1, N + CTEMP = CONJG(A(q,p)) + A(q,p) = CONJG(A(p,q)) + A(p,q) = CTEMP + 1116 CONTINUE + 1115 CONTINUE + A(N,N) = CONJG(A(N,N)) + DO 1117 p = 1, N + RWORK(M+p) = SVA(p) + SVA(p) = RWORK(p) +* previously computed row 2-norms are now column 2-norms +* of the transposed matrix + 1117 CONTINUE + TEMP1 = AAPP + AAPP = AATMAX + AATMAX = TEMP1 + TEMP1 = AAQQ + AAQQ = AATMIN + AATMIN = TEMP1 + KILL = LSVEC + LSVEC = RSVEC + RSVEC = KILL + IF ( LSVEC ) N1 = N +* + ROWPIV = .TRUE. + END IF +* + END IF +* END IF L2TRAN +* +* Scale the matrix so that its maximal singular value remains less +* than SQRT(BIG) -- the matrix is scaled so that its maximal column +* has Euclidean norm equal to SQRT(BIG/N). The only reason to keep +* SQRT(BIG) instead of BIG is the fact that ZGEJSV uses LAPACK and +* BLAS routines that, in some implementations, are not capable of +* working in the full interval [SFMIN,BIG] and that they may provoke +* overflows in the intermediate results. If the singular values spread +* from SFMIN to BIG, then ZGESVJ will compute them. So, in that case, +* one should use ZGESVJ instead of ZGEJSV. +* >> change in the April 2016 update: allow bigger range, i.e. the +* largest column is allowed up to BIG/N and ZGESVJ will do the rest. + BIG1 = SQRT( BIG ) + TEMP1 = SQRT( BIG / DBLE(N) ) +* TEMP1 = BIG/DBLE(N) +* + CALL DLASCL( 'G', 0, 0, AAPP, TEMP1, N, 1, SVA, N, IERR ) + IF ( AAQQ .GT. (AAPP * SFMIN) ) THEN + AAQQ = ( AAQQ / AAPP ) * TEMP1 + ELSE + AAQQ = ( AAQQ * TEMP1 ) / AAPP + END IF + TEMP1 = TEMP1 * SCALEM + CALL ZLASCL( 'G', 0, 0, AAPP, TEMP1, M, N, A, LDA, IERR ) +* +* To undo scaling at the end of this procedure, multiply the +* computed singular values with USCAL2 / USCAL1. +* + USCAL1 = TEMP1 + USCAL2 = AAPP +* + IF ( L2KILL ) THEN +* L2KILL enforces computation of nonzero singular values in +* the restricted range of condition number of the initial A, +* sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN). + XSC = SQRT( SFMIN ) + ELSE + XSC = SMALL +* +* Now, if the condition number of A is too big, +* sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN, +* as a precaution measure, the full SVD is computed using ZGESVJ +* with accumulated Jacobi rotations. This provides numerically +* more robust computation, at the cost of slightly increased run +* time. Depending on the concrete implementation of BLAS and LAPACK +* (i.e. how they behave in presence of extreme ill-conditioning) the +* implementor may decide to remove this switch. + IF ( ( AAQQ.LT.SQRT(SFMIN) ) .AND. LSVEC .AND. RSVEC ) THEN + JRACC = .TRUE. + END IF +* + END IF + IF ( AAQQ .LT. XSC ) THEN + DO 700 p = 1, N + IF ( SVA(p) .LT. XSC ) THEN + CALL ZLASET( 'A', M, 1, CZERO, CZERO, A(1,p), LDA ) + SVA(p) = ZERO + END IF + 700 CONTINUE + END IF +* +* Preconditioning using QR factorization with pivoting +* + IF ( ROWPIV ) THEN +* Optional row permutation (Bjoerck row pivoting): +* A result by Cox and Higham shows that the Bjoerck's +* row pivoting combined with standard column pivoting +* has similar effect as Powell-Reid complete pivoting. +* The ell-infinity norms of A are made nonincreasing. + IF ( ( LSVEC .AND. RSVEC ) .AND. .NOT.( JRACC ) ) THEN + IWOFF = 2*N + ELSE + IWOFF = N + END IF + DO 1952 p = 1, M - 1 + q = IDAMAX( M-p+1, RWORK(M+p), 1 ) + p - 1 + IWORK(IWOFF+p) = q + IF ( p .NE. q ) THEN + TEMP1 = RWORK(M+p) + RWORK(M+p) = RWORK(M+q) + RWORK(M+q) = TEMP1 + END IF + 1952 CONTINUE + CALL ZLASWP( N, A, LDA, 1, M-1, IWORK(IWOFF+1), 1 ) + END IF +* +* End of the preparation phase (scaling, optional sorting and +* transposing, optional flushing of small columns). +* +* Preconditioning +* +* If the full SVD is needed, the right singular vectors are computed +* from a matrix equation, and for that we need theoretical analysis +* of the Businger-Golub pivoting. So we use ZGEQP3 as the first RR QRF. +* In all other cases the first RR QRF can be chosen by other criteria +* (eg speed by replacing global with restricted window pivoting, such +* as in xGEQPX from TOMS # 782). Good results will be obtained using +* xGEQPX with properly (!) chosen numerical parameters. +* Any improvement of ZGEQP3 improves overal performance of ZGEJSV. +* +* A * P1 = Q1 * [ R1^* 0]^*: + DO 1963 p = 1, N +* .. all columns are free columns + IWORK(p) = 0 + 1963 CONTINUE + CALL ZGEQP3( M, N, A, LDA, IWORK, CWORK, CWORK(N+1), LWORK-N, + $ RWORK, IERR ) +* +* The upper triangular matrix R1 from the first QRF is inspected for +* rank deficiency and possibilities for deflation, or possible +* ill-conditioning. Depending on the user specified flag L2RANK, +* the procedure explores possibilities to reduce the numerical +* rank by inspecting the computed upper triangular factor. If +* L2RANK or L2ABER are up, then ZGEJSV will compute the SVD of +* A + dA, where ||dA|| <= f(M,N)*EPSLN. +* + NR = 1 + IF ( L2ABER ) THEN +* Standard absolute error bound suffices. All sigma_i with +* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an +* agressive enforcement of lower numerical rank by introducing a +* backward error of the order of N*EPSLN*||A||. + TEMP1 = SQRT(DBLE(N))*EPSLN + DO 3001 p = 2, N + IF ( ABS(A(p,p)) .GE. (TEMP1*ABS(A(1,1))) ) THEN + NR = NR + 1 + ELSE + GO TO 3002 + END IF + 3001 CONTINUE + 3002 CONTINUE + ELSE IF ( L2RANK ) THEN +* .. similarly as above, only slightly more gentle (less agressive). +* Sudden drop on the diagonal of R1 is used as the criterion for +* close-to-rank-deficient. + TEMP1 = SQRT(SFMIN) + DO 3401 p = 2, N + IF ( ( ABS(A(p,p)) .LT. (EPSLN*ABS(A(p-1,p-1))) ) .OR. + $ ( ABS(A(p,p)) .LT. SMALL ) .OR. + $ ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3402 + NR = NR + 1 + 3401 CONTINUE + 3402 CONTINUE +* + ELSE +* The goal is high relative accuracy. However, if the matrix +* has high scaled condition number the relative accuracy is in +* general not feasible. Later on, a condition number estimator +* will be deployed to estimate the scaled condition number. +* Here we just remove the underflowed part of the triangular +* factor. This prevents the situation in which the code is +* working hard to get the accuracy not warranted by the data. + TEMP1 = SQRT(SFMIN) + DO 3301 p = 2, N + IF ( ( ABS(A(p,p)) .LT. SMALL ) .OR. + $ ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3302 + NR = NR + 1 + 3301 CONTINUE + 3302 CONTINUE +* + END IF +* + ALMORT = .FALSE. + IF ( NR .EQ. N ) THEN + MAXPRJ = ONE + DO 3051 p = 2, N + TEMP1 = ABS(A(p,p)) / SVA(IWORK(p)) + MAXPRJ = MIN( MAXPRJ, TEMP1 ) + 3051 CONTINUE + IF ( MAXPRJ**2 .GE. ONE - DBLE(N)*EPSLN ) ALMORT = .TRUE. + END IF +* +* + SCONDA = - ONE + CONDR1 = - ONE + CONDR2 = - ONE +* + IF ( ERREST ) THEN + IF ( N .EQ. NR ) THEN + IF ( RSVEC ) THEN +* .. V is available as workspace + CALL ZLACPY( 'U', N, N, A, LDA, V, LDV ) + DO 3053 p = 1, N + TEMP1 = SVA(IWORK(p)) + CALL ZDSCAL( p, ONE/TEMP1, V(1,p), 1 ) + 3053 CONTINUE + IF ( LSVEC )THEN + CALL ZPOCON( 'U', N, V, LDV, ONE, TEMP1, + $ CWORK(N+1), RWORK, IERR ) + ELSE + CALL ZPOCON( 'U', N, V, LDV, ONE, TEMP1, + $ CWORK, RWORK, IERR ) + END IF +* + ELSE IF ( LSVEC ) THEN +* .. U is available as workspace + CALL ZLACPY( 'U', N, N, A, LDA, U, LDU ) + DO 3054 p = 1, N + TEMP1 = SVA(IWORK(p)) + CALL ZDSCAL( p, ONE/TEMP1, U(1,p), 1 ) + 3054 CONTINUE + CALL ZPOCON( 'U', N, U, LDU, ONE, TEMP1, + $ CWORK(N+1), RWORK, IERR ) + ELSE + CALL ZLACPY( 'U', N, N, A, LDA, CWORK, N ) +*[] CALL ZLACPY( 'U', N, N, A, LDA, CWORK(N+1), N ) +* Change: here index shifted by N to the left, CWORK(1:N) +* not needed for SIGMA only computation + DO 3052 p = 1, N + TEMP1 = SVA(IWORK(p)) +*[] CALL ZDSCAL( p, ONE/TEMP1, CWORK(N+(p-1)*N+1), 1 ) + CALL ZDSCAL( p, ONE/TEMP1, CWORK((p-1)*N+1), 1 ) + 3052 CONTINUE +* .. the columns of R are scaled to have unit Euclidean lengths. +*[] CALL ZPOCON( 'U', N, CWORK(N+1), N, ONE, TEMP1, +*[] $ CWORK(N+N*N+1), RWORK, IERR ) + CALL ZPOCON( 'U', N, CWORK, N, ONE, TEMP1, + $ CWORK(N*N+1), RWORK, IERR ) +* + END IF + IF ( TEMP1 .NE. ZERO ) THEN + SCONDA = ONE / SQRT(TEMP1) + ELSE + SCONDA = - ONE + END IF +* SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1). +* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA + ELSE + SCONDA = - ONE + END IF + END IF +* + L2PERT = L2PERT .AND. ( ABS( A(1,1)/A(NR,NR) ) .GT. SQRT(BIG1) ) +* If there is no violent scaling, artificial perturbation is not needed. +* +* Phase 3: +* + IF ( .NOT. ( RSVEC .OR. LSVEC ) ) THEN +* +* Singular Values only +* +* .. transpose A(1:NR,1:N) + DO 1946 p = 1, MIN( N-1, NR ) + CALL ZCOPY( N-p, A(p,p+1), LDA, A(p+1,p), 1 ) + CALL ZLACGV( N-p+1, A(p,p), 1 ) + 1946 CONTINUE + IF ( NR .EQ. N ) A(N,N) = CONJG(A(N,N)) +* +* The following two DO-loops introduce small relative perturbation +* into the strict upper triangle of the lower triangular matrix. +* Small entries below the main diagonal are also changed. +* This modification is useful if the computing environment does not +* provide/allow FLUSH TO ZERO underflow, for it prevents many +* annoying denormalized numbers in case of strongly scaled matrices. +* The perturbation is structured so that it does not introduce any +* new perturbation of the singular values, and it does not destroy +* the job done by the preconditioner. +* The licence for this perturbation is in the variable L2PERT, which +* should be .FALSE. if FLUSH TO ZERO underflow is active. +* + IF ( .NOT. ALMORT ) THEN +* + IF ( L2PERT ) THEN +* XSC = SQRT(SMALL) + XSC = EPSLN / DBLE(N) + DO 4947 q = 1, NR + CTEMP = DCMPLX(XSC*ABS(A(q,q)),ZERO) + DO 4949 p = 1, N + IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) ) + $ .OR. ( p .LT. q ) ) +* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) ) + $ A(p,q) = CTEMP + 4949 CONTINUE + 4947 CONTINUE + ELSE + CALL ZLASET( 'U', NR-1,NR-1, CZERO,CZERO, A(1,2),LDA ) + END IF +* +* .. second preconditioning using the QR factorization +* + CALL ZGEQRF( N,NR, A,LDA, CWORK, CWORK(N+1),LWORK-N, IERR ) +* +* .. and transpose upper to lower triangular + DO 1948 p = 1, NR - 1 + CALL ZCOPY( NR-p, A(p,p+1), LDA, A(p+1,p), 1 ) + CALL ZLACGV( NR-p+1, A(p,p), 1 ) + 1948 CONTINUE +* + END IF +* +* Row-cyclic Jacobi SVD algorithm with column pivoting +* +* .. again some perturbation (a "background noise") is added +* to drown denormals + IF ( L2PERT ) THEN +* XSC = SQRT(SMALL) + XSC = EPSLN / DBLE(N) + DO 1947 q = 1, NR + CTEMP = DCMPLX(XSC*ABS(A(q,q)),ZERO) + DO 1949 p = 1, NR + IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) ) + $ .OR. ( p .LT. q ) ) +* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) ) + $ A(p,q) = CTEMP + 1949 CONTINUE + 1947 CONTINUE + ELSE + CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, A(1,2), LDA ) + END IF +* +* .. and one-sided Jacobi rotations are started on a lower +* triangular matrix (plus perturbation which is ignored in +* the part which destroys triangular form (confusing?!)) +* + CALL ZGESVJ( 'L', 'N', 'N', NR, NR, A, LDA, SVA, + $ N, V, LDV, CWORK, LWORK, RWORK, LRWORK, INFO ) +* + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) +* +* + ELSE IF ( ( RSVEC .AND. ( .NOT. LSVEC ) .AND. ( .NOT. JRACC ) ) + $ .OR. + $ ( JRACC .AND. ( .NOT. LSVEC ) .AND. ( NR .NE. N ) ) ) THEN +* +* -> Singular Values and Right Singular Vectors <- +* + IF ( ALMORT ) THEN +* +* .. in this case NR equals N + DO 1998 p = 1, NR + CALL ZCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + CALL ZLACGV( N-p+1, V(p,p), 1 ) + 1998 CONTINUE + CALL ZLASET( 'U', NR-1,NR-1, CZERO, CZERO, V(1,2), LDV ) +* + CALL ZGESVJ( 'L','U','N', N, NR, V, LDV, SVA, NR, A, LDA, + $ CWORK, LWORK, RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + + ELSE +* +* .. two more QR factorizations ( one QRF is not enough, two require +* accumulated product of Jacobi rotations, three are perfect ) +* + CALL ZLASET( 'L', NR-1,NR-1, CZERO, CZERO, A(2,1), LDA ) + CALL ZGELQF( NR,N, A, LDA, CWORK, CWORK(N+1), LWORK-N, IERR) + CALL ZLACPY( 'L', NR, NR, A, LDA, V, LDV ) + CALL ZLASET( 'U', NR-1,NR-1, CZERO, CZERO, V(1,2), LDV ) + CALL ZGEQRF( NR, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), + $ LWORK-2*N, IERR ) + DO 8998 p = 1, NR + CALL ZCOPY( NR-p+1, V(p,p), LDV, V(p,p), 1 ) + CALL ZLACGV( NR-p+1, V(p,p), 1 ) + 8998 CONTINUE + CALL ZLASET('U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV) +* + CALL ZGESVJ( 'L', 'U','N', NR, NR, V,LDV, SVA, NR, U, + $ LDU, CWORK(N+1), LWORK-N, RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + IF ( NR .LT. N ) THEN + CALL ZLASET( 'A',N-NR, NR, CZERO,CZERO, V(NR+1,1), LDV ) + CALL ZLASET( 'A',NR, N-NR, CZERO,CZERO, V(1,NR+1), LDV ) + CALL ZLASET( 'A',N-NR,N-NR,CZERO,CONE, V(NR+1,NR+1),LDV ) + END IF +* + CALL ZUNMLQ( 'L', 'C', N, N, NR, A, LDA, CWORK, + $ V, LDV, CWORK(N+1), LWORK-N, IERR ) +* + END IF +* .. permute the rows of V +* DO 8991 p = 1, N +* CALL ZCOPY( N, V(p,1), LDV, A(IWORK(p),1), LDA ) +* 8991 CONTINUE +* CALL ZLACPY( 'All', N, N, A, LDA, V, LDV ) + CALL ZLAPMR( .FALSE., N, N, V, LDV, IWORK ) +* + IF ( TRANSP ) THEN + CALL ZLACPY( 'A', N, N, V, LDV, U, LDU ) + END IF +* + ELSE IF ( JRACC .AND. (.NOT. LSVEC) .AND. ( NR.EQ. N ) ) THEN +* + CALL ZLASET( 'L', N-1,N-1, CZERO, CZERO, A(2,1), LDA ) +* + CALL ZGESVJ( 'U','N','V', N, N, A, LDA, SVA, N, V, LDV, + $ CWORK, LWORK, RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + CALL ZLAPMR( .FALSE., N, N, V, LDV, IWORK ) +* + ELSE IF ( LSVEC .AND. ( .NOT. RSVEC ) ) THEN +* +* .. Singular Values and Left Singular Vectors .. +* +* .. second preconditioning step to avoid need to accumulate +* Jacobi rotations in the Jacobi iterations. + DO 1965 p = 1, NR + CALL ZCOPY( N-p+1, A(p,p), LDA, U(p,p), 1 ) + CALL ZLACGV( N-p+1, U(p,p), 1 ) + 1965 CONTINUE + CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) +* + CALL ZGEQRF( N, NR, U, LDU, CWORK(N+1), CWORK(2*N+1), + $ LWORK-2*N, IERR ) +* + DO 1967 p = 1, NR - 1 + CALL ZCOPY( NR-p, U(p,p+1), LDU, U(p+1,p), 1 ) + CALL ZLACGV( N-p+1, U(p,p), 1 ) + 1967 CONTINUE + CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) +* + CALL ZGESVJ( 'L', 'U', 'N', NR,NR, U, LDU, SVA, NR, A, + $ LDA, CWORK(N+1), LWORK-N, RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) +* + IF ( NR .LT. M ) THEN + CALL ZLASET( 'A', M-NR, NR,CZERO, CZERO, U(NR+1,1), LDU ) + IF ( NR .LT. N1 ) THEN + CALL ZLASET( 'A',NR, N1-NR, CZERO, CZERO, U(1,NR+1),LDU ) + CALL ZLASET( 'A',M-NR,N1-NR,CZERO,CONE,U(NR+1,NR+1),LDU ) + END IF + END IF +* + CALL ZUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, + $ LDU, CWORK(N+1), LWORK-N, IERR ) +* + IF ( ROWPIV ) + $ CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) +* + DO 1974 p = 1, N1 + XSC = ONE / DZNRM2( M, U(1,p), 1 ) + CALL ZDSCAL( M, XSC, U(1,p), 1 ) + 1974 CONTINUE +* + IF ( TRANSP ) THEN + CALL ZLACPY( 'A', N, N, U, LDU, V, LDV ) + END IF +* + ELSE +* +* .. Full SVD .. +* + IF ( .NOT. JRACC ) THEN +* + IF ( .NOT. ALMORT ) THEN +* +* Second Preconditioning Step (QRF [with pivoting]) +* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is +* equivalent to an LQF CALL. Since in many libraries the QRF +* seems to be better optimized than the LQF, we do explicit +* transpose and use the QRF. This is subject to changes in an +* optimized implementation of ZGEJSV. +* + DO 1968 p = 1, NR + CALL ZCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + CALL ZLACGV( N-p+1, V(p,p), 1 ) + 1968 CONTINUE +* +* .. the following two loops perturb small entries to avoid +* denormals in the second QR factorization, where they are +* as good as zeros. This is done to avoid painfully slow +* computation with denormals. The relative size of the perturbation +* is a parameter that can be changed by the implementer. +* This perturbation device will be obsolete on machines with +* properly implemented arithmetic. +* To switch it off, set L2PERT=.FALSE. To remove it from the +* code, remove the action under L2PERT=.TRUE., leave the ELSE part. +* The following two loops should be blocked and fused with the +* transposed copy above. +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 2969 q = 1, NR + CTEMP = DCMPLX(XSC*ABS( V(q,q) ),ZERO) + DO 2968 p = 1, N + IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 ) + $ .OR. ( p .LT. q ) ) +* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) ) + $ V(p,q) = CTEMP + IF ( p .LT. q ) V(p,q) = - V(p,q) + 2968 CONTINUE + 2969 CONTINUE + ELSE + CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV ) + END IF +* +* Estimate the row scaled condition number of R1 +* (If R1 is rectangular, N > NR, then the condition number +* of the leading NR x NR submatrix is estimated.) +* + CALL ZLACPY( 'L', NR, NR, V, LDV, CWORK(2*N+1), NR ) + DO 3950 p = 1, NR + TEMP1 = DZNRM2(NR-p+1,CWORK(2*N+(p-1)*NR+p),1) + CALL ZDSCAL(NR-p+1,ONE/TEMP1,CWORK(2*N+(p-1)*NR+p),1) + 3950 CONTINUE + CALL ZPOCON('L',NR,CWORK(2*N+1),NR,ONE,TEMP1, + $ CWORK(2*N+NR*NR+1),RWORK,IERR) + CONDR1 = ONE / SQRT(TEMP1) +* .. here need a second oppinion on the condition number +* .. then assume worst case scenario +* R1 is OK for inverse <=> CONDR1 .LT. DBLE(N) +* more conservative <=> CONDR1 .LT. SQRT(DBLE(N)) +* + COND_OK = SQRT(SQRT(DBLE(NR))) +*[TP] COND_OK is a tuning parameter. +* + IF ( CONDR1 .LT. COND_OK ) THEN +* .. the second QRF without pivoting. Note: in an optimized +* implementation, this QRF should be implemented as the QRF +* of a lower triangular matrix. +* R1^* = Q2 * R2 + CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), + $ LWORK-2*N, IERR ) +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL)/EPSLN + DO 3959 p = 2, NR + DO 3958 q = 1, p - 1 + CTEMP=DCMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), + $ ZERO) + IF ( ABS(V(q,p)) .LE. TEMP1 ) +* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) ) + $ V(q,p) = CTEMP + 3958 CONTINUE + 3959 CONTINUE + END IF +* + IF ( NR .NE. N ) + $ CALL ZLACPY( 'A', N, NR, V, LDV, CWORK(2*N+1), N ) +* .. save ... +* +* .. this transposed copy should be better than naive + DO 1969 p = 1, NR - 1 + CALL ZCOPY( NR-p, V(p,p+1), LDV, V(p+1,p), 1 ) + CALL ZLACGV(NR-p+1, V(p,p), 1 ) + 1969 CONTINUE + V(NR,NR)=CONJG(V(NR,NR)) +* + CONDR2 = CONDR1 +* + ELSE +* +* .. ill-conditioned case: second QRF with pivoting +* Note that windowed pivoting would be equaly good +* numerically, and more run-time efficient. So, in +* an optimal implementation, the next call to ZGEQP3 +* should be replaced with eg. CALL ZGEQPX (ACM TOMS #782) +* with properly (carefully) chosen parameters. +* +* R1^* * P2 = Q2 * R2 + DO 3003 p = 1, NR + IWORK(N+p) = 0 + 3003 CONTINUE + CALL ZGEQP3( N, NR, V, LDV, IWORK(N+1), CWORK(N+1), + $ CWORK(2*N+1), LWORK-2*N, RWORK, IERR ) +** CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), +** $ LWORK-2*N, IERR ) + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 3969 p = 2, NR + DO 3968 q = 1, p - 1 + CTEMP=DCMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), + $ ZERO) + IF ( ABS(V(q,p)) .LE. TEMP1 ) +* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) ) + $ V(q,p) = CTEMP + 3968 CONTINUE + 3969 CONTINUE + END IF +* + CALL ZLACPY( 'A', N, NR, V, LDV, CWORK(2*N+1), N ) +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 8970 p = 2, NR + DO 8971 q = 1, p - 1 + CTEMP=DCMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), + $ ZERO) +* V(p,q) = - TEMP1*( V(q,p) / ABS(V(q,p)) ) + V(p,q) = - CTEMP + 8971 CONTINUE + 8970 CONTINUE + ELSE + CALL ZLASET( 'L',NR-1,NR-1,CZERO,CZERO,V(2,1),LDV ) + END IF +* Now, compute R2 = L3 * Q3, the LQ factorization. + CALL ZGELQF( NR, NR, V, LDV, CWORK(2*N+N*NR+1), + $ CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, IERR ) +* .. and estimate the condition number + CALL ZLACPY( 'L',NR,NR,V,LDV,CWORK(2*N+N*NR+NR+1),NR ) + DO 4950 p = 1, NR + TEMP1 = DZNRM2( p, CWORK(2*N+N*NR+NR+p), NR ) + CALL ZDSCAL( p, ONE/TEMP1, CWORK(2*N+N*NR+NR+p), NR ) + 4950 CONTINUE + CALL ZPOCON( 'L',NR,CWORK(2*N+N*NR+NR+1),NR,ONE,TEMP1, + $ CWORK(2*N+N*NR+NR+NR*NR+1),RWORK,IERR ) + CONDR2 = ONE / SQRT(TEMP1) +* +* + IF ( CONDR2 .GE. COND_OK ) THEN +* .. save the Householder vectors used for Q3 +* (this overwrittes the copy of R2, as it will not be +* needed in this branch, but it does not overwritte the +* Huseholder vectors of Q2.). + CALL ZLACPY( 'U', NR, NR, V, LDV, CWORK(2*N+1), N ) +* .. and the rest of the information on Q3 is in +* WORK(2*N+N*NR+1:2*N+N*NR+N) + END IF +* + END IF +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 4968 q = 2, NR + CTEMP = XSC * V(q,q) + DO 4969 p = 1, q - 1 +* V(p,q) = - TEMP1*( V(p,q) / ABS(V(p,q)) ) + V(p,q) = - CTEMP + 4969 CONTINUE + 4968 CONTINUE + ELSE + CALL ZLASET( 'U', NR-1,NR-1, CZERO,CZERO, V(1,2), LDV ) + END IF +* +* Second preconditioning finished; continue with Jacobi SVD +* The input matrix is lower trinagular. +* +* Recover the right singular vectors as solution of a well +* conditioned triangular matrix equation. +* + IF ( CONDR1 .LT. COND_OK ) THEN +* + CALL ZGESVJ( 'L','U','N',NR,NR,V,LDV,SVA,NR,U, LDU, + $ CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,RWORK, + $ LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + DO 3970 p = 1, NR + CALL ZCOPY( NR, V(1,p), 1, U(1,p), 1 ) + CALL ZDSCAL( NR, SVA(p), V(1,p), 1 ) + 3970 CONTINUE + +* .. pick the right matrix equation and solve it +* + IF ( NR .EQ. N ) THEN +* :)) .. best case, R1 is inverted. The solution of this matrix +* equation is Q2*V2 = the product of the Jacobi rotations +* used in ZGESVJ, premultiplied with the orthogonal matrix +* from the second QR factorization. + CALL ZTRSM('L','U','N','N', NR,NR,CONE, A,LDA, V,LDV) + ELSE +* .. R1 is well conditioned, but non-square. Adjoint of R2 +* is inverted to get the product of the Jacobi rotations +* used in ZGESVJ. The Q-factor from the second QR +* factorization is then built in explicitly. + CALL ZTRSM('L','U','C','N',NR,NR,CONE,CWORK(2*N+1), + $ N,V,LDV) + IF ( NR .LT. N ) THEN + CALL ZLASET('A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV) + CALL ZLASET('A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV) + CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) + END IF + CALL ZUNMQR('L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), + $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR) + END IF +* + ELSE IF ( CONDR2 .LT. COND_OK ) THEN +* +* The matrix R2 is inverted. The solution of the matrix equation +* is Q3^* * V3 = the product of the Jacobi rotations (appplied to +* the lower triangular L3 from the LQ factorization of +* R2=L3*Q3), pre-multiplied with the transposed Q3. + CALL ZGESVJ( 'L', 'U', 'N', NR, NR, V, LDV, SVA, NR, U, + $ LDU, CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, + $ RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + DO 3870 p = 1, NR + CALL ZCOPY( NR, V(1,p), 1, U(1,p), 1 ) + CALL ZDSCAL( NR, SVA(p), U(1,p), 1 ) + 3870 CONTINUE + CALL ZTRSM('L','U','N','N',NR,NR,CONE,CWORK(2*N+1),N, + $ U,LDU) +* .. apply the permutation from the second QR factorization + DO 873 q = 1, NR + DO 872 p = 1, NR + CWORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q) + 872 CONTINUE + DO 874 p = 1, NR + U(p,q) = CWORK(2*N+N*NR+NR+p) + 874 CONTINUE + 873 CONTINUE + IF ( NR .LT. N ) THEN + CALL ZLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) + CALL ZLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) + CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) + END IF + CALL ZUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), + $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) + ELSE +* Last line of defense. +* #:( This is a rather pathological case: no scaled condition +* improvement after two pivoted QR factorizations. Other +* possibility is that the rank revealing QR factorization +* or the condition estimator has failed, or the COND_OK +* is set very close to ONE (which is unnecessary). Normally, +* this branch should never be executed, but in rare cases of +* failure of the RRQR or condition estimator, the last line of +* defense ensures that ZGEJSV completes the task. +* Compute the full SVD of L3 using ZGESVJ with explicit +* accumulation of Jacobi rotations. + CALL ZGESVJ( 'L', 'U', 'V', NR, NR, V, LDV, SVA, NR, U, + $ LDU, CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, + $ RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + IF ( NR .LT. N ) THEN + CALL ZLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) + CALL ZLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) + CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) + END IF + CALL ZUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), + $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) +* + CALL ZUNMLQ( 'L', 'C', NR, NR, NR, CWORK(2*N+1), N, + $ CWORK(2*N+N*NR+1), U, LDU, CWORK(2*N+N*NR+NR+1), + $ LWORK-2*N-N*NR-NR, IERR ) + DO 773 q = 1, NR + DO 772 p = 1, NR + CWORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q) + 772 CONTINUE + DO 774 p = 1, NR + U(p,q) = CWORK(2*N+N*NR+NR+p) + 774 CONTINUE + 773 CONTINUE +* + END IF +* +* Permute the rows of V using the (column) permutation from the +* first QRF. Also, scale the columns to make them unit in +* Euclidean norm. This applies to all cases. +* + TEMP1 = SQRT(DBLE(N)) * EPSLN + DO 1972 q = 1, N + DO 972 p = 1, N + CWORK(2*N+N*NR+NR+IWORK(p)) = V(p,q) + 972 CONTINUE + DO 973 p = 1, N + V(p,q) = CWORK(2*N+N*NR+NR+p) + 973 CONTINUE + XSC = ONE / DZNRM2( N, V(1,q), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL ZDSCAL( N, XSC, V(1,q), 1 ) + 1972 CONTINUE +* At this moment, V contains the right singular vectors of A. +* Next, assemble the left singular vector matrix U (M x N). + IF ( NR .LT. M ) THEN + CALL ZLASET('A', M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU) + IF ( NR .LT. N1 ) THEN + CALL ZLASET('A',NR,N1-NR,CZERO,CZERO,U(1,NR+1),LDU) + CALL ZLASET('A',M-NR,N1-NR,CZERO,CONE, + $ U(NR+1,NR+1),LDU) + END IF + END IF +* +* The Q matrix from the first QRF is built into the left singular +* matrix U. This applies to all cases. +* + CALL ZUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, + $ LDU, CWORK(N+1), LWORK-N, IERR ) + +* The columns of U are normalized. The cost is O(M*N) flops. + TEMP1 = SQRT(DBLE(M)) * EPSLN + DO 1973 p = 1, NR + XSC = ONE / DZNRM2( M, U(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL ZDSCAL( M, XSC, U(1,p), 1 ) + 1973 CONTINUE +* +* If the initial QRF is computed with row pivoting, the left +* singular vectors must be adjusted. +* + IF ( ROWPIV ) + $ CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) +* + ELSE +* +* .. the initial matrix A has almost orthogonal columns and +* the second QRF is not needed +* + CALL ZLACPY( 'U', N, N, A, LDA, CWORK(N+1), N ) + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 5970 p = 2, N + CTEMP = XSC * CWORK( N + (p-1)*N + p ) + DO 5971 q = 1, p - 1 +* CWORK(N+(q-1)*N+p)=-TEMP1 * ( CWORK(N+(p-1)*N+q) / +* $ ABS(CWORK(N+(p-1)*N+q)) ) + CWORK(N+(q-1)*N+p)=-CTEMP + 5971 CONTINUE + 5970 CONTINUE + ELSE + CALL ZLASET( 'L',N-1,N-1,CZERO,CZERO,CWORK(N+2),N ) + END IF +* + CALL ZGESVJ( 'U', 'U', 'N', N, N, CWORK(N+1), N, SVA, + $ N, U, LDU, CWORK(N+N*N+1), LWORK-N-N*N, RWORK, LRWORK, + $ INFO ) +* + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + DO 6970 p = 1, N + CALL ZCOPY( N, CWORK(N+(p-1)*N+1), 1, U(1,p), 1 ) + CALL ZDSCAL( N, SVA(p), CWORK(N+(p-1)*N+1), 1 ) + 6970 CONTINUE +* + CALL ZTRSM( 'L', 'U', 'N', 'N', N, N, + $ CONE, A, LDA, CWORK(N+1), N ) + DO 6972 p = 1, N + CALL ZCOPY( N, CWORK(N+p), N, V(IWORK(p),1), LDV ) + 6972 CONTINUE + TEMP1 = SQRT(DBLE(N))*EPSLN + DO 6971 p = 1, N + XSC = ONE / DZNRM2( N, V(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL ZDSCAL( N, XSC, V(1,p), 1 ) + 6971 CONTINUE +* +* Assemble the left singular vector matrix U (M x N). +* + IF ( N .LT. M ) THEN + CALL ZLASET( 'A', M-N, N, CZERO, CZERO, U(N+1,1), LDU ) + IF ( N .LT. N1 ) THEN + CALL ZLASET('A',N, N1-N, CZERO, CZERO, U(1,N+1),LDU) + CALL ZLASET( 'A',M-N,N1-N, CZERO, CONE,U(N+1,N+1),LDU) + END IF + END IF + CALL ZUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, + $ LDU, CWORK(N+1), LWORK-N, IERR ) + TEMP1 = SQRT(DBLE(M))*EPSLN + DO 6973 p = 1, N1 + XSC = ONE / DZNRM2( M, U(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL ZDSCAL( M, XSC, U(1,p), 1 ) + 6973 CONTINUE +* + IF ( ROWPIV ) + $ CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) +* + END IF +* +* end of the >> almost orthogonal case << in the full SVD +* + ELSE +* +* This branch deploys a preconditioned Jacobi SVD with explicitly +* accumulated rotations. It is included as optional, mainly for +* experimental purposes. It does perfom well, and can also be used. +* In this implementation, this branch will be automatically activated +* if the condition number sigma_max(A) / sigma_min(A) is predicted +* to be greater than the overflow threshold. This is because the +* a posteriori computation of the singular vectors assumes robust +* implementation of BLAS and some LAPACK procedures, capable of working +* in presence of extreme values, e.g. when the singular values spread from +* the underflow to the overflow threshold. +* + DO 7968 p = 1, NR + CALL ZCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + CALL ZLACGV( N-p+1, V(p,p), 1 ) + 7968 CONTINUE +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL/EPSLN) + DO 5969 q = 1, NR + CTEMP = DCMPLX(XSC*ABS( V(q,q) ),ZERO) + DO 5968 p = 1, N + IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 ) + $ .OR. ( p .LT. q ) ) +* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) ) + $ V(p,q) = CTEMP + IF ( p .LT. q ) V(p,q) = - V(p,q) + 5968 CONTINUE + 5969 CONTINUE + ELSE + CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV ) + END IF + + CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), + $ LWORK-2*N, IERR ) + CALL ZLACPY( 'L', N, NR, V, LDV, CWORK(2*N+1), N ) +* + DO 7969 p = 1, NR + CALL ZCOPY( NR-p+1, V(p,p), LDV, U(p,p), 1 ) + CALL ZLACGV( NR-p+1, U(p,p), 1 ) + 7969 CONTINUE + + IF ( L2PERT ) THEN + XSC = SQRT(SMALL/EPSLN) + DO 9970 q = 2, NR + DO 9971 p = 1, q - 1 + CTEMP = DCMPLX(XSC * MIN(ABS(U(p,p)),ABS(U(q,q))), + $ ZERO) +* U(p,q) = - TEMP1 * ( U(q,p) / ABS(U(q,p)) ) + U(p,q) = - CTEMP + 9971 CONTINUE + 9970 CONTINUE + ELSE + CALL ZLASET('U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) + END IF + + CALL ZGESVJ( 'L', 'U', 'V', NR, NR, U, LDU, SVA, + $ N, V, LDV, CWORK(2*N+N*NR+1), LWORK-2*N-N*NR, + $ RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + + IF ( NR .LT. N ) THEN + CALL ZLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) + CALL ZLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) + CALL ZLASET( 'A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV ) + END IF + + CALL ZUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), + $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) +* +* Permute the rows of V using the (column) permutation from the +* first QRF. Also, scale the columns to make them unit in +* Euclidean norm. This applies to all cases. +* + TEMP1 = SQRT(DBLE(N)) * EPSLN + DO 7972 q = 1, N + DO 8972 p = 1, N + CWORK(2*N+N*NR+NR+IWORK(p)) = V(p,q) + 8972 CONTINUE + DO 8973 p = 1, N + V(p,q) = CWORK(2*N+N*NR+NR+p) + 8973 CONTINUE + XSC = ONE / DZNRM2( N, V(1,q), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL ZDSCAL( N, XSC, V(1,q), 1 ) + 7972 CONTINUE +* +* At this moment, V contains the right singular vectors of A. +* Next, assemble the left singular vector matrix U (M x N). +* + IF ( NR .LT. M ) THEN + CALL ZLASET( 'A', M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU ) + IF ( NR .LT. N1 ) THEN + CALL ZLASET('A',NR, N1-NR, CZERO, CZERO, U(1,NR+1),LDU) + CALL ZLASET('A',M-NR,N1-NR, CZERO, CONE,U(NR+1,NR+1),LDU) + END IF + END IF +* + CALL ZUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, + $ LDU, CWORK(N+1), LWORK-N, IERR ) +* + IF ( ROWPIV ) + $ CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) +* +* + END IF + IF ( TRANSP ) THEN +* .. swap U and V because the procedure worked on A^* + DO 6974 p = 1, N + CALL ZSWAP( N, U(1,p), 1, V(1,p), 1 ) + 6974 CONTINUE + END IF +* + END IF +* end of the full SVD +* +* Undo scaling, if necessary (and possible) +* + IF ( USCAL2 .LE. (BIG/SVA(1))*USCAL1 ) THEN + CALL DLASCL( 'G', 0, 0, USCAL1, USCAL2, NR, 1, SVA, N, IERR ) + USCAL1 = ONE + USCAL2 = ONE + END IF +* + IF ( NR .LT. N ) THEN + DO 3004 p = NR+1, N + SVA(p) = ZERO + 3004 CONTINUE + END IF +* + RWORK(1) = USCAL2 * SCALEM + RWORK(2) = USCAL1 + IF ( ERREST ) RWORK(3) = SCONDA + IF ( LSVEC .AND. RSVEC ) THEN + RWORK(4) = CONDR1 + RWORK(5) = CONDR2 + END IF + IF ( L2TRAN ) THEN + RWORK(6) = ENTRA + RWORK(7) = ENTRAT + END IF +* + IWORK(1) = NR + IWORK(2) = NUMRANK + IWORK(3) = WARNING + IF ( TRANSP ) THEN + IWORK(4) = 1 + ELSE + IWORK(4) = -1 + END IF + +* + RETURN +* .. +* .. END OF ZGEJSV +* .. + END +* diff --git a/lapack-netlib/SRC/zgelqt.f b/lapack-netlib/SRC/zgelqt.f index c8afd1c56..015bd3d7d 100644 --- a/lapack-netlib/SRC/zgelqt.f +++ b/lapack-netlib/SRC/zgelqt.f @@ -108,7 +108,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleGEcomputational * @@ -117,8 +117,8 @@ *> *> \verbatim *> -*> The matrix V stores the elementary reflectors H(i) in the i-th column -*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> The matrix V stores the elementary reflectors H(i) in the i-th row +*> above the diagonal. For example, if M=5 and N=3, the matrix V is *> *> V = ( 1 v1 v1 v1 v1 ) *> ( 1 v2 v2 v2 ) @@ -127,11 +127,11 @@ *> *> where the vi's represent the vectors which define H(i), which are returned *> in the matrix A. The 1's along the diagonal of V are not stored in A. -*> Let K=MIN(M,N). The number of blocks is B = ceiling(K/NB), where each -*> block is of order NB except for the last block, which is of order -*> IB = K - (B-1)*NB. For each of the B blocks, a upper triangular block -*> reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB -*> for the last block) T's are stored in the NB-by-N matrix T as +*> Let K=MIN(M,N). The number of blocks is B = ceiling(K/MB), where each +*> block is of order MB except for the last block, which is of order +*> IB = K - (B-1)*MB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The MB-by-MB (and IB-by-IB +*> for the last block) T's are stored in the MB-by-K matrix T as *> *> T = (T1 T2 ... TB). *> \endverbatim @@ -139,10 +139,10 @@ * ===================================================================== SUBROUTINE ZGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDT, M, N, MB diff --git a/lapack-netlib/SRC/zgelqt3.f b/lapack-netlib/SRC/zgelqt3.f index 14063544f..45e74f43d 100644 --- a/lapack-netlib/SRC/zgelqt3.f +++ b/lapack-netlib/SRC/zgelqt3.f @@ -100,7 +100,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleGEcomputational * @@ -109,8 +109,8 @@ *> *> \verbatim *> -*> The matrix V stores the elementary reflectors H(i) in the i-th column -*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> The matrix V stores the elementary reflectors H(i) in the i-th row +*> above the diagonal. For example, if M=5 and N=3, the matrix V is *> *> V = ( 1 v1 v1 v1 v1 ) *> ( 1 v2 v2 v2 ) @@ -131,10 +131,10 @@ * ===================================================================== RECURSIVE SUBROUTINE ZGELQT3( M, N, A, LDA, T, LDT, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, M, N, LDT @@ -151,7 +151,7 @@ PARAMETER ( ZERO = (0.0D+00,0.0D+00)) * .. * .. Local Scalars .. - INTEGER I, I1, J, J1, M1, M2, N1, N2, IINFO + INTEGER I, I1, J, J1, M1, M2, IINFO * .. * .. External Subroutines .. EXTERNAL ZLARFG, ZTRMM, ZGEMM, XERBLA diff --git a/lapack-netlib/SRC/zgelsd.f b/lapack-netlib/SRC/zgelsd.f index ce574173a..9463ccc10 100644 --- a/lapack-netlib/SRC/zgelsd.f +++ b/lapack-netlib/SRC/zgelsd.f @@ -90,7 +90,7 @@ *> of the matrices B and X. NRHS >= 0. *> \endverbatim *> -*> \param[in] A +*> \param[in,out] A *> \verbatim *> A is COMPLEX*16 array, dimension (LDA,N) *> On entry, the M-by-N matrix A. @@ -210,7 +210,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16GEsolve * @@ -225,10 +225,10 @@ SUBROUTINE ZGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, $ WORK, LWORK, RWORK, IWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK diff --git a/lapack-netlib/SRC/zgemlqt.f b/lapack-netlib/SRC/zgemlqt.f index 569713c71..6a4175e3c 100644 --- a/lapack-netlib/SRC/zgemlqt.f +++ b/lapack-netlib/SRC/zgemlqt.f @@ -6,7 +6,7 @@ * http://www.netlib.org/lapack/explore-html/ * *> \htmlonly -*> Download DGEMQRT + dependencies +*> Download DGEMLQT + dependencies *> *> [TGZ] *> @@ -35,16 +35,16 @@ *> *> \verbatim *> -*> ZGEMQRT overwrites the general real M-by-N matrix C with +*> ZGEMLQT overwrites the general real M-by-N matrix C with *> *> SIDE = 'L' SIDE = 'R' *> TRANS = 'N': Q C C Q -*> TRANS = 'C': Q**C C C Q**C +*> TRANS = 'C': Q**H C C Q**H *> *> where Q is a complex orthogonal matrix defined as the product of K *> elementary reflectors: *> -*> Q = H(1) H(2) . . . H(K) = I - V C V**C +*> Q = H(1) H(2) . . . H(K) = I - V T V**H *> *> generated using the compact WY representation as returned by ZGELQT. *> @@ -57,15 +57,15 @@ *> \param[in] SIDE *> \verbatim *> SIDE is CHARACTER*1 -*> = 'L': apply Q or Q**C from the Left; -*> = 'R': apply Q or Q**C from the Right. +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. *> \endverbatim *> *> \param[in] TRANS *> \verbatim *> TRANS is CHARACTER*1 *> = 'N': No transpose, apply Q; -*> = 'C': Transpose, apply Q**C. +*> = 'C': Transpose, apply Q**H. *> \endverbatim *> *> \param[in] M @@ -99,7 +99,9 @@ *> *> \param[in] V *> \verbatim -*> V is COMPLEX*16 array, dimension (LDV,K) +*> V is COMPLEX*16 array, dimension +*> (LDV,M) if SIDE = 'L', +*> (LDV,N) if SIDE = 'R' *> The i-th row must contain the vector which defines the *> elementary reflector H(i), for i = 1,2,...,k, as returned by *> DGELQT in the first K rows of its array argument A. @@ -108,16 +110,14 @@ *> \param[in] LDV *> \verbatim *> LDV is INTEGER -*> The leading dimension of the array V. -*> If SIDE = 'L', LDA >= max(1,M); -*> if SIDE = 'R', LDA >= max(1,N). +*> The leading dimension of the array V. LDV >= max(1,K). *> \endverbatim *> *> \param[in] T *> \verbatim *> T is COMPLEX*16 array, dimension (LDT,K) *> The upper triangular factors of the block reflectors -*> as returned by DGELQT, stored as a MB-by-M matrix. +*> as returned by DGELQT, stored as a MB-by-K matrix. *> \endverbatim *> *> \param[in] LDT @@ -130,7 +130,7 @@ *> \verbatim *> C is COMPLEX*16 array, dimension (LDC,N) *> On entry, the M-by-N matrix C. -*> On exit, C is overwritten by Q C, Q**C C, C Q**C or C Q. +*> On exit, C is overwritten by Q C, Q**H C, C Q**H or C Q. *> \endverbatim *> *> \param[in] LDC @@ -160,7 +160,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup doubleGEcomputational * @@ -168,10 +168,10 @@ SUBROUTINE ZGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, $ C, LDC, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS @@ -186,7 +186,7 @@ * .. * .. Local Scalars .. LOGICAL LEFT, RIGHT, TRAN, NOTRAN - INTEGER I, IB, LDWORK, KF, Q + INTEGER I, IB, LDWORK, KF * .. * .. External Functions .. LOGICAL LSAME diff --git a/lapack-netlib/SRC/zgeqrt.f b/lapack-netlib/SRC/zgeqrt.f index 4f872c5be..60adfec76 100644 --- a/lapack-netlib/SRC/zgeqrt.f +++ b/lapack-netlib/SRC/zgeqrt.f @@ -108,7 +108,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16GEcomputational * @@ -133,7 +133,7 @@ *> block is of order NB except for the last block, which is of order *> IB = K - (B-1)*NB. For each of the B blocks, a upper triangular block *> reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB -*> for the last block) T's are stored in the NB-by-N matrix T as +*> for the last block) T's are stored in the NB-by-K matrix T as *> *> T = (T1 T2 ... TB). *> \endverbatim @@ -141,10 +141,10 @@ * ===================================================================== SUBROUTINE ZGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDT, M, N, NB diff --git a/lapack-netlib/SRC/zgesc2.f b/lapack-netlib/SRC/zgesc2.f index e5bea1430..72ef99dba 100644 --- a/lapack-netlib/SRC/zgesc2.f +++ b/lapack-netlib/SRC/zgesc2.f @@ -102,7 +102,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16GEauxiliary * @@ -115,10 +115,10 @@ * ===================================================================== SUBROUTINE ZGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER LDA, N @@ -141,7 +141,7 @@ COMPLEX*16 TEMP * .. * .. External Subroutines .. - EXTERNAL ZLASWP, ZSCAL + EXTERNAL ZLASWP, ZSCAL, DLABAD * .. * .. External Functions .. INTEGER IZAMAX diff --git a/lapack-netlib/SRC/zgesv.f b/lapack-netlib/SRC/zgesv.f index 316965ac7..40dd14f06 100644 --- a/lapack-netlib/SRC/zgesv.f +++ b/lapack-netlib/SRC/zgesv.f @@ -1,4 +1,4 @@ -*> \brief ZGESV computes the solution to system of linear equations A * X = B for GE matrices (simple driver) +*> \brief ZGESV computes the solution to system of linear equations A * X = B for GE matrices (simple driver) * * =========== DOCUMENTATION =========== * @@ -115,17 +115,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16GEsolve * * ===================================================================== SUBROUTINE ZGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDB, N, NRHS diff --git a/lapack-netlib/SRC/zgesvdx.f b/lapack-netlib/SRC/zgesvdx.f index 1643b109e..56b5cd4f2 100644 --- a/lapack-netlib/SRC/zgesvdx.f +++ b/lapack-netlib/SRC/zgesvdx.f @@ -270,7 +270,7 @@ $ IL, IU, NS, S, U, LDU, VT, LDVT, WORK, $ LWORK, RWORK, IWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -308,8 +308,8 @@ DOUBLE PRECISION DUM( 1 ) * .. * .. External Subroutines .. - EXTERNAL ZGEBRD, ZGELQF, ZGEQRF, ZLASCL, ZLASET, - $ DLASCL, XERBLA + EXTERNAL ZGEBRD, ZGELQF, ZGEQRF, ZLASCL, ZLASET, ZLACPY, + $ ZUNMLQ, ZUNMBR, ZUNMQR, DBDSVDX, DLASCL, XERBLA * .. * .. External Functions .. LOGICAL LSAME diff --git a/lapack-netlib/SRC/zgesvj.f b/lapack-netlib/SRC/zgesvj.f index d08cfa555..fd32f92d8 100644 --- a/lapack-netlib/SRC/zgesvj.f +++ b/lapack-netlib/SRC/zgesvj.f @@ -52,7 +52,7 @@ * *> \param[in] JOBA *> \verbatim -*> JOBA is CHARACTER* 1 +*> JOBA is CHARACTER*1 *> Specifies the structure of A. *> = 'L': The input matrix A is lower triangular; *> = 'U': The input matrix A is upper triangular; @@ -206,7 +206,7 @@ *> *> \param[in,out] CWORK *> \verbatim -*> CWORK is COMPLEX*16 array, dimension max(1,LWORK). +*> CWORK is COMPLEX*16 array, dimension (max(1,LWORK)) *> Used as workspace. *> If on entry LWORK .EQ. -1, then a workspace query is assumed and *> no computation is done; CWORK(1) is set to the minial (and optimal) @@ -221,7 +221,7 @@ *> *> \param[in,out] RWORK *> \verbatim -*> RWORK is DOUBLE PRECISION array, dimension max(6,LRWORK). +*> RWORK is DOUBLE PRECISION array, dimension (max(6,LRWORK)) *> On entry, *> If JOBU .EQ. 'C' : *> RWORK(1) = CTOL, where CTOL defines the threshold for convergence. @@ -318,6 +318,8 @@ *> \par References: * ================ *> +*> \verbatim +*> *> [1] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the *> singular value decomposition on a vector computer. *> SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371. @@ -349,7 +351,7 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, $ LDV, CWORK, LWORK, RWORK, LRWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -407,7 +409,7 @@ * .. External Subroutines .. * .. * from BLAS - EXTERNAL ZCOPY, ZROT, ZDSCAL, ZSWAP + EXTERNAL ZCOPY, ZROT, ZDSCAL, ZSWAP, ZAXPY * from LAPACK EXTERNAL DLASCL, ZLASCL, ZLASET, ZLASSQ, XERBLA EXTERNAL ZGSVJ0, ZGSVJ1 diff --git a/lapack-netlib/SRC/zgetc2.f b/lapack-netlib/SRC/zgetc2.f index 40e4d9150..b790e34d4 100644 --- a/lapack-netlib/SRC/zgetc2.f +++ b/lapack-netlib/SRC/zgetc2.f @@ -111,7 +111,7 @@ * ===================================================================== SUBROUTINE ZGETC2( N, A, LDA, IPIV, JPIV, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -135,7 +135,7 @@ DOUBLE PRECISION BIGNUM, EPS, SMIN, SMLNUM, XMAX * .. * .. External Subroutines .. - EXTERNAL ZGERU, ZSWAP + EXTERNAL ZGERU, ZSWAP, DLABAD * .. * .. External Functions .. DOUBLE PRECISION DLAMCH diff --git a/lapack-netlib/SRC/zgetsls.f b/lapack-netlib/SRC/zgetsls.f index 40ad10e86..5ce11efef 100644 --- a/lapack-netlib/SRC/zgetsls.f +++ b/lapack-netlib/SRC/zgetsls.f @@ -53,7 +53,7 @@ *> \verbatim *> TRANS is CHARACTER*1 *> = 'N': the linear system involves A; -*> = 'C': the linear system involves A**C. +*> = 'C': the linear system involves A**H. *> \endverbatim *> *> \param[in] M @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16GEsolve * @@ -160,10 +160,10 @@ SUBROUTINE ZGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, $ WORK, LWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER TRANS @@ -187,8 +187,8 @@ INTEGER I, IASCL, IBSCL, J, MINMN, MAXMN, BROW, $ SCLLEN, MNK, TSZO, TSZM, LWO, LWM, LW1, LW2, $ WSIZEO, WSIZEM, INFO2 - DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM - COMPLEX*16 TQ( 5 ), WORKQ + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM, DUM( 1 ) + COMPLEX*16 TQ( 5 ), WORKQ( 1 ) * .. * .. External Functions .. LOGICAL LSAME @@ -236,31 +236,31 @@ IF( M.GE.N ) THEN CALL ZGEQR( M, N, A, LDA, TQ, -1, WORKQ, -1, INFO2 ) TSZO = INT( TQ( 1 ) ) - LWO = INT( WORKQ ) + LWO = INT( WORKQ( 1 ) ) CALL ZGEMQR( 'L', TRANS, M, NRHS, N, A, LDA, TQ, $ TSZO, B, LDB, WORKQ, -1, INFO2 ) - LWO = MAX( LWO, INT( WORKQ ) ) + LWO = MAX( LWO, INT( WORKQ( 1 ) ) ) CALL ZGEQR( M, N, A, LDA, TQ, -2, WORKQ, -2, INFO2 ) TSZM = INT( TQ( 1 ) ) - LWM = INT( WORKQ ) + LWM = INT( WORKQ( 1 ) ) CALL ZGEMQR( 'L', TRANS, M, NRHS, N, A, LDA, TQ, $ TSZM, B, LDB, WORKQ, -1, INFO2 ) - LWM = MAX( LWM, INT( WORKQ ) ) + LWM = MAX( LWM, INT( WORKQ( 1 ) ) ) WSIZEO = TSZO + LWO WSIZEM = TSZM + LWM ELSE CALL ZGELQ( M, N, A, LDA, TQ, -1, WORKQ, -1, INFO2 ) TSZO = INT( TQ( 1 ) ) - LWO = INT( WORKQ ) + LWO = INT( WORKQ( 1 ) ) CALL ZGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ, $ TSZO, B, LDB, WORKQ, -1, INFO2 ) - LWO = MAX( LWO, INT( WORKQ ) ) + LWO = MAX( LWO, INT( WORKQ( 1 ) ) ) CALL ZGELQ( M, N, A, LDA, TQ, -2, WORKQ, -2, INFO2 ) TSZM = INT( TQ( 1 ) ) - LWM = INT( WORKQ ) + LWM = INT( WORKQ( 1 ) ) CALL ZGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ, $ TSZO, B, LDB, WORKQ, -1, INFO2 ) - LWM = MAX( LWM, INT( WORKQ ) ) + LWM = MAX( LWM, INT( WORKQ( 1 ) ) ) WSIZEO = TSZO + LWO WSIZEM = TSZM + LWM END IF @@ -305,7 +305,7 @@ * * Scale A, B if max element outside range [SMLNUM,BIGNUM] * - ANRM = ZLANGE( 'M', M, N, A, LDA, WORK ) + ANRM = ZLANGE( 'M', M, N, A, LDA, DUM ) IASCL = 0 IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN * @@ -331,7 +331,7 @@ IF ( TRAN ) THEN BROW = N END IF - BNRM = ZLANGE( 'M', BROW, NRHS, B, LDB, WORK ) + BNRM = ZLANGE( 'M', BROW, NRHS, B, LDB, DUM ) IBSCL = 0 IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN * diff --git a/lapack-netlib/SRC/zggesx.f b/lapack-netlib/SRC/zggesx.f index ac9138451..661523465 100644 --- a/lapack-netlib/SRC/zggesx.f +++ b/lapack-netlib/SRC/zggesx.f @@ -104,7 +104,7 @@ *> *> \param[in] SELCTG *> \verbatim -*> SELCTG is procedure) LOGICAL FUNCTION of two COMPLEX*16 arguments +*> SELCTG is a LOGICAL FUNCTION of two COMPLEX*16 arguments *> SELCTG must be declared EXTERNAL in the calling subroutine. *> If SORT = 'N', SELCTG is not referenced. *> If SORT = 'S', SELCTG is used to select eigenvalues to sort @@ -320,7 +320,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16GEeigen * @@ -330,10 +330,10 @@ $ LDVSR, RCONDE, RCONDV, WORK, LWORK, RWORK, $ IWORK, LIWORK, BWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER JOBVSL, JOBVSR, SENSE, SORT diff --git a/lapack-netlib/SRC/zgghd3.f b/lapack-netlib/SRC/zgghd3.f index 94ae93b98..4b0e7826a 100644 --- a/lapack-netlib/SRC/zgghd3.f +++ b/lapack-netlib/SRC/zgghd3.f @@ -227,7 +227,7 @@ SUBROUTINE ZGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, $ LDQ, Z, LDZ, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.6.1) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * January 2015 @@ -266,7 +266,8 @@ EXTERNAL ILAENV, LSAME * .. * .. External Subroutines .. - EXTERNAL ZGGHRD, ZLARTG, ZLASET, ZUNM22, ZROT, XERBLA + EXTERNAL ZGGHRD, ZLARTG, ZLASET, ZUNM22, ZROT, ZGEMM, + $ ZGEMV, ZTRMV, ZLACPY, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, DCONJG, MAX diff --git a/lapack-netlib/SRC/zgsvj0.f b/lapack-netlib/SRC/zgsvj0.f index ea2b54222..c4a6bd38a 100644 --- a/lapack-netlib/SRC/zgsvj0.f +++ b/lapack-netlib/SRC/zgsvj0.f @@ -169,7 +169,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is COMPLEX*16 array, dimension LWORK. +*> WORK is COMPLEX*16 array, dimension (LWORK) *> \endverbatim *> *> \param[in] LWORK @@ -218,7 +218,7 @@ SUBROUTINE ZGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, $ SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -268,7 +268,7 @@ * .. External Subroutines .. * .. * from BLAS - EXTERNAL ZCOPY, ZROT, ZSWAP + EXTERNAL ZCOPY, ZROT, ZSWAP, ZAXPY * from LAPACK EXTERNAL ZLASCL, ZLASSQ, XERBLA * .. diff --git a/lapack-netlib/SRC/zgsvj1.f b/lapack-netlib/SRC/zgsvj1.f index aba6ea237..91e39ca8a 100644 --- a/lapack-netlib/SRC/zgsvj1.f +++ b/lapack-netlib/SRC/zgsvj1.f @@ -236,7 +236,7 @@ SUBROUTINE ZGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, $ EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -282,7 +282,7 @@ * .. * .. External Subroutines .. * .. from BLAS - EXTERNAL ZCOPY, ZROT, ZSWAP + EXTERNAL ZCOPY, ZROT, ZSWAP, ZAXPY * .. from LAPACK EXTERNAL ZLASCL, ZLASSQ, XERBLA * .. diff --git a/lapack-netlib/SRC/zhb2st_kernels.f b/lapack-netlib/SRC/zhb2st_kernels.f index e4114b5f4..a440b5c0d 100644 --- a/lapack-netlib/SRC/zhb2st_kernels.f +++ b/lapack-netlib/SRC/zhb2st_kernels.f @@ -47,45 +47,87 @@ * Arguments: * ========== * -*> @param[in] n -*> The order of the matrix A. -*> -*> @param[in] nb -*> The size of the band. -*> -*> @param[in, out] A -*> A pointer to the matrix A. -*> -*> @param[in] lda -*> The leading dimension of the matrix A. +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> \endverbatim *> -*> @param[out] V -*> COMPLEX*16 array, dimension 2*n if eigenvalues only are -*> requested or to be queried for vectors. +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both +*> Eigenvalue/Eigenvectors. +*> \endverbatim *> -*> @param[out] TAU -*> COMPLEX*16 array, dimension (2*n). -*> The scalar factors of the Householder reflectors are stored -*> in this array. +*> \param[in] TTYPE +*> \verbatim +*> TTYPE is INTEGER +*> \endverbatim *> -*> @param[in] st +*> \param[in] ST +*> \verbatim +*> ST is INTEGER *> internal parameter for indices. +*> \endverbatim *> -*> @param[in] ed +*> \param[in] ED +*> \verbatim +*> ED is INTEGER *> internal parameter for indices. +*> \endverbatim *> -*> @param[in] sweep +*> \param[in] SWEEP +*> \verbatim +*> SWEEP is INTEGER *> internal parameter for indices. +*> \endverbatim *> -*> @param[in] Vblksiz -*> internal parameter for indices. +*> \param[in] N +*> \verbatim +*> N is INTEGER. The order of the matrix A. +*> \endverbatim *> -*> @param[in] wantz -*> logical which indicate if Eigenvalue are requested or both -*> Eigenvalue/Eigenvectors. +*> \param[in] NB +*> \verbatim +*> NB is INTEGER. The size of the band. +*> \endverbatim +*> +*> \param[in] IB +*> \verbatim +*> IB is INTEGER. +*> \endverbatim +*> +*> \param[in, out] A +*> \verbatim +*> A is COMPLEX*16 array. A pointer to the matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER. The leading dimension of the matrix A. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension 2*n if eigenvalues only are +*> requested or to be queried for vectors. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (2*n). +*> The scalar factors of the Householder reflectors are stored +*> in this array. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER. +*> \endverbatim *> -*> @param[in] work -*> Workspace of size nb. +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX*16 array. Workspace of size nb. +*> \endverbatim *> *> \par Further Details: * ===================== @@ -128,10 +170,10 @@ * IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/zhbev_2stage.f b/lapack-netlib/SRC/zhbev_2stage.f index bb0faefd2..583e55ce0 100644 --- a/lapack-netlib/SRC/zhbev_2stage.f +++ b/lapack-netlib/SRC/zhbev_2stage.f @@ -171,7 +171,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16OTHEReigen * @@ -213,10 +213,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -242,13 +242,13 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE DOUBLE PRECISION DLAMCH, ZLANHB - EXTERNAL LSAME, DLAMCH, ZLANHB, ILAENV + EXTERNAL LSAME, DLAMCH, ZLANHB, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL DSCAL, DSTERF, XERBLA, ZLASCL, ZSTEQR, - $ ZHETRD_2STAGE + $ ZHETRD_2STAGE, ZHETRD_HB2ST * .. * .. Intrinsic Functions .. INTRINSIC DBLE, SQRT @@ -281,9 +281,12 @@ LWMIN = 1 WORK( 1 ) = LWMIN ELSE - IB = ILAENV( 18, 'ZHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_HB2ST', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_HB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_HB2ST', JOBZ, + $ N, KD, IB, -1 ) LWMIN = LHTRD + LWTRD WORK( 1 ) = LWMIN ENDIF diff --git a/lapack-netlib/SRC/zhbevd_2stage.f b/lapack-netlib/SRC/zhbevd_2stage.f index 94863c708..be493a1b9 100644 --- a/lapack-netlib/SRC/zhbevd_2stage.f +++ b/lapack-netlib/SRC/zhbevd_2stage.f @@ -219,7 +219,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16OTHEReigen * @@ -262,10 +262,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -296,9 +296,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE DOUBLE PRECISION DLAMCH, ZLANHB - EXTERNAL LSAME, DLAMCH, ZLANHB, ILAENV + EXTERNAL LSAME, DLAMCH, ZLANHB, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL DSCAL, DSTERF, XERBLA, ZGEMM, ZLACPY, @@ -321,9 +321,9 @@ LRWMIN = 1 LIWMIN = 1 ELSE - IB = ILAENV( 18, 'ZHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) IF( WANTZ ) THEN LWMIN = 2*N**2 LRWMIN = 1 + 5*N + 2*N**2 diff --git a/lapack-netlib/SRC/zhbevx_2stage.f b/lapack-netlib/SRC/zhbevx_2stage.f index 8473c4a40..9b1f5fe92 100644 --- a/lapack-netlib/SRC/zhbevx_2stage.f +++ b/lapack-netlib/SRC/zhbevx_2stage.f @@ -329,7 +329,7 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -369,9 +369,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE DOUBLE PRECISION DLAMCH, ZLANHB - EXTERNAL LSAME, DLAMCH, ZLANHB, ILAENV + EXTERNAL LSAME, DLAMCH, ZLANHB, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZCOPY, @@ -429,9 +429,12 @@ LWMIN = 1 WORK( 1 ) = LWMIN ELSE - IB = ILAENV( 18, 'ZHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_HB2ST', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_HB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_HB2ST', JOBZ, + $ N, KD, IB, -1 ) LWMIN = LHTRD + LWTRD WORK( 1 ) = LWMIN ENDIF diff --git a/lapack-netlib/SRC/zhecon_3.f b/lapack-netlib/SRC/zhecon_3.f index db93f6d93..8c3a9f32b 100644 --- a/lapack-netlib/SRC/zhecon_3.f +++ b/lapack-netlib/SRC/zhecon_3.f @@ -149,7 +149,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16HEcomputational * @@ -157,7 +157,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -171,10 +171,10 @@ SUBROUTINE ZHECON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, $ WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -189,7 +189,7 @@ * ===================================================================== * * .. Parameters .. - REAL ONE, ZERO + DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. diff --git a/lapack-netlib/SRC/zhecon_rook.f b/lapack-netlib/SRC/zhecon_rook.f index 3daf53523..6dabdcff8 100644 --- a/lapack-netlib/SRC/zhecon_rook.f +++ b/lapack-netlib/SRC/zhecon_rook.f @@ -117,7 +117,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16HEcomputational * @@ -125,7 +125,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -139,10 +139,10 @@ SUBROUTINE ZHECON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -157,7 +157,7 @@ * ===================================================================== * * .. Parameters .. - REAL ONE, ZERO + DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. diff --git a/lapack-netlib/SRC/zheequb.f b/lapack-netlib/SRC/zheequb.f index ec6d095ad..d698232e8 100644 --- a/lapack-netlib/SRC/zheequb.f +++ b/lapack-netlib/SRC/zheequb.f @@ -132,7 +132,7 @@ * ===================================================================== SUBROUTINE ZHEEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 @@ -168,7 +168,7 @@ EXTERNAL DLAMCH, LSAME * .. * .. External Subroutines .. - EXTERNAL ZLASSQ + EXTERNAL ZLASSQ, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, INT, LOG, MAX, MIN, SQRT diff --git a/lapack-netlib/SRC/zheev_2stage.f b/lapack-netlib/SRC/zheev_2stage.f index 0cf57e904..d80a80d63 100644 --- a/lapack-netlib/SRC/zheev_2stage.f +++ b/lapack-netlib/SRC/zheev_2stage.f @@ -149,7 +149,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16HEeigen * @@ -191,10 +191,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -222,9 +222,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE DOUBLE PRECISION DLAMCH, ZLANHE - EXTERNAL LSAME, ILAENV, DLAMCH, ZLANHE + EXTERNAL LSAME, DLAMCH, ZLANHE, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL DSCAL, DSTERF, XERBLA, ZLASCL, ZSTEQR, @@ -253,10 +253,10 @@ END IF * IF( INFO.EQ.0 ) THEN - KD = ILAENV( 17, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) LWMIN = N + LHTRD + LWTRD WORK( 1 ) = LWMIN * diff --git a/lapack-netlib/SRC/zheevd_2stage.f b/lapack-netlib/SRC/zheevd_2stage.f index 7a8c1593f..1bf5fec37 100644 --- a/lapack-netlib/SRC/zheevd_2stage.f +++ b/lapack-netlib/SRC/zheevd_2stage.f @@ -202,7 +202,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16HEeigen * @@ -255,10 +255,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -291,9 +291,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE DOUBLE PRECISION DLAMCH, ZLANHE - EXTERNAL LSAME, ILAENV, DLAMCH, ZLANHE + EXTERNAL LSAME, DLAMCH, ZLANHE, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL DSCAL, DSTERF, XERBLA, ZLACPY, ZLASCL, @@ -327,10 +327,14 @@ LRWMIN = 1 LIWMIN = 1 ELSE - KD = ILAENV( 17, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, + $ N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) IF( WANTZ ) THEN LWMIN = 2*N + N*N LRWMIN = 1 + 5*N + 2*N**2 diff --git a/lapack-netlib/SRC/zheevr_2stage.f b/lapack-netlib/SRC/zheevr_2stage.f index 545785361..ab7f3374e 100644 --- a/lapack-netlib/SRC/zheevr_2stage.f +++ b/lapack-netlib/SRC/zheevr_2stage.f @@ -408,7 +408,7 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -445,9 +445,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV, ILAENV2STAGE DOUBLE PRECISION DLAMCH, ZLANSY - EXTERNAL LSAME, ILAENV, DLAMCH, ZLANSY + EXTERNAL LSAME, DLAMCH, ZLANSY, ILAENV, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL, @@ -471,10 +471,10 @@ LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LRWORK.EQ.-1 ) .OR. $ ( LIWORK.EQ.-1 ) ) * - KD = ILAENV( 17, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) LWMIN = N + LHTRD + LWTRD LRWMIN = MAX( 1, 24*N ) LIWMIN = MAX( 1, 10*N ) diff --git a/lapack-netlib/SRC/zheevx_2stage.f b/lapack-netlib/SRC/zheevx_2stage.f index 9def33c6d..a70c870fd 100644 --- a/lapack-netlib/SRC/zheevx_2stage.f +++ b/lapack-netlib/SRC/zheevx_2stage.f @@ -308,7 +308,7 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -345,9 +345,9 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV + INTEGER ILAENV2STAGE DOUBLE PRECISION DLAMCH, ZLANHE - EXTERNAL LSAME, ILAENV, DLAMCH, ZLANHE + EXTERNAL LSAME, DLAMCH, ZLANHE, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL, @@ -402,10 +402,14 @@ LWMIN = 1 WORK( 1 ) = LWMIN ELSE - KD = ILAENV( 17, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, + $ N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) LWMIN = N + LHTRD + LWTRD WORK( 1 ) = LWMIN END IF diff --git a/lapack-netlib/SRC/zhegv_2stage.f b/lapack-netlib/SRC/zhegv_2stage.f index 1afd2e187..53f1b8d8e 100644 --- a/lapack-netlib/SRC/zhegv_2stage.f +++ b/lapack-netlib/SRC/zhegv_2stage.f @@ -47,7 +47,7 @@ *> positive definite. *> This routine use the 2stage technique for the reduction to tridiagonal *> which showed higher performance on recent architecture and for large -* sizes N>2000. +*> sizes N>2000. *> \endverbatim * * Arguments: @@ -192,7 +192,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16HEeigen * @@ -234,10 +234,10 @@ * IMPLICIT NONE * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO @@ -261,8 +261,8 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV - EXTERNAL LSAME, ILAENV + INTEGER ILAENV2STAGE + EXTERNAL LSAME, ILAENV2STAGE * .. * .. External Subroutines .. EXTERNAL XERBLA, ZHEGST, ZPOTRF, ZTRMM, ZTRSM, @@ -295,10 +295,10 @@ END IF * IF( INFO.EQ.0 ) THEN - KD = ILAENV( 17, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV( 18, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV( 19, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV( 20, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) LWMIN = N + LHTRD + LWTRD WORK( 1 ) = LWMIN * diff --git a/lapack-netlib/SRC/zhesv_aa.f b/lapack-netlib/SRC/zhesv_aa.f index 56a3086af..bbd0fdff4 100644 --- a/lapack-netlib/SRC/zhesv_aa.f +++ b/lapack-netlib/SRC/zhesv_aa.f @@ -129,8 +129,6 @@ *> The length of WORK. LWORK >= MAX(1,2*N,3*N-2), and for best *> performance LWORK >= max(1,N*NB), where NB is the optimal *> blocksize for ZHETRF. -*> for LWORK < N, TRS will be done with Level BLAS 2 -*> for LWORK >= N, TRS will be done with Level BLAS 3 *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -156,7 +154,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16HEsolve * @@ -164,10 +162,10 @@ SUBROUTINE ZHESV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, $ LWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -190,7 +188,7 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZHETRF, ZHETRS, ZHETRS2 + EXTERNAL XERBLA, ZHETRF_AA, ZHETRS_AA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/zhesv_aa_2stage.f b/lapack-netlib/SRC/zhesv_aa_2stage.f new file mode 100644 index 000000000..a34440029 --- /dev/null +++ b/lapack-netlib/SRC/zhesv_aa_2stage.f @@ -0,0 +1,284 @@ +*> \brief ZHESV_AA_2STAGE computes the solution to system of linear equations A * X = B for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHESV_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHESV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, +* IPIV, IPIV2, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX*16 A( LDA, * ), TB( * ), B( LDB, *), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHESV_AA_2STAGE computes the solution to a complex system of +*> linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's 2-stage algorithm is used to factor A as +*> A = U * T * U**H, if UPLO = 'U', or +*> A = L * T * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is Hermitian and band. The matrix T is +*> then LU-factored with partial pivoting. The factored form of A +*> is then used to solve the system of equations A * X = B. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX*16 array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16HEsolve +* +* ===================================================================== + SUBROUTINE ZHESV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER I, J, K, I1, I2, TD + INTEGER LDTB, LWKOPT, NB, KB, NT, IINFO + COMPLEX PIV +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHETRF_AA_2STAGE, ZHETRS_AA_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL ZHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, + $ IPIV2, WORK, -1, INFO ) + LWKOPT = INT( WORK(1) ) + IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHESV_AA_2STAGE', -INFO ) + RETURN + ELSE IF( WQUERY .OR. TQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*T*U**H or A = L*T*L**H. +* + CALL ZHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, IPIV2, + $ WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZHETRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, + $ IPIV2, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHESV_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/zhetrd_2stage.f b/lapack-netlib/SRC/zhetrd_2stage.f index 4245b3205..9d6a426a3 100644 --- a/lapack-netlib/SRC/zhetrd_2stage.f +++ b/lapack-netlib/SRC/zhetrd_2stage.f @@ -144,7 +144,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is COMPLEX*16 array, dimension LWORK. +*> WORK is COMPLEX*16 array, dimension (LWORK) *> \endverbatim *> *> \param[in] LWORK @@ -183,7 +183,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16HEcomputational * @@ -227,10 +227,10 @@ * IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER VECT, UPLO @@ -253,8 +253,8 @@ * .. * .. External Functions .. LOGICAL LSAME - INTEGER ILAENV - EXTERNAL LSAME, ILAENV + INTEGER ILAENV2STAGE + EXTERNAL LSAME, ILAENV2STAGE * .. * .. Executable Statements .. * @@ -267,10 +267,10 @@ * * Determine the block size, the workspace size and the hous size. * - KD = ILAENV( 17, 'ZHETRD_2STAGE', VECT, N, -1, -1, -1 ) - IB = ILAENV( 18, 'ZHETRD_2STAGE', VECT, N, KD, -1, -1 ) - LHMIN = ILAENV( 19, 'ZHETRD_2STAGE', VECT, N, KD, IB, -1 ) - LWMIN = ILAENV( 20, 'ZHETRD_2STAGE', VECT, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', VECT, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', VECT, N, KD, IB, -1 ) * WRITE(*,*),'ZHETRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO, * $ LHMIN, LWMIN * diff --git a/lapack-netlib/SRC/zhetrd_hb2st.F b/lapack-netlib/SRC/zhetrd_hb2st.F index d963d8c90..07390623a 100644 --- a/lapack-netlib/SRC/zhetrd_hb2st.F +++ b/lapack-netlib/SRC/zhetrd_hb2st.F @@ -188,7 +188,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16OTHERcomputational * @@ -237,10 +237,10 @@ * IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER STAGE1, UPLO, VECT @@ -273,7 +273,7 @@ COMPLEX*16 TMP * .. * .. External Subroutines .. - EXTERNAL ZHB2ST_KERNELS, ZLACPY, ZLASET + EXTERNAL ZHB2ST_KERNELS, ZLACPY, ZLASET, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MIN, MAX, CEILING, DBLE, REAL diff --git a/lapack-netlib/SRC/zhetrd_he2hb.f b/lapack-netlib/SRC/zhetrd_he2hb.f index 89fb1b8a5..e35578b42 100644 --- a/lapack-netlib/SRC/zhetrd_he2hb.f +++ b/lapack-netlib/SRC/zhetrd_he2hb.f @@ -123,7 +123,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is COMPLEX*16 array, dimension LWORK. +*> WORK is COMPLEX*16 array, dimension (LWORK) *> On exit, if INFO = 0, or if LWORK=-1, *> WORK(1) returns the size of LWORK. *> \endverbatim @@ -132,7 +132,7 @@ *> \verbatim *> LWORK is INTEGER *> The dimension of the array WORK which should be calculated -* by a workspace query. LWORK = MAX(1, LWORK_QUERY) +*> by a workspace query. LWORK = MAX(1, LWORK_QUERY) *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns *> this value as the first entry of the WORK array, and no error @@ -158,7 +158,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16HEcomputational * @@ -222,7 +222,7 @@ *> *> where tau is a complex scalar, and v is a complex vector with *> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in -* A(i+kd+2:n,i), and tau in TAU(i). +*> A(i+kd+2:n,i), and tau in TAU(i). *> *> The contents of A on exit are illustrated by the following examples *> with n = 5: @@ -245,10 +245,10 @@ * IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -277,7 +277,7 @@ $ TPOS, WPOS, S2POS, S1POS * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZHER2K, ZHEMM, ZGEMM, + EXTERNAL XERBLA, ZHER2K, ZHEMM, ZGEMM, ZCOPY, $ ZLARFT, ZGELQF, ZGEQRF, ZLASET * .. * .. Intrinsic Functions .. diff --git a/lapack-netlib/SRC/zhetrf_aa.f b/lapack-netlib/SRC/zhetrf_aa.f index 05844bb52..e355aed14 100644 --- a/lapack-netlib/SRC/zhetrf_aa.f +++ b/lapack-netlib/SRC/zhetrf_aa.f @@ -114,11 +114,7 @@ *> \verbatim *> INFO is INTEGER *> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization -*> has been completed, but the block diagonal matrix D is -*> exactly singular, and division by zero will occur if it -*> is used to solve a system of equations. +*> < 0: if INFO = -i, the i-th argument had an illegal value. *> \endverbatim * * Authors: @@ -129,17 +125,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16HEcomputational * * ===================================================================== SUBROUTINE ZHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -159,7 +155,7 @@ * * .. Local Scalars .. LOGICAL LQUERY, UPPER - INTEGER J, LWKOPT, IINFO + INTEGER J, LWKOPT INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB COMPLEX*16 ALPHA * .. @@ -169,7 +165,7 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL ZLAHEF_AA, ZGEMM, ZGEMV, ZCOPY, ZSCAL, ZSWAP, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCONJG, MAX @@ -178,7 +174,7 @@ * * Determine the block size * - NB = ILAENV( 1, 'ZHETRF', UPLO, N, -1, -1, -1 ) + NB = ILAENV( 1, 'ZHETRF_AA', UPLO, N, -1, -1, -1 ) * * Test the input parameters. * @@ -215,13 +211,10 @@ IPIV( 1 ) = 1 IF ( N.EQ.1 ) THEN A( 1, 1 ) = DBLE( A( 1, 1 ) ) - IF ( A( 1, 1 ).EQ.ZERO ) THEN - INFO = 1 - END IF RETURN END IF * -* Adjubst block size based on the workspace size +* Adjust block size based on the workspace size * IF( LWORK.LT.((1+NB)*N) ) THEN NB = ( LWORK-N ) / N @@ -261,11 +254,7 @@ * CALL ZLAHEF_AA( UPLO, 2-K1, N-J, JB, $ A( MAX(1, J), J+1 ), LDA, - $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), - $ IINFO ) - IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN - INFO = IINFO+J - ENDIF + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) * * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) * @@ -385,10 +374,7 @@ * CALL ZLAHEF_AA( UPLO, 2-K1, N-J, JB, $ A( J+1, MAX(1, J) ), LDA, - $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), IINFO) - IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN - INFO = IINFO+J - ENDIF + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) * * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) * diff --git a/lapack-netlib/SRC/zhetrf_aa_2stage.f b/lapack-netlib/SRC/zhetrf_aa_2stage.f new file mode 100644 index 000000000..4d62198d6 --- /dev/null +++ b/lapack-netlib/SRC/zhetrf_aa_2stage.f @@ -0,0 +1,663 @@ +*> \brief \b ZHETRF_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRF_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, +* IPIV2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX*16 A( LDA, * ), TB( * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRF_AA_2STAGE computes the factorization of a double hermitian matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a hermitian band matrix with the +*> bandwidth of NB (NB is internally selected and stored in TB( 1 ), and T is +*> LU factorized with partial pivoting). +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, + $ IPIV2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX*16 A( LDA, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER I, J, K, I1, I2, TD + INTEGER LDTB, NB, KB, JB, NT, IINFO + COMPLEX*16 PIV +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZLACGV, ZLACPY, + $ ZLASET, ZGBTRF, ZGEMM, ZGETRF, + $ ZHEGST, ZSWAP, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MIN, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LTB .LT. 4*N .AND. .NOT.TQUERY ) THEN + INFO = -6 + ELSE IF ( LWORK .LT. N .AND. .NOT.WQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRF_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Answer the query +* + NB = ILAENV( 1, 'ZHETRF_AA_2STAGE', UPLO, N, -1, -1, -1 ) + IF( INFO.EQ.0 ) THEN + IF( TQUERY ) THEN + TB( 1 ) = (3*NB+1)*N + END IF + IF( WQUERY ) THEN + WORK( 1 ) = N*NB + END IF + END IF + IF( TQUERY .OR. WQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF +* +* Determine the number of the block size +* + LDTB = LTB/N + IF( LDTB .LT. 3*NB+1 ) THEN + NB = (LDTB-1)/3 + END IF + IF( LWORK .LT. NB*N ) THEN + NB = LWORK/N + END IF +* +* Determine the number of the block columns +* + NT = (N+NB-1)/NB + TD = 2*NB + KB = MIN(NB, N) +* +* Initialize vectors/matrices +* + DO J = 1, KB + IPIV( J ) = J + END DO +* +* Save NB +* + TB( 1 ) = NB +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*U(I,J) + T(I+1,I)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( (I-1)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (I-2)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL ZLACPY( 'Upper', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = U(1:J,J)'*H(1:J) + CALL ZGEMM( 'Conjugate transpose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -ONE, A( 1, J*NB+1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += U(J,J)'*T(J,J-1)*U(J-1,J) + CALL ZGEMM( 'Conjugate transpose', 'NoTranspose', + $ KB, NB, KB, + $ ONE, A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ ZERO, WORK( 1 ), N ) + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB, + $ -ONE, WORK( 1 ), N, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF + IF( J.GT.0 ) THEN + CALL ZHEGST( 1, 'Upper', KB, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, IINFO ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + TB( TD+1 + (J*NB+I-1)*LDTB ) + $ = REAL( TB( TD+1 + (J*NB+I-1)*LDTB ) ) + DO K = I+1, KB + TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + $ = DCONJG( TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, KB, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + ELSE + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB+KB, + $ ONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL ZGEMM( 'Conjugate transpose', 'NoTranspose', + $ NB, N-(J+1)*NB, J*NB, + $ -ONE, WORK( NB+1 ), N, + $ A( 1, (J+1)*NB+1 ), LDA, + $ ONE, A( J*NB+1, (J+1)*NB+1 ), LDA ) + END IF +* +* Copy panel to workspace to call ZGETRF +* + DO K = 1, NB + CALL ZCOPY( N-(J+1)*NB, + $ A( J*NB+K, (J+1)*NB+1 ), LDA, + $ WORK( 1+(K-1)*N ), 1 ) + END DO +* +* Factorize panel +* + CALL ZGETRF( N-(J+1)*NB, NB, + $ WORK, N, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Copy panel back +* + DO K = 1, NB +* +* Copy only L-factor +* + CALL ZCOPY( N-K-(J+1)*NB, + $ WORK( K+1+(K-1)*N ), 1, + $ A( J*NB+K, (J+1)*NB+K+1 ), LDA ) +* +* Transpose U-factor to be copied back into T(J+1, J) +* + CALL ZLACGV( K, WORK( 1+(K-1)*N ), 1 ) + END DO +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL ZLASET( 'Full', KB, NB, ZERO, ZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB) , LDTB-1 ) + CALL ZLACPY( 'Upper', KB, NB, + $ WORK, N, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL ZTRSM( 'R', 'U', 'N', 'U', KB, NB, ONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J,J+1) into T(J+1, J), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = DCONJG( TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) ) + END DO + END DO + CALL ZLASET( 'Lower', KB, NB, ZERO, ONE, + $ A( J*NB+1, (J+1)*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL ZSWAP( K-1, A( (J+1)*NB+1, I1 ), 1, + $ A( (J+1)*NB+1, I2 ), 1 ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL ZSWAP( I2-I1-1, A( I1, I1+1 ), LDA, + $ A( I1+1, I2 ), 1 ) + CALL ZLACGV( I2-I1, A( I1, I1+1 ), LDA ) + CALL ZLACGV( I2-I1-1, A( I1+1, I2 ), 1 ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL ZSWAP( N-I2, A( I1, I2+1 ), LDA, + $ A( I2, I2+1 ), LDA ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL ZSWAP( J*NB, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ENDIF + END DO + END IF + END DO + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL ZGEMM( 'NoTranspose', 'Conjugate transpose', + $ NB, KB, JB, + $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (I-1)*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL ZGEMM( 'NoTranspose', 'Conjugate transpose', + $ NB, KB, JB, + $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (I-2)*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL ZLACPY( 'Lower', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = L(J,1:J)*H(1:J) + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -ONE, A( J*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += L(J,J)*T(J,J-1)*L(J,J-1)' + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, NB, KB, + $ ONE, A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ ZERO, WORK( 1 ), N ) + CALL ZGEMM( 'NoTranspose', 'Conjugate transpose', + $ KB, KB, NB, + $ -ONE, WORK( 1 ), N, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF + IF( J.GT.0 ) THEN + CALL ZHEGST( 1, 'Lower', KB, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, IINFO ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + TB( TD+1 + (J*NB+I-1)*LDTB ) + $ = REAL( TB( TD+1 + (J*NB+I-1)*LDTB ) ) + DO K = I+1, KB + TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + $ = DCONJG( TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL ZGEMM( 'NoTranspose', 'Conjugate transpose', + $ KB, KB, KB, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + ELSE + CALL ZGEMM( 'NoTranspose', 'Conjugate transpose', + $ KB, KB, NB+KB, + $ ONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ N-(J+1)*NB, NB, J*NB, + $ -ONE, A( (J+1)*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, A( (J+1)*NB+1, J*NB+1 ), LDA ) + END IF +* +* Factorize panel +* + CALL ZGETRF( N-(J+1)*NB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL ZLASET( 'Full', KB, NB, ZERO, ZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB) , LDTB-1 ) + CALL ZLACPY( 'Upper', KB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL ZTRSM( 'R', 'L', 'C', 'U', KB, NB, ONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J+1,J) into T(J, J+1), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = DCONJG( TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) ) + END DO + END DO + CALL ZLASET( 'Upper', KB, NB, ZERO, ONE, + $ A( (J+1)*NB+1, J*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL ZSWAP( K-1, A( I1, (J+1)*NB+1 ), LDA, + $ A( I2, (J+1)*NB+1 ), LDA ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL ZSWAP( I2-I1-1, A( I1+1, I1 ), 1, + $ A( I2, I1+1 ), LDA ) + CALL ZLACGV( I2-I1, A( I1+1, I1 ), 1 ) + CALL ZLACGV( I2-I1-1, A( I2, I1+1 ), LDA ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL ZSWAP( N-I2, A( I2+1, I1 ), 1, + $ A( I2+1, I2 ), 1 ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL ZSWAP( J*NB, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ENDIF + END DO +* +* Apply pivots to previous columns of L +* +c CALL ZLASWP( J*NB, A( 1, 1 ), LDA, +c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) + END IF + END DO + END IF +* +* Factor the band matrix + CALL ZGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* +* End of ZHETRF_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/zhetri2.f b/lapack-netlib/SRC/zhetri2.f index 7e743a126..a7acff49f 100644 --- a/lapack-netlib/SRC/zhetri2.f +++ b/lapack-netlib/SRC/zhetri2.f @@ -120,17 +120,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16HEcomputational * * ===================================================================== SUBROUTINE ZHETRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -153,7 +153,7 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL ZHETRI2X + EXTERNAL ZHETRI2X, ZHETRI, XERBLA * .. * .. Executable Statements .. * diff --git a/lapack-netlib/SRC/zhetri2x.f b/lapack-netlib/SRC/zhetri2x.f index 169017221..ab35e700a 100644 --- a/lapack-netlib/SRC/zhetri2x.f +++ b/lapack-netlib/SRC/zhetri2x.f @@ -113,17 +113,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16HEcomputational * * ===================================================================== SUBROUTINE ZHETRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -137,7 +137,7 @@ * ===================================================================== * * .. Parameters .. - REAL ONE + DOUBLE PRECISION ONE COMPLEX*16 CONE, ZERO PARAMETER ( ONE = 1.0D+0, $ CONE = ( 1.0D+0, 0.0D+0 ), diff --git a/lapack-netlib/SRC/zhetri_3.f b/lapack-netlib/SRC/zhetri_3.f index 69d6e0b80..14be660ea 100644 --- a/lapack-netlib/SRC/zhetri_3.f +++ b/lapack-netlib/SRC/zhetri_3.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16HEcomputational * @@ -160,7 +160,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> November 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -170,10 +170,10 @@ SUBROUTINE ZHETRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -196,7 +196,7 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL ZHETRI_3X + EXTERNAL ZHETRI_3X, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/zhetri_3x.f b/lapack-netlib/SRC/zhetri_3x.f index 8be104cb9..d7c00c81b 100644 --- a/lapack-netlib/SRC/zhetri_3x.f +++ b/lapack-netlib/SRC/zhetri_3x.f @@ -142,7 +142,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16HEcomputational * @@ -150,7 +150,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -159,10 +159,10 @@ * ===================================================================== SUBROUTINE ZHETRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/zhetrs_3.f b/lapack-netlib/SRC/zhetrs_3.f index a73f51b41..937544e8f 100644 --- a/lapack-netlib/SRC/zhetrs_3.f +++ b/lapack-netlib/SRC/zhetrs_3.f @@ -142,7 +142,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16HEcomputational * @@ -151,7 +151,7 @@ *> *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -165,10 +165,10 @@ SUBROUTINE ZHETRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/zhetrs_aa.f b/lapack-netlib/SRC/zhetrs_aa.f index 044bf4cfa..9d302b9cd 100644 --- a/lapack-netlib/SRC/zhetrs_aa.f +++ b/lapack-netlib/SRC/zhetrs_aa.f @@ -67,7 +67,7 @@ *> of the matrix B. NRHS >= 0. *> \endverbatim *> -*> \param[in,out] A +*> \param[in] A *> \verbatim *> A is COMPLEX*16 array, dimension (LDA,N) *> Details of factors computed by ZHETRF_AA. @@ -122,7 +122,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16HEcomputational * @@ -130,10 +130,10 @@ SUBROUTINE ZHETRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, $ WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -160,7 +160,7 @@ EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL ZGTSV, ZSWAP, ZTRSM, XERBLA + EXTERNAL ZGTSV, ZSWAP, ZTRSM, ZLACGV, ZLACPY, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/zhetrs_aa_2stage.f b/lapack-netlib/SRC/zhetrs_aa_2stage.f new file mode 100644 index 000000000..02e17476f --- /dev/null +++ b/lapack-netlib/SRC/zhetrs_aa_2stage.f @@ -0,0 +1,283 @@ +*> \brief \b ZHETRS_AA_2STAGE +* +* @generated from SRC/dsytrs_aa_2stage.f, fortran d -> c, Mon Oct 30 11:59:02 2017 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRS_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, +* IPIV2, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX*16 A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRS_AA_2STAGE solves a system of linear equations A*X = B with a +*> hermitian matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by ZHETRF_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16array, dimension (LDA,N) +*> Details of factors computed by ZHETRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX*16array, dimension (LTB) +*> Details of factors computed by ZHETRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> ZHETRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] IPIV2 +*> \verbatim +*> IPIV2 is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> ZHETRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZHETRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX*16 A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER LDTB, NB + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGBTRS, ZLASWP, ZTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LTB.LT.( 4*N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRS_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Read NB and compute LDTB +* + NB = INT( TB( 1 ) ) + LDTB = LTB/N +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL ZLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (U**T \P**T * B) -> B [ (U**T \P**T * B) ] +* + CALL ZTRSM( 'L', 'U', 'C', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (U**T \P**T * B) ] +* + CALL ZGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (U \ B) -> B [ U \ (T \ (U**T \P**T * B) ) ] +* + CALL ZTRSM( 'L', 'U', 'N', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (U \ (T \ (U**T \P**T * B) )) ] +* + CALL ZLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL ZLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL ZTRSM( 'L', 'L', 'N', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL ZGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL ZTRSM( 'L', 'L', 'C', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + CALL ZLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF + END IF +* + RETURN +* +* End of ZHETRS_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/zhfrk.f b/lapack-netlib/SRC/zhfrk.f index cfc3e111a..6440542fe 100644 --- a/lapack-netlib/SRC/zhfrk.f +++ b/lapack-netlib/SRC/zhfrk.f @@ -117,7 +117,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION (LDA,ka) +*> A is COMPLEX*16 array, dimension (LDA,ka) *> where KA *> is K when TRANS = 'N' or 'n', and is N otherwise. Before *> entry with TRANS = 'N' or 'n', the leading N--by--K part of @@ -160,7 +160,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16OTHERcomputational * @@ -168,10 +168,10 @@ SUBROUTINE ZHFRK( TRANSR, UPLO, TRANS, N, K, ALPHA, A, LDA, BETA, $ C ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. DOUBLE PRECISION ALPHA, BETA diff --git a/lapack-netlib/SRC/zhpevd.f b/lapack-netlib/SRC/zhpevd.f index 10b59c937..83983ee85 100644 --- a/lapack-netlib/SRC/zhpevd.f +++ b/lapack-netlib/SRC/zhpevd.f @@ -134,8 +134,7 @@ *> *> \param[out] RWORK *> \verbatim -*> RWORK is DOUBLE PRECISION array, -*> dimension (LRWORK) +*> RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) *> On exit, if INFO = 0, RWORK(1) returns the required LRWORK. *> \endverbatim *> @@ -193,7 +192,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16OTHEReigen * @@ -201,10 +200,10 @@ SUBROUTINE ZHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, $ RWORK, LRWORK, IWORK, LIWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO diff --git a/lapack-netlib/SRC/zla_gbamv.f b/lapack-netlib/SRC/zla_gbamv.f index 9d5291b88..6ffaf8f7d 100644 --- a/lapack-netlib/SRC/zla_gbamv.f +++ b/lapack-netlib/SRC/zla_gbamv.f @@ -107,7 +107,7 @@ *> *> \param[in] AB *> \verbatim -*> AB is COMPLEX*16 array of DIMENSION ( LDAB, n ) +*> AB is COMPLEX*16 array, dimension ( LDAB, n ) *> Before entry, the leading m by n part of the array AB must *> contain the matrix of coefficients. *> Unchanged on exit. @@ -178,7 +178,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16GBcomputational * @@ -186,10 +186,10 @@ SUBROUTINE ZLA_GBAMV( TRANS, M, N, KL, KU, ALPHA, AB, LDAB, X, $ INCX, BETA, Y, INCY ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. DOUBLE PRECISION ALPHA, BETA diff --git a/lapack-netlib/SRC/zla_gbrfsx_extended.f b/lapack-netlib/SRC/zla_gbrfsx_extended.f index c95e48ba0..7a850f1aa 100644 --- a/lapack-netlib/SRC/zla_gbrfsx_extended.f +++ b/lapack-netlib/SRC/zla_gbrfsx_extended.f @@ -208,8 +208,7 @@ *> *> \param[in,out] ERR_BNDS_NORM *> \verbatim -*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> normwise relative error, which is defined as follows: @@ -255,8 +254,7 @@ *> *> \param[in,out] ERR_BNDS_COMP *> \verbatim -*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> componentwise relative error, which is defined as follows: @@ -399,7 +397,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16GBcomputational * @@ -412,10 +410,10 @@ $ Y_TAIL, RCOND, ITHRESH, RTHRESH, $ DZ_UB, IGNORE_CWISE, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDAB, LDAFB, LDB, LDY, N, KL, KU, NRHS, diff --git a/lapack-netlib/SRC/zla_geamv.f b/lapack-netlib/SRC/zla_geamv.f index 8d221691b..221e0c3fd 100644 --- a/lapack-netlib/SRC/zla_geamv.f +++ b/lapack-netlib/SRC/zla_geamv.f @@ -96,7 +96,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array of DIMENSION ( LDA, n ) +*> A is COMPLEX*16 array, dimension ( LDA, n ) *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. *> Unchanged on exit. @@ -113,7 +113,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX*16 array of DIMENSION at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' *> and at least *> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. @@ -167,7 +167,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16GEcomputational * @@ -175,10 +175,10 @@ SUBROUTINE ZLA_GEAMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, $ Y, INCY ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. DOUBLE PRECISION ALPHA, BETA diff --git a/lapack-netlib/SRC/zla_gerfsx_extended.f b/lapack-netlib/SRC/zla_gerfsx_extended.f index 2382d6044..2e93e265e 100644 --- a/lapack-netlib/SRC/zla_gerfsx_extended.f +++ b/lapack-netlib/SRC/zla_gerfsx_extended.f @@ -195,8 +195,7 @@ *> *> \param[in,out] ERRS_N *> \verbatim -*> ERRS_N is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERRS_N is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> normwise relative error, which is defined as follows: @@ -242,8 +241,7 @@ *> *> \param[in,out] ERRS_C *> \verbatim -*> ERRS_C is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERRS_C is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> componentwise relative error, which is defined as follows: @@ -386,7 +384,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16GEcomputational * @@ -398,10 +396,10 @@ $ Y_TAIL, RCOND, ITHRESH, RTHRESH, $ DZ_UB, IGNORE_CWISE, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, diff --git a/lapack-netlib/SRC/zla_heamv.f b/lapack-netlib/SRC/zla_heamv.f index 1fa0e0a81..cd536efb2 100644 --- a/lapack-netlib/SRC/zla_heamv.f +++ b/lapack-netlib/SRC/zla_heamv.f @@ -89,7 +89,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array, DIMENSION ( LDA, n ). +*> A is COMPLEX*16 array, dimension ( LDA, n ). *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. *> Unchanged on exit. @@ -106,7 +106,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX*16 array, DIMENSION at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ) *> Before entry, the incremented array X must contain the *> vector x. @@ -154,7 +154,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16HEcomputational * @@ -178,10 +178,10 @@ SUBROUTINE ZLA_HEAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, $ INCY ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. DOUBLE PRECISION ALPHA, BETA diff --git a/lapack-netlib/SRC/zla_herfsx_extended.f b/lapack-netlib/SRC/zla_herfsx_extended.f index e80a5910a..5b43a58b9 100644 --- a/lapack-netlib/SRC/zla_herfsx_extended.f +++ b/lapack-netlib/SRC/zla_herfsx_extended.f @@ -161,8 +161,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is COMPLEX*16 array, dimension -*> (LDY,NRHS) +*> Y is COMPLEX*16 array, dimension (LDY,NRHS) *> On entry, the solution matrix X, as computed by ZHETRS. *> On exit, the improved solution matrix Y. *> \endverbatim @@ -194,8 +193,7 @@ *> *> \param[in,out] ERR_BNDS_NORM *> \verbatim -*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> normwise relative error, which is defined as follows: @@ -241,8 +239,7 @@ *> *> \param[in,out] ERR_BNDS_COMP *> \verbatim -*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> componentwise relative error, which is defined as follows: @@ -385,7 +382,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16HEcomputational * @@ -398,10 +395,10 @@ $ RTHRESH, DZ_UB, IGNORE_CWISE, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, diff --git a/lapack-netlib/SRC/zla_porfsx_extended.f b/lapack-netlib/SRC/zla_porfsx_extended.f index 34a0ac58c..85dd42780 100644 --- a/lapack-netlib/SRC/zla_porfsx_extended.f +++ b/lapack-netlib/SRC/zla_porfsx_extended.f @@ -153,8 +153,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is COMPLEX*16 array, dimension -*> (LDY,NRHS) +*> Y is COMPLEX*16 array, dimension (LDY,NRHS) *> On entry, the solution matrix X, as computed by ZPOTRS. *> On exit, the improved solution matrix Y. *> \endverbatim @@ -186,8 +185,7 @@ *> *> \param[in,out] ERR_BNDS_NORM *> \verbatim -*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> normwise relative error, which is defined as follows: @@ -233,8 +231,7 @@ *> *> \param[in,out] ERR_BNDS_COMP *> \verbatim -*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> componentwise relative error, which is defined as follows: @@ -377,7 +374,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16POcomputational * @@ -390,10 +387,10 @@ $ RTHRESH, DZ_UB, IGNORE_CWISE, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, diff --git a/lapack-netlib/SRC/zla_syamv.f b/lapack-netlib/SRC/zla_syamv.f index 01854a9ba..02958bef3 100644 --- a/lapack-netlib/SRC/zla_syamv.f +++ b/lapack-netlib/SRC/zla_syamv.f @@ -90,7 +90,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16 array, DIMENSION ( LDA, n ). +*> A is COMPLEX*16 array, dimension ( LDA, n ). *> Before entry, the leading m by n part of the array A must *> contain the matrix of coefficients. *> Unchanged on exit. @@ -107,7 +107,7 @@ *> *> \param[in] X *> \verbatim -*> X is COMPLEX*16 array, DIMENSION at least +*> X is COMPLEX*16 array, dimension at least *> ( 1 + ( n - 1 )*abs( INCX ) ) *> Before entry, the incremented array X must contain the *> vector x. @@ -155,7 +155,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16SYcomputational * @@ -179,10 +179,10 @@ SUBROUTINE ZLA_SYAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, $ INCY ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. DOUBLE PRECISION ALPHA, BETA diff --git a/lapack-netlib/SRC/zla_syrfsx_extended.f b/lapack-netlib/SRC/zla_syrfsx_extended.f index 0091e8133..a9716fd23 100644 --- a/lapack-netlib/SRC/zla_syrfsx_extended.f +++ b/lapack-netlib/SRC/zla_syrfsx_extended.f @@ -161,8 +161,7 @@ *> *> \param[in,out] Y *> \verbatim -*> Y is COMPLEX*16 array, dimension -*> (LDY,NRHS) +*> Y is COMPLEX*16 array, dimension (LDY,NRHS) *> On entry, the solution matrix X, as computed by ZSYTRS. *> On exit, the improved solution matrix Y. *> \endverbatim @@ -194,8 +193,7 @@ *> *> \param[in,out] ERR_BNDS_NORM *> \verbatim -*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> normwise relative error, which is defined as follows: @@ -241,8 +239,7 @@ *> *> \param[in,out] ERR_BNDS_COMP *> \verbatim -*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension -*> (NRHS, N_ERR_BNDS) +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) *> For each right-hand side, this array contains information about *> various error bounds and condition numbers corresponding to the *> componentwise relative error, which is defined as follows: @@ -385,7 +382,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16SYcomputational * @@ -398,10 +395,10 @@ $ RTHRESH, DZ_UB, IGNORE_CWISE, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, diff --git a/lapack-netlib/SRC/zlabrd.f b/lapack-netlib/SRC/zlabrd.f index 07b5e9fcf..907840967 100644 --- a/lapack-netlib/SRC/zlabrd.f +++ b/lapack-netlib/SRC/zlabrd.f @@ -111,7 +111,7 @@ *> *> \param[out] TAUQ *> \verbatim -*> TAUQ is COMPLEX*16 array dimension (NB) +*> TAUQ is COMPLEX*16 array, dimension (NB) *> The scalar factors of the elementary reflectors which *> represent the unitary matrix Q. See Further Details. *> \endverbatim @@ -157,7 +157,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16OTHERauxiliary * @@ -212,10 +212,10 @@ SUBROUTINE ZLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, $ LDY ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER LDA, LDX, LDY, M, N, NB diff --git a/lapack-netlib/SRC/zlahef_aa.f b/lapack-netlib/SRC/zlahef_aa.f index 5698ba057..8bad4aba9 100644 --- a/lapack-netlib/SRC/zlahef_aa.f +++ b/lapack-netlib/SRC/zlahef_aa.f @@ -19,11 +19,11 @@ * =========== * * SUBROUTINE ZLAHEF_AA( UPLO, J1, M, NB, A, LDA, IPIV, -* H, LDH, WORK, INFO ) +* H, LDH, WORK ) * * .. Scalar Arguments .. * CHARACTER UPLO -* INTEGER J1, M, NB, LDA, LDH, INFO +* INTEGER J1, M, NB, LDA, LDH * .. * .. Array Arguments .. * INTEGER IPIV( * ) @@ -127,16 +127,6 @@ *> WORK is COMPLEX*16 workspace, dimension (M). *> \endverbatim *> -*> \param[out] INFO -*> \verbatim -*> INFO is INTEGER -*> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization -*> has been completed, but the block diagonal matrix D is -*> exactly singular, and division by zero will occur if it -*> is used to solve a system of equations. -*> \endverbatim * * Authors: * ======== @@ -146,24 +136,24 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16HEcomputational * * ===================================================================== SUBROUTINE ZLAHEF_AA( UPLO, J1, M, NB, A, LDA, IPIV, - $ H, LDH, WORK, INFO ) + $ H, LDH, WORK ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * * .. Scalar Arguments .. CHARACTER UPLO - INTEGER M, NB, J1, LDA, LDH, INFO + INTEGER M, NB, J1, LDA, LDH * .. * .. Array Arguments .. INTEGER IPIV( * ) @@ -176,7 +166,7 @@ PARAMETER ( ZERO = (0.0D+0, 0.0D+0), ONE = (1.0D+0, 0.0D+0) ) * * .. Local Scalars .. - INTEGER J, K, K1, I1, I2 + INTEGER J, K, K1, I1, I2, MJ COMPLEX*16 PIV, ALPHA * .. * .. External Functions .. @@ -185,14 +175,14 @@ EXTERNAL LSAME, ILAENV, IZAMAX * .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL ZGEMM, ZGEMV, ZAXPY, ZLACGV, ZCOPY, ZSCAL, ZSWAP, + $ ZLASET, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCONJG, MAX * .. * .. Executable Statements .. * - INFO = 0 J = 1 * * K1 is the first column of the panel to be factorized @@ -216,6 +206,14 @@ * > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, * K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF * * H(J:N, J) := A(J, J:N) - H(J:N, 1:(J-1)) * L(J1:(J-1), J), * where H(J:N, J) has been initialized to be A(J, J:N) @@ -229,7 +227,7 @@ * first column * CALL ZLACGV( J-K1, A( 1, J ), 1 ) - CALL ZGEMV( 'No transpose', M-J+1, J-K1, + CALL ZGEMV( 'No transpose', MJ, J-K1, $ -ONE, H( J, K1 ), LDH, $ A( 1, J ), 1, $ ONE, H( J, J ), 1 ) @@ -238,7 +236,7 @@ * * Copy H(i:n, i) into WORK * - CALL ZCOPY( M-J+1, H( J, J ), 1, WORK( 1 ), 1 ) + CALL ZCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) * IF( J.GT.K1 ) THEN * @@ -246,7 +244,7 @@ * where A(J-1, J) stores T(J-1, J) and A(J-2, J:N) stores U(J-1, J:N) * ALPHA = -DCONJG( A( K-1, J ) ) - CALL ZAXPY( M-J+1, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) + CALL ZAXPY( MJ, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) END IF * * Set A(J, J) = T(J, J) @@ -319,12 +317,6 @@ * Set A(J, J+1) = T(J, J+1) * A( K, J+1 ) = WORK( 2 ) - IF( (A( K, J ).EQ.ZERO ) .AND. - $ ( (J.EQ.M) .OR. (A( K, J+1 ).EQ.ZERO))) THEN - IF(INFO .EQ. 0) THEN - INFO = J - END IF - END IF * IF( J.LT.NB ) THEN * @@ -345,10 +337,6 @@ CALL ZLASET( 'Full', 1, M-J-1, ZERO, ZERO, $ A( K, J+2 ), LDA) END IF - ELSE - IF( (A( K, J ).EQ.ZERO) .AND. (INFO.EQ.0) ) THEN - INFO = J - END IF END IF J = J + 1 GO TO 10 @@ -370,6 +358,14 @@ * > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, * K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF * * H(J:N, J) := A(J:N, J) - H(J:N, 1:(J-1)) * L(J, J1:(J-1))^T, * where H(J:N, J) has been initialized to be A(J:N, J) @@ -383,7 +379,7 @@ * first column * CALL ZLACGV( J-K1, A( J, 1 ), LDA ) - CALL ZGEMV( 'No transpose', M-J+1, J-K1, + CALL ZGEMV( 'No transpose', MJ, J-K1, $ -ONE, H( J, K1 ), LDH, $ A( J, 1 ), LDA, $ ONE, H( J, J ), 1 ) @@ -392,7 +388,7 @@ * * Copy H(J:N, J) into WORK * - CALL ZCOPY( M-J+1, H( J, J ), 1, WORK( 1 ), 1 ) + CALL ZCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) * IF( J.GT.K1 ) THEN * @@ -400,7 +396,7 @@ * where A(J-1, J) = T(J-1, J) and A(J, J-2) = L(J, J-1) * ALPHA = -DCONJG( A( J, K-1 ) ) - CALL ZAXPY( M-J+1, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) + CALL ZAXPY( MJ, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) END IF * * Set A(J, J) = T(J, J) @@ -473,11 +469,6 @@ * Set A(J+1, J) = T(J+1, J) * A( J+1, K ) = WORK( 2 ) - IF( (A( J, K ).EQ.ZERO) .AND. - $ ( (J.EQ.M) .OR. (A( J+1, K ).EQ.ZERO)) ) THEN - IF (INFO .EQ. 0) - $ INFO = J - END IF * IF( J.LT.NB ) THEN * @@ -498,9 +489,6 @@ CALL ZLASET( 'Full', M-J-1, 1, ZERO, ZERO, $ A( J+2, K ), LDA ) END IF - ELSE - IF( (A( J, K ).EQ.ZERO) .AND. (J.EQ.M) - $ .AND. (INFO.EQ.0) ) INFO = J END IF J = J + 1 GO TO 30 diff --git a/lapack-netlib/SRC/zlalsa.f b/lapack-netlib/SRC/zlalsa.f index 6ad6cc5ef..ed5845ba2 100644 --- a/lapack-netlib/SRC/zlalsa.f +++ b/lapack-netlib/SRC/zlalsa.f @@ -232,8 +232,7 @@ *> *> \param[out] IWORK *> \verbatim -*> IWORK is INTEGER array. -*> The dimension must be at least 3 * N +*> IWORK is INTEGER array, dimension (3*N) *> \endverbatim *> *> \param[out] INFO @@ -251,7 +250,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16OTHERcomputational * @@ -268,10 +267,10 @@ $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK, $ IWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, diff --git a/lapack-netlib/SRC/zlalsd.f b/lapack-netlib/SRC/zlalsd.f index 372b38223..409ebe3ac 100644 --- a/lapack-netlib/SRC/zlalsd.f +++ b/lapack-netlib/SRC/zlalsd.f @@ -136,8 +136,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is COMPLEX*16 array, dimension at least -*> (N * NRHS). +*> WORK is COMPLEX*16 array, dimension (N * NRHS) *> \endverbatim *> *> \param[out] RWORK @@ -173,7 +172,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16OTHERcomputational * @@ -188,10 +187,10 @@ SUBROUTINE ZLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND, $ RANK, WORK, RWORK, IWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/zlamswlq.f b/lapack-netlib/SRC/zlamswlq.f index 8068114b8..0e0b0a1da 100644 --- a/lapack-netlib/SRC/zlamswlq.f +++ b/lapack-netlib/SRC/zlamswlq.f @@ -23,7 +23,7 @@ *> *> SIDE = 'L' SIDE = 'R' *> TRANS = 'N': Q * C C * Q -*> TRANS = 'T': Q**T * C C * Q**T +*> TRANS = 'C': Q**H * C C * Q**H *> where Q is a real orthogonal matrix defined as the product of blocked *> elementary reflectors computed by short wide LQ *> factorization (ZLASWLQ) @@ -35,21 +35,21 @@ *> \param[in] SIDE *> \verbatim *> SIDE is CHARACTER*1 -*> = 'L': apply Q or Q**T from the Left; -*> = 'R': apply Q or Q**T from the Right. +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. *> \endverbatim *> *> \param[in] TRANS *> \verbatim *> TRANS is CHARACTER*1 *> = 'N': No transpose, apply Q; -*> = 'T': Transpose, apply Q**T. +*> = 'C': Conjugate Transpose, apply Q**H. *> \endverbatim *> *> \param[in] M *> \verbatim *> M is INTEGER -*> The number of rows of the matrix A. M >=0. +*> The number of rows of the matrix C. M >=0. *> \endverbatim *> *> \param[in] N @@ -88,12 +88,14 @@ *> *> \endverbatim *> -*> \param[in,out] A +*> \param[in] A *> \verbatim -*> A is COMPLEX*16 array, dimension (LDA,K) +*> A is COMPLEX*16 array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' *> The i-th row must contain the vector which defines the blocked *> elementary reflector H(i), for i = 1,2,...,k, as returned by -*> DLASWLQ in the first k rows of its array argument A. +*> ZLASWLQ in the first k rows of its array argument A. *> \endverbatim *> *> \param[in] LDA @@ -123,7 +125,7 @@ *> \verbatim *> C is COMPLEX*16 array, dimension (LDC,N) *> On entry, the M-by-N matrix C. -*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. *> \endverbatim *> *> \param[in] LDC @@ -200,14 +202,14 @@ SUBROUTINE ZLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, $ LDT, C, LDC, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS - INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC, LW + INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), WORK( * ), C(LDC, * ), @@ -219,7 +221,7 @@ * .. * .. Local Scalars .. LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY - INTEGER I, II, KK, CTR + INTEGER I, II, KK, LW, CTR * .. * .. External Functions .. LOGICAL LSAME diff --git a/lapack-netlib/SRC/zlamtsqr.f b/lapack-netlib/SRC/zlamtsqr.f index 855083a6f..1ee732425 100644 --- a/lapack-netlib/SRC/zlamtsqr.f +++ b/lapack-netlib/SRC/zlamtsqr.f @@ -23,7 +23,7 @@ *> *> SIDE = 'L' SIDE = 'R' *> TRANS = 'N': Q * C C * Q -*> TRANS = 'C': Q**C * C C * Q**C +*> TRANS = 'C': Q**H * C C * Q**H *> where Q is a real orthogonal matrix defined as the product *> of blocked elementary reflectors computed by tall skinny *> QR factorization (ZLATSQR) @@ -35,15 +35,15 @@ *> \param[in] SIDE *> \verbatim *> SIDE is CHARACTER*1 -*> = 'L': apply Q or Q**T from the Left; -*> = 'R': apply Q or Q**T from the Right. +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. *> \endverbatim *> *> \param[in] TRANS *> \verbatim *> TRANS is CHARACTER*1 *> = 'N': No transpose, apply Q; -*> = 'C': Conjugate Transpose, apply Q**C. +*> = 'C': Conjugate Transpose, apply Q**H. *> \endverbatim *> *> \param[in] M @@ -81,7 +81,7 @@ *> N >= NB >= 1. *> \endverbatim *> -*> \param[in,out] A +*> \param[in] A *> \verbatim *> A is COMPLEX*16 array, dimension (LDA,K) *> The i-th column must contain the vector which defines the @@ -117,7 +117,7 @@ *> \verbatim *> C is COMPLEX*16 array, dimension (LDC,N) *> On entry, the M-by-N matrix C. -*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. *> \endverbatim *> *> \param[in] LDC @@ -195,10 +195,10 @@ SUBROUTINE ZLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, $ LDT, C, LDC, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS diff --git a/lapack-netlib/SRC/zlaqr1.f b/lapack-netlib/SRC/zlaqr1.f index f945672ca..03afb87aa 100644 --- a/lapack-netlib/SRC/zlaqr1.f +++ b/lapack-netlib/SRC/zlaqr1.f @@ -50,19 +50,19 @@ * *> \param[in] N *> \verbatim -*> N is integer +*> N is INTEGER *> Order of the matrix H. N must be either 2 or 3. *> \endverbatim *> *> \param[in] H *> \verbatim -*> H is COMPLEX*16 array of dimension (LDH,N) +*> H is COMPLEX*16 array, dimension (LDH,N) *> The 2-by-2 or 3-by-3 matrix H in (*). *> \endverbatim *> *> \param[in] LDH *> \verbatim -*> LDH is integer +*> LDH is INTEGER *> The leading dimension of H as declared in *> the calling procedure. LDH.GE.N *> \endverbatim @@ -81,7 +81,7 @@ *> *> \param[out] V *> \verbatim -*> V is COMPLEX*16 array of dimension N +*> V is COMPLEX*16 array, dimension (N) *> A scalar multiple of the first column of the *> matrix K in (*). *> \endverbatim @@ -94,7 +94,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16OTHERauxiliary * @@ -107,10 +107,10 @@ * ===================================================================== SUBROUTINE ZLAQR1( N, H, LDH, S1, S2, V ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. COMPLEX*16 S1, S2 diff --git a/lapack-netlib/SRC/zlaqr2.f b/lapack-netlib/SRC/zlaqr2.f index d8396d231..e6e2ea48c 100644 --- a/lapack-netlib/SRC/zlaqr2.f +++ b/lapack-netlib/SRC/zlaqr2.f @@ -119,7 +119,7 @@ *> *> \param[in] LDH *> \verbatim -*> LDH is integer +*> LDH is INTEGER *> Leading dimension of H just as declared in the calling *> subroutine. N .LE. LDH *> \endverbatim @@ -147,14 +147,14 @@ *> *> \param[in] LDZ *> \verbatim -*> LDZ is integer +*> LDZ is INTEGER *> The leading dimension of Z just as declared in the *> calling subroutine. 1 .LE. LDZ. *> \endverbatim *> *> \param[out] NS *> \verbatim -*> NS is integer +*> NS is INTEGER *> The number of unconverged (ie approximate) eigenvalues *> returned in SR and SI that may be used as shifts by the *> calling subroutine. @@ -162,14 +162,14 @@ *> *> \param[out] ND *> \verbatim -*> ND is integer +*> ND is INTEGER *> The number of converged eigenvalues uncovered by this *> subroutine. *> \endverbatim *> *> \param[out] SH *> \verbatim -*> SH is COMPLEX*16 array, dimension KBOT +*> SH is COMPLEX*16 array, dimension (KBOT) *> On output, approximate eigenvalues that may *> be used for shifts are stored in SH(KBOT-ND-NS+1) *> through SR(KBOT-ND). Converged eigenvalues are @@ -184,14 +184,14 @@ *> *> \param[in] LDV *> \verbatim -*> LDV is integer scalar +*> LDV is INTEGER *> The leading dimension of V just as declared in the *> calling subroutine. NW .LE. LDV *> \endverbatim *> *> \param[in] NH *> \verbatim -*> NH is integer scalar +*> NH is INTEGER *> The number of columns of T. NH.GE.NW. *> \endverbatim *> @@ -202,14 +202,14 @@ *> *> \param[in] LDT *> \verbatim -*> LDT is integer +*> LDT is INTEGER *> The leading dimension of T just as declared in the *> calling subroutine. NW .LE. LDT *> \endverbatim *> *> \param[in] NV *> \verbatim -*> NV is integer +*> NV is INTEGER *> The number of rows of work array WV available for *> workspace. NV.GE.NW. *> \endverbatim @@ -221,21 +221,21 @@ *> *> \param[in] LDWV *> \verbatim -*> LDWV is integer +*> LDWV is INTEGER *> The leading dimension of W just as declared in the *> calling subroutine. NW .LE. LDV *> \endverbatim *> *> \param[out] WORK *> \verbatim -*> WORK is COMPLEX*16 array, dimension LWORK. +*> WORK is COMPLEX*16 array, dimension (LWORK) *> On exit, WORK(1) is set to an estimate of the optimal value *> of LWORK for the given values of N, NW, KTOP and KBOT. *> \endverbatim *> *> \param[in] LWORK *> \verbatim -*> LWORK is integer +*> LWORK is INTEGER *> The dimension of the work array WORK. LWORK = 2*NW *> suffices, but greater efficiency may result from larger *> values of LWORK. @@ -255,7 +255,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16OTHERauxiliary * @@ -270,10 +270,10 @@ $ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, $ NV, WV, LDWV, WORK, LWORK ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, diff --git a/lapack-netlib/SRC/zlaqr3.f b/lapack-netlib/SRC/zlaqr3.f index 402644fd0..64ab59f31 100644 --- a/lapack-netlib/SRC/zlaqr3.f +++ b/lapack-netlib/SRC/zlaqr3.f @@ -116,7 +116,7 @@ *> *> \param[in] LDH *> \verbatim -*> LDH is integer +*> LDH is INTEGER *> Leading dimension of H just as declared in the calling *> subroutine. N .LE. LDH *> \endverbatim @@ -144,14 +144,14 @@ *> *> \param[in] LDZ *> \verbatim -*> LDZ is integer +*> LDZ is INTEGER *> The leading dimension of Z just as declared in the *> calling subroutine. 1 .LE. LDZ. *> \endverbatim *> *> \param[out] NS *> \verbatim -*> NS is integer +*> NS is INTEGER *> The number of unconverged (ie approximate) eigenvalues *> returned in SR and SI that may be used as shifts by the *> calling subroutine. @@ -159,14 +159,14 @@ *> *> \param[out] ND *> \verbatim -*> ND is integer +*> ND is INTEGER *> The number of converged eigenvalues uncovered by this *> subroutine. *> \endverbatim *> *> \param[out] SH *> \verbatim -*> SH is COMPLEX*16 array, dimension KBOT +*> SH is COMPLEX*16 array, dimension (KBOT) *> On output, approximate eigenvalues that may *> be used for shifts are stored in SH(KBOT-ND-NS+1) *> through SR(KBOT-ND). Converged eigenvalues are @@ -181,14 +181,14 @@ *> *> \param[in] LDV *> \verbatim -*> LDV is integer scalar +*> LDV is INTEGER *> The leading dimension of V just as declared in the *> calling subroutine. NW .LE. LDV *> \endverbatim *> *> \param[in] NH *> \verbatim -*> NH is integer scalar +*> NH is INTEGER *> The number of columns of T. NH.GE.NW. *> \endverbatim *> @@ -199,14 +199,14 @@ *> *> \param[in] LDT *> \verbatim -*> LDT is integer +*> LDT is INTEGER *> The leading dimension of T just as declared in the *> calling subroutine. NW .LE. LDT *> \endverbatim *> *> \param[in] NV *> \verbatim -*> NV is integer +*> NV is INTEGER *> The number of rows of work array WV available for *> workspace. NV.GE.NW. *> \endverbatim @@ -218,21 +218,21 @@ *> *> \param[in] LDWV *> \verbatim -*> LDWV is integer +*> LDWV is INTEGER *> The leading dimension of W just as declared in the *> calling subroutine. NW .LE. LDV *> \endverbatim *> *> \param[out] WORK *> \verbatim -*> WORK is COMPLEX*16 array, dimension LWORK. +*> WORK is COMPLEX*16 array, dimension (LWORK) *> On exit, WORK(1) is set to an estimate of the optimal value *> of LWORK for the given values of N, NW, KTOP and KBOT. *> \endverbatim *> *> \param[in] LWORK *> \verbatim -*> LWORK is integer +*> LWORK is INTEGER *> The dimension of the work array WORK. LWORK = 2*NW *> suffices, but greater efficiency may result from larger *> values of LWORK. @@ -267,7 +267,7 @@ $ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, $ NV, WV, LDWV, WORK, LWORK ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/zlaqr5.f b/lapack-netlib/SRC/zlaqr5.f index 3e0392cb4..0dfbce82c 100644 --- a/lapack-netlib/SRC/zlaqr5.f +++ b/lapack-netlib/SRC/zlaqr5.f @@ -47,21 +47,21 @@ * *> \param[in] WANTT *> \verbatim -*> WANTT is logical scalar +*> WANTT is LOGICAL *> WANTT = .true. if the triangular Schur factor *> is being computed. WANTT is set to .false. otherwise. *> \endverbatim *> *> \param[in] WANTZ *> \verbatim -*> WANTZ is logical scalar +*> WANTZ is LOGICAL *> WANTZ = .true. if the unitary Schur factor is being *> computed. WANTZ is set to .false. otherwise. *> \endverbatim *> *> \param[in] KACC22 *> \verbatim -*> KACC22 is integer with value 0, 1, or 2. +*> KACC22 is INTEGER with value 0, 1, or 2. *> Specifies the computation mode of far-from-diagonal *> orthogonal updates. *> = 0: ZLAQR5 does not accumulate reflections and does not @@ -77,19 +77,19 @@ *> *> \param[in] N *> \verbatim -*> N is integer scalar +*> N is INTEGER *> N is the order of the Hessenberg matrix H upon which this *> subroutine operates. *> \endverbatim *> *> \param[in] KTOP *> \verbatim -*> KTOP is integer scalar +*> KTOP is INTEGER *> \endverbatim *> *> \param[in] KBOT *> \verbatim -*> KBOT is integer scalar +*> KBOT is INTEGER *> These are the first and last rows and columns of an *> isolated diagonal block upon which the QR sweep is to be *> applied. It is assumed without a check that @@ -100,21 +100,21 @@ *> *> \param[in] NSHFTS *> \verbatim -*> NSHFTS is integer scalar +*> NSHFTS is INTEGER *> NSHFTS gives the number of simultaneous shifts. NSHFTS *> must be positive and even. *> \endverbatim *> *> \param[in,out] S *> \verbatim -*> S is COMPLEX*16 array of size (NSHFTS) +*> S is COMPLEX*16 array, dimension (NSHFTS) *> S contains the shifts of origin that define the multi- *> shift QR sweep. On output S may be reordered. *> \endverbatim *> *> \param[in,out] H *> \verbatim -*> H is COMPLEX*16 array of size (LDH,N) +*> H is COMPLEX*16 array, dimension (LDH,N) *> On input H contains a Hessenberg matrix. On output a *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied *> to the isolated diagonal block in rows and columns KTOP @@ -123,7 +123,7 @@ *> *> \param[in] LDH *> \verbatim -*> LDH is integer scalar +*> LDH is INTEGER *> LDH is the leading dimension of H just as declared in the *> calling procedure. LDH.GE.MAX(1,N). *> \endverbatim @@ -142,7 +142,7 @@ *> *> \param[in,out] Z *> \verbatim -*> Z is COMPLEX*16 array of size (LDZ,IHIZ) +*> Z is COMPLEX*16 array, dimension (LDZ,IHIZ) *> If WANTZ = .TRUE., then the QR Sweep unitary *> similarity transformation is accumulated into *> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. @@ -151,71 +151,69 @@ *> *> \param[in] LDZ *> \verbatim -*> LDZ is integer scalar +*> LDZ is INTEGER *> LDA is the leading dimension of Z just as declared in *> the calling procedure. LDZ.GE.N. *> \endverbatim *> *> \param[out] V *> \verbatim -*> V is COMPLEX*16 array of size (LDV,NSHFTS/2) +*> V is COMPLEX*16 array, dimension (LDV,NSHFTS/2) *> \endverbatim *> *> \param[in] LDV *> \verbatim -*> LDV is integer scalar +*> LDV is INTEGER *> LDV is the leading dimension of V as declared in the *> calling procedure. LDV.GE.3. *> \endverbatim *> *> \param[out] U *> \verbatim -*> U is COMPLEX*16 array of size -*> (LDU,3*NSHFTS-3) +*> U is COMPLEX*16 array, dimension (LDU,3*NSHFTS-3) *> \endverbatim *> *> \param[in] LDU *> \verbatim -*> LDU is integer scalar +*> LDU is INTEGER *> LDU is the leading dimension of U just as declared in the *> in the calling subroutine. LDU.GE.3*NSHFTS-3. *> \endverbatim *> *> \param[in] NH *> \verbatim -*> NH is integer scalar +*> NH is INTEGER *> NH is the number of columns in array WH available for *> workspace. NH.GE.1. *> \endverbatim *> *> \param[out] WH *> \verbatim -*> WH is COMPLEX*16 array of size (LDWH,NH) +*> WH is COMPLEX*16 array, dimension (LDWH,NH) *> \endverbatim *> *> \param[in] LDWH *> \verbatim -*> LDWH is integer scalar +*> LDWH is INTEGER *> Leading dimension of WH just as declared in the *> calling procedure. LDWH.GE.3*NSHFTS-3. *> \endverbatim *> *> \param[in] NV *> \verbatim -*> NV is integer scalar +*> NV is INTEGER *> NV is the number of rows in WV agailable for workspace. *> NV.GE.1. *> \endverbatim *> *> \param[out] WV *> \verbatim -*> WV is COMPLEX*16 array of size -*> (LDWV,3*NSHFTS-3) +*> WV is COMPLEX*16 array, dimension (LDWV,3*NSHFTS-3) *> \endverbatim *> *> \param[in] LDWV *> \verbatim -*> LDWV is integer scalar +*> LDWV is INTEGER *> LDWV is the leading dimension of WV as declared in the *> in the calling subroutine. LDWV.GE.NV. *> \endverbatim @@ -251,7 +249,7 @@ $ H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, $ WV, LDWV, NH, WH, LDWH ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/zlarfg.f b/lapack-netlib/SRC/zlarfg.f index 76ca452f6..081d391d7 100644 --- a/lapack-netlib/SRC/zlarfg.f +++ b/lapack-netlib/SRC/zlarfg.f @@ -99,17 +99,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16OTHERauxiliary * * ===================================================================== SUBROUTINE ZLARFG( N, ALPHA, X, INCX, TAU ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX, N @@ -175,7 +175,7 @@ BETA = BETA*RSAFMN ALPHI = ALPHI*RSAFMN ALPHR = ALPHR*RSAFMN - IF( ABS( BETA ).LT.SAFMIN .AND. KNT .LT. 1000) + IF( (ABS( BETA ).LT.SAFMIN) .AND. (KNT .LT. 20) ) $ GO TO 10 * * New BETA is at most 1, at least SAFMIN diff --git a/lapack-netlib/SRC/zlarfgp.f b/lapack-netlib/SRC/zlarfgp.f index 32e55ea6c..c9e55a5a0 100644 --- a/lapack-netlib/SRC/zlarfgp.f +++ b/lapack-netlib/SRC/zlarfgp.f @@ -97,17 +97,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16OTHERauxiliary * * ===================================================================== SUBROUTINE ZLARFGP( N, ALPHA, X, INCX, TAU ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INCX, N @@ -197,7 +197,7 @@ BETA = BETA*BIGNUM ALPHI = ALPHI*BIGNUM ALPHR = ALPHR*BIGNUM - IF( ABS( BETA ).LT.SMLNUM .AND. KNT .LT. 1000) + IF( (ABS( BETA ).LT.SMLNUM) .AND. (KNT .LT. 20) ) $ GO TO 10 * * New BETA is at most 1, at least SMLNUM diff --git a/lapack-netlib/SRC/zlarrv.f b/lapack-netlib/SRC/zlarrv.f index c7656811d..67a67584c 100644 --- a/lapack-netlib/SRC/zlarrv.f +++ b/lapack-netlib/SRC/zlarrv.f @@ -286,7 +286,7 @@ $ IBLOCK, INDEXW, GERS, Z, LDZ, ISUPPZ, $ WORK, IWORK, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 @@ -348,6 +348,13 @@ * .. INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* * The first N entries of WORK are reserved for the eigenvalues INDLD = N+1 INDLLD= 2*N+1 diff --git a/lapack-netlib/SRC/zlaswlq.f b/lapack-netlib/SRC/zlaswlq.f index cd7bcc3a2..24dd41d79 100644 --- a/lapack-netlib/SRC/zlaswlq.f +++ b/lapack-netlib/SRC/zlaswlq.f @@ -55,7 +55,7 @@ *> \verbatim *> A is COMPLEX*16 array, dimension (LDA,N) *> On entry, the M-by-N matrix A. -*> On exit, the elements on and bleow the diagonal +*> On exit, the elements on and below the diagonal *> of the array contain the N-by-N lower triangular matrix L; *> the elements above the diagonal represent Q by the rows *> of blocked V (see Further Details). @@ -150,10 +150,10 @@ SUBROUTINE ZLASWLQ( M, N, MB, NB, A, LDA, T, LDT, WORK, LWORK, $ INFO) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, M, N, MB, NB, LWORK, LDT diff --git a/lapack-netlib/SRC/zlaswp.f b/lapack-netlib/SRC/zlaswp.f index 81ceba2cc..13c0f9b45 100644 --- a/lapack-netlib/SRC/zlaswp.f +++ b/lapack-netlib/SRC/zlaswp.f @@ -79,14 +79,15 @@ *> \verbatim *> IPIV is INTEGER array, dimension (K1+(K2-K1)*abs(INCX)) *> The vector of pivot indices. Only the elements in positions -*> K1 through K1+(K2-K1)*INCX of IPIV are accessed. -*> IPIV(K) = L implies rows K and L are to be interchanged. +*> K1 through K1+(K2-K1)*abs(INCX) of IPIV are accessed. +*> IPIV(K1+(K-K1)*abs(INCX)) = L implies rows K and L are to be +*> interchanged. *> \endverbatim *> *> \param[in] INCX *> \verbatim *> INCX is INTEGER -*> The increment between successive values of IPIV. If IPIV +*> The increment between successive values of IPIV. If INCX *> is negative, the pivots are applied in reverse order. *> \endverbatim * @@ -98,7 +99,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16OTHERauxiliary * @@ -114,10 +115,10 @@ * ===================================================================== SUBROUTINE ZLASWP( N, A, LDA, K1, K2, IPIV, INCX ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INCX, K1, K2, LDA, N @@ -135,7 +136,8 @@ * .. * .. Executable Statements .. * -* Interchange row I with row IPIV(I) for each of rows K1 through K2. +* Interchange row I with row IPIV(K1+(I-K1)*abs(INCX)) for each of rows +* K1 through K2. * IF( INCX.GT.0 ) THEN IX0 = K1 diff --git a/lapack-netlib/SRC/zlasyf_aa.f b/lapack-netlib/SRC/zlasyf_aa.f index 7ac4ff3dc..f321b72de 100644 --- a/lapack-netlib/SRC/zlasyf_aa.f +++ b/lapack-netlib/SRC/zlasyf_aa.f @@ -19,11 +19,11 @@ * =========== * * SUBROUTINE ZLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, -* H, LDH, WORK, INFO ) +* H, LDH, WORK ) * * .. Scalar Arguments .. * CHARACTER UPLO -* INTEGER J1, M, NB, LDA, LDH, INFO +* INTEGER J1, M, NB, LDA, LDH * .. * .. Array Arguments .. * INTEGER IPIV( * ) @@ -99,12 +99,12 @@ *> \param[in] LDA *> \verbatim *> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(1,N). +*> The leading dimension of the array A. LDA >= max(1,M). *> \endverbatim *> *> \param[out] IPIV *> \verbatim -*> IPIV is INTEGER array, dimension (N) +*> IPIV is INTEGER array, dimension (M) *> Details of the row and column interchanges, *> the row and column k were interchanged with the row and *> column IPIV(k). @@ -127,16 +127,6 @@ *> WORK is COMPLEX*16 workspace, dimension (M). *> \endverbatim *> -*> \param[out] INFO -*> \verbatim -*> INFO is INTEGER -*> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization -*> has been completed, but the block diagonal matrix D is -*> exactly singular, and division by zero will occur if it -*> is used to solve a system of equations. -*> \endverbatim * * Authors: * ======== @@ -146,24 +136,24 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16SYcomputational * * ===================================================================== SUBROUTINE ZLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, - $ H, LDH, WORK, INFO ) + $ H, LDH, WORK ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * * .. Scalar Arguments .. CHARACTER UPLO - INTEGER M, NB, J1, LDA, LDH, INFO + INTEGER M, NB, J1, LDA, LDH * .. * .. Array Arguments .. INTEGER IPIV( * ) @@ -176,7 +166,7 @@ PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * * .. Local Scalars .. - INTEGER J, K, K1, I1, I2 + INTEGER J, K, K1, I1, I2, MJ COMPLEX*16 PIV, ALPHA * .. * .. External Functions .. @@ -185,14 +175,14 @@ EXTERNAL LSAME, ILAENV, IZAMAX * .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL ZGEMV, ZAXPY, ZSCAL, ZCOPY, ZSWAP, ZLASET, + $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * - INFO = 0 J = 1 * * K1 is the first column of the panel to be factorized @@ -216,9 +206,17 @@ * > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, * K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF * -* H(J:N, J) := A(J, J:N) - H(J:N, 1:(J-1)) * L(J1:(J-1), J), -* where H(J:N, J) has been initialized to be A(J, J:N) +* H(J:M, J) := A(J, J:M) - H(J:M, 1:(J-1)) * L(J1:(J-1), J), +* where H(J:M, J) has been initialized to be A(J, J:M) * IF( K.GT.2 ) THEN * @@ -228,23 +226,23 @@ * > for the rest of the columns, K is J+1, skipping only the * first column * - CALL ZGEMV( 'No transpose', M-J+1, J-K1, + CALL ZGEMV( 'No transpose', MJ, J-K1, $ -ONE, H( J, K1 ), LDH, $ A( 1, J ), 1, $ ONE, H( J, J ), 1 ) END IF * -* Copy H(i:n, i) into WORK +* Copy H(i:M, i) into WORK * - CALL ZCOPY( M-J+1, H( J, J ), 1, WORK( 1 ), 1 ) + CALL ZCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) * IF( J.GT.K1 ) THEN * -* Compute WORK := WORK - L(J-1, J:N) * T(J-1,J), -* where A(J-1, J) stores T(J-1, J) and A(J-2, J:N) stores U(J-1, J:N) +* Compute WORK := WORK - L(J-1, J:M) * T(J-1,J), +* where A(J-1, J) stores T(J-1, J) and A(J-2, J:M) stores U(J-1, J:M) * ALPHA = -A( K-1, J ) - CALL ZAXPY( M-J+1, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) + CALL ZAXPY( MJ, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) END IF * * Set A(J, J) = T(J, J) @@ -253,8 +251,8 @@ * IF( J.LT.M ) THEN * -* Compute WORK(2:N) = T(J, J) L(J, (J+1):N) -* where A(J, J) stores T(J, J) and A(J-1, (J+1):N) stores U(J, (J+1):N) +* Compute WORK(2:M) = T(J, J) L(J, (J+1):M) +* where A(J, J) stores T(J, J) and A(J-1, (J+1):M) stores U(J, (J+1):M) * IF( K.GT.1 ) THEN ALPHA = -A( K, J ) @@ -262,7 +260,7 @@ $ WORK( 2 ), 1 ) ENDIF * -* Find max(|WORK(2:n)|) +* Find max(|WORK(2:M)|) * I2 = IZAMAX( M-J, WORK( 2 ), 1 ) + 1 PIV = WORK( I2 ) @@ -277,14 +275,14 @@ WORK( I2 ) = WORK( I1 ) WORK( I1 ) = PIV * -* Swap A(I1, I1+1:N) with A(I1+1:N, I2) +* Swap A(I1, I1+1:M) with A(I1+1:M, I2) * I1 = I1+J-1 I2 = I2+J-1 CALL ZSWAP( I2-I1-1, A( J1+I1-1, I1+1 ), LDA, $ A( J1+I1, I2 ), 1 ) * -* Swap A(I1, I2+1:N) with A(I2, I2+1:N) +* Swap A(I1, I2+1:M) with A(I2, I2+1:M) * CALL ZSWAP( M-I2, A( J1+I1-1, I2+1 ), LDA, $ A( J1+I2-1, I2+1 ), LDA ) @@ -315,23 +313,17 @@ * Set A(J, J+1) = T(J, J+1) * A( K, J+1 ) = WORK( 2 ) - IF( (A( K, J ).EQ.ZERO ) .AND. - $ ( (J.EQ.M) .OR. (A( K, J+1 ).EQ.ZERO))) THEN - IF(INFO .EQ. 0) THEN - INFO = J - ENDIF - END IF * IF( J.LT.NB ) THEN * -* Copy A(J+1:N, J+1) into H(J:N, J), +* Copy A(J+1:M, J+1) into H(J:M, J), * CALL ZCOPY( M-J, A( K+1, J+1 ), LDA, $ H( J+1, J+1 ), 1 ) END IF * -* Compute L(J+2, J+1) = WORK( 3:N ) / T(J, J+1), -* where A(J, J+1) = T(J, J+1) and A(J+2:N, J) = L(J+2:N, J+1) +* Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1) * IF( A( K, J+1 ).NE.ZERO ) THEN ALPHA = ONE / A( K, J+1 ) @@ -341,10 +333,6 @@ CALL ZLASET( 'Full', 1, M-J-1, ZERO, ZERO, $ A( K, J+2 ), LDA) END IF - ELSE - IF( (A( K, J ).EQ.ZERO) .AND. (INFO.EQ.0) ) THEN - INFO = J - END IF END IF J = J + 1 GO TO 10 @@ -366,9 +354,17 @@ * > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, * K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF * -* H(J:N, J) := A(J:N, J) - H(J:N, 1:(J-1)) * L(J, J1:(J-1))^T, -* where H(J:N, J) has been initialized to be A(J:N, J) +* H(J:M, J) := A(J:M, J) - H(J:M, 1:(J-1)) * L(J, J1:(J-1))^T, +* where H(J:M, J) has been initialized to be A(J:M, J) * IF( K.GT.2 ) THEN * @@ -378,23 +374,23 @@ * > for the rest of the columns, K is J+1, skipping only the * first column * - CALL ZGEMV( 'No transpose', M-J+1, J-K1, + CALL ZGEMV( 'No transpose', MJ, J-K1, $ -ONE, H( J, K1 ), LDH, $ A( J, 1 ), LDA, $ ONE, H( J, J ), 1 ) END IF * -* Copy H(J:N, J) into WORK +* Copy H(J:M, J) into WORK * - CALL ZCOPY( M-J+1, H( J, J ), 1, WORK( 1 ), 1 ) + CALL ZCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) * IF( J.GT.K1 ) THEN * -* Compute WORK := WORK - L(J:N, J-1) * T(J-1,J), +* Compute WORK := WORK - L(J:M, J-1) * T(J-1,J), * where A(J-1, J) = T(J-1, J) and A(J, J-2) = L(J, J-1) * ALPHA = -A( J, K-1 ) - CALL ZAXPY( M-J+1, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) + CALL ZAXPY( MJ, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) END IF * * Set A(J, J) = T(J, J) @@ -403,8 +399,8 @@ * IF( J.LT.M ) THEN * -* Compute WORK(2:N) = T(J, J) L((J+1):N, J) -* where A(J, J) = T(J, J) and A((J+1):N, J-1) = L((J+1):N, J) +* Compute WORK(2:M) = T(J, J) L((J+1):M, J) +* where A(J, J) = T(J, J) and A((J+1):M, J-1) = L((J+1):M, J) * IF( K.GT.1 ) THEN ALPHA = -A( J, K ) @@ -412,7 +408,7 @@ $ WORK( 2 ), 1 ) ENDIF * -* Find max(|WORK(2:n)|) +* Find max(|WORK(2:M)|) * I2 = IZAMAX( M-J, WORK( 2 ), 1 ) + 1 PIV = WORK( I2 ) @@ -427,14 +423,14 @@ WORK( I2 ) = WORK( I1 ) WORK( I1 ) = PIV * -* Swap A(I1+1:N, I1) with A(I2, I1+1:N) +* Swap A(I1+1:M, I1) with A(I2, I1+1:M) * I1 = I1+J-1 I2 = I2+J-1 CALL ZSWAP( I2-I1-1, A( I1+1, J1+I1-1 ), 1, $ A( I2, J1+I1 ), LDA ) * -* Swap A(I2+1:N, I1) with A(I2+1:N, I2) +* Swap A(I2+1:M, I1) with A(I2+1:M, I2) * CALL ZSWAP( M-I2, A( I2+1, J1+I1-1 ), 1, $ A( I2+1, J1+I2-1 ), 1 ) @@ -465,22 +461,17 @@ * Set A(J+1, J) = T(J+1, J) * A( J+1, K ) = WORK( 2 ) - IF( (A( J, K ).EQ.ZERO) .AND. - $ ( (J.EQ.M) .OR. (A( J+1, K ).EQ.ZERO)) ) THEN - IF (INFO .EQ. 0) - $ INFO = J - END IF * IF( J.LT.NB ) THEN * -* Copy A(J+1:N, J+1) into H(J+1:N, J), +* Copy A(J+1:M, J+1) into H(J+1:M, J), * CALL ZCOPY( M-J, A( J+1, K+1 ), 1, $ H( J+1, J+1 ), 1 ) END IF * -* Compute L(J+2, J+1) = WORK( 3:N ) / T(J, J+1), -* where A(J, J+1) = T(J, J+1) and A(J+2:N, J) = L(J+2:N, J+1) +* Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1) * IF( A( J+1, K ).NE.ZERO ) THEN ALPHA = ONE / A( J+1, K ) @@ -490,10 +481,6 @@ CALL ZLASET( 'Full', M-J-1, 1, ZERO, ZERO, $ A( J+2, K ), LDA ) END IF - ELSE - IF( (A( J, K ).EQ.ZERO) .AND. (INFO.EQ.0) ) THEN - INFO = J - END IF END IF J = J + 1 GO TO 30 diff --git a/lapack-netlib/SRC/zlatbs.f b/lapack-netlib/SRC/zlatbs.f index ef2d67c9c..c9d672a34 100644 --- a/lapack-netlib/SRC/zlatbs.f +++ b/lapack-netlib/SRC/zlatbs.f @@ -163,7 +163,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16OTHERauxiliary * @@ -243,10 +243,10 @@ SUBROUTINE ZLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X, $ SCALE, CNORM, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER DIAG, NORMIN, TRANS, UPLO @@ -281,7 +281,7 @@ $ ZDOTU, ZLADIV * .. * .. External Subroutines .. - EXTERNAL DSCAL, XERBLA, ZAXPY, ZDSCAL, ZTBSV + EXTERNAL DSCAL, XERBLA, ZAXPY, ZDSCAL, ZTBSV, DLABAD * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN diff --git a/lapack-netlib/SRC/zlatps.f b/lapack-netlib/SRC/zlatps.f index d6523382c..ac148cab0 100644 --- a/lapack-netlib/SRC/zlatps.f +++ b/lapack-netlib/SRC/zlatps.f @@ -151,7 +151,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16OTHERauxiliary * @@ -231,10 +231,10 @@ SUBROUTINE ZLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE, $ CNORM, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER DIAG, NORMIN, TRANS, UPLO @@ -269,7 +269,7 @@ $ ZDOTU, ZLADIV * .. * .. External Subroutines .. - EXTERNAL DSCAL, XERBLA, ZAXPY, ZDSCAL, ZTPSV + EXTERNAL DSCAL, XERBLA, ZAXPY, ZDSCAL, ZTPSV, DLABAD * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN diff --git a/lapack-netlib/SRC/zlatrs.f b/lapack-netlib/SRC/zlatrs.f index 36ddba970..f06ac4ad0 100644 --- a/lapack-netlib/SRC/zlatrs.f +++ b/lapack-netlib/SRC/zlatrs.f @@ -159,7 +159,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16OTHERauxiliary * @@ -239,10 +239,10 @@ SUBROUTINE ZLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, $ CNORM, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER DIAG, NORMIN, TRANS, UPLO @@ -277,7 +277,7 @@ $ ZDOTU, ZLADIV * .. * .. External Subroutines .. - EXTERNAL DSCAL, XERBLA, ZAXPY, ZDSCAL, ZTRSV + EXTERNAL DSCAL, XERBLA, ZAXPY, ZDSCAL, ZTRSV, DLABAD * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN diff --git a/lapack-netlib/SRC/zstedc.f b/lapack-netlib/SRC/zstedc.f index 7809372bc..b6be431b0 100644 --- a/lapack-netlib/SRC/zstedc.f +++ b/lapack-netlib/SRC/zstedc.f @@ -128,8 +128,7 @@ *> *> \param[out] RWORK *> \verbatim -*> RWORK is DOUBLE PRECISION array, -*> dimension (LRWORK) +*> RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) *> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. *> \endverbatim *> @@ -199,7 +198,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16OTHERcomputational * @@ -213,10 +212,10 @@ SUBROUTINE ZSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, $ LRWORK, IWORK, LIWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER COMPZ diff --git a/lapack-netlib/SRC/zstegr.f b/lapack-netlib/SRC/zstegr.f index 71f304c1c..5ad981096 100644 --- a/lapack-netlib/SRC/zstegr.f +++ b/lapack-netlib/SRC/zstegr.f @@ -184,7 +184,7 @@ *> *> \param[out] ISUPPZ *> \verbatim -*> ISUPPZ is INTEGER ARRAY, dimension ( 2*max(1,M) ) +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) *> The support of the eigenvectors in Z, i.e., the indices *> indicating the nonzero elements in Z. The i-th computed eigenvector *> is nonzero only in elements ISUPPZ( 2*i-1 ) through @@ -265,7 +265,7 @@ $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, $ LIWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/zstemr.f b/lapack-netlib/SRC/zstemr.f index 681c87adc..ac7552a6a 100644 --- a/lapack-netlib/SRC/zstemr.f +++ b/lapack-netlib/SRC/zstemr.f @@ -239,7 +239,7 @@ *> *> \param[out] ISUPPZ *> \verbatim -*> ISUPPZ is INTEGER ARRAY, dimension ( 2*max(1,M) ) +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) *> The support of the eigenvectors in Z, i.e., the indices *> indicating the nonzero elements in Z. The i-th computed eigenvector *> is nonzero only in elements ISUPPZ( 2*i-1 ) through @@ -338,7 +338,7 @@ $ M, W, Z, LDZ, NZC, ISUPPZ, TRYRAC, WORK, LWORK, $ IWORK, LIWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/zsycon_3.f b/lapack-netlib/SRC/zsycon_3.f index f279f3c60..856845960 100644 --- a/lapack-netlib/SRC/zsycon_3.f +++ b/lapack-netlib/SRC/zsycon_3.f @@ -149,7 +149,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16SYcomputational * @@ -157,7 +157,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -171,10 +171,10 @@ SUBROUTINE ZSYCON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, $ WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/zsyconvf.f b/lapack-netlib/SRC/zsyconvf.f index 5bd93199d..b26bfd63b 100644 --- a/lapack-netlib/SRC/zsyconvf.f +++ b/lapack-netlib/SRC/zsyconvf.f @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE ZSYCONVF( UPLO, WAY, N, A, LDA, IPIV, E, INFO ) +* SUBROUTINE ZSYCONVF( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) * * .. Scalar Arguments .. * CHARACTER UPLO, WAY @@ -192,7 +192,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16SYcomputational * @@ -201,7 +201,7 @@ *> *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> November 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -209,10 +209,10 @@ * ===================================================================== SUBROUTINE ZSYCONVF( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO, WAY diff --git a/lapack-netlib/SRC/zsyconvf_rook.f b/lapack-netlib/SRC/zsyconvf_rook.f index daddd2601..5c36f4bcd 100644 --- a/lapack-netlib/SRC/zsyconvf_rook.f +++ b/lapack-netlib/SRC/zsyconvf_rook.f @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE ZSYCONVF_ROOK( UPLO, WAY, N, A, LDA, IPIV, E, INFO ) +* SUBROUTINE ZSYCONVF_ROOK( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) * * .. Scalar Arguments .. * CHARACTER UPLO, WAY @@ -183,7 +183,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16SYcomputational * @@ -192,7 +192,7 @@ *> *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> November 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -200,10 +200,10 @@ * ===================================================================== SUBROUTINE ZSYCONVF_ROOK( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO, WAY diff --git a/lapack-netlib/SRC/zsyequb.f b/lapack-netlib/SRC/zsyequb.f index 715f32b33..1cf411968 100644 --- a/lapack-netlib/SRC/zsyequb.f +++ b/lapack-netlib/SRC/zsyequb.f @@ -117,7 +117,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16SYcomputational * @@ -132,10 +132,10 @@ * ===================================================================== SUBROUTINE ZSYEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, N @@ -168,7 +168,7 @@ EXTERNAL DLAMCH, LSAME * .. * .. External Subroutines .. - EXTERNAL ZLASSQ + EXTERNAL ZLASSQ, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, INT, LOG, MAX, MIN, SQRT diff --git a/lapack-netlib/SRC/zsysv_aa.f b/lapack-netlib/SRC/zsysv_aa.f index c650bed23..10693c731 100644 --- a/lapack-netlib/SRC/zsysv_aa.f +++ b/lapack-netlib/SRC/zsysv_aa.f @@ -154,7 +154,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16SYsolve * @@ -162,10 +162,10 @@ SUBROUTINE ZSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, $ LWORK, INFO ) * -* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK driver routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -188,7 +188,7 @@ EXTERNAL ILAENV, LSAME * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZSYTRF, ZSYTRS, ZSYTRS2 + EXTERNAL XERBLA, ZSYTRF_AA, ZSYTRS_AA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/zsysv_aa_2stage.f b/lapack-netlib/SRC/zsysv_aa_2stage.f new file mode 100644 index 000000000..fcf9bc870 --- /dev/null +++ b/lapack-netlib/SRC/zsysv_aa_2stage.f @@ -0,0 +1,276 @@ +*> \brief ZSYSV_AA_2STAGE computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYSV_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, +* IPIV, IPIV2, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX*16 A( LDA, * ), TB( * ), B( LDB, *), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYSV_AA_2STAGE computes the solution to a complex system of +*> linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's 2-stage algorithm is used to factor A as +*> A = U * T * U**H, if UPLO = 'U', or +*> A = L * T * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is symmetric and band. The matrix T is +*> then LU-factored with partial pivoting. The factored form of A +*> is then used to solve the system of equations A * X = B. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX*16 array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSYTRF_AA_2STAGE, + $ ZSYTRS_AA_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL ZSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, + $ IPIV2, WORK, -1, INFO ) + LWKOPT = INT( WORK(1) ) + IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYSV_AA_2STAGE', -INFO ) + RETURN + ELSE IF( WQUERY .OR. TQUERY ) THEN + RETURN + END IF +* +* +* Compute the factorization A = U*T*U**H or A = L*T*L**H. +* + CALL ZSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, IPIV2, + $ WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, + $ IPIV2, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* +* End of ZSYSV_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/zsytrf_aa.f b/lapack-netlib/SRC/zsytrf_aa.f index 02f8cdda9..b25b1fbce 100644 --- a/lapack-netlib/SRC/zsytrf_aa.f +++ b/lapack-netlib/SRC/zsytrf_aa.f @@ -114,11 +114,7 @@ *> \verbatim *> INFO is INTEGER *> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization -*> has been completed, but the block diagonal matrix D is -*> exactly singular, and division by zero will occur if it -*> is used to solve a system of equations. +*> < 0: if INFO = -i, the i-th argument had an illegal value. *> \endverbatim * * Authors: @@ -129,17 +125,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16SYcomputational * * ===================================================================== SUBROUTINE ZSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -159,7 +155,7 @@ * * .. Local Scalars .. LOGICAL LQUERY, UPPER - INTEGER J, LWKOPT, IINFO + INTEGER J, LWKOPT INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB COMPLEX*16 ALPHA * .. @@ -169,7 +165,8 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL ZLASYF_AA, ZGEMM, ZGEMV, ZSCAL, ZCOPY, + $ ZSWAP, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -178,7 +175,7 @@ * * Determine the block size * - NB = ILAENV( 1, 'ZSYTRF', UPLO, N, -1, -1, -1 ) + NB = ILAENV( 1, 'ZSYTRF_AA', UPLO, N, -1, -1, -1 ) * * Test the input parameters. * @@ -214,13 +211,10 @@ ENDIF IPIV( 1 ) = 1 IF ( N.EQ.1 ) THEN - IF ( A( 1, 1 ).EQ.ZERO ) THEN - INFO = 1 - END IF RETURN END IF * -* Adjubst block size based on the workspace size +* Adjust block size based on the workspace size * IF( LWORK.LT.((1+NB)*N) ) THEN NB = ( LWORK-N ) / N @@ -260,11 +254,7 @@ * CALL ZLASYF_AA( UPLO, 2-K1, N-J, JB, $ A( MAX(1, J), J+1 ), LDA, - $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), - $ IINFO ) - IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN - INFO = IINFO+J - ENDIF + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) * * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) * @@ -383,10 +373,7 @@ * CALL ZLASYF_AA( UPLO, 2-K1, N-J, JB, $ A( J+1, MAX(1, J) ), LDA, - $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), IINFO) - IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN - INFO = IINFO+J - ENDIF + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) * * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) * diff --git a/lapack-netlib/SRC/zsytrf_aa_2stage.f b/lapack-netlib/SRC/zsytrf_aa_2stage.f new file mode 100644 index 000000000..1f916726e --- /dev/null +++ b/lapack-netlib/SRC/zsytrf_aa_2stage.f @@ -0,0 +1,668 @@ +*> \brief \b ZSYTRF_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRF_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, +* IPIV2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX*16 A( LDA, * ), TB( * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRF_AA_2STAGE computes the factorization of a complex symmetric matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a complex symmetric band matrix with the +*> bandwidth of NB (NB is internally selected and stored in TB( 1 ), and T is +*> LU factorized with partial pivoting). +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX*16 array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, + $ IPIV2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX*16 A( LDA, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER I, J, K, I1, I2, TD + INTEGER LDTB, NB, KB, JB, NT, IINFO + COMPLEX*16 PIV +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZGBTRF, ZGEMM, ZGETRF, + $ ZLACPY, ZLASET, ZLASWP, ZTRSM, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LTB .LT. 4*N .AND. .NOT.TQUERY ) THEN + INFO = -6 + ELSE IF ( LWORK .LT. N .AND. .NOT.WQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRF_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Answer the query +* + NB = ILAENV( 1, 'ZSYTRF_AA_2STAGE', UPLO, N, -1, -1, -1 ) + IF( INFO.EQ.0 ) THEN + IF( TQUERY ) THEN + TB( 1 ) = (3*NB+1)*N + END IF + IF( WQUERY ) THEN + WORK( 1 ) = N*NB + END IF + END IF + IF( TQUERY .OR. WQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF +* +* Determine the number of the block size +* + LDTB = LTB/N + IF( LDTB .LT. 3*NB+1 ) THEN + NB = (LDTB-1)/3 + END IF + IF( LWORK .LT. NB*N ) THEN + NB = LWORK/N + END IF +* +* Determine the number of the block columns +* + NT = (N+NB-1)/NB + TD = 2*NB + KB = MIN(NB, N) +* +* Initialize vectors/matrices +* + DO J = 1, KB + IPIV( J ) = J + END DO +* +* Save NB +* + TB( 1 ) = NB +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*U(I,J) + T(I+1,I)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ CONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( (I-1)*NB+1, J*NB+1 ), LDA, + $ CZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ CONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (I-2)*NB+1, J*NB+1 ), LDA, + $ CZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL ZLACPY( 'Upper', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = U(1:J,J)'*H(1:J) + CALL ZGEMM( 'Transpose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -CONE, A( 1, J*NB+1 ), LDA, + $ WORK( NB+1 ), N, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += U(J,J)'*T(J,J-1)*U(J-1,J) + CALL ZGEMM( 'Transpose', 'NoTranspose', + $ KB, NB, KB, + $ CONE, A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ CZERO, WORK( 1 ), N ) + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB, + $ -CONE, WORK( 1 ), N, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + DO K = I+1, KB + TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + $ = TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + END DO + END DO + IF( J.GT.0 ) THEN +c CALL CHEGST( 1, 'Upper', KB, +c $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, +c $ A( (J-1)*NB+1, J*NB+1 ), LDA, IINFO ) + CALL ZTRSM( 'L', 'U', 'T', 'N', KB, KB, CONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + CALL ZTRSM( 'R', 'U', 'N', 'N', KB, KB, CONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, KB, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ CZERO, WORK( J*NB+1 ), N ) + ELSE + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB+KB, + $ CONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ CZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL ZGEMM( 'Transpose', 'NoTranspose', + $ NB, N-(J+1)*NB, J*NB, + $ -CONE, WORK( NB+1 ), N, + $ A( 1, (J+1)*NB+1 ), LDA, + $ CONE, A( J*NB+1, (J+1)*NB+1 ), LDA ) + END IF +* +* Copy panel to workspace to call ZGETRF +* + DO K = 1, NB + CALL ZCOPY( N-(J+1)*NB, + $ A( J*NB+K, (J+1)*NB+1 ), LDA, + $ WORK( 1+(K-1)*N ), 1 ) + END DO +* +* Factorize panel +* + CALL ZGETRF( N-(J+1)*NB, NB, + $ WORK, N, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Copy panel back +* + DO K = 1, NB + CALL ZCOPY( N-(J+1)*NB, + $ WORK( 1+(K-1)*N ), 1, + $ A( J*NB+K, (J+1)*NB+1 ), LDA ) + END DO +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL ZLASET( 'Full', KB, NB, CZERO, CZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL ZLACPY( 'Upper', KB, NB, + $ WORK, N, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL ZTRSM( 'R', 'U', 'N', 'U', KB, NB, CONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J,J+1) into T(J+1, J), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) + END DO + END DO + CALL ZLASET( 'Lower', KB, NB, CZERO, CONE, + $ A( J*NB+1, (J+1)*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL ZSWAP( K-1, A( (J+1)*NB+1, I1 ), 1, + $ A( (J+1)*NB+1, I2 ), 1 ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL ZSWAP( I2-I1-1, A( I1, I1+1 ), LDA, + $ A( I1+1, I2 ), 1 ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL ZSWAP( N-I2, A( I1, I2+1 ), LDA, + $ A( I2, I2+1 ), LDA ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL ZSWAP( J*NB, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ENDIF + END DO + END IF + END DO + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL ZGEMM( 'NoTranspose', 'Transpose', + $ NB, KB, JB, + $ CONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (I-1)*NB+1 ), LDA, + $ CZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL ZGEMM( 'NoTranspose', 'Transpose', + $ NB, KB, JB, + $ CONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (I-2)*NB+1 ), LDA, + $ CZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL ZLACPY( 'Lower', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = L(J,1:J)*H(1:J) + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -CONE, A( J*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += L(J,J)*T(J,J-1)*L(J,J-1)' + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, NB, KB, + $ CONE, A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ CZERO, WORK( 1 ), N ) + CALL ZGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, NB, + $ -CONE, WORK( 1 ), N, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + DO K = I+1, KB + TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + $ = TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + END DO + END DO + IF( J.GT.0 ) THEN +c CALL CHEGST( 1, 'Lower', KB, +c $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, +c $ A( J*NB+1, (J-1)*NB+1 ), LDA, IINFO ) + CALL ZTRSM( 'L', 'L', 'N', 'N', KB, KB, CONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + CALL ZTRSM( 'R', 'L', 'T', 'N', KB, KB, CONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Symmetrize T(J,J) +* + DO I = 1, KB + DO K = I+1, KB + TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + $ = TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL ZGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, KB, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ CZERO, WORK( J*NB+1 ), N ) + ELSE + CALL ZGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, NB+KB, + $ CONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ CZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ N-(J+1)*NB, NB, J*NB, + $ -CONE, A( (J+1)*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ CONE, A( (J+1)*NB+1, J*NB+1 ), LDA ) + END IF +* +* Factorize panel +* + CALL ZGETRF( N-(J+1)*NB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL ZLASET( 'Full', KB, NB, CZERO, CZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL ZLACPY( 'Upper', KB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL ZTRSM( 'R', 'L', 'T', 'U', KB, NB, CONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J+1,J) into T(J, J+1), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) = + $ TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) + END DO + END DO + CALL ZLASET( 'Upper', KB, NB, CZERO, CONE, + $ A( (J+1)*NB+1, J*NB+1 ), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL ZSWAP( K-1, A( I1, (J+1)*NB+1 ), LDA, + $ A( I2, (J+1)*NB+1 ), LDA ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL ZSWAP( I2-I1-1, A( I1+1, I1 ), 1, + $ A( I2, I1+1 ), LDA ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL ZSWAP( N-I2, A( I2+1, I1 ), 1, + $ A( I2+1, I2 ), 1 ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL ZSWAP( J*NB, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ENDIF + END DO +* +* Apply pivots to previous columns of L +* +c CALL ZLASWP( J*NB, A( 1, 1 ), LDA, +c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) + END IF + END DO + END IF +* +* Factor the band matrix + CALL ZGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* +* End of ZSYTRF_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/zsytri2.f b/lapack-netlib/SRC/zsytri2.f index 69ae38930..d5aabd43a 100644 --- a/lapack-netlib/SRC/zsytri2.f +++ b/lapack-netlib/SRC/zsytri2.f @@ -120,17 +120,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16SYcomputational * * ===================================================================== SUBROUTINE ZSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -153,7 +153,7 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL ZSYTRI, ZSYTRI2X + EXTERNAL ZSYTRI, ZSYTRI2X, XERBLA * .. * .. Executable Statements .. * diff --git a/lapack-netlib/SRC/zsytri2x.f b/lapack-netlib/SRC/zsytri2x.f index a8e068db0..4feb4d563 100644 --- a/lapack-netlib/SRC/zsytri2x.f +++ b/lapack-netlib/SRC/zsytri2x.f @@ -87,7 +87,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is COMPLEX*16 array, dimension (N+NNB+1,NNB+3) +*> WORK is COMPLEX*16 array, dimension (N+NB+1,NB+3) *> \endverbatim *> *> \param[in] NB @@ -113,17 +113,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16SYcomputational * * ===================================================================== SUBROUTINE ZSYTRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/zsytri_3.f b/lapack-netlib/SRC/zsytri_3.f index 99d771a83..dc38e8547 100644 --- a/lapack-netlib/SRC/zsytri_3.f +++ b/lapack-netlib/SRC/zsytri_3.f @@ -152,7 +152,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16SYcomputational * @@ -160,7 +160,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> November 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -170,10 +170,10 @@ SUBROUTINE ZSYTRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -196,7 +196,7 @@ EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL ZSYTRI_3X + EXTERNAL ZSYTRI_3X, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/zsytri_3x.f b/lapack-netlib/SRC/zsytri_3x.f index 7f999e061..a94392923 100644 --- a/lapack-netlib/SRC/zsytri_3x.f +++ b/lapack-netlib/SRC/zsytri_3x.f @@ -142,7 +142,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16SYcomputational * @@ -150,7 +150,7 @@ * ================== *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -159,10 +159,10 @@ * ===================================================================== SUBROUTINE ZSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/zsytrs_3.f b/lapack-netlib/SRC/zsytrs_3.f index dab361353..673687532 100644 --- a/lapack-netlib/SRC/zsytrs_3.f +++ b/lapack-netlib/SRC/zsytrs_3.f @@ -142,7 +142,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16SYcomputational * @@ -151,7 +151,7 @@ *> *> \verbatim *> -*> December 2016, Igor Kozachenko, +*> June 2017, Igor Kozachenko, *> Computer Science Division, *> University of California, Berkeley *> @@ -165,10 +165,10 @@ SUBROUTINE ZSYTRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/SRC/zsytrs_aa.f b/lapack-netlib/SRC/zsytrs_aa.f index b3c9b9ecd..e62e9e486 100644 --- a/lapack-netlib/SRC/zsytrs_aa.f +++ b/lapack-netlib/SRC/zsytrs_aa.f @@ -66,7 +66,7 @@ *> of the matrix B. NRHS >= 0. *> \endverbatim *> -*> \param[in,out] A +*> \param[in] A *> \verbatim *> A is COMPLEX*16 array, dimension (LDA,N) *> Details of factors computed by ZSYTRF_AA. @@ -121,7 +121,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16SYcomputational * @@ -129,10 +129,10 @@ SUBROUTINE ZSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, $ WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -159,7 +159,7 @@ EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL ZGTSV, ZSWAP, ZTRSM, XERBLA + EXTERNAL ZGTSV, ZSWAP, ZLACPY, ZTRSM, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX diff --git a/lapack-netlib/SRC/zsytrs_aa_2stage.f b/lapack-netlib/SRC/zsytrs_aa_2stage.f new file mode 100644 index 000000000..c5d894753 --- /dev/null +++ b/lapack-netlib/SRC/zsytrs_aa_2stage.f @@ -0,0 +1,281 @@ +*> \brief \b ZSYTRS_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRS_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, +* IPIV2, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX*16 A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRS_AA_2STAGE solves a system of linear equations A*X = B with a complex +*> symmetric matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by ZSYTRF_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> Details of factors computed by ZSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX*16 array, dimension (LTB) +*> Details of factors computed by ZSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> ZSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] IPIV2 +*> \verbatim +*> IPIV2 is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> ZSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX*16 A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER LDTB, NB + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGBTRS, ZLASWP, ZTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LTB.LT.( 4*N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRS_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Read NB and compute LDTB +* + NB = INT( TB( 1 ) ) + LDTB = LTB/N +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL ZLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (U**T \P**T * B) -> B [ (U**T \P**T * B) ] +* + CALL ZTRSM( 'L', 'U', 'T', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (U**T \P**T * B) ] +* + CALL ZGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (U \ B) -> B [ U \ (T \ (U**T \P**T * B) ) ] +* + CALL ZTRSM( 'L', 'U', 'N', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (U \ (T \ (U**T \P**T * B) )) ] +* + CALL ZLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL ZLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL ZTRSM( 'L', 'L', 'N', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL ZGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL ZTRSM( 'L', 'L', 'T', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + CALL ZLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF + END IF +* + RETURN +* +* End of ZSYTRS_AA_2STAGE +* + END diff --git a/lapack-netlib/SRC/ztgex2.f b/lapack-netlib/SRC/ztgex2.f index 26b83dec3..ba80d7162 100644 --- a/lapack-netlib/SRC/ztgex2.f +++ b/lapack-netlib/SRC/ztgex2.f @@ -76,7 +76,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is COMPLEX*16 arrays, dimensions (LDA,N) +*> A is COMPLEX*16 array, dimensions (LDA,N) *> On entry, the matrix A in the pair (A, B). *> On exit, the updated matrix A. *> \endverbatim @@ -89,7 +89,7 @@ *> *> \param[in,out] B *> \verbatim -*> B is COMPLEX*16 arrays, dimensions (LDB,N) +*> B is COMPLEX*16 array, dimensions (LDB,N) *> On entry, the matrix B in the pair (A, B). *> On exit, the updated matrix B. *> \endverbatim @@ -102,7 +102,7 @@ *> *> \param[in,out] Q *> \verbatim -*> Q is COMPLEX*16 array, dimension (LDZ,N) +*> Q is COMPLEX*16 array, dimension (LDQ,N) *> If WANTQ = .TRUE, on entry, the unitary matrix Q. On exit, *> the updated matrix Q. *> Not referenced if WANTQ = .FALSE.. @@ -153,7 +153,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16GEauxiliary * @@ -190,10 +190,10 @@ SUBROUTINE ZTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, $ LDZ, J1, INFO ) * -* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK auxiliary routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. LOGICAL WANTQ, WANTZ diff --git a/lapack-netlib/SRC/ztgexc.f b/lapack-netlib/SRC/ztgexc.f index cb7b5229a..7eac54cab 100644 --- a/lapack-netlib/SRC/ztgexc.f +++ b/lapack-netlib/SRC/ztgexc.f @@ -102,7 +102,7 @@ *> *> \param[in,out] Q *> \verbatim -*> Q is COMPLEX*16 array, dimension (LDZ,N) +*> Q is COMPLEX*16 array, dimension (LDQ,N) *> On entry, if WANTQ = .TRUE., the unitary matrix Q. *> On exit, the updated matrix Q. *> If WANTQ = .FALSE., Q is not referenced. @@ -163,7 +163,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16GEcomputational * @@ -200,10 +200,10 @@ SUBROUTINE ZTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, $ LDZ, IFST, ILST, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. LOGICAL WANTQ, WANTZ diff --git a/lapack-netlib/SRC/ztgsen.f b/lapack-netlib/SRC/ztgsen.f index 8561f23ae..cb4afd5e8 100644 --- a/lapack-netlib/SRC/ztgsen.f +++ b/lapack-netlib/SRC/ztgsen.f @@ -72,7 +72,7 @@ * *> \param[in] IJOB *> \verbatim -*> IJOB is integer +*> IJOB is INTEGER *> Specifies whether condition numbers are required for the *> cluster of eigenvalues (PL and PR) or the deflating subspaces *> (Difu and Difl): @@ -433,7 +433,7 @@ $ ALPHA, BETA, Q, LDQ, Z, LDZ, M, PL, PR, DIF, $ WORK, LWORK, IWORK, LIWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 diff --git a/lapack-netlib/SRC/ztplqt.f b/lapack-netlib/SRC/ztplqt.f index 28740208f..bbf001044 100644 --- a/lapack-netlib/SRC/ztplqt.f +++ b/lapack-netlib/SRC/ztplqt.f @@ -73,8 +73,8 @@ *> *> \param[in,out] A *> \verbatim -*> A is COMPLEX*16 array, dimension (LDA,N) -*> On entry, the lower triangular N-by-N matrix A. +*> A is COMPLEX*16 array, dimension (LDA,M) +*> On entry, the lower triangular M-by-M matrix A. *> On exit, the elements on and below the diagonal of the array *> contain the lower triangular matrix L. *> \endverbatim @@ -82,7 +82,7 @@ *> \param[in] LDA *> \verbatim *> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(1,N). +*> The leading dimension of the array A. LDA >= max(1,M). *> \endverbatim *> *> \param[in,out] B @@ -132,7 +132,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERcomputational * @@ -146,26 +146,26 @@ *> C = [ A ] [ B ] *> *> -*> where A is an lower triangular N-by-N matrix, and B is M-by-N pentagonal +*> where A is an lower triangular M-by-M matrix, and B is M-by-N pentagonal *> matrix consisting of a M-by-(N-L) rectangular matrix B1 on left of a M-by-L *> upper trapezoidal matrix B2: *> [ B ] = [ B1 ] [ B2 ] *> [ B1 ] <- M-by-(N-L) rectangular -*> [ B2 ] <- M-by-L upper trapezoidal. +*> [ B2 ] <- M-by-L lower trapezoidal. *> *> The lower trapezoidal matrix B2 consists of the first L columns of a -*> N-by-N lower triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> M-by-M lower triangular matrix, where 0 <= L <= MIN(M,N). If L=0, *> B is rectangular M-by-N; if M=L=N, B is lower triangular. *> *> The matrix W stores the elementary reflectors H(i) in the i-th row *> above the diagonal (of A) in the M-by-(M+N) input matrix C *> [ C ] = [ A ] [ B ] -*> [ A ] <- lower triangular N-by-N +*> [ A ] <- lower triangular M-by-M *> [ B ] <- M-by-N pentagonal *> *> so that W can be represented as *> [ W ] = [ I ] [ V ] -*> [ I ] <- identity, N-by-N +*> [ I ] <- identity, M-by-M *> [ V ] <- M-by-N, same form as B. *> *> Thus, all of information needed for W is contained on exit in B, which @@ -189,10 +189,10 @@ SUBROUTINE ZTPLQT( M, N, L, MB, A, LDA, B, LDB, T, LDT, WORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDB, LDT, N, M, L, MB diff --git a/lapack-netlib/SRC/ztplqt2.f b/lapack-netlib/SRC/ztplqt2.f index 733f9dccc..9fecfddfd 100644 --- a/lapack-netlib/SRC/ztplqt2.f +++ b/lapack-netlib/SRC/ztplqt2.f @@ -65,7 +65,7 @@ *> *> \param[in,out] A *> \verbatim -*> A is COMPLEX*16 array, dimension (LDA,N) +*> A is COMPLEX*16 array, dimension (LDA,M) *> On entry, the lower triangular M-by-M matrix A. *> On exit, the elements on and below the diagonal of the array *> contain the lower triangular matrix L. @@ -74,7 +74,7 @@ *> \param[in] LDA *> \verbatim *> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(1,N). +*> The leading dimension of the array A. LDA >= max(1,M). *> \endverbatim *> *> \param[in,out] B @@ -119,7 +119,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERcomputational * @@ -133,7 +133,7 @@ *> C = [ A ][ B ] *> *> -*> where A is an lower triangular N-by-N matrix, and B is M-by-N pentagonal +*> where A is an lower triangular M-by-M matrix, and B is M-by-N pentagonal *> matrix consisting of a M-by-(N-L) rectangular matrix B1 left of a M-by-L *> upper trapezoidal matrix B2: *> @@ -149,13 +149,13 @@ *> above the diagonal (of A) in the M-by-(M+N) input matrix C *> *> C = [ A ][ B ] -*> [ A ] <- lower triangular N-by-N +*> [ A ] <- lower triangular M-by-M *> [ B ] <- M-by-N pentagonal *> *> so that W can be represented as *> *> W = [ I ][ V ] -*> [ I ] <- identity, N-by-N +*> [ I ] <- identity, M-by-M *> [ V ] <- M-by-N, same form as B. *> *> Thus, all of information needed for W is contained on exit in B, which @@ -177,10 +177,10 @@ * ===================================================================== SUBROUTINE ZTPLQT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDB, LDT, N, M, L diff --git a/lapack-netlib/SRC/ztpmlqt.f b/lapack-netlib/SRC/ztpmlqt.f index f9540e112..6a67e4443 100644 --- a/lapack-netlib/SRC/ztpmlqt.f +++ b/lapack-netlib/SRC/ztpmlqt.f @@ -6,7 +6,7 @@ * http://www.netlib.org/lapack/explore-html/ * *> \htmlonly -*> Download DTPMQRT + dependencies +*> Download ZTPMLQT + dependencies *> *> [TGZ] *> @@ -36,9 +36,9 @@ *> *> \verbatim *> -*> ZTPMQRT applies a complex orthogonal matrix Q obtained from a -*> "triangular-pentagonal" real block reflector H to a general -*> real matrix C, which consists of two blocks A and B. +*> ZTPMLQT applies a complex orthogonal matrix Q obtained from a +*> "triangular-pentagonal" complex block reflector H to a general +*> complex matrix C, which consists of two blocks A and B. *> \endverbatim * * Arguments: @@ -47,15 +47,15 @@ *> \param[in] SIDE *> \verbatim *> SIDE is CHARACTER*1 -*> = 'L': apply Q or Q**C from the Left; -*> = 'R': apply Q or Q**C from the Right. +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. *> \endverbatim *> *> \param[in] TRANS *> \verbatim *> TRANS is CHARACTER*1 *> = 'N': No transpose, apply Q; -*> = 'C': Transpose, apply Q**C. +*> = 'C': Transpose, apply Q**H. *> \endverbatim *> *> \param[in] M @@ -128,7 +128,7 @@ *> (LDA,K) if SIDE = 'R' *> On entry, the K-by-N or M-by-K matrix A. *> On exit, A is overwritten by the corresponding block of -*> Q*C or Q**C*C or C*Q or C*Q**C. See Further Details. +*> Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. *> \endverbatim *> *> \param[in] LDA @@ -144,7 +144,7 @@ *> B is COMPLEX*16 array, dimension (LDB,N) *> On entry, the M-by-N matrix B. *> On exit, B is overwritten by the corresponding block of -*> Q*C or Q**C*C or C*Q or C*Q**C. See Further Details. +*> Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. *> \endverbatim *> *> \param[in] LDB @@ -175,7 +175,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup doubleOTHERcomputational * @@ -205,21 +205,21 @@ *> *> If TRANS='N' and SIDE='L', C is on exit replaced with Q * C. *> -*> If TRANS='C' and SIDE='L', C is on exit replaced with Q**C * C. +*> If TRANS='C' and SIDE='L', C is on exit replaced with Q**H * C. *> *> If TRANS='N' and SIDE='R', C is on exit replaced with C * Q. *> -*> If TRANS='C' and SIDE='R', C is on exit replaced with C * Q**C. +*> If TRANS='C' and SIDE='R', C is on exit replaced with C * Q**H. *> \endverbatim *> * ===================================================================== SUBROUTINE ZTPMLQT( SIDE, TRANS, M, N, K, L, MB, V, LDV, T, LDT, $ A, LDA, B, LDB, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS diff --git a/lapack-netlib/SRC/ztpmqrt.f b/lapack-netlib/SRC/ztpmqrt.f index a1b53a3c7..aca7ff00f 100644 --- a/lapack-netlib/SRC/ztpmqrt.f +++ b/lapack-netlib/SRC/ztpmqrt.f @@ -175,7 +175,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16OTHERcomputational * @@ -216,10 +216,10 @@ SUBROUTINE ZTPMQRT( SIDE, TRANS, M, N, K, L, NB, V, LDV, T, LDT, $ A, LDA, B, LDB, WORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS @@ -242,7 +242,7 @@ EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL ZTPRFB, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN diff --git a/lapack-netlib/SRC/ztrevc.f b/lapack-netlib/SRC/ztrevc.f index 678cf94e7..ea925dda8 100644 --- a/lapack-netlib/SRC/ztrevc.f +++ b/lapack-netlib/SRC/ztrevc.f @@ -196,7 +196,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16OTHERcomputational * @@ -218,10 +218,10 @@ SUBROUTINE ZTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, $ LDVR, MM, M, WORK, RWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER HOWMNY, SIDE @@ -256,7 +256,7 @@ EXTERNAL LSAME, IZAMAX, DLAMCH, DZASUM * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZCOPY, ZDSCAL, ZGEMV, ZLATRS + EXTERNAL XERBLA, ZCOPY, ZDSCAL, ZGEMV, ZLATRS, DLABAD * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX diff --git a/lapack-netlib/SRC/ztrevc3.f b/lapack-netlib/SRC/ztrevc3.f index ef8331174..36f7d8274 100644 --- a/lapack-netlib/SRC/ztrevc3.f +++ b/lapack-netlib/SRC/ztrevc3.f @@ -222,7 +222,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * * @precisions fortran z -> c * @@ -247,10 +247,10 @@ $ LDVR, MM, M, WORK, LWORK, RWORK, LRWORK, INFO) IMPLICIT NONE * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER HOWMNY, SIDE @@ -288,7 +288,7 @@ * .. * .. External Subroutines .. EXTERNAL XERBLA, ZCOPY, ZDSCAL, ZGEMV, ZLATRS, - $ ZGEMM, DLABAD, ZLASET + $ ZGEMM, DLABAD, ZLASET, ZLACPY * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, CONJG, AIMAG, MAX diff --git a/lapack-netlib/SRC/ztrsna.f b/lapack-netlib/SRC/ztrsna.f index 07a76c138..97a42a516 100644 --- a/lapack-netlib/SRC/ztrsna.f +++ b/lapack-netlib/SRC/ztrsna.f @@ -197,7 +197,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16OTHERcomputational * @@ -249,10 +249,10 @@ $ LDVR, S, SEP, MM, M, WORK, LDWORK, RWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER HOWMNY, JOB @@ -291,7 +291,8 @@ EXTERNAL LSAME, IZAMAX, DLAMCH, DZNRM2, ZDOTC * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZDRSCL, ZLACN2, ZLACPY, ZLATRS, ZTREXC + EXTERNAL XERBLA, ZDRSCL, ZLACN2, ZLACPY, ZLATRS, ZTREXC, + $ DLABAD * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, MAX diff --git a/lapack-netlib/SRC/zunbdb1.f b/lapack-netlib/SRC/zunbdb1.f index 9ec097728..039d160c1 100644 --- a/lapack-netlib/SRC/zunbdb1.f +++ b/lapack-netlib/SRC/zunbdb1.f @@ -32,7 +32,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -203,7 +203,7 @@ SUBROUTINE ZUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/zunbdb2.f b/lapack-netlib/SRC/zunbdb2.f index 8fa730829..8bfca46f6 100644 --- a/lapack-netlib/SRC/zunbdb2.f +++ b/lapack-netlib/SRC/zunbdb2.f @@ -32,7 +32,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -201,7 +201,7 @@ SUBROUTINE ZUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 @@ -229,7 +229,8 @@ LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL ZLARF, ZLARFGP, ZUNBDB5, ZDROT, ZSCAL, XERBLA + EXTERNAL ZLARF, ZLARFGP, ZUNBDB5, ZDROT, ZSCAL, ZLACGV, + $ XERBLA * .. * .. External Functions .. DOUBLE PRECISION DZNRM2 diff --git a/lapack-netlib/SRC/zunbdb3.f b/lapack-netlib/SRC/zunbdb3.f index 737c9a76e..523aee55d 100644 --- a/lapack-netlib/SRC/zunbdb3.f +++ b/lapack-netlib/SRC/zunbdb3.f @@ -32,7 +32,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -201,7 +201,7 @@ SUBROUTINE ZUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 @@ -228,7 +228,7 @@ LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL ZLARF, ZLARFGP, ZUNBDB5, ZDROT, XERBLA + EXTERNAL ZLARF, ZLARFGP, ZUNBDB5, ZDROT, ZLACGV, XERBLA * .. * .. External Functions .. DOUBLE PRECISION DZNRM2 diff --git a/lapack-netlib/SRC/zunbdb4.f b/lapack-netlib/SRC/zunbdb4.f index 9bb0c9bd9..026f5a558 100644 --- a/lapack-netlib/SRC/zunbdb4.f +++ b/lapack-netlib/SRC/zunbdb4.f @@ -33,7 +33,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -213,7 +213,7 @@ $ TAUP1, TAUP2, TAUQ1, PHANTOM, WORK, LWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 @@ -241,7 +241,8 @@ LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL ZLARF, ZLARFGP, ZUNBDB5, ZDROT, ZSCAL, XERBLA + EXTERNAL ZLARF, ZLARFGP, ZUNBDB5, ZDROT, ZSCAL, ZLACGV, + $ XERBLA * .. * .. External Functions .. DOUBLE PRECISION DZNRM2 diff --git a/lapack-netlib/SRC/zunbdb5.f b/lapack-netlib/SRC/zunbdb5.f index 0e2678bfc..be708b794 100644 --- a/lapack-netlib/SRC/zunbdb5.f +++ b/lapack-netlib/SRC/zunbdb5.f @@ -31,7 +31,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -156,7 +156,7 @@ SUBROUTINE ZUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/zunbdb6.f b/lapack-netlib/SRC/zunbdb6.f index 42a3cf801..c4ccbfb10 100644 --- a/lapack-netlib/SRC/zunbdb6.f +++ b/lapack-netlib/SRC/zunbdb6.f @@ -31,7 +31,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -154,7 +154,7 @@ SUBROUTINE ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/zuncsd.f b/lapack-netlib/SRC/zuncsd.f index 77a83c095..db53f93d8 100644 --- a/lapack-netlib/SRC/zuncsd.f +++ b/lapack-netlib/SRC/zuncsd.f @@ -188,7 +188,7 @@ *> *> \param[out] U1 *> \verbatim -*> U1 is COMPLEX*16 array, dimension (P) +*> U1 is COMPLEX*16 array, dimension (LDU1,P) *> If JOBU1 = 'Y', U1 contains the P-by-P unitary matrix U1. *> \endverbatim *> @@ -201,7 +201,7 @@ *> *> \param[out] U2 *> \verbatim -*> U2 is COMPLEX*16 array, dimension (M-P) +*> U2 is COMPLEX*16 array, dimension (LDU2,M-P) *> If JOBU2 = 'Y', U2 contains the (M-P)-by-(M-P) unitary *> matrix U2. *> \endverbatim @@ -215,7 +215,7 @@ *> *> \param[out] V1T *> \verbatim -*> V1T is COMPLEX*16 array, dimension (Q) +*> V1T is COMPLEX*16 array, dimension (LDV1T,Q) *> If JOBV1T = 'Y', V1T contains the Q-by-Q matrix unitary *> matrix V1**H. *> \endverbatim @@ -229,7 +229,7 @@ *> *> \param[out] V2T *> \verbatim -*> V2T is COMPLEX*16 array, dimension (M-Q) +*> V2T is COMPLEX*16 array, dimension (LDV2T,M-Q) *> If JOBV2T = 'Y', V2T contains the (M-Q)-by-(M-Q) unitary *> matrix V2**H. *> \endverbatim @@ -308,7 +308,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16OTHERcomputational * @@ -320,10 +320,10 @@ $ LDV2T, WORK, LWORK, RWORK, LRWORK, $ IWORK, INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, SIGNS, TRANS diff --git a/lapack-netlib/SRC/zuncsd2by1.f b/lapack-netlib/SRC/zuncsd2by1.f index 630a59380..56285a191 100644 --- a/lapack-netlib/SRC/zuncsd2by1.f +++ b/lapack-netlib/SRC/zuncsd2by1.f @@ -39,7 +39,7 @@ * * *> \par Purpose: -*> ============= +* ============= *> *>\verbatim *> @@ -254,7 +254,7 @@ $ LDV1T, WORK, LWORK, RWORK, LRWORK, IWORK, $ INFO ) * -* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * July 2012 diff --git a/lapack-netlib/SRC/zunm22.f b/lapack-netlib/SRC/zunm22.f index 468d7d8c1..f612e8fac 100644 --- a/lapack-netlib/SRC/zunm22.f +++ b/lapack-netlib/SRC/zunm22.f @@ -52,8 +52,8 @@ *> N2-by-N2 upper triangular matrix. *> \endverbatim * -* Arguments -* ========= +* Arguments: +* ========== * *> \param[in] SIDE *> \verbatim @@ -162,7 +162,7 @@ SUBROUTINE ZUNM22( SIDE, TRANS, M, N, N1, N2, Q, LDQ, C, LDC, $ WORK, LWORK, INFO ) * -* -- LAPACK computational routine (version 3.6.0) -- +* -- LAPACK computational routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * January 2015 diff --git a/lapack-netlib/TESTING/EIG/CMakeLists.txt b/lapack-netlib/TESTING/EIG/CMakeLists.txt index 19fffcd44..20fd25b4a 100644 --- a/lapack-netlib/TESTING/EIG/CMakeLists.txt +++ b/lapack-netlib/TESTING/EIG/CMakeLists.txt @@ -3,34 +3,13 @@ # The test files are organized as follows: # # AEIGTST -- Auxiliary test routines used in all precisions -# SCIGTST -- Auxiliary test routines used in REAL and COMPLEX -# DZIGTST -- Auxiliary test routines used in DOUBLE PRECISION and -# COMPLEX*16 +# SCIGTST -- Auxiliary test routines used in single precision +# DZIGTST -- Auxiliary test routines used in double precision # SEIGTST -- Single precision real test routines # CEIGTST -- Single precision complex test routines # DEIGTST -- Double precision real test routines # ZEIGTST -- Double precision complex test routines # -# Test programs can be generated for all or some of the four different -# precisions. Enter make followed by one or more of the data types -# desired. Some examples: -# make single -# make single complex -# make single double complex complex16 -# Alternatively, the command -# make -# without any arguments creates all four test programs. -# The executable files are called -# xeigtsts, xeigtstd, xeigtstc, and xeigtstz -# and are created in the next higher directory level. -# -# To remove the object files after the executable files have been -# created, enter -# make clean -# On some systems, you can force the source files to be recompiled by -# entering (for example) -# make single FRC=FRC -# ######################################################################## set(AEIGTST @@ -119,25 +98,21 @@ set(ZEIGTST zchkee.f macro(add_eig_executable name) add_executable(${name} ${ARGN}) - target_link_libraries(${name} tmglib ${LAPACK_LIBRARIES}) + target_link_libraries(${name} tmglib ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) endmacro() if(BUILD_SINGLE) -add_eig_executable(xeigtsts ${SEIGTST} ${SCIGTST} ${AEIGTST} - ${SECOND_SRC}) +add_eig_executable(xeigtsts ${SEIGTST} ${SCIGTST} ${AEIGTST}) endif() if(BUILD_COMPLEX) -add_eig_executable(xeigtstc ${CEIGTST} ${SCIGTST} ${AEIGTST} - ${SECOND_SRC}) +add_eig_executable(xeigtstc ${CEIGTST} ${SCIGTST} ${AEIGTST}) endif() if(BUILD_DOUBLE) -add_eig_executable(xeigtstd ${DEIGTST} ${DZIGTST} ${AEIGTST} - ${DSECOND_SRC}) +add_eig_executable(xeigtstd ${DEIGTST} ${DZIGTST} ${AEIGTST}) endif() if(BUILD_COMPLEX16) -add_eig_executable(xeigtstz ${ZEIGTST} ${DZIGTST} ${AEIGTST} - ${DSECOND_SRC}) +add_eig_executable(xeigtstz ${ZEIGTST} ${DZIGTST} ${AEIGTST}) endif() diff --git a/lapack-netlib/TESTING/EIG/Makefile b/lapack-netlib/TESTING/EIG/Makefile index eef087d9c..78046125a 100644 --- a/lapack-netlib/TESTING/EIG/Makefile +++ b/lapack-netlib/TESTING/EIG/Makefile @@ -5,9 +5,8 @@ include ../../make.inc # The test files are organized as follows: # # AEIGTST -- Auxiliary test routines used in all precisions -# SCIGTST -- Auxiliary test routines used in REAL and COMPLEX -# DZIGTST -- Auxiliary test routines used in DOUBLE PRECISION and -# COMPLEX*16 +# SCIGTST -- Auxiliary test routines used in single precision +# DZIGTST -- Auxiliary test routines used in double precision # SEIGTST -- Single precision real test routines # CEIGTST -- Single precision complex test routines # DEIGTST -- Double precision real test routines @@ -24,11 +23,10 @@ include ../../make.inc # without any arguments creates all four test programs. # The executable files are called # xeigtsts, xeigtstd, xeigtstc, and xeigtstz -# and are created in the next higher directory level. # # To remove the object files after the executable files have been # created, enter -# make clean +# make cleanobj # On some systems, you can force the source files to be recompiled by # entering (for example) # make single FRC=FRC @@ -121,30 +119,22 @@ ZEIGTST = zchkee.o \ all: single complex double complex16 -single: ../xeigtsts -complex: ../xeigtstc -double: ../xeigtstd -complex16: ../xeigtstz +single: xeigtsts +complex: xeigtstc +double: xeigtstd +complex16: xeigtstz -../xeigtsts: $(SEIGTST) $(SCIGTST) $(AEIGTST) ../../$(LAPACKLIB) - $(LOADER) $(LOADOPTS) -o $@ \ - $(SEIGTST) $(SCIGTST) $(AEIGTST) ../../$(TMGLIB) \ - ../../$(LAPACKLIB) $(BLASLIB) +xeigtsts: $(SEIGTST) $(SCIGTST) $(AEIGTST) ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ -../xeigtstc: $(CEIGTST) $(SCIGTST) $(AEIGTST) ../../$(LAPACKLIB) - $(LOADER) $(LOADOPTS) -o $@ \ - $(CEIGTST) $(SCIGTST) $(AEIGTST) ../../$(TMGLIB) \ - ../../$(LAPACKLIB) $(BLASLIB) +xeigtstc: $(CEIGTST) $(SCIGTST) $(AEIGTST) ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ -../xeigtstd: $(DEIGTST) $(DZIGTST) $(AEIGTST) ../../$(LAPACKLIB) - $(LOADER) $(LOADOPTS) -o $@ \ - $(DEIGTST) $(DZIGTST) $(AEIGTST) ../../$(TMGLIB) \ - ../../$(LAPACKLIB) $(BLASLIB) +xeigtstd: $(DEIGTST) $(DZIGTST) $(AEIGTST) ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ -../xeigtstz: $(ZEIGTST) $(DZIGTST) $(AEIGTST) ../../$(LAPACKLIB) - $(LOADER) $(LOADOPTS) -o $@ \ - $(ZEIGTST) $(DZIGTST) $(AEIGTST) ../../$(TMGLIB) \ - ../../$(LAPACKLIB) $(BLASLIB) +xeigtstz: $(ZEIGTST) $(DZIGTST) $(AEIGTST) ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ $(AEIGTST): $(FRC) $(SCIGTST): $(FRC) @@ -157,8 +147,11 @@ $(ZEIGTST): $(FRC) FRC: @FRC=$(FRC) -clean: +clean: cleanobj cleanexe +cleanobj: rm -f *.o +cleanexe: + rm -f xeigtst* schkee.o: schkee.f $(FORTRAN) $(DRVOPTS) -c -o $@ $< diff --git a/lapack-netlib/TESTING/EIG/cchkhb2stg.f b/lapack-netlib/TESTING/EIG/cchkhb2stg.f index 975217fa5..61537f44b 100644 --- a/lapack-netlib/TESTING/EIG/cchkhb2stg.f +++ b/lapack-netlib/TESTING/EIG/cchkhb2stg.f @@ -313,7 +313,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex_eig * @@ -323,10 +323,10 @@ $ D2, D3, U, LDU, WORK, LWORK, RWORK, RESULT, $ INFO ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDU, LWORK, NOUNIT, NSIZES, NTYPES, @@ -373,7 +373,7 @@ * .. * .. External Subroutines .. EXTERNAL SLASUM, XERBLA, CHBT21, CHBTRD, CLACPY, CLASET, - $ CLATMR, CLATMS, CHBTRD_HB2ST, CSTEQR + $ CLATMR, CLATMS, CHETRD_HB2ST, CSTEQR * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL, CONJG, MAX, MIN, SQRT diff --git a/lapack-netlib/TESTING/EIG/cdrvst2stg.f b/lapack-netlib/TESTING/EIG/cdrvst2stg.f index 12a27e5d9..095bd7885 100644 --- a/lapack-netlib/TESTING/EIG/cdrvst2stg.f +++ b/lapack-netlib/TESTING/EIG/cdrvst2stg.f @@ -328,7 +328,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex_eig * @@ -338,10 +338,10 @@ $ LDU, V, TAU, Z, WORK, LWORK, RWORK, LRWORK, $ IWORK, LIWORK, RESULT, INFO ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDU, LIWORK, LRWORK, LWORK, NOUNIT, @@ -399,8 +399,7 @@ $ CHET22, CHPEV, CHPEVD, CHPEVX, CLACPY, CLASET, $ CHEEVD_2STAGE, CHEEVR_2STAGE, CHEEVX_2STAGE, $ CHEEV_2STAGE, CHBEV_2STAGE, CHBEVD_2STAGE, - $ CHBEVX_2STAGE, CHETRD_2STAGE, CHETRD_SY2SB, - $ CHETRD_SB2ST, CLATMR, CLATMS + $ CHBEVX_2STAGE, CLATMR, CLATMS * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL, INT, LOG, MAX, MIN, SQRT diff --git a/lapack-netlib/TESTING/EIG/cerrst.f b/lapack-netlib/TESTING/EIG/cerrst.f index 10b2de432..4aa5aebe3 100644 --- a/lapack-netlib/TESTING/EIG/cerrst.f +++ b/lapack-netlib/TESTING/EIG/cerrst.f @@ -27,8 +27,8 @@ *> CHPEV, CHPEVX, CHPEVD, and CSTEDC. *> CHEEVD_2STAGE, CHEEVR_2STAGE, CHEEVX_2STAGE, *> CHEEV_2STAGE, CHBEV_2STAGE, CHBEVD_2STAGE, -*> CHBEVX_2STAGE, CHETRD_2STAGE, CHETRD_SY2SB, -*> CHETRD_SB2ST +*> CHBEVX_2STAGE, CHETRD_2STAGE, CHETRD_HE2HB, +*> CHETRD_HB2ST *> \endverbatim * * Arguments: @@ -54,17 +54,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex_eig * * ===================================================================== SUBROUTINE CERRST( PATH, NUNIT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER*3 PATH @@ -100,8 +100,8 @@ $ CUNGTR, CUNMTR, CUPGTR, CUPMTR, $ CHEEVD_2STAGE, CHEEVR_2STAGE, CHEEVX_2STAGE, $ CHEEV_2STAGE, CHBEV_2STAGE, CHBEVD_2STAGE, - $ CHBEVX_2STAGE, CHETRD_2STAGE, CHETRD_SY2SB, - $ CHETRD_SB2ST + $ CHBEVX_2STAGE, CHETRD_2STAGE, CHETRD_HE2HB, + $ CHETRD_HB2ST * .. * .. Scalars in Common .. LOGICAL LERR, OK diff --git a/lapack-netlib/TESTING/EIG/cget02.f b/lapack-netlib/TESTING/EIG/cget02.f index f79585db4..1b7f6b384 100644 --- a/lapack-netlib/TESTING/EIG/cget02.f +++ b/lapack-netlib/TESTING/EIG/cget02.f @@ -125,7 +125,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex_eig * @@ -133,10 +133,10 @@ SUBROUTINE CGET02( TRANS, M, N, NRHS, A, LDA, X, LDX, B, LDB, $ RWORK, RESID ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER TRANS @@ -154,7 +154,7 @@ REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) COMPLEX CONE - PARAMETER ( CONE = 1.0E+0 ) + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER J, N1, N2 @@ -191,7 +191,7 @@ * Exit with RESID = 1/EPS if ANORM = 0. * EPS = SLAMCH( 'Epsilon' ) - ANORM = CLANGE( '1', N1, N2, A, LDA, RWORK ) + ANORM = CLANGE( '1', M, N, A, LDA, RWORK ) IF( ANORM.LE.ZERO ) THEN RESID = ONE / EPS RETURN diff --git a/lapack-netlib/TESTING/EIG/clarhs.f b/lapack-netlib/TESTING/EIG/clarhs.f index 6cb654713..1143be85d 100644 --- a/lapack-netlib/TESTING/EIG/clarhs.f +++ b/lapack-netlib/TESTING/EIG/clarhs.f @@ -189,8 +189,8 @@ *> \param[out] INFO *> \verbatim *> INFO is INTEGER -*> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value *> \endverbatim * * Authors: @@ -201,7 +201,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex_eig * @@ -209,10 +209,10 @@ SUBROUTINE CLARHS( PATH, XTYPE, UPLO, TRANS, M, N, KL, KU, NRHS, $ A, LDA, X, LDX, B, LDB, ISEED, INFO ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO, XTYPE diff --git a/lapack-netlib/TESTING/EIG/dchksb2stg.f b/lapack-netlib/TESTING/EIG/dchksb2stg.f index 8cd194424..ee66f7ebb 100644 --- a/lapack-netlib/TESTING/EIG/dchksb2stg.f +++ b/lapack-netlib/TESTING/EIG/dchksb2stg.f @@ -307,7 +307,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup double_eig * @@ -316,10 +316,10 @@ $ ISEED, THRESH, NOUNIT, A, LDA, SD, SE, D1, $ D2, D3, U, LDU, WORK, LWORK, RESULT, INFO ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDU, LWORK, NOUNIT, NSIZES, NTYPES, @@ -363,7 +363,7 @@ * .. * .. External Subroutines .. EXTERNAL DLACPY, DLASET, DLASUM, DLATMR, DLATMS, DSBT21, - $ DSBTRD, XERBLA, DSBTRD_SB2ST, DSTEQR + $ DSBTRD, XERBLA, DSYTRD_SB2ST, DSTEQR * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, SQRT diff --git a/lapack-netlib/TESTING/EIG/dget02.f b/lapack-netlib/TESTING/EIG/dget02.f index c992b524c..a0337eeec 100644 --- a/lapack-netlib/TESTING/EIG/dget02.f +++ b/lapack-netlib/TESTING/EIG/dget02.f @@ -125,7 +125,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup double_eig * @@ -133,10 +133,10 @@ SUBROUTINE DGET02( TRANS, M, N, NRHS, A, LDA, X, LDX, B, LDB, $ RWORK, RESID ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER TRANS @@ -189,7 +189,7 @@ * Exit with RESID = 1/EPS if ANORM = 0. * EPS = DLAMCH( 'Epsilon' ) - ANORM = DLANGE( '1', N1, N2, A, LDA, RWORK ) + ANORM = DLANGE( '1', M, N, A, LDA, RWORK ) IF( ANORM.LE.ZERO ) THEN RESID = ONE / EPS RETURN diff --git a/lapack-netlib/TESTING/EIG/dlarhs.f b/lapack-netlib/TESTING/EIG/dlarhs.f index 435f7d6a2..74b22792a 100644 --- a/lapack-netlib/TESTING/EIG/dlarhs.f +++ b/lapack-netlib/TESTING/EIG/dlarhs.f @@ -184,8 +184,8 @@ *> \param[out] INFO *> \verbatim *> INFO is INTEGER -*> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value *> \endverbatim * * Authors: @@ -196,7 +196,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup double_eig * @@ -204,10 +204,10 @@ SUBROUTINE DLARHS( PATH, XTYPE, UPLO, TRANS, M, N, KL, KU, NRHS, $ A, LDA, X, LDX, B, LDB, ISEED, INFO ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO, XTYPE diff --git a/lapack-netlib/TESTING/EIG/ilaenv.f b/lapack-netlib/TESTING/EIG/ilaenv.f index 8b741a283..d99f52ea9 100644 --- a/lapack-netlib/TESTING/EIG/ilaenv.f +++ b/lapack-netlib/TESTING/EIG/ilaenv.f @@ -122,7 +122,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup OTHERauxiliary * @@ -153,10 +153,10 @@ INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, $ N4 ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER*( * ) NAME, OPTS @@ -250,6 +250,53 @@ C ILAENV = 0 * * End of ILAENV * + END + INTEGER FUNCTION ILAENV2STAGE( ISPEC, NAME, OPTS, N1, N2, + $ N3, N4 ) +* .. Scalar Arguments .. + CHARACTER*( * ) NAME, OPTS + INTEGER ISPEC, N1, N2, N3, N4 +* .. +* +* ===================================================================== +* +* .. Local variables .. + INTEGER IISPEC +* .. External Functions .. + INTEGER IPARAM2STAGE + EXTERNAL IPARAM2STAGE +* .. +* .. Arrays in Common .. + INTEGER IPARMS( 100 ) +* .. +* .. Common blocks .. + COMMON / CLAENV / IPARMS +* .. +* .. Save statement .. + SAVE / CLAENV / +* .. +* .. Executable Statements .. +* + IF(( ISPEC.GE.1 ) .AND. (ISPEC.LE.5)) THEN +* +* 1 <= ISPEC <= 5: 2stage eigenvalues SVD routines. +* + IF( ISPEC.EQ.1 ) THEN + ILAENV2STAGE = IPARMS( 1 ) + ELSE + IISPEC = 16 + ISPEC + ILAENV2STAGE = IPARAM2STAGE( IISPEC, NAME, OPTS, + $ N1, N2, N3, N4 ) + ENDIF +* + ELSE +* +* Invalid value for ISPEC +* + ILAENV2STAGE = -1 + END IF +* + RETURN END INTEGER FUNCTION IPARMQ( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK ) * diff --git a/lapack-netlib/TESTING/EIG/schksb2stg.f b/lapack-netlib/TESTING/EIG/schksb2stg.f index 07dfc83a4..07b6fa95c 100644 --- a/lapack-netlib/TESTING/EIG/schksb2stg.f +++ b/lapack-netlib/TESTING/EIG/schksb2stg.f @@ -307,7 +307,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup single_eig * @@ -316,10 +316,10 @@ $ ISEED, THRESH, NOUNIT, A, LDA, SD, SE, D1, $ D2, D3, U, LDU, WORK, LWORK, RESULT, INFO ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDU, LWORK, NOUNIT, NSIZES, NTYPES, @@ -363,7 +363,7 @@ * .. * .. External Subroutines .. EXTERNAL SLACPY, SLASET, SLASUM, SLATMR, SLATMS, SSBT21, - $ SSBTRD, XERBLA, SSBTRD_SB2ST, SSTEQR + $ SSBTRD, XERBLA, SSYTRD_SB2ST, SSTEQR * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL, MAX, MIN, SQRT diff --git a/lapack-netlib/TESTING/EIG/sget02.f b/lapack-netlib/TESTING/EIG/sget02.f index 50546869d..a47b901d3 100644 --- a/lapack-netlib/TESTING/EIG/sget02.f +++ b/lapack-netlib/TESTING/EIG/sget02.f @@ -125,7 +125,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup single_eig * @@ -133,10 +133,10 @@ SUBROUTINE SGET02( TRANS, M, N, NRHS, A, LDA, X, LDX, B, LDB, $ RWORK, RESID ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER TRANS @@ -189,7 +189,7 @@ * Exit with RESID = 1/EPS if ANORM = 0. * EPS = SLAMCH( 'Epsilon' ) - ANORM = SLANGE( '1', N1, N2, A, LDA, RWORK ) + ANORM = SLANGE( '1', M, N, A, LDA, RWORK ) IF( ANORM.LE.ZERO ) THEN RESID = ONE / EPS RETURN diff --git a/lapack-netlib/TESTING/EIG/slarhs.f b/lapack-netlib/TESTING/EIG/slarhs.f index e4a8159f7..1e73842e3 100644 --- a/lapack-netlib/TESTING/EIG/slarhs.f +++ b/lapack-netlib/TESTING/EIG/slarhs.f @@ -184,8 +184,8 @@ *> \param[out] INFO *> \verbatim *> INFO is INTEGER -*> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value *> \endverbatim * * Authors: @@ -196,7 +196,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup single_eig * @@ -204,10 +204,10 @@ SUBROUTINE SLARHS( PATH, XTYPE, UPLO, TRANS, M, N, KL, KU, NRHS, $ A, LDA, X, LDX, B, LDB, ISEED, INFO ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO, XTYPE diff --git a/lapack-netlib/TESTING/EIG/zchkhb2stg.f b/lapack-netlib/TESTING/EIG/zchkhb2stg.f index 88c049919..dbbb84348 100644 --- a/lapack-netlib/TESTING/EIG/zchkhb2stg.f +++ b/lapack-netlib/TESTING/EIG/zchkhb2stg.f @@ -313,7 +313,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16_eig * @@ -323,10 +323,10 @@ $ D2, D3, U, LDU, WORK, LWORK, RWORK, RESULT, $ INFO ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDU, LWORK, NOUNIT, NSIZES, NTYPES, diff --git a/lapack-netlib/TESTING/EIG/zdrvst2stg.f b/lapack-netlib/TESTING/EIG/zdrvst2stg.f index dbf8d8037..de072c9a3 100644 --- a/lapack-netlib/TESTING/EIG/zdrvst2stg.f +++ b/lapack-netlib/TESTING/EIG/zdrvst2stg.f @@ -328,7 +328,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16_eig * @@ -338,10 +338,10 @@ $ LDU, V, TAU, Z, WORK, LWORK, RWORK, LRWORK, $ IWORK, LIWORK, RESULT, INFO ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER INFO, LDA, LDU, LIWORK, LRWORK, LWORK, NOUNIT, diff --git a/lapack-netlib/TESTING/EIG/zerrst.f b/lapack-netlib/TESTING/EIG/zerrst.f index 38fc85793..b906c96df 100644 --- a/lapack-netlib/TESTING/EIG/zerrst.f +++ b/lapack-netlib/TESTING/EIG/zerrst.f @@ -55,17 +55,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16_eig * * ===================================================================== SUBROUTINE ZERRST( PATH, NUNIT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER*3 PATH diff --git a/lapack-netlib/TESTING/EIG/zget02.f b/lapack-netlib/TESTING/EIG/zget02.f index b157379a8..efea82567 100644 --- a/lapack-netlib/TESTING/EIG/zget02.f +++ b/lapack-netlib/TESTING/EIG/zget02.f @@ -125,7 +125,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16_eig * @@ -133,10 +133,10 @@ SUBROUTINE ZGET02( TRANS, M, N, NRHS, A, LDA, X, LDX, B, LDB, $ RWORK, RESID ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER TRANS @@ -154,7 +154,7 @@ DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) COMPLEX*16 CONE - PARAMETER ( CONE = 1.0D+0 ) + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER J, N1, N2 @@ -191,7 +191,7 @@ * Exit with RESID = 1/EPS if ANORM = 0. * EPS = DLAMCH( 'Epsilon' ) - ANORM = ZLANGE( '1', N1, N2, A, LDA, RWORK ) + ANORM = ZLANGE( '1', M, N, A, LDA, RWORK ) IF( ANORM.LE.ZERO ) THEN RESID = ONE / EPS RETURN diff --git a/lapack-netlib/TESTING/EIG/zlarhs.f b/lapack-netlib/TESTING/EIG/zlarhs.f index 76bd2cb2a..f2fe47e9b 100644 --- a/lapack-netlib/TESTING/EIG/zlarhs.f +++ b/lapack-netlib/TESTING/EIG/zlarhs.f @@ -189,8 +189,8 @@ *> \param[out] INFO *> \verbatim *> INFO is INTEGER -*> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value *> \endverbatim * * Authors: @@ -201,7 +201,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16_eig * @@ -209,10 +209,10 @@ SUBROUTINE ZLARHS( PATH, XTYPE, UPLO, TRANS, M, N, KL, KU, NRHS, $ A, LDA, X, LDX, B, LDB, ISEED, INFO ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO, XTYPE diff --git a/lapack-netlib/TESTING/LIN/CMakeLists.txt b/lapack-netlib/TESTING/LIN/CMakeLists.txt index 715f32ec2..50ba8fc28 100644 --- a/lapack-netlib/TESTING/LIN/CMakeLists.txt +++ b/lapack-netlib/TESTING/LIN/CMakeLists.txt @@ -10,13 +10,16 @@ set(SLINTST schkaa.f schkeq.f schkgb.f schkge.f schkgt.f schklq.f schkpb.f schkpo.f schkps.f schkpp.f schkpt.f schkq3.f schkql.f schkqr.f schkrq.f - schksp.f schksy.f schksy_rook.f schksy_rk.f schksy_aa.f schktb.f schktp.f schktr.f + schksp.f schksy.f schksy_rook.f schksy_rk.f + schksy_aa.f schksy_aa_2stage.f + schktb.f schktp.f schktr.f schktz.f sdrvgt.f sdrvls.f sdrvpb.f - sdrvpp.f sdrvpt.f sdrvsp.f sdrvsy.f sdrvsy_rook.f sdrvsy_rk.f sdrvsy_aa.f + sdrvpp.f sdrvpt.f sdrvsp.f sdrvsy_rook.f sdrvsy_rk.f + sdrvsy_aa.f sdrvsy_aa_2stage.f serrgt.f serrlq.f serrls.f - serrpo.f serrps.f serrql.f serrqp.f serrqr.f - serrrq.f serrsy.f serrtr.f serrtz.f serrvx.f + serrps.f serrql.f serrqp.f serrqr.f + serrrq.f serrtr.f serrtz.f sgbt01.f sgbt02.f sgbt05.f sgelqs.f sgeqls.f sgeqrs.f sgerqs.f sget01.f sget02.f sget03.f sget04.f sget06.f sget07.f sgtt01.f sgtt02.f @@ -29,38 +32,48 @@ set(SLINTST schkaa.f sqrt01.f sqrt01p.f sqrt02.f sqrt03.f sqrt11.f sqrt12.f sqrt13.f sqrt14.f sqrt15.f sqrt16.f sqrt17.f srqt01.f srqt02.f srqt03.f srzt01.f srzt02.f - sspt01.f ssyt01.f ssyt01_rook.f ssyt01_3.f ssyt01_aa.f + sspt01.f ssyt01.f ssyt01_rook.f ssyt01_3.f + ssyt01_aa.f stbt02.f stbt03.f stbt05.f stbt06.f stpt01.f stpt02.f stpt03.f stpt05.f stpt06.f strt01.f strt02.f strt03.f strt05.f strt06.f - sgennd.f - sqrt04.f sqrt05.f schkqrt.f serrqrt.f schkqrtp.f serrqrtp.f + sgennd.f sqrt04.f sqrt05.f schkqrt.f serrqrt.f schkqrtp.f serrqrtp.f schklqt.f schklqtp.f schktsqr.f serrlqt.f serrlqtp.f serrtsqr.f stsqr01.f slqt04.f slqt05.f) -if(USEXBLAS) - list(APPEND SLINTST sdrvgex.f serrgex.f sdrvgbx.f sdrvpox.f sebchvxx.f) +if(USE_XBLAS) + list(APPEND SLINTST sdrvgbx.f sdrvgex.f sdrvsyx.f sdrvpox.f + serrvxx.f serrgex.f serrsyx.f serrpox.f + sebchvxx.f) else() - list(APPEND SLINTST sdrvge.f serrge.f sdrvgb.f sdrvpo.f) + list(APPEND SLINTST sdrvgb.f sdrvge.f sdrvsy.f sdrvpo.f + serrvx.f serrge.f serrsy.f serrpo.f) endif() set(CLINTST cchkaa.f cchkeq.f cchkgb.f cchkge.f cchkgt.f - cchkhe.f cchkhe_rook.f cchkhe_rk.f cchkhe_aa.f cchkhp.f cchklq.f cchkpb.f + cchkhe.f cchkhe_rook.f cchkhe_rk.f + cchkhe_aa.f cchkhe_aa_2stage.f + cchkhp.f cchklq.f cchkpb.f cchkpo.f cchkps.f cchkpp.f cchkpt.f cchkq3.f cchkql.f - cchkqr.f cchkrq.f cchksp.f cchksy.f cchksy_rook.f cchksy_rk.f cchksy_aa.f cchktb.f + cchkqr.f cchkrq.f cchksp.f cchksy.f cchksy_rook.f cchksy_rk.f + cchksy_aa.f cchksy_aa_2stage.f + cchktb.f cchktp.f cchktr.f cchktz.f - cdrvgt.f cdrvhe.f cdrvhe_rook.f cdrvhe_rk.f cdrvhe_aa.f cdrvhp.f + cdrvgt.f cdrvhe_rook.f cdrvhe_rk.f + cdrvhe_aa.f cdrvhe_aa_2stage.f cdrvsy_aa_2stage.f + cdrvhp.f cdrvls.f cdrvpb.f cdrvpp.f cdrvpt.f - cdrvsp.f cdrvsy.f cdrvsy_rook.f cdrvsy_rk.f cdrvsy_aa.f - cerrgt.f cerrhe.f cerrlq.f + cdrvsp.f cdrvsy_rook.f cdrvsy_rk.f + cdrvsy_aa.f + cerrgt.f cerrlq.f cerrls.f cerrps.f cerrql.f cerrqp.f - cerrqr.f cerrrq.f cerrsy.f cerrtr.f cerrtz.f - cerrvx.f + cerrqr.f cerrrq.f cerrtr.f cerrtz.f cgbt01.f cgbt02.f cgbt05.f cgelqs.f cgeqls.f cgeqrs.f cgerqs.f cget01.f cget02.f cget03.f cget04.f cget07.f cgtt01.f cgtt02.f - cgtt05.f chet01.f chet01_rook.f chet01_3.f chet01_aa.f + cgtt05.f chet01.f chet01_rook.f chet01_3.f + chet01_aa.f chpt01.f claipd.f claptm.f clarhs.f clatb4.f clatb5.f clatsp.f clatsy.f clattb.f clattp.f clattr.f clavhe.f clavhe_rook.f clavhp.f clavsp.f clavsy.f clavsy_rook.f clqt01.f @@ -72,7 +85,9 @@ set(CLINTST cchkaa.f cqrt12.f cqrt13.f cqrt14.f cqrt15.f cqrt16.f cqrt17.f crqt01.f crqt02.f crqt03.f crzt01.f crzt02.f csbmv.f cspt01.f - cspt02.f cspt03.f csyt01.f csyt01_rook.f csyt01_3.f csyt01_aa.f csyt02.f csyt03.f + cspt02.f cspt03.f csyt01.f csyt01_rook.f csyt01_3.f + csyt01_aa.f + csyt02.f csyt03.f ctbt02.f ctbt03.f ctbt05.f ctbt06.f ctpt01.f ctpt02.f ctpt03.f ctpt05.f ctpt06.f ctrt01.f ctrt02.f ctrt03.f ctrt05.f ctrt06.f @@ -81,24 +96,29 @@ set(CLINTST cchkaa.f cchklqt.f cchklqtp.f cchktsqr.f cerrlqt.f cerrlqtp.f cerrtsqr.f ctsqr01.f clqt04.f clqt05.f) -if(USEXBLAS) - list(APPEND - CLINTST cdrvgex.f cdrvgbx.f cerrgex.f cdrvpox.f cerrpox.f cebchvxx.f) +if(USE_XBLAS) + list(APPEND CLINTST cdrvgbx.f cdrvgex.f cdrvhex.f cdrvsyx.f cdrvpox.f + cerrvxx.f cerrgex.f cerrhex.f cerrsyx.f cerrpox.f + cebchvxx.f) else() - list(APPEND CLINTST cdrvge.f cdrvgb.f cerrge.f cdrvpo.f cerrpo.f) + list(APPEND CLINTST cdrvgb.f cdrvge.f cdrvhe.f cdrvsy.f cdrvpo.f + cerrvx.f cerrge.f cerrhe.f cerrsy.f cerrpo.f) endif() set(DLINTST dchkaa.f dchkeq.f dchkgb.f dchkge.f dchkgt.f dchklq.f dchkpb.f dchkpo.f dchkps.f dchkpp.f dchkpt.f dchkq3.f dchkql.f dchkqr.f dchkrq.f - dchksp.f dchksy.f dchksy_rook.f dchksy_rk.f dchksy_aa.f dchktb.f dchktp.f dchktr.f + dchksp.f dchksy.f dchksy_rook.f dchksy_rk.f + dchksy_aa.f dchksy_aa_2stage.f + dchktb.f dchktp.f dchktr.f dchktz.f ddrvgt.f ddrvls.f ddrvpb.f - ddrvpp.f ddrvpt.f ddrvsp.f ddrvsy.f ddrvsy_rook.f ddrvsy_rk.f ddrvsy_aa.f + ddrvpp.f ddrvpt.f ddrvsp.f ddrvsy_rook.f ddrvsy_rk.f + ddrvsy_aa.f ddrvsy_aa_2stage.f derrgt.f derrlq.f derrls.f derrps.f derrql.f derrqp.f derrqr.f - derrrq.f derrsy.f derrtr.f derrtz.f derrvx.f + derrrq.f derrtr.f derrtz.f dgbt01.f dgbt02.f dgbt05.f dgelqs.f dgeqls.f dgeqrs.f dgerqs.f dget01.f dget02.f dget03.f dget04.f dget06.f dget07.f dgtt01.f dgtt02.f @@ -111,7 +131,8 @@ set(DLINTST dchkaa.f dqrt01.f dqrt01p.f dqrt02.f dqrt03.f dqrt11.f dqrt12.f dqrt13.f dqrt14.f dqrt15.f dqrt16.f dqrt17.f drqt01.f drqt02.f drqt03.f drzt01.f drzt02.f - dspt01.f dsyt01.f dsyt01_rook.f dsyt01_3.f dsyt01_aa.f + dspt01.f dsyt01.f dsyt01_rook.f dsyt01_3.f + dsyt01_aa.f dtbt02.f dtbt03.f dtbt05.f dtbt06.f dtpt01.f dtpt02.f dtpt03.f dtpt05.f dtpt06.f dtrt01.f dtrt02.f dtrt03.f dtrt05.f dtrt06.f @@ -120,31 +141,39 @@ set(DLINTST dchkaa.f dchklq.f dchklqt.f dchklqtp.f dchktsqr.f derrlqt.f derrlqtp.f derrtsqr.f dtsqr01.f dlqt04.f dlqt05.f) -if(USEXBLAS) - list(APPEND - DLINTST ddrvgex.f ddrvgbx.f derrgex.f ddrvpox.f derrpox.f debchvxx.f) +if(USE_XBLAS) + list(APPEND DLINTST ddrvgbx.f ddrvgex.f ddrvsyx.f ddrvpox.f + derrvxx.f derrgex.f derrsyx.f derrpox.f + debchvxx.f) else() - list(APPEND - DLINTST ddrvge.f ddrvgb.f derrge.f ddrvpo.f derrpo.f) + list(APPEND DLINTST ddrvgb.f ddrvge.f ddrvsy.f ddrvpo.f + derrvx.f derrge.f derrsy.f derrpo.f) endif() set(ZLINTST zchkaa.f zchkeq.f zchkgb.f zchkge.f zchkgt.f - zchkhe.f zchkhe_rook.f zchkhe_rk.f zchkhe_aa.f zchkhp.f zchklq.f zchkpb.f + zchkhe.f zchkhe_rook.f zchkhe_rk.f + zchkhe_aa.f zchkhe_aa_2stage.f + zchkhp.f zchklq.f zchkpb.f zchkpo.f zchkps.f zchkpp.f zchkpt.f zchkq3.f zchkql.f - zchkqr.f zchkrq.f zchksp.f zchksy.f zchksy_rook.f zchksy_rk.f zchksy_aa.f zchktb.f + zchkqr.f zchkrq.f zchksp.f zchksy.f zchksy_rook.f zchksy_rk.f + zchksy_aa.f zchksy_aa_2stage.f + zchktb.f zchktp.f zchktr.f zchktz.f - zdrvgt.f zdrvhe.f zdrvhe_rook.f zdrvhe_rk.f zdrvhe_aa.f zdrvhp.f + zdrvgt.f zdrvhe_rook.f zdrvhe_rk.f + zdrvhe_aa.f zdrvhe_aa_2stage.f + zdrvhp.f zdrvls.f zdrvpb.f zdrvpp.f zdrvpt.f - zdrvsp.f zdrvsy.f zdrvsy_rook.f zdrvsy_rk.f zdrvsy_aa.f - zerrgt.f zerrhe.f zerrlq.f + zdrvsp.f zdrvsy_rook.f zdrvsy_rk.f + zdrvsy_aa.f zdrvsy_aa_2stage.f + zerrgt.f zerrlq.f zerrls.f zerrps.f zerrql.f zerrqp.f - zerrqr.f zerrrq.f zerrsy.f zerrtr.f zerrtz.f - zerrvx.f + zerrqr.f zerrrq.f zerrtr.f zerrtz.f zgbt01.f zgbt02.f zgbt05.f zgelqs.f zgeqls.f zgeqrs.f zgerqs.f zget01.f zget02.f zget03.f zget04.f zget07.f zgtt01.f zgtt02.f - zgtt05.f zhet01.f zhet01_rook.f zhet01_3.f zhet01_aa.f + zgtt05.f zhet01.f zhet01_rook.f zhet01_3.f + zhet01_aa.f zhpt01.f zlaipd.f zlaptm.f zlarhs.f zlatb4.f zlatb5.f zlatsp.f zlatsy.f zlattb.f zlattp.f zlattr.f zlavhe.f zlavhe_rook.f zlavhp.f zlavsp.f zlavsy.f zlavsy_rook.f zlqt01.f @@ -156,7 +185,9 @@ set(ZLINTST zchkaa.f zqrt12.f zqrt13.f zqrt14.f zqrt15.f zqrt16.f zqrt17.f zrqt01.f zrqt02.f zrqt03.f zrzt01.f zrzt02.f zsbmv.f zspt01.f - zspt02.f zspt03.f zsyt01.f zsyt01_rook.f zsyt01_3.f zsyt01_aa.f zsyt02.f zsyt03.f + zspt02.f zspt03.f zsyt01.f zsyt01_rook.f zsyt01_3.f + zsyt01_aa.f + zsyt02.f zsyt03.f ztbt02.f ztbt03.f ztbt05.f ztbt06.f ztpt01.f ztpt02.f ztpt03.f ztpt05.f ztpt06.f ztrt01.f ztrt02.f ztrt03.f ztrt05.f ztrt06.f @@ -165,12 +196,13 @@ set(ZLINTST zchkaa.f zchklqt.f zchklqtp.f zchktsqr.f zerrlqt.f zerrlqtp.f zerrtsqr.f ztsqr01.f zlqt04.f zlqt05.f) -if(USEXBLAS) - list(APPEND - ZLINTST zdrvgex.f zdrvgbx.f zerrgex.f zdrvpox.f zerrpox.f zebchvxx.f) +if(USE_XBLAS) + list(APPEND ZLINTST zdrvgbx.f zdrvgex.f zdrvhex.f zdrvsyx.f zdrvpox.f + zerrvxx.f zerrgex.f zerrhex.f zerrsyx.f zerrpox.f + zebchvxx.f) else() - list(APPEND - ZLINTST zdrvge.f zdrvgb.f zerrge.f zdrvpo.f zerrpo.f) + list(APPEND ZLINTST zdrvgb.f zdrvge.f zdrvhe.f zdrvsy.f zdrvpo.f + zerrvx.f zerrge.f zerrhe.f zerrsy.f zerrpo.f) endif() set(DSLINTST dchkab.f @@ -203,33 +235,33 @@ set(ZLINTSTRFP zchkrfp.f zdrvrfp.f zdrvrf1.f zdrvrf2.f zdrvrf3.f zdrvrf4.f zerrr macro(add_lin_executable name) add_executable(${name} ${ARGN}) - target_link_libraries(${name} tmglib ${LAPACK_LIBRARIES}) + target_link_libraries(${name} tmglib ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) endmacro() if(BUILD_SINGLE) - add_lin_executable(xlintsts ${ALINTST} ${SCLNTST} ${SLINTST} ${SECOND_SRC}) - add_lin_executable(xlintstrfs ${SLINTSTRFP} ${SECOND_SRC}) + add_lin_executable(xlintsts ${ALINTST} ${SLINTST} ${SCLNTST}) + add_lin_executable(xlintstrfs ${SLINTSTRFP}) endif() if(BUILD_DOUBLE) - add_lin_executable(xlintstd ${ALINTST} ${DLINTST} ${DZLNTST} ${DSECOND_SRC}) - add_lin_executable(xlintstrfd ${DLINTSTRFP} ${DSECOND_SRC}) + add_lin_executable(xlintstd ${ALINTST} ${DLINTST} ${DZLNTST}) + add_lin_executable(xlintstrfd ${DLINTSTRFP}) endif() if(BUILD_SINGLE AND BUILD_DOUBLE) - add_lin_executable(xlintstds ${DSLINTST} ${SECOND_SRC} ${DSECOND_SRC}) + add_lin_executable(xlintstds ${DSLINTST}) endif() if(BUILD_COMPLEX) - add_lin_executable(xlintstc ${ALINTST} ${CLINTST} ${SCLNTST} ${SECOND_SRC}) - add_lin_executable(xlintstrfc ${CLINTSTRFP} ${SECOND_SRC}) + add_lin_executable(xlintstc ${ALINTST} ${CLINTST} ${SCLNTST}) + add_lin_executable(xlintstrfc ${CLINTSTRFP}) endif() if(BUILD_COMPLEX16) - add_lin_executable(xlintstz ${ALINTST} ${ZLINTST} ${DZLNTST} ${DSECOND_SRC}) - add_lin_executable(xlintstrfz ${ZLINTSTRFP} ${DSECOND_SRC}) + add_lin_executable(xlintstz ${ALINTST} ${ZLINTST} ${DZLNTST}) + add_lin_executable(xlintstrfz ${ZLINTSTRFP}) endif() if(BUILD_COMPLEX AND BUILD_COMPLEX16) - add_lin_executable(xlintstzc ${ZCLINTST} ${SECOND_SRC} ${DSECOND_SRC}) + add_lin_executable(xlintstzc ${ZCLINTST}) endif() diff --git a/lapack-netlib/TESTING/LIN/Makefile b/lapack-netlib/TESTING/LIN/Makefile index bd188b20b..1a332f70b 100644 --- a/lapack-netlib/TESTING/LIN/Makefile +++ b/lapack-netlib/TESTING/LIN/Makefile @@ -7,10 +7,10 @@ include ../../make.inc # ALINTST -- Auxiliary test routines # SLINTST -- Single precision real test routines # CLINTST -- Single precision complex test routines -# SCLNTST -- Single and Complex routines in common +# SCLNTST -- Single precision real and complex routines in common # DLINTST -- Double precision real test routines # ZLINTST -- Double precision complex test routines -# DZLNTST -- Double and Double Complex routines in common +# DZLNTST -- Double precision real and complex routines in common # # Test programs can be generated for all or some of the four different # precisions. Enter make followed by one or more of the data types @@ -22,12 +22,11 @@ include ../../make.inc # make # without any arguments creates all four test programs. # The executable files are called -# xlintims, xlintimd, xlintimc, and xlintimz -# and are created in the next higher directory level. +# xlintsts, xlintstd, xlintstc, and xlintstz # # To remove the object files after the executable files have been # created, enter -# make clean +# make cleanobj # On some systems, you can force the source files to be recompiled by # entering (for example) # make single FRC=FRC @@ -51,10 +50,12 @@ SLINTST = schkaa.o \ schkeq.o schkgb.o schkge.o schkgt.o \ schklq.o schkpb.o schkpo.o schkps.o schkpp.o \ schkpt.o schkq3.o schkql.o schkqr.o schkrq.o \ - schksp.o schksy.o schksy_rook.o schksy_rk.o schksy_aa.o schktb.o schktp.o schktr.o \ + schksp.o schksy.o schksy_rook.o schksy_rk.o \ + schksy_aa.o schksy_aa_2stage.o schktb.o schktp.o schktr.o \ schktz.o \ sdrvgt.o sdrvls.o sdrvpb.o \ - sdrvpp.o sdrvpt.o sdrvsp.o sdrvsy_rook.o sdrvsy_rk.o sdrvsy_aa.o \ + sdrvpp.o sdrvpt.o sdrvsp.o sdrvsy_rook.o sdrvsy_rk.o \ + sdrvsy_aa.o sdrvsy_aa_2stage.o \ serrgt.o serrlq.o serrls.o \ serrps.o serrql.o serrqp.o serrqr.o \ serrrq.o serrtr.o serrtz.o \ @@ -79,30 +80,34 @@ SLINTST = schkaa.o \ serrlqt.o serrlqtp.o serrtsqr.o stsqr01.o slqt04.o slqt05.o ifdef USEXBLAS -SLINTST += serrvxx.o sdrvgex.o sdrvsyx.o serrgex.o sdrvgbx.o sdrvpox.o \ - sebchvxx.o serrsyx.o serrpox.o +SLINTST += sdrvgbx.o sdrvgex.o sdrvsyx.o sdrvpox.o \ + serrvxx.o serrgex.o serrsyx.o serrpox.o \ + sebchvxx.o else -SLINTST += serrvx.o sdrvge.o sdrvsy.o serrge.o sdrvgb.o sdrvpo.o \ - serrsy.o serrpo.o +SLINTST += sdrvgb.o sdrvge.o sdrvsy.o sdrvpo.o \ + serrvx.o serrge.o serrsy.o serrpo.o endif CLINTST = cchkaa.o \ cchkeq.o cchkgb.o cchkge.o cchkgt.o \ - cchkhe.o cchkhe_rook.o cchkhe_rk.o cchkhe_aa.o cchkhp.o cchklq.o cchkpb.o \ + cchkhe.o cchkhe_rook.o cchkhe_rk.o \ + cchkhe_aa.o cchkhe_aa_2stage.o cchkhp.o cchklq.o cchkpb.o \ cchkpo.o cchkps.o cchkpp.o cchkpt.o cchkq3.o cchkql.o \ - cchkqr.o cchkrq.o cchksp.o cchksy.o cchksy_rook.o cchksy_rk.o cchksy_aa.o cchktb.o \ + cchkqr.o cchkrq.o cchksp.o cchksy.o cchksy_rook.o cchksy_rk.o \ + cchksy_aa.o cchksy_aa_2stage.o cchktb.o \ cchktp.o cchktr.o cchktz.o \ cdrvgt.o cdrvhe_rook.o cdrvhe_rk.o cdrvhe_aa.o cdrvhp.o \ + cdrvhe_aa_2stage.o \ cdrvls.o cdrvpb.o cdrvpp.o cdrvpt.o \ - cdrvsp.o cdrvsy_rook.o cdrvsy_rk.o cdrvsy_aa.o \ + cdrvsp.o cdrvsy_rook.o cdrvsy_rk.o cdrvsy_aa.o cdrvsy_aa_2stage.o \ cerrgt.o cerrlq.o \ cerrls.o cerrps.o cerrql.o cerrqp.o \ cerrqr.o cerrrq.o cerrtr.o cerrtz.o \ cgbt01.o cgbt02.o cgbt05.o cgelqs.o cgeqls.o cgeqrs.o \ cgerqs.o cget01.o cget02.o \ cget03.o cget04.o cget07.o cgtt01.o cgtt02.o \ - cgtt05.o chet01.o chet01_rook.o chet01_3.o \ - chet01_aa.o chpt01.o claipd.o claptm.o clarhs.o clatb4.o clatb5.o \ + cgtt05.o chet01.o chet01_rook.o chet01_3.o chet01_aa.o \ + chpt01.o claipd.o claptm.o clarhs.o clatb4.o clatb5.o \ clatsp.o clatsy.o clattb.o clattp.o clattr.o \ clavhe.o clavhe_rook.o clavhp.o clavsp.o clavsy.o clavsy_rook.o clqt01.o \ clqt02.o clqt03.o cpbt01.o cpbt02.o cpbt05.o \ @@ -123,21 +128,24 @@ CLINTST = cchkaa.o \ cerrlqt.o cerrlqtp.o cerrtsqr.o ctsqr01.o clqt04.o clqt05.o ifdef USEXBLAS -CLINTST += cerrvxx.o cdrvgex.o cdrvsyx.o cdrvgbx.o cerrgex.o cdrvpox.o \ - cdrvhex.o cerrpox.o cebchvxx.o cerrsyx.o cerrhex.o +CLINTST += cdrvgbx.o cdrvgex.o cdrvhex.o cdrvsyx.o cdrvpox.o \ + cerrvxx.o cerrgex.o cerrhex.o cerrsyx.o cerrpox.o \ + cebchvxx.o else -CLINTST += cerrvx.o cdrvge.o cdrvsy.o cdrvgb.o cerrge.o cdrvpo.o \ - cdrvhe.o cerrpo.o cerrsy.o cerrhe.o +CLINTST += cdrvgb.o cdrvge.o cdrvhe.o cdrvsy.o cdrvpo.o \ + cerrvx.o cerrge.o cerrhe.o cerrsy.o cerrpo.o endif DLINTST = dchkaa.o \ dchkeq.o dchkgb.o dchkge.o dchkgt.o \ dchklq.o dchkpb.o dchkpo.o dchkps.o dchkpp.o \ dchkpt.o dchkq3.o dchkql.o dchkqr.o dchkrq.o \ - dchksp.o dchksy.o dchksy_rook.o dchksy_rk.o dchksy_aa.o dchktb.o dchktp.o dchktr.o \ + dchksp.o dchksy.o dchksy_rook.o dchksy_rk.o \ + dchksy_aa.o dchksy_aa_2stage.o dchktb.o dchktp.o dchktr.o \ dchktz.o \ ddrvgt.o ddrvls.o ddrvpb.o \ - ddrvpp.o ddrvpt.o ddrvsp.o ddrvsy_rook.o ddrvsy_rk.o ddrvsy_aa.o \ + ddrvpp.o ddrvpt.o ddrvsp.o ddrvsy_rook.o ddrvsy_rk.o \ + ddrvsy_aa.o ddrvsy_aa_2stage.o \ derrgt.o derrlq.o derrls.o \ derrps.o derrql.o derrqp.o derrqr.o \ derrrq.o derrtr.o derrtz.o \ @@ -163,30 +171,33 @@ DLINTST = dchkaa.o \ derrlqt.o derrlqtp.o derrtsqr.o dtsqr01.o dlqt04.o dlqt05.o ifdef USEXBLAS -DLINTST += derrvxx.o ddrvgex.o ddrvsyx.o ddrvgbx.o derrgex.o ddrvpox.o derrpox.o \ - debchvxx.o derrsyx.o +DLINTST += ddrvgbx.o ddrvgex.o ddrvsyx.o ddrvpox.o \ + derrvxx.o derrgex.o derrsyx.o derrpox.o \ + debchvxx.o else -DLINTST += derrvx.o ddrvge.o ddrvsy.o ddrvgb.o derrge.o ddrvpo.o derrpo.o \ - derrsy.o +DLINTST += ddrvgb.o ddrvge.o ddrvsy.o ddrvpo.o \ + derrvx.o derrge.o derrsy.o derrpo.o endif ZLINTST = zchkaa.o \ zchkeq.o zchkgb.o zchkge.o zchkgt.o \ - zchkhe.o zchkhe_rook.o zchkhe_rk.o zchkhe_aa.o zchkhp.o zchklq.o zchkpb.o \ + zchkhe.o zchkhe_rook.o zchkhe_rk.o zchkhe_aa.o zchkhe_aa_2stage.o \ + zchkhp.o zchklq.o zchkpb.o \ zchkpo.o zchkps.o zchkpp.o zchkpt.o zchkq3.o zchkql.o \ - zchkqr.o zchkrq.o zchksp.o zchksy.o zchksy_rook.o zchksy_rk.o zchksy_aa.o zchktb.o \ + zchkqr.o zchkrq.o zchksp.o zchksy.o zchksy_rook.o zchksy_rk.o \ + zchksy_aa.o zchksy_aa_2stage.o zchktb.o \ zchktp.o zchktr.o zchktz.o \ - zdrvgt.o zdrvhe_rook.o zdrvhe_rk.o zdrvhe_aa.o zdrvhp.o \ + zdrvgt.o zdrvhe_rook.o zdrvhe_rk.o zdrvhe_aa.o zdrvhe_aa_2stage.o zdrvhp.o \ zdrvls.o zdrvpb.o zdrvpp.o zdrvpt.o \ - zdrvsp.o zdrvsy_rook.o zdrvsy_rk.o zdrvsy_aa.o \ + zdrvsp.o zdrvsy_rook.o zdrvsy_rk.o zdrvsy_aa.o zdrvsy_aa_2stage.o \ zerrgt.o zerrlq.o \ zerrls.o zerrps.o zerrql.o zerrqp.o \ zerrqr.o zerrrq.o zerrtr.o zerrtz.o \ zgbt01.o zgbt02.o zgbt05.o zgelqs.o zgeqls.o zgeqrs.o \ zgerqs.o zget01.o zget02.o \ zget03.o zget04.o zget07.o zgtt01.o zgtt02.o \ - zgtt05.o zhet01.o zhet01_rook.o zhet01_3.o \ - zhet01_aa.o zhpt01.o zlaipd.o zlaptm.o zlarhs.o zlatb4.o zlatb5.o \ + zgtt05.o zhet01.o zhet01_rook.o zhet01_3.o zhet01_aa.o \ + zhpt01.o zlaipd.o zlaptm.o zlarhs.o zlatb4.o zlatb5.o \ zlatsp.o zlatsy.o zlattb.o zlattp.o zlattr.o \ zlavhe.o zlavhe_rook.o zlavhp.o zlavsp.o zlavsy.o zlavsy_rook.o zlqt01.o \ zlqt02.o zlqt03.o zpbt01.o zpbt02.o zpbt05.o \ @@ -207,11 +218,12 @@ ZLINTST = zchkaa.o \ zerrlqt.o zerrlqtp.o zerrtsqr.o ztsqr01.o zlqt04.o zlqt05.o ifdef USEXBLAS -ZLINTST += zerrvxx.o zdrvgex.o zdrvsyx.o zdrvgbx.o zerrgex.o zdrvpox.o zdrvhex.o \ - zerrpox.o zebchvxx.o zerrsyx.o zerrhex.o +ZLINTST += zdrvgbx.o zdrvgex.o zdrvhex.o zdrvsyx.o zdrvpox.o \ + zerrvxx.o zerrgex.o zerrhex.o zerrsyx.o zerrpox.o \ + zebchvxx.o else -ZLINTST += zerrvx.o zdrvge.o zdrvsy.o zdrvgb.o zerrge.o zdrvpo.o \ - zdrvhe.o zerrpo.o zerrsy.o zerrhe.o +ZLINTST += zdrvgb.o zdrvge.o zdrvhe.o zdrvsy.o zdrvpo.o \ + zerrvx.o zerrge.o zerrhe.o zerrsy.o zerrpo.o endif DSLINTST = dchkab.o \ @@ -244,55 +256,45 @@ ZLINTSTRFP = zchkrfp.o zdrvrfp.o zdrvrf1.o zdrvrf2.o zdrvrf3.o zdrvrf4.o zerrrfp all: single double complex complex16 proto-single proto-double proto-complex proto-complex16 -single: ../xlintsts -double: ../xlintstd -complex: ../xlintstc -complex16: ../xlintstz +single: xlintsts +double: xlintstd +complex: xlintstc +complex16: xlintstz -proto-single: ../xlintstrfs -proto-double: ../xlintstds ../xlintstrfd -proto-complex: ../xlintstrfc -proto-complex16: ../xlintstzc ../xlintstrfz +proto-single: xlintstrfs +proto-double: xlintstds xlintstrfd +proto-complex: xlintstrfc +proto-complex16: xlintstzc xlintstrfz -../xlintsts: $(ALINTST) $(SLINTST) $(SCLNTST) ../../$(LAPACKLIB) - $(LOADER) $(LOADOPTS) -o $@ $(ALINTST) $(SCLNTST) $(SLINTST) \ - ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) +xlintsts: $(ALINTST) $(SLINTST) $(SCLNTST) ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ -../xlintstc: $(ALINTST) $(CLINTST) $(SCLNTST) ../../$(LAPACKLIB) - $(LOADER) $(LOADOPTS) -o $@ $(ALINTST) $(SCLNTST) $(CLINTST) \ - ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) +xlintstc: $(ALINTST) $(CLINTST) $(SCLNTST) ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ -../xlintstd: $(ALINTST) $(DLINTST) $(DZLNTST) ../../$(LAPACKLIB) - $(LOADER) $(LOADOPTS) -o $@ $^ \ - ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) +xlintstd: $(ALINTST) $(DLINTST) $(DZLNTST) ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ -../xlintstz: $(ALINTST) $(ZLINTST) $(DZLNTST) ../../$(LAPACKLIB) - $(LOADER) $(LOADOPTS) -o $@ $(ALINTST) $(DZLNTST) $(ZLINTST) \ - ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) +xlintstz: $(ALINTST) $(ZLINTST) $(DZLNTST) ../../$(TMGLIB) ../../$(LAPACKLIB) $(XBLASLIB) $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ -../xlintstds: $(DSLINTST) ../../$(LAPACKLIB) - $(LOADER) $(LOADOPTS) -o $@ $(DSLINTST) \ - ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) +xlintstds: $(DSLINTST) ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ -../xlintstzc: $(ZCLINTST) ../../$(LAPACKLIB) - $(LOADER) $(LOADOPTS) -o $@ $(ZCLINTST) \ - ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) +xlintstzc: $(ZCLINTST) ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ -../xlintstrfs: $(SLINTSTRFP) ../../$(LAPACKLIB) - $(LOADER) $(LOADOPTS) -o $@ $(SLINTSTRFP) \ - ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) +xlintstrfs: $(SLINTSTRFP) ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ -../xlintstrfd: $(DLINTSTRFP) ../../$(LAPACKLIB) - $(LOADER) $(LOADOPTS) -o $@ $(DLINTSTRFP) \ - ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) +xlintstrfd: $(DLINTSTRFP) ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ -../xlintstrfc: $(CLINTSTRFP) ../../$(LAPACKLIB) - $(LOADER) $(LOADOPTS) -o $@ $(CLINTSTRFP) \ - ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) +xlintstrfc: $(CLINTSTRFP) ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ -../xlintstrfz: $(ZLINTSTRFP) ../../$(LAPACKLIB) - $(LOADER) $(LOADOPTS) -o $@ $(ZLINTSTRFP) \ - ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) +xlintstrfz: $(ZLINTSTRFP) ../../$(TMGLIB) ../../$(LAPACKLIB) $(BLASLIB) + $(LOADER) $(LOADOPTS) -o $@ $^ $(ALINTST): $(FRC) $(SCLNTST): $(FRC) @@ -305,8 +307,11 @@ $(ZLINTST): $(FRC) FRC: @FRC=$(FRC) -clean: +clean: cleanobj cleanexe +cleanobj: rm -f *.o +cleanexe: + rm -f xlintst* schkaa.o: schkaa.f $(FORTRAN) $(DRVOPTS) -c -o $@ $< diff --git a/lapack-netlib/TESTING/LIN/cchkaa.f b/lapack-netlib/TESTING/LIN/cchkaa.f index 9724618db..d8d5060c3 100644 --- a/lapack-netlib/TESTING/LIN/cchkaa.f +++ b/lapack-netlib/TESTING/LIN/cchkaa.f @@ -53,6 +53,9 @@ *> CHR 10 List types on next line if 0 < NTYPES < 10 *> CHK 10 List types on next line if 0 < NTYPES < 10 *> CHA 10 List types on next line if 0 < NTYPES < 10 +*> CH2 10 List types on next line if 0 < NTYPES < 10 +*> CSA 11 List types on next line if 0 < NTYPES < 10 +*> CS2 11 List types on next line if 0 < NTYPES < 10 *> CHP 10 List types on next line if 0 < NTYPES < 10 *> CSY 11 List types on next line if 0 < NTYPES < 11 *> CSK 11 List types on next line if 0 < NTYPES < 11 @@ -105,17 +108,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex_lin * * ===================================================================== PROGRAM CCHKAA * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * ===================================================================== * @@ -708,7 +711,34 @@ * IF( TSTCHK ) THEN CALL CCHKHE_AA( DOTYPE, NN, NVAL, NNB2, NBVAL2, NNS, - $ NSVAL, THRESH, TSTERR, LDA, + $ NSVAL, THRESH, TSTERR, LDA, + $ A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), + $ B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), + $ WORK, RWORK, IWORK, NOUT ) + ELSE + WRITE( NOUT, FMT = 9989 )PATH + END IF +* + IF( TSTDRV ) THEN + CALL CDRVHE_AA( DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, + $ LDA, A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), + $ B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), + $ WORK, RWORK, IWORK, NOUT ) + ELSE + WRITE( NOUT, FMT = 9988 )PATH + END IF +* + ELSE IF( LSAMEN( 2, C2, 'H2' ) ) THEN +* +* H2: Hermitian matrices, +* with partial (Aasen's) pivoting algorithm +* + NTYPES = 10 + CALL ALAREQ( PATH, NMATS, DOTYPE, NTYPES, NIN, NOUT ) +* + IF( TSTCHK ) THEN + CALL CCHKHE_AA_2STAGE( DOTYPE, NN, NVAL, NNB2, NBVAL2, + $ NNS, NSVAL, THRESH, TSTERR, LDA, $ A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), $ B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), $ WORK, RWORK, IWORK, NOUT ) @@ -717,7 +747,8 @@ END IF * IF( TSTDRV ) THEN - CALL CDRVHE_AA( DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, + CALL CDRVHE_AA_2STAGE( + $ DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, $ LDA, A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), $ B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), $ WORK, RWORK, IWORK, NOUT ) @@ -854,6 +885,34 @@ ELSE WRITE( NOUT, FMT = 9988 )PATH END IF +* + ELSE IF( LSAMEN( 2, C2, 'S2' ) ) THEN +* +* S2: symmetric indefinite matrices with Aasen's algorithm +* 2 stage +* + NTYPES = 11 + CALL ALAREQ( PATH, NMATS, DOTYPE, NTYPES, NIN, NOUT ) +* + IF( TSTCHK ) THEN + CALL CCHKSY_AA_2STAGE( DOTYPE, NN, NVAL, NNB2, NBVAL2, NNS, + $ NSVAL, THRESH, TSTERR, LDA, + $ A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), + $ B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), + $ WORK, RWORK, IWORK, NOUT ) + ELSE + WRITE( NOUT, FMT = 9989 )PATH + END IF +* + IF( TSTDRV ) THEN + CALL CDRVSY_AA_2STAGE( + $ DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, + $ LDA, A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), + $ B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), WORK, + $ RWORK, IWORK, NOUT ) + ELSE + WRITE( NOUT, FMT = 9988 )PATH + END IF * ELSE IF( LSAMEN( 2, C2, 'SP' ) ) THEN * diff --git a/lapack-netlib/TESTING/LIN/cchkhe_aa.f b/lapack-netlib/TESTING/LIN/cchkhe_aa.f index cb1f07b68..8dd551a72 100644 --- a/lapack-netlib/TESTING/LIN/cchkhe_aa.f +++ b/lapack-netlib/TESTING/LIN/cchkhe_aa.f @@ -162,7 +162,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * * *> \ingroup complex_lin @@ -172,10 +172,10 @@ $ THRESH, TSTERR, NMAX, A, AFAC, AINV, B, $ X, XACT, WORK, RWORK, IWORK, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -218,18 +218,13 @@ INTEGER ISEED( 4 ), ISEEDY( 4 ) REAL RESULT( NTESTS ) * .. -* .. External Functions .. - REAL DGET06, CLANHE - EXTERNAL DGET06, CLANHE -* .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, XLAENV, CERRHE, CGET04, - $ ZHECON, CHERFS, CHET01_AA, CHETRF_AA, ZHETRI2, - $ CHETRS_AA, CLACPY, CLAIPD, CLARHS, CLATB4, - $ CLATMS, CPOT02, ZPOT03, ZPOT05 + EXTERNAL ALAERH, ALAHD, ALASUM, XLAENV, CERRHE, CHET01_AA, + $ CHETRF_AA, CHETRS_AA, CLACPY, CLAIPD, CLARHS, + $ CLATB4, CLATMS, CPOT02 * .. * .. Intrinsic Functions .. - INTRINSIC REAL, IMAG, MAX, MIN + INTRINSIC MAX, MIN * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -439,22 +434,22 @@ * Adjust the expected value of INFO to account for * pivoting. * - IF( IZERO.GT.0 ) THEN - J = 1 - K = IZERO - 100 CONTINUE - IF( J.EQ.K ) THEN - K = IWORK( J ) - ELSE IF( IWORK( J ).EQ.K ) THEN - K = J - END IF - IF( J.LT.K ) THEN - J = J + 1 - GO TO 100 - END IF - ELSE +c IF( IZERO.GT.0 ) THEN +c J = 1 +c K = IZERO +c 100 CONTINUE +c IF( J.EQ.K ) THEN +c K = IWORK( J ) +c ELSE IF( IWORK( J ).EQ.K ) THEN +c K = J +c END IF +c IF( J.LT.K ) THEN +c J = J + 1 +c GO TO 100 +c END IF +c ELSE K = 0 - END IF +c END IF * * Check error code from CHETRF and handle error. * @@ -517,30 +512,33 @@ * Check error code from CHETRS and handle error. * IF( INFO.NE.0 ) THEN - CALL ALAERH( PATH, 'CHETRS_AA', INFO, 0, - $ UPLO, N, N, -1, -1, NRHS, IMAT, - $ NFAIL, NERRS, NOUT ) - END IF -* - CALL CLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA ) -* -* Compute the residual for the solution -* - CALL CPOT02( UPLO, N, NRHS, A, LDA, X, LDA, WORK, - $ LDA, RWORK, RESULT( 2 ) ) -* -* Print information about the tests that did not pass -* the threshold. -* - DO 120 K = 2, 2 - IF( RESULT( K ).GE.THRESH ) THEN - IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) - $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, - $ IMAT, K, RESULT( K ) - NFAIL = NFAIL + 1 + IF( IZERO.EQ.0 ) THEN + CALL ALAERH( PATH, 'CHETRS_AA', INFO, 0, + $ UPLO, N, N, -1, -1, NRHS, IMAT, + $ NFAIL, NERRS, NOUT ) END IF - 120 CONTINUE + ELSE + CALL CLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA + $ ) +* +* Compute the residual for the solution +* + CALL CPOT02( UPLO, N, NRHS, A, LDA, X, LDA, + $ WORK, LDA, RWORK, RESULT( 2 ) ) +* +* Print information about the tests that did not pass +* the threshold. +* + DO 120 K = 2, 2 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, + $ IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 120 CONTINUE + END IF NRUN = NRUN + 1 * * End do for each value of NRHS in NSVAL. diff --git a/lapack-netlib/TESTING/LIN/cchkhe_aa_2stage.f b/lapack-netlib/TESTING/LIN/cchkhe_aa_2stage.f new file mode 100644 index 000000000..06b2134fa --- /dev/null +++ b/lapack-netlib/TESTING/LIN/cchkhe_aa_2stage.f @@ -0,0 +1,584 @@ +*> \brief \b CCHKHE_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CCHKHE_AA_2STAGE( DOTYPE, NN, NVAL, NNB, NBVAL, +* NNS, NSVAL, THRESH, TSTERR, NMAX, A, +* AFAC, AINV, B, X, XACT, WORK, RWORK, +* IWORK, NOUT ) +* +* .. Scalar Arguments .. +* LOGICAL TSTERR +* INTEGER NN, NNB, NNS, NOUT +* REAL THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER IWORK( * ), NBVAL( * ), NSVAL( * ), NVAL( * ) +* REAL RWORK( * ) +* COMPLEX A( * ), AFAC( * ), AINV( * ), B( * ), +* $ WORK( * ), X( * ), XACT( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CCHKSY_AA_2STAGE tests CHETRF_AA_2STAGE, -TRS_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> The matrix types to be used for testing. Matrices of type j +*> (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) = +*> .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER +*> The number of values of N contained in the vector NVAL. +*> \endverbatim +*> +*> \param[in] NVAL +*> \verbatim +*> NVAL is INTEGER array, dimension (NN) +*> The values of the matrix dimension N. +*> \endverbatim +*> +*> \param[in] NNB +*> \verbatim +*> NNB is INTEGER +*> The number of values of NB contained in the vector NBVAL. +*> \endverbatim +*> +*> \param[in] NBVAL +*> \verbatim +*> NBVAL is INTEGER array, dimension (NBVAL) +*> The values of the blocksize NB. +*> \endverbatim +*> +*> \param[in] NNS +*> \verbatim +*> NNS is INTEGER +*> The number of values of NRHS contained in the vector NSVAL. +*> \endverbatim +*> +*> \param[in] NSVAL +*> \verbatim +*> NSVAL is INTEGER array, dimension (NNS) +*> The values of the number of right hand sides NRHS. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is REAL +*> The threshold value for the test ratios. A result is +*> included in the output file if RESULT >= THRESH. To have +*> every test ratio printed, use THRESH = 0. +*> \endverbatim +*> +*> \param[in] TSTERR +*> \verbatim +*> TSTERR is LOGICAL +*> Flag that indicates whether error exits are to be tested. +*> \endverbatim +*> +*> \param[in] NMAX +*> \verbatim +*> NMAX is INTEGER +*> The maximum value permitted for N, used in dimensioning the +*> work arrays. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is COMPLEX array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AFAC +*> \verbatim +*> AFAC is COMPLEX array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AINV +*> \verbatim +*> AINV is COMPLEX array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is COMPLEX array, dimension (NMAX*NSMAX) +*> where NSMAX is the largest entry in NSVAL. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (NMAX*NSMAX) +*> \endverbatim +*> +*> \param[out] XACT +*> \verbatim +*> XACT is COMPLEX array, dimension (NMAX*NSMAX) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (NMAX*max(3,NSMAX)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (max(NMAX,2*NSMAX)) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (2*NMAX) +*> \endverbatim +*> +*> \param[in] NOUT +*> \verbatim +*> NOUT is INTEGER +*> The unit number for output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +* +*> \ingroup complex_lin +* +* ===================================================================== + SUBROUTINE CCHKHE_AA_2STAGE( DOTYPE, NN, NVAL, NNB, NBVAL, NNS, + $ NSVAL, THRESH, TSTERR, NMAX, A, AFAC, AINV, + $ B, X, XACT, WORK, RWORK, IWORK, NOUT ) +* +* -- LAPACK test routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + LOGICAL TSTERR + INTEGER NN, NNB, NNS, NMAX, NOUT + REAL THRESH +* .. +* .. Array Arguments .. +* + LOGICAL DOTYPE( * ) + INTEGER IWORK( * ), NBVAL( * ), NSVAL( * ), NVAL( * ) + REAL RWORK( * ) + COMPLEX A( * ), AFAC( * ), AINV( * ), B( * ), + $ WORK( * ), X( * ), XACT( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) + INTEGER NTYPES + PARAMETER ( NTYPES = 10 ) + INTEGER NTESTS + PARAMETER ( NTESTS = 9 ) +* .. +* .. Local Scalars .. + LOGICAL ZEROT + CHARACTER DIST, TYPE, UPLO, XTYPE + CHARACTER*3 PATH, MATPATH + INTEGER I, I1, I2, IMAT, IN, INB, INFO, IOFF, IRHS, + $ IUPLO, IZERO, J, K, KL, KU, LDA, LWORK, MODE, + $ N, NB, NERRS, NFAIL, NIMAT, NRHS, NRUN, NT + REAL ANORM, CNDNUM +* .. +* .. Local Arrays .. + CHARACTER UPLOS( 2 ) + INTEGER ISEED( 4 ), ISEEDY( 4 ) + REAL RESULT( NTESTS ) +* .. +* .. External Subroutines .. + EXTERNAL ALAERH, ALAHD, ALASUM, CERRHE, CLACPY, + $ CLARHS, CLATB4, CLATMS, CPOT02, + $ CHETRF_AA_2STAGE, + $ CHETRS_AA_2STAGE, XLAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Scalars in Common .. + LOGICAL LERR, OK + CHARACTER*32 SRNAMT + INTEGER INFOT, NUNIT +* .. +* .. Common blocks .. + COMMON / INFOC / INFOT, NUNIT, OK, LERR + COMMON / SRNAMC / SRNAMT +* .. +* .. Data statements .. + DATA ISEEDY / 1988, 1989, 1990, 1991 / + DATA UPLOS / 'U', 'L' / +* .. +* .. Executable Statements .. +* +* Initialize constants and the random number seed. +* +* +* Test path +* + PATH( 1: 1 ) = 'Complex precision' + PATH( 2: 3 ) = 'H2' +* +* Path to generate matrices +* + MATPATH( 1: 1 ) = 'Complex precision' + MATPATH( 2: 3 ) = 'HE' + NRUN = 0 + NFAIL = 0 + NERRS = 0 + DO 10 I = 1, 4 + ISEED( I ) = ISEEDY( I ) + 10 CONTINUE +* +* Test the error exits +* + IF( TSTERR ) + $ CALL CERRHE( PATH, NOUT ) + INFOT = 0 +* +* Set the minimum block size for which the block routine should +* be used, which will be later returned by ILAENV +* + CALL XLAENV( 2, 2 ) +* +* Do for each value of N in NVAL +* + DO 180 IN = 1, NN + N = NVAL( IN ) + IF( N .GT. NMAX ) THEN + NFAIL = NFAIL + 1 + WRITE(NOUT, 9995) 'M ', N, NMAX + GO TO 180 + END IF + LDA = MAX( N, 1 ) + XTYPE = 'N' + NIMAT = NTYPES + IF( N.LE.0 ) + $ NIMAT = 1 +* + IZERO = 0 +* +* Do for each value of matrix type IMAT +* + DO 170 IMAT = 1, NIMAT +* +* Do the tests only if DOTYPE( IMAT ) is true. +* + IF( .NOT.DOTYPE( IMAT ) ) + $ GO TO 170 +* +* Skip types 3, 4, 5, or 6 if the matrix size is too small. +* + ZEROT = IMAT.GE.3 .AND. IMAT.LE.6 + IF( ZEROT .AND. N.LT.IMAT-2 ) + $ GO TO 170 +* +* Do first for UPLO = 'U', then for UPLO = 'L' +* + DO 160 IUPLO = 1, 2 + UPLO = UPLOS( IUPLO ) +* +* Begin generate the test matrix A. +* +* +* Set up parameters with CLATB4 for the matrix generator +* based on the type of matrix to be generated. +* + CALL CLATB4( MATPATH, IMAT, N, N, TYPE, KL, KU, + $ ANORM, MODE, CNDNUM, DIST ) +* +* Generate a matrix with CLATMS. +* + SRNAMT = 'CLATMS' + CALL CLATMS( N, N, DIST, ISEED, TYPE, RWORK, MODE, + $ CNDNUM, ANORM, KL, KU, UPLO, A, LDA, WORK, + $ INFO ) +* +* Check error code from CLATMS and handle error. +* + IF( INFO.NE.0 ) THEN + CALL ALAERH( PATH, 'CLATMS', INFO, 0, UPLO, N, N, -1, + $ -1, -1, IMAT, NFAIL, NERRS, NOUT ) +* +* Skip all tests for this generated matrix +* + GO TO 160 + END IF +* +* For matrix types 3-6, zero one or more rows and +* columns of the matrix to test that INFO is returned +* correctly. +* + IF( ZEROT ) THEN + IF( IMAT.EQ.3 ) THEN + IZERO = 1 + ELSE IF( IMAT.EQ.4 ) THEN + IZERO = N + ELSE + IZERO = N / 2 + 1 + END IF +* + IF( IMAT.LT.6 ) THEN +* +* Set row and column IZERO to zero. +* + IF( IUPLO.EQ.1 ) THEN + IOFF = ( IZERO-1 )*LDA + DO 20 I = 1, IZERO - 1 + A( IOFF+I ) = CZERO + 20 CONTINUE + IOFF = IOFF + IZERO + DO 30 I = IZERO, N + A( IOFF ) = CZERO + IOFF = IOFF + LDA + 30 CONTINUE + ELSE + IOFF = IZERO + DO 40 I = 1, IZERO - 1 + A( IOFF ) = CZERO + IOFF = IOFF + LDA + 40 CONTINUE + IOFF = IOFF - IZERO + DO 50 I = IZERO, N + A( IOFF+I ) = CZERO + 50 CONTINUE + END IF + ELSE + IF( IUPLO.EQ.1 ) THEN +* +* Set the first IZERO rows and columns to zero. +* + IOFF = 0 + DO 70 J = 1, N + I2 = MIN( J, IZERO ) + DO 60 I = 1, I2 + A( IOFF+I ) = CZERO + 60 CONTINUE + IOFF = IOFF + LDA + 70 CONTINUE + IZERO = 1 + ELSE +* +* Set the last IZERO rows and columns to zero. +* + IOFF = 0 + DO 90 J = 1, N + I1 = MAX( J, IZERO ) + DO 80 I = I1, N + A( IOFF+I ) = CZERO + 80 CONTINUE + IOFF = IOFF + LDA + 90 CONTINUE + END IF + END IF + ELSE + IZERO = 0 + END IF +* +* End generate test matrix A. +* +* +* Set the imaginary part of the diagonals. +* + CALL CLAIPD( N, A, LDA+1, 0 ) +* +* Do for each value of NB in NBVAL +* + DO 150 INB = 1, NNB +* +* Set the optimal blocksize, which will be later +* returned by ILAENV. +* + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) +* +* Copy the test matrix A into matrix AFAC which +* will be factorized in place. This is needed to +* preserve the test matrix A for subsequent tests. +* + CALL CLACPY( UPLO, N, N, A, LDA, AFAC, LDA ) +* +* Compute the L*D*L**T or U*D*U**T factorization of the +* matrix. IWORK stores details of the interchanges and +* the block structure of D. AINV is a work array for +* block factorization, LWORK is the length of AINV. +* + SRNAMT = 'CHETRF_AA_2STAGE' + LWORK = MIN(N*NB, 3*NMAX*NMAX) + CALL CHETRF_AA_2STAGE( UPLO, N, AFAC, LDA, + $ AINV, (3*NB+1)*N, + $ IWORK, IWORK( 1+N ), + $ WORK, LWORK, + $ INFO ) +* +* Adjust the expected value of INFO to account for +* pivoting. +* + IF( IZERO.GT.0 ) THEN + J = 1 + K = IZERO + 100 CONTINUE + IF( J.EQ.K ) THEN + K = IWORK( J ) + ELSE IF( IWORK( J ).EQ.K ) THEN + K = J + END IF + IF( J.LT.K ) THEN + J = J + 1 + GO TO 100 + END IF + ELSE + K = 0 + END IF +* +* Check error code from CHETRF and handle error. +* + IF( INFO.NE.K ) THEN + CALL ALAERH( PATH, 'CHETRF_AA_2STAGE', INFO, K, + $ UPLO, N, N, -1, -1, NB, IMAT, NFAIL, + $ NERRS, NOUT ) + END IF +* +*+ TEST 1 +* Reconstruct matrix from factors and compute residual. +* +* +c NEED TO WRITE CHET01_AA_2STAGE +c CALL CHET01_AA( UPLO, N, A, LDA, AFAC, LDA, IWORK, +c $ AINV, LDA, RWORK, RESULT( 1 ) ) +c NT = 1 + NT = 0 +* +* +* Print information about the tests that did not pass +* the threshold. +* + DO 110 K = 1, NT + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9999 )UPLO, N, NB, IMAT, K, + $ RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 110 CONTINUE + NRUN = NRUN + NT +* +* Skip solver test if INFO is not 0. +* + IF( INFO.NE.0 ) THEN + GO TO 140 + END IF +* +* Do for each value of NRHS in NSVAL. +* + DO 130 IRHS = 1, NNS + NRHS = NSVAL( IRHS ) +* +*+ TEST 2 (Using TRS) +* Solve and compute residual for A * X = B. +* +* Choose a set of NRHS random solution vectors +* stored in XACT and set up the right hand side B +* + SRNAMT = 'CLARHS' + CALL CLARHS( MATPATH, XTYPE, UPLO, ' ', N, N, + $ KL, KU, NRHS, A, LDA, XACT, LDA, + $ B, LDA, ISEED, INFO ) + CALL CLACPY( 'Full', N, NRHS, B, LDA, X, LDA ) +* + SRNAMT = 'CHETRS_AA_2STAGE' + LWORK = MAX( 1, 3*N-2 ) + CALL CHETRS_AA_2STAGE( UPLO, N, NRHS, AFAC, LDA, + $ AINV, (3*NB+1)*N, IWORK, IWORK( 1+N ), + $ X, LDA, INFO ) +* +* Check error code from CHETRS and handle error. +* + IF( INFO.NE.0 ) THEN + IF( IZERO.EQ.0 ) THEN + CALL ALAERH( PATH, 'CHETRS_AA_2STAGE', + $ INFO, 0, UPLO, N, N, -1, -1, + $ NRHS, IMAT, NFAIL, NERRS, NOUT ) + END IF + ELSE + CALL CLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA + $ ) +* +* Compute the residual for the solution +* + CALL CPOT02( UPLO, N, NRHS, A, LDA, X, LDA, + $ WORK, LDA, RWORK, RESULT( 2 ) ) +* +* Print information about the tests that did not pass +* the threshold. +* + DO 120 K = 2, 2 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, + $ IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 120 CONTINUE + END IF + NRUN = NRUN + 1 +* +* End do for each value of NRHS in NSVAL. +* + 130 CONTINUE + 140 CONTINUE + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + 180 CONTINUE +* +* Print a summary of the results. +* + CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS ) +* + 9999 FORMAT( ' UPLO = ''', A1, ''', N =', I5, ', NB =', I4, ', type ', + $ I2, ', test ', I2, ', ratio =', G12.5 ) + 9998 FORMAT( ' UPLO = ''', A1, ''', N =', I5, ', NRHS=', I3, ', type ', + $ I2, ', test(', I2, ') =', G12.5 ) + 9995 FORMAT( ' Invalid input value: ', A4, '=', I6, '; must be <=', + $ I6 ) + RETURN +* +* End of CCHKSY_AA_2STAGE +* + END diff --git a/lapack-netlib/TESTING/LIN/cchkrfp.f b/lapack-netlib/TESTING/LIN/cchkrfp.f index 6e903eb25..6a692f71d 100644 --- a/lapack-netlib/TESTING/LIN/cchkrfp.f +++ b/lapack-netlib/TESTING/LIN/cchkrfp.f @@ -59,7 +59,7 @@ * ===================================================================== PROGRAM CCHKRFP * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 @@ -217,11 +217,6 @@ WRITE( NOUT, FMT = 9999 ) STOP END IF -* - IF( FATAL ) THEN - WRITE( NOUT, FMT = 9999 ) - STOP - END IF * * Calculate and print the machine dependent constants. * diff --git a/lapack-netlib/TESTING/LIN/cchksy_aa.f b/lapack-netlib/TESTING/LIN/cchksy_aa.f index 33bbad192..3a38d87a4 100644 --- a/lapack-netlib/TESTING/LIN/cchksy_aa.f +++ b/lapack-netlib/TESTING/LIN/cchksy_aa.f @@ -161,7 +161,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * * @generated from LIN/dchksy_aa.f, fortran d -> c, Wed Nov 16 21:34:18 2016 * @@ -172,10 +172,10 @@ $ THRESH, TSTERR, NMAX, A, AFAC, AINV, B, $ X, XACT, WORK, RWORK, IWORK, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -218,15 +218,10 @@ INTEGER ISEED( 4 ), ISEEDY( 4 ) REAL RESULT( NTESTS ) * .. -* .. External Functions .. - REAL DGET06, CLANSY - EXTERNAL DGET06, CLANSY -* .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, CERRSY, CGET04, CLACPY, - $ CLARHS, CLATB4, CLATMS, CSYT02, DSYT03, DSYT05, - $ DSYCON, CSYRFS, CSYT01_AA, CSYTRF_AA, - $ DSYTRI2, CSYTRS_AA, XLAENV + EXTERNAL ALAERH, ALAHD, ALASUM, CERRSY, CLACPY, CLARHS, + $ CLATB4, CLATMS, CSYT02, CSYT01_AA, CSYTRF_AA, + $ CSYTRS_AA, XLAENV * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -440,22 +435,22 @@ * Adjust the expected value of INFO to account for * pivoting. * - IF( IZERO.GT.0 ) THEN - J = 1 - K = IZERO - 100 CONTINUE - IF( J.EQ.K ) THEN - K = IWORK( J ) - ELSE IF( IWORK( J ).EQ.K ) THEN - K = J - END IF - IF( J.LT.K ) THEN - J = J + 1 - GO TO 100 - END IF - ELSE +c IF( IZERO.GT.0 ) THEN +c J = 1 +c K = IZERO +c 100 CONTINUE +c IF( J.EQ.K ) THEN +c K = IWORK( J ) +c ELSE IF( IWORK( J ).EQ.K ) THEN +c K = J +c END IF +c IF( J.LT.K ) THEN +c J = J + 1 +c GO TO 100 +c END IF +c ELSE K = 0 - END IF +c END IF * * Check error code from CSYTRF and handle error. * @@ -519,31 +514,34 @@ * Check error code from CSYTRS and handle error. * IF( INFO.NE.0 ) THEN - CALL ALAERH( PATH, 'CSYTRS_AA', INFO, 0, - $ UPLO, N, N, -1, -1, NRHS, IMAT, - $ NFAIL, NERRS, NOUT ) - END IF -* - CALL CLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA ) + IF( IZERO.EQ.0 ) THEN + CALL ALAERH( PATH, 'CSYTRS_AA', INFO, 0, + $ UPLO, N, N, -1, -1, NRHS, IMAT, + $ NFAIL, NERRS, NOUT ) + END IF + ELSE + CALL CLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA + $ ) * -* Compute the residual for the solution +* Compute the residual for the solution * - CALL CSYT02( UPLO, N, NRHS, A, LDA, X, LDA, WORK, - $ LDA, RWORK, RESULT( 2 ) ) + CALL CSYT02( UPLO, N, NRHS, A, LDA, X, LDA, + $ WORK, LDA, RWORK, RESULT( 2 ) ) * * -* Print information about the tests that did not pass -* the threshold. +* Print information about the tests that did not pass +* the threshold. * - DO 120 K = 2, 2 - IF( RESULT( K ).GE.THRESH ) THEN - IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) - $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, - $ IMAT, K, RESULT( K ) - NFAIL = NFAIL + 1 - END IF - 120 CONTINUE + DO 120 K = 2, 2 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, + $ IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 120 CONTINUE + END IF NRUN = NRUN + 1 * * End do for each value of NRHS in NSVAL. diff --git a/lapack-netlib/TESTING/LIN/cchksy_aa_2stage.f b/lapack-netlib/TESTING/LIN/cchksy_aa_2stage.f new file mode 100644 index 000000000..c17ae54e6 --- /dev/null +++ b/lapack-netlib/TESTING/LIN/cchksy_aa_2stage.f @@ -0,0 +1,573 @@ +*> \brief \b CCHKSY_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CCHKSY_AA_2STAGE( DOTYPE, NN, NVAL, NNB, NBVAL, +* NNS, NSVAL, THRESH, TSTERR, NMAX, A, +* AFAC, AINV, B, X, XACT, WORK, RWORK, +* IWORK, NOUT ) +* +* .. Scalar Arguments .. +* LOGICAL TSTERR +* INTEGER NMAX, NN, NNB, NNS, NOUT +* REAL THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER IWORK( * ), NBVAL( * ), NSVAL( * ), NVAL( * ) +* REAL RWORK( * ) +* COMPLEX A( * ), AFAC( * ), AINV( * ), B( * ), +* $ WORK( * ), X( * ), XACT( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CCHKSY_AA_2STAGE tests CSYTRF_AA_2STAGE, -TRS_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> The matrix types to be used for testing. Matrices of type j +*> (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) = +*> .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER +*> The number of values of N contained in the vector NVAL. +*> \endverbatim +*> +*> \param[in] NVAL +*> \verbatim +*> NVAL is INTEGER array, dimension (NN) +*> The values of the matrix dimension N. +*> \endverbatim +*> +*> \param[in] NNB +*> \verbatim +*> NNB is INTEGER +*> The number of values of NB contained in the vector NBVAL. +*> \endverbatim +*> +*> \param[in] NBVAL +*> \verbatim +*> NBVAL is INTEGER array, dimension (NBVAL) +*> The values of the blocksize NB. +*> \endverbatim +*> +*> \param[in] NNS +*> \verbatim +*> NNS is INTEGER +*> The number of values of NRHS contained in the vector NSVAL. +*> \endverbatim +*> +*> \param[in] NSVAL +*> \verbatim +*> NSVAL is INTEGER array, dimension (NNS) +*> The values of the number of right hand sides NRHS. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is REAL +*> The threshold value for the test ratios. A result is +*> included in the output file if RESULT >= THRESH. To have +*> every test ratio printed, use THRESH = 0. +*> \endverbatim +*> +*> \param[in] TSTERR +*> \verbatim +*> TSTERR is LOGICAL +*> Flag that indicates whether error exits are to be tested. +*> \endverbatim +*> +*> \param[in] NMAX +*> \verbatim +*> NMAX is INTEGER +*> The maximum value permitted for N, used in dimensioning the +*> work arrays. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is COMPLEX array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AFAC +*> \verbatim +*> AFAC is COMPLEX array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AINV +*> \verbatim +*> AINV is COMPLEX array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is COMPLEX array, dimension (NMAX*NSMAX) +*> where NSMAX is the largest entry in NSVAL. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (NMAX*NSMAX) +*> \endverbatim +*> +*> \param[out] XACT +*> \verbatim +*> XACT is COMPLEX array, dimension (NMAX*NSMAX) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (NMAX*max(3,NSMAX)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is COMPLEX array, dimension (max(NMAX,2*NSMAX)) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (2*NMAX) +*> \endverbatim +*> +*> \param[in] NOUT +*> \verbatim +*> NOUT is INTEGER +*> The unit number for output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex_lin +* +* ===================================================================== + SUBROUTINE CCHKSY_AA_2STAGE( DOTYPE, NN, NVAL, NNB, NBVAL, NNS, + $ NSVAL, THRESH, TSTERR, NMAX, A, AFAC, AINV, + $ B, X, XACT, WORK, RWORK, IWORK, NOUT ) +* +* -- LAPACK test routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + LOGICAL TSTERR + INTEGER NN, NNB, NNS, NMAX, NOUT + REAL THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER IWORK( * ), NBVAL( * ), NSVAL( * ), NVAL( * ) + COMPLEX A( * ), AFAC( * ), AINV( * ), B( * ), + $ WORK( * ), X( * ), XACT( * ) + REAL RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) + INTEGER NTYPES + PARAMETER ( NTYPES = 10 ) + INTEGER NTESTS + PARAMETER ( NTESTS = 9 ) +* .. +* .. Local Scalars .. + LOGICAL ZEROT + CHARACTER DIST, TYPE, UPLO, XTYPE + CHARACTER*3 PATH, MATPATH + INTEGER I, I1, I2, IMAT, IN, INB, INFO, IOFF, IRHS, + $ IUPLO, IZERO, J, K, KL, KU, LDA, LWORK, MODE, + $ N, NB, NERRS, NFAIL, NIMAT, NRHS, NRUN, NT + REAL ANORM, CNDNUM +* .. +* .. Local Arrays .. + CHARACTER UPLOS( 2 ) + INTEGER ISEED( 4 ), ISEEDY( 4 ) + REAL RESULT( NTESTS ) +* .. +* .. External Subroutines .. + EXTERNAL ALAERH, ALAHD, ALASUM, CERRSY, CLACPY, CLARHS, + $ CLATB4, CLATMS, CSYT02, CSYT01, + $ CSYTRF_AA_2STAGE, CSYTRS_AA_2STAGE, + $ XLAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Scalars in Common .. + LOGICAL LERR, OK + CHARACTER*32 SRNAMT + INTEGER INFOT, NUNIT +* .. +* .. Common blocks .. + COMMON / INFOC / INFOT, NUNIT, OK, LERR + COMMON / SRNAMC / SRNAMT +* .. +* .. Data statements .. + DATA ISEEDY / 1988, 1989, 1990, 1991 / + DATA UPLOS / 'U', 'L' / +* .. +* .. Executable Statements .. +* +* Initialize constants and the random number seed. +* +* Test path +* + PATH( 1: 1 ) = 'Complex precision' + PATH( 2: 3 ) = 'S2' +* +* Path to generate matrices +* + MATPATH( 1: 1 ) = 'Complex precision' + MATPATH( 2: 3 ) = 'SY' + NRUN = 0 + NFAIL = 0 + NERRS = 0 + DO 10 I = 1, 4 + ISEED( I ) = ISEEDY( I ) + 10 CONTINUE +* +* Test the error exits +* + IF( TSTERR ) + $ CALL CERRSY( PATH, NOUT ) + INFOT = 0 +* +* Set the minimum block size for which the block routine should +* be used, which will be later returned by ILAENV +* + CALL XLAENV( 2, 2 ) +* +* Do for each value of N in NVAL +* + DO 180 IN = 1, NN + N = NVAL( IN ) + IF( N .GT. NMAX ) THEN + NFAIL = NFAIL + 1 + WRITE(NOUT, 9995) 'M ', N, NMAX + GO TO 180 + END IF + LDA = MAX( N, 1 ) + XTYPE = 'N' + NIMAT = NTYPES + IF( N.LE.0 ) + $ NIMAT = 1 +* + IZERO = 0 +* +* Do for each value of matrix type IMAT +* + DO 170 IMAT = 1, NIMAT +* +* Do the tests only if DOTYPE( IMAT ) is true. +* + IF( .NOT.DOTYPE( IMAT ) ) + $ GO TO 170 +* +* Skip types 3, 4, 5, or 6 if the matrix size is too small. +* + ZEROT = IMAT.GE.3 .AND. IMAT.LE.6 + IF( ZEROT .AND. N.LT.IMAT-2 ) + $ GO TO 170 +* +* Do first for UPLO = 'U', then for UPLO = 'L' +* + DO 160 IUPLO = 1, 2 + UPLO = UPLOS( IUPLO ) +* +* Begin generate the test matrix A. +* +* +* Set up parameters with CLATB4 for the matrix generator +* based on the type of matrix to be generated. +* + CALL CLATB4( MATPATH, IMAT, N, N, TYPE, KL, KU, + $ ANORM, MODE, CNDNUM, DIST ) +* +* Generate a matrix with CLATMS. +* + SRNAMT = 'CLATMS' + CALL CLATMS( N, N, DIST, ISEED, TYPE, RWORK, MODE, + $ CNDNUM, ANORM, KL, KU, UPLO, A, LDA, WORK, + $ INFO ) +* +* Check error code from CLATMS and handle error. +* + IF( INFO.NE.0 ) THEN + CALL ALAERH( PATH, 'CLATMS', INFO, 0, UPLO, N, N, -1, + $ -1, -1, IMAT, NFAIL, NERRS, NOUT ) +* +* Skip all tests for this generated matrix +* + GO TO 160 + END IF +* +* For matrix types 3-6, zero one or more rows and +* columns of the matrix to test that INFO is returned +* correctly. +* + IF( ZEROT ) THEN + IF( IMAT.EQ.3 ) THEN + IZERO = 1 + ELSE IF( IMAT.EQ.4 ) THEN + IZERO = N + ELSE + IZERO = N / 2 + 1 + END IF +* + IF( IMAT.LT.6 ) THEN +* +* Set row and column IZERO to zero. +* + IF( IUPLO.EQ.1 ) THEN + IOFF = ( IZERO-1 )*LDA + DO 20 I = 1, IZERO - 1 + A( IOFF+I ) = CZERO + 20 CONTINUE + IOFF = IOFF + IZERO + DO 30 I = IZERO, N + A( IOFF ) = CZERO + IOFF = IOFF + LDA + 30 CONTINUE + ELSE + IOFF = IZERO + DO 40 I = 1, IZERO - 1 + A( IOFF ) = CZERO + IOFF = IOFF + LDA + 40 CONTINUE + IOFF = IOFF - IZERO + DO 50 I = IZERO, N + A( IOFF+I ) = CZERO + 50 CONTINUE + END IF + ELSE + IF( IUPLO.EQ.1 ) THEN +* +* Set the first IZERO rows and columns to zero. +* + IOFF = 0 + DO 70 J = 1, N + I2 = MIN( J, IZERO ) + DO 60 I = 1, I2 + A( IOFF+I ) = CZERO + 60 CONTINUE + IOFF = IOFF + LDA + 70 CONTINUE + IZERO = 1 + ELSE +* +* Set the last IZERO rows and columns to zero. +* + IOFF = 0 + DO 90 J = 1, N + I1 = MAX( J, IZERO ) + DO 80 I = I1, N + A( IOFF+I ) = CZERO + 80 CONTINUE + IOFF = IOFF + LDA + 90 CONTINUE + END IF + END IF + ELSE + IZERO = 0 + END IF +* +* End generate the test matrix A. +* +* Do for each value of NB in NBVAL +* + DO 150 INB = 1, NNB +* +* Set the optimal blocksize, which will be later +* returned by ILAENV. +* + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) +* +* Copy the test matrix A into matrix AFAC which +* will be factorized in place. This is needed to +* preserve the test matrix A for subsequent tests. +* + CALL CLACPY( UPLO, N, N, A, LDA, AFAC, LDA ) +* +* Compute the L*D*L**T or U*D*U**T factorization of the +* matrix. IWORK stores details of the interchanges and +* the block structure of D. AINV is a work array for +* block factorization, LWORK is the length of AINV. +* + SRNAMT = 'CSYTRF_AA_2STAGE' + LWORK = MIN(N*NB, 3*NMAX*NMAX) + CALL CSYTRF_AA_2STAGE( UPLO, N, AFAC, LDA, + $ AINV, (3*NB+1)*N, + $ IWORK, IWORK( 1+N ), + $ WORK, LWORK, + $ INFO ) +* +* Adjust the expected value of INFO to account for +* pivoting. +* + IF( IZERO.GT.0 ) THEN + J = 1 + K = IZERO + 100 CONTINUE + IF( J.EQ.K ) THEN + K = IWORK( J ) + ELSE IF( IWORK( J ).EQ.K ) THEN + K = J + END IF + IF( J.LT.K ) THEN + J = J + 1 + GO TO 100 + END IF + ELSE + K = 0 + END IF +* +* Check error code from CSYTRF and handle error. +* + IF( INFO.NE.K ) THEN + CALL ALAERH( PATH, 'CSYTRF_AA_2STAGE', INFO, K, + $ UPLO, N, N, -1, -1, NB, IMAT, NFAIL, + $ NERRS, NOUT ) + END IF +* +*+ TEST 1 +* Reconstruct matrix from factors and compute residual. +* +c CALL CSYT01_AA( UPLO, N, A, LDA, AFAC, LDA, IWORK, +c $ AINV, LDA, RWORK, RESULT( 1 ) ) +c NT = 1 + NT = 0 +* +* +* Print information about the tests that did not pass +* the threshold. +* + DO 110 K = 1, NT + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9999 )UPLO, N, NB, IMAT, K, + $ RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 110 CONTINUE + NRUN = NRUN + NT +* +* Skip solver test if INFO is not 0. +* + IF( INFO.NE.0 ) THEN + GO TO 140 + END IF +* +* Do for each value of NRHS in NSVAL. +* + DO 130 IRHS = 1, NNS + NRHS = NSVAL( IRHS ) +* +*+ TEST 2 (Using TRS) +* Solve and compute residual for A * X = B. +* +* Choose a set of NRHS random solution vectors +* stored in XACT and set up the right hand side B +* + SRNAMT = 'CLARHS' + CALL CLARHS( MATPATH, XTYPE, UPLO, ' ', N, N, + $ KL, KU, NRHS, A, LDA, XACT, LDA, + $ B, LDA, ISEED, INFO ) + CALL CLACPY( 'Full', N, NRHS, B, LDA, X, LDA ) +* + SRNAMT = 'CSYTRS_AA_2STAGE' + LWORK = MAX( 1, 3*N-2 ) + CALL CSYTRS_AA_2STAGE( UPLO, N, NRHS, AFAC, LDA, + $ AINV, (3*NB+1)*N, IWORK, IWORK( 1+N ), + $ X, LDA, INFO ) +* +* Check error code from CSYTRS and handle error. +* + IF( INFO.NE.0 ) THEN + IF( IZERO.EQ.0 ) THEN + CALL ALAERH( PATH, 'CSYTRS_AA_2STAGE', + $ INFO, 0, UPLO, N, N, -1, -1, + $ NRHS, IMAT, NFAIL, NERRS, NOUT ) + END IF + ELSE + CALL CLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA + $ ) +* +* Compute the residual for the solution +* + CALL CSYT02( UPLO, N, NRHS, A, LDA, X, LDA, + $ WORK, LDA, RWORK, RESULT( 2 ) ) +* +* +* Print information about the tests that did not pass +* the threshold. +* + DO 120 K = 2, 2 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, + $ IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 120 CONTINUE + END IF + NRUN = NRUN + 1 +* +* End do for each value of NRHS in NSVAL. +* + 130 CONTINUE + 140 CONTINUE + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + 180 CONTINUE +* +* Print a summary of the results. +* + CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS ) +* + 9999 FORMAT( ' UPLO = ''', A1, ''', N =', I5, ', NB =', I4, ', type ', + $ I2, ', test ', I2, ', ratio =', G12.5 ) + 9998 FORMAT( ' UPLO = ''', A1, ''', N =', I5, ', NRHS=', I3, ', type ', + $ I2, ', test(', I2, ') =', G12.5 ) + 9995 FORMAT( ' Invalid input value: ', A4, '=', I6, '; must be <=', + $ I6 ) + RETURN +* +* End of CCHKSY_AA_2STAGE +* + END diff --git a/lapack-netlib/TESTING/LIN/cdrvhe_aa.f b/lapack-netlib/TESTING/LIN/cdrvhe_aa.f index 6f6d758b2..d56aa45b0 100644 --- a/lapack-netlib/TESTING/LIN/cdrvhe_aa.f +++ b/lapack-netlib/TESTING/LIN/cdrvhe_aa.f @@ -144,7 +144,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex_lin * @@ -153,10 +153,10 @@ $ NMAX, A, AFAC, AINV, B, X, XACT, WORK, $ RWORK, IWORK, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -241,7 +241,6 @@ DO 10 I = 1, 4 ISEED( I ) = ISEEDY( I ) 10 CONTINUE - LWORK = MAX( 2*NMAX, NMAX*NRHS ) * * Test the error exits * @@ -260,6 +259,8 @@ * DO 180 IN = 1, NN N = NVAL( IN ) + LWORK = MAX( 3*N-2, N*(1+NB) ) + LWORK = MAX( LWORK, 1 ) LDA = MAX( N, 1 ) XTYPE = 'N' NIMAT = NTYPES diff --git a/lapack-netlib/TESTING/LIN/cdrvhe_aa_2stage.f b/lapack-netlib/TESTING/LIN/cdrvhe_aa_2stage.f new file mode 100644 index 000000000..32be41f64 --- /dev/null +++ b/lapack-netlib/TESTING/LIN/cdrvhe_aa_2stage.f @@ -0,0 +1,490 @@ +*> \brief \b CDRVHE_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CDRVHE_AA_2STAGE( +* DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, NMAX, +* A, AFAC, AINV, B, X, XACT, WORK, RWORK, IWORK, +* NOUT ) +* +* .. Scalar Arguments .. +* LOGICAL TSTERR +* INTEGER NMAX, NN, NOUT, NRHS +* REAL THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER IWORK( * ), NVAL( * ) +* REAL RWORK( * ) +* COMPLEX A( * ), AFAC( * ), AINV( * ), B( * ), +* $ WORK( * ), X( * ), XACT( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CDRVHE_AA_2STAGE tests the driver routine CHESV_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> The matrix types to be used for testing. Matrices of type j +*> (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) = +*> .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER +*> The number of values of N contained in the vector NVAL. +*> \endverbatim +*> +*> \param[in] NVAL +*> \verbatim +*> NVAL is INTEGER array, dimension (NN) +*> The values of the matrix dimension N. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand side vectors to be generated for +*> each linear system. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is REAL +*> The threshold value for the test ratios. A result is +*> included in the output file if RESULT >= THRESH. To have +*> every test ratio printed, use THRESH = 0. +*> \endverbatim +*> +*> \param[in] TSTERR +*> \verbatim +*> TSTERR is LOGICAL +*> Flag that indicates whether error exits are to be tested. +*> \endverbatim +*> +*> \param[in] NMAX +*> \verbatim +*> NMAX is INTEGER +*> The maximum value permitted for N, used in dimensioning the +*> work arrays. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is COMPLEX array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AFAC +*> \verbatim +*> AFAC is COMPLEX array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AINV +*> \verbatim +*> AINV is COMPLEX array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is COMPLEX array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] XACT +*> \verbatim +*> XACT is COMPLEX array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (NMAX*max(2,NRHS)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (NMAX+2*NRHS) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (NMAX) +*> \endverbatim +*> +*> \param[in] NOUT +*> \verbatim +*> NOUT is INTEGER +*> The unit number for output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex_lin +* +* ===================================================================== + SUBROUTINE CDRVHE_AA_2STAGE( + $ DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, + $ NMAX, A, AFAC, AINV, B, X, XACT, WORK, + $ RWORK, IWORK, NOUT ) +* +* -- LAPACK test routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + LOGICAL TSTERR + INTEGER NMAX, NN, NOUT, NRHS + REAL THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER IWORK( * ), NVAL( * ) + REAL RWORK( * ) + COMPLEX A( * ), AFAC( * ), AINV( * ), B( * ), + $ WORK( * ), X( * ), XACT( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + INTEGER NTYPES, NTESTS + PARAMETER ( NTYPES = 10, NTESTS = 3 ) + INTEGER NFACT + PARAMETER ( NFACT = 2 ) +* .. +* .. Local Scalars .. + LOGICAL ZEROT + CHARACTER DIST, FACT, TYPE, UPLO, XTYPE + CHARACTER*3 MATPATH, PATH + INTEGER I, I1, I2, IFACT, IMAT, IN, INFO, IOFF, IUPLO, + $ IZERO, J, K, KL, KU, LDA, LWORK, MODE, N, + $ NB, NBMIN, NERRS, NFAIL, NIMAT, NRUN, NT + REAL ANORM, CNDNUM +* .. +* .. Local Arrays .. + CHARACTER FACTS( NFACT ), UPLOS( 2 ) + INTEGER ISEED( 4 ), ISEEDY( 4 ) + REAL RESULT( NTESTS ) +* .. +* .. External Functions .. + REAL CLANHE, SGET06 + EXTERNAL CLANHE, SGET06 +* .. +* .. External Subroutines .. + EXTERNAL ALADHD, ALAERH, ALASVM, XLAENV, CERRVX, + $ CGET04, CLACPY, CLARHS, CLATB4, CLATMS, + $ CHESV_AA_2STAGE, CPOT02, + $ CHETRF_AA_2STAGE +* .. +* .. Scalars in Common .. + LOGICAL LERR, OK + CHARACTER*32 SRNAMT + INTEGER INFOT, NUNIT +* .. +* .. Common blocks .. + COMMON / INFOC / INFOT, NUNIT, OK, LERR + COMMON / SRNAMC / SRNAMT +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, MAX, MIN +* .. +* .. Data statements .. + DATA ISEEDY / 1988, 1989, 1990, 1991 / + DATA UPLOS / 'U', 'L' / , FACTS / 'F', 'N' / +* .. +* .. Executable Statements .. +* +* Initialize constants and the random number seed. +* +* Test path +* + PATH( 1: 1 ) = 'Complex precision' + PATH( 2: 3 ) = 'H2' +* +* Path to generate matrices +* + MATPATH( 1: 1 ) = 'Complex precision' + MATPATH( 2: 3 ) = 'HE' +* + NRUN = 0 + NFAIL = 0 + NERRS = 0 + DO 10 I = 1, 4 + ISEED( I ) = ISEEDY( I ) + 10 CONTINUE +* +* Test the error exits +* + IF( TSTERR ) + $ CALL CERRVX( PATH, NOUT ) + INFOT = 0 +* +* Set the block size and minimum block size for testing. +* + NB = 1 + NBMIN = 2 + CALL XLAENV( 1, NB ) + CALL XLAENV( 2, NBMIN ) +* +* Do for each value of N in NVAL +* + DO 180 IN = 1, NN + N = NVAL( IN ) + LDA = MAX( N, 1 ) + XTYPE = 'N' + NIMAT = NTYPES + IF( N.LE.0 ) + $ NIMAT = 1 +* + DO 170 IMAT = 1, NIMAT +* +* Do the tests only if DOTYPE( IMAT ) is true. +* + IF( .NOT.DOTYPE( IMAT ) ) + $ GO TO 170 +* +* Skip types 3, 4, 5, or 6 if the matrix size is too small. +* + ZEROT = IMAT.GE.3 .AND. IMAT.LE.6 + IF( ZEROT .AND. N.LT.IMAT-2 ) + $ GO TO 170 +* +* Do first for UPLO = 'U', then for UPLO = 'L' +* + DO 160 IUPLO = 1, 2 + UPLO = UPLOS( IUPLO ) +* +* Begin generate the test matrix A. +* +* Set up parameters with CLATB4 for the matrix generator +* based on the type of matrix to be generated. +* + CALL CLATB4( MATPATH, IMAT, N, N, TYPE, KL, KU, ANORM, + $ MODE, CNDNUM, DIST ) +* +* Generate a matrix with CLATMS. +* + SRNAMT = 'CLATMS' + CALL CLATMS( N, N, DIST, ISEED, TYPE, RWORK, MODE, + $ CNDNUM, ANORM, KL, KU, UPLO, A, LDA, + $ WORK, INFO ) +* +* Check error code from CLATMS and handle error. +* + IF( INFO.NE.0 ) THEN + CALL ALAERH( PATH, 'CLATMS', INFO, 0, UPLO, N, N, + $ -1, -1, -1, IMAT, NFAIL, NERRS, NOUT ) + GO TO 160 + END IF +* +* For types 3-6, zero one or more rows and columns of +* the matrix to test that INFO is returned correctly. +* + IF( ZEROT ) THEN + IF( IMAT.EQ.3 ) THEN + IZERO = 1 + ELSE IF( IMAT.EQ.4 ) THEN + IZERO = N + ELSE + IZERO = N / 2 + 1 + END IF +* + IF( IMAT.LT.6 ) THEN +* +* Set row and column IZERO to zero. +* + IF( IUPLO.EQ.1 ) THEN + IOFF = ( IZERO-1 )*LDA + DO 20 I = 1, IZERO - 1 + A( IOFF+I ) = ZERO + 20 CONTINUE + IOFF = IOFF + IZERO + DO 30 I = IZERO, N + A( IOFF ) = ZERO + IOFF = IOFF + LDA + 30 CONTINUE + ELSE + IOFF = IZERO + DO 40 I = 1, IZERO - 1 + A( IOFF ) = ZERO + IOFF = IOFF + LDA + 40 CONTINUE + IOFF = IOFF - IZERO + DO 50 I = IZERO, N + A( IOFF+I ) = ZERO + 50 CONTINUE + END IF + ELSE + IOFF = 0 + IF( IUPLO.EQ.1 ) THEN +* +* Set the first IZERO rows and columns to zero. +* + DO 70 J = 1, N + I2 = MIN( J, IZERO ) + DO 60 I = 1, I2 + A( IOFF+I ) = ZERO + 60 CONTINUE + IOFF = IOFF + LDA + 70 CONTINUE + IZERO = 1 + ELSE +* +* Set the first IZERO rows and columns to zero. +* + IOFF = 0 + DO 90 J = 1, N + I1 = MAX( J, IZERO ) + DO 80 I = I1, N + A( IOFF+I ) = ZERO + 80 CONTINUE + IOFF = IOFF + LDA + 90 CONTINUE + END IF + END IF + ELSE + IZERO = 0 + END IF +* +* End generate the test matrix A. +* +* + DO 150 IFACT = 1, NFACT +* +* Do first for FACT = 'F', then for other values. +* + FACT = FACTS( IFACT ) +* +* Form an exact solution and set the right hand side. +* + SRNAMT = 'CLARHS' + CALL CLARHS( MATPATH, XTYPE, UPLO, ' ', N, N, KL, KU, + $ NRHS, A, LDA, XACT, LDA, B, LDA, ISEED, + $ INFO ) + XTYPE = 'C' +* +* --- Test CHESV_AA_2STAGE --- +* + IF( IFACT.EQ.2 ) THEN + CALL CLACPY( UPLO, N, N, A, LDA, AFAC, LDA ) + CALL CLACPY( 'Full', N, NRHS, B, LDA, X, LDA ) +* +* Factor the matrix and solve the system using CHESV_AA. +* + SRNAMT = 'CHESV_AA_2STAGE ' + LWORK = MIN(N*NB, 3*NMAX*NMAX) + CALL CHESV_AA_2STAGE( UPLO, N, NRHS, AFAC, LDA, + $ AINV, (3*NB+1)*N, + $ IWORK, IWORK( 1+N ), + $ X, LDA, WORK, LWORK, INFO ) +* +* Adjust the expected value of INFO to account for +* pivoting. +* + IF( IZERO.GT.0 ) THEN + J = 1 + K = IZERO + 100 CONTINUE + IF( J.EQ.K ) THEN + K = IWORK( J ) + ELSE IF( IWORK( J ).EQ.K ) THEN + K = J + END IF + IF( J.LT.K ) THEN + J = J + 1 + GO TO 100 + END IF + ELSE + K = 0 + END IF +* +* Check error code from CHESV_AA . +* + IF( INFO.NE.K ) THEN + CALL ALAERH( PATH, 'CHESV_AA', INFO, K, + $ UPLO, N, N, -1, -1, NRHS, + $ IMAT, NFAIL, NERRS, NOUT ) + GO TO 120 + ELSE IF( INFO.NE.0 ) THEN + GO TO 120 + END IF +* +* Compute residual of the computed solution. +* + CALL CLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA ) + CALL CPOT02( UPLO, N, NRHS, A, LDA, X, LDA, WORK, + $ LDA, RWORK, RESULT( 1 ) ) +* +* Reconstruct matrix from factors and compute +* residual. +* +c CALL CHET01_AA( UPLO, N, A, LDA, AFAC, LDA, +c $ IWORK, AINV, LDA, RWORK, +c $ RESULT( 2 ) ) +c NT = 2 + NT = 1 +* +* Print information about the tests that did not pass +* the threshold. +* + DO 110 K = 1, NT + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALADHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9999 )'CHESV_AA ', + $ UPLO, N, IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 110 CONTINUE + NRUN = NRUN + NT + 120 CONTINUE + END IF +* + 150 CONTINUE +* + 160 CONTINUE + 170 CONTINUE + 180 CONTINUE +* +* Print a summary of the results. +* + CALL ALASVM( PATH, NOUT, NFAIL, NRUN, NERRS ) +* + 9999 FORMAT( 1X, A, ', UPLO=''', A1, ''', N =', I5, ', type ', I2, + $ ', test ', I2, ', ratio =', G12.5 ) + RETURN +* +* End of CDRVHE_AA_2STAGE +* + END diff --git a/lapack-netlib/TESTING/LIN/cdrvls.f b/lapack-netlib/TESTING/LIN/cdrvls.f index b9b8e0f4f..2c2d9abb8 100644 --- a/lapack-netlib/TESTING/LIN/cdrvls.f +++ b/lapack-netlib/TESTING/LIN/cdrvls.f @@ -183,7 +183,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex_lin * @@ -192,10 +192,10 @@ $ NBVAL, NXVAL, THRESH, TSTERR, A, COPYA, B, $ COPYB, C, S, COPYS, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -237,9 +237,9 @@ REAL EPS, NORMA, NORMB, RCOND * .. * .. Local Arrays .. - INTEGER ISEED( 4 ), ISEEDY( 4 ), IWORKQUERY - REAL RESULT( NTESTS ), RWORKQUERY - COMPLEX WORKQUERY + INTEGER ISEED( 4 ), ISEEDY( 4 ), IWQ + REAL RESULT( NTESTS ), RWQ + COMPLEX WQ * .. * .. Allocatable Arrays .. COMPLEX, ALLOCATABLE :: WORK (:) @@ -324,48 +324,85 @@ M = MMAX N = NMAX NRHS = NSMAX - LDA = MAX( 1, M ) - LDB = MAX( 1, M, N ) MNMIN = MAX( MIN( M, N ), 1 ) * * Compute workspace needed for routines * CQRT14, CQRT17 (two side cases), CQRT15 and CQRT12 * - LWORK = MAX( ( M+N )*NRHS, + LWORK = MAX( 1, ( M+N )*NRHS, $ ( N+NRHS )*( M+2 ), ( M+NRHS )*( N+2 ), $ MAX( M+MNMIN, NRHS*MNMIN,2*N+M ), $ MAX( M*N+4*MNMIN+MAX(M,N), M*N+2*MNMIN+4*N ) ) + LRWORK = 1 + LIWORK = 1 +* +* Iterate through all test cases and compute necessary workspace +* sizes for ?GELS, ?GETSLS, ?GELSY, ?GELSS and ?GELSD routines. +* + DO IM = 1, NM + M = MVAL( IM ) + LDA = MAX( 1, M ) + DO IN = 1, NN + N = NVAL( IN ) + MNMIN = MAX(MIN( M, N ),1) + LDB = MAX( 1, M, N ) + DO INS = 1, NNS + NRHS = NSVAL( INS ) + DO IRANK = 1, 2 + DO ISCALE = 1, 3 + ITYPE = ( IRANK-1 )*3 + ISCALE + IF( DOTYPE( ITYPE ) ) THEN + IF( IRANK.EQ.1 ) THEN + DO ITRAN = 1, 2 + IF( ITRAN.EQ.1 ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* +* Compute workspace needed for CGELS + CALL CGELS( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_CGELS = INT( WQ ) +* Compute workspace needed for CGETSLS + CALL CGETSLS( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_CGETSLS = INT( WQ ) + ENDDO + END IF +* Compute workspace needed for CGELSY + CALL CGELSY( M, N, NRHS, A, LDA, B, LDB, + $ IWQ, RCOND, CRANK, WQ, -1, RWORK, + $ INFO ) + LWORK_CGELSY = INT( WQ ) + LRWORK_CGELSY = 2*N +* Compute workspace needed for CGELSS + CALL CGELSS( M, N, NRHS, A, LDA, B, LDB, S, + $ RCOND, CRANK, WQ, -1, RWORK, INFO ) + LWORK_CGELSS = INT( WQ ) + LRWORK_CGELSS = 5*MNMIN +* Compute workspace needed for CGELSD + CALL CGELSD( M, N, NRHS, A, LDA, B, LDB, S, + $ RCOND, CRANK, WQ, -1, RWQ, IWQ, + $ INFO ) + LWORK_CGELSD = INT( WQ ) + LRWORK_CGELSD = INT( RWQ ) +* Compute LIWORK workspace needed for CGELSY and CGELSD + LIWORK = MAX( LIWORK, N, IWQ ) +* Compute LRWORK workspace needed for CGELSY, CGELSS and CGELSD + LRWORK = MAX( LRWORK, LRWORK_CGELSY, + $ LRWORK_CGELSS, LRWORK_CGELSD ) +* Compute LWORK workspace needed for all functions + LWORK = MAX( LWORK, LWORK_CGELS, LWORK_CGETSLS, + $ LWORK_CGELSY, LWORK_CGELSS, + $ LWORK_CGELSD ) + END IF + ENDDO + ENDDO + ENDDO + ENDDO + ENDDO * -* Compute workspace needed for CGELS - CALL CGELS( 'N', M, N, NRHS, A, LDA, B, LDB, - $ WORKQUERY, -1, INFO ) - LWORK_CGELS = INT( WORKQUERY ) -* Compute workspace needed for CGETSLS - CALL CGETSLS( 'N', M, N, NRHS, A, LDA, B, LDB, - $ WORKQUERY, -1, INFO ) - LWORK_CGETSLS = INT( WORKQUERY ) -* Compute workspace needed for CGELSY - CALL CGELSY( M, N, NRHS, A, LDA, B, LDB, IWORKQUERY, - $ RCOND, CRANK, WORKQUERY, -1, RWORK, INFO ) - LWORK_CGELSY = INT( WORKQUERY ) - LRWORK_CGELSY = 2*N -* Compute workspace needed for CGELSS - CALL CGELSS( M, N, NRHS, A, LDA, B, LDB, S, - $ RCOND, CRANK, WORKQUERY, -1, RWORK, INFO ) - LWORK_CGELSS = INT( WORKQUERY ) - LRWORK_CGELSS = 5*MNMIN -* Compute workspace needed for CGELSD - CALL CGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, CRANK, - $ WORKQUERY, -1, RWORKQUERY, IWORKQUERY, INFO ) - LWORK_CGELSD = INT( WORKQUERY ) - LRWORK_CGELSD = INT( RWORKQUERY ) -* Compute LIWORK workspace needed for CGELSY and CGELSD - LIWORK = MAX( 1, N, IWORKQUERY ) -* Compute LRWORK workspace needed for CGELSY, CGELSS and CGELSD - LRWORK = MAX( 1, LRWORK_CGELSY, LRWORK_CGELSS, LRWORK_CGELSD ) -* Compute LWORK workspace needed for all functions - LWORK = MAX( 1, LWORK, LWORK_CGELS, LWORK_CGETSLS, LWORK_CGELSY, - $ LWORK_CGELSS, LWORK_CGELSD ) LWLSY = LWORK * ALLOCATE( WORK( LWORK ) ) diff --git a/lapack-netlib/TESTING/LIN/cdrvrf3.f b/lapack-netlib/TESTING/LIN/cdrvrf3.f index ca798e19a..2c71d0fc5 100644 --- a/lapack-netlib/TESTING/LIN/cdrvrf3.f +++ b/lapack-netlib/TESTING/LIN/cdrvrf3.f @@ -111,7 +111,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex_lin * @@ -119,10 +119,10 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, + S_WORK_CLANGE, C_WORK_CGEQRF, TAU ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER LDA, NN, NOUT @@ -227,7 +227,7 @@ * IF ( IALPHA.EQ. 1) THEN ALPHA = ZERO - ELSE IF ( IALPHA.EQ. 1) THEN + ELSE IF ( IALPHA.EQ. 2) THEN ALPHA = ONE ELSE ALPHA = CLARND( 4, ISEED ) diff --git a/lapack-netlib/TESTING/LIN/cdrvrf4.f b/lapack-netlib/TESTING/LIN/cdrvrf4.f index 9f33b04b7..d3e6a39c8 100644 --- a/lapack-netlib/TESTING/LIN/cdrvrf4.f +++ b/lapack-netlib/TESTING/LIN/cdrvrf4.f @@ -106,7 +106,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex_lin * @@ -114,10 +114,10 @@ SUBROUTINE CDRVRF4( NOUT, NN, NVAL, THRESH, C1, C2, LDC, CRF, A, + LDA, S_WORK_CLANGE ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER LDA, LDC, NN, NOUT @@ -209,10 +209,10 @@ IF ( IALPHA.EQ. 1) THEN ALPHA = ZERO BETA = ZERO - ELSE IF ( IALPHA.EQ. 1) THEN + ELSE IF ( IALPHA.EQ. 2) THEN ALPHA = ONE BETA = ZERO - ELSE IF ( IALPHA.EQ. 1) THEN + ELSE IF ( IALPHA.EQ. 3) THEN ALPHA = ZERO BETA = ONE ELSE diff --git a/lapack-netlib/TESTING/LIN/cdrvsy_aa.f b/lapack-netlib/TESTING/LIN/cdrvsy_aa.f index b1a5f11c4..a9e53de4f 100644 --- a/lapack-netlib/TESTING/LIN/cdrvsy_aa.f +++ b/lapack-netlib/TESTING/LIN/cdrvsy_aa.f @@ -144,7 +144,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * * @generated from LIN/ddrvsy_aa.f, fortran d -> c, Thu Nov 17 12:14:51 2016 * @@ -155,10 +155,10 @@ $ NMAX, A, AFAC, AINV, B, X, XACT, WORK, $ RWORK, IWORK, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -204,8 +204,8 @@ EXTERNAL DGET06, CLANSY * .. * .. External Subroutines .. - EXTERNAL ALADHD, ALAERH, ALASVM, DERRVX, CGET04, CLACPY, - $ CLARHS, CLASET, CLATB4, CLATMS, CSYT02, DSYT05, + EXTERNAL ALADHD, ALAERH, ALASVM, CERRVX, CGET04, CLACPY, + $ CLARHS, CLASET, CLATB4, CLATMS, CSYT02, $ CSYSV_AA, CSYT01_AA, CSYTRF_AA, XLAENV * .. * .. Scalars in Common .. @@ -244,7 +244,6 @@ DO 10 I = 1, 4 ISEED( I ) = ISEEDY( I ) 10 CONTINUE - LWORK = MAX( 2*NMAX, NMAX*NRHS ) * * Test the error exits * @@ -263,6 +262,8 @@ * DO 180 IN = 1, NN N = NVAL( IN ) + LWORK = MAX( 3*N-2, N*(1+NB) ) + LWORK = MAX( LWORK, 1 ) LDA = MAX( N, 1 ) XTYPE = 'N' NIMAT = NTYPES diff --git a/lapack-netlib/TESTING/LIN/cdrvsy_aa_2stage.f b/lapack-netlib/TESTING/LIN/cdrvsy_aa_2stage.f new file mode 100644 index 000000000..2b766bcbd --- /dev/null +++ b/lapack-netlib/TESTING/LIN/cdrvsy_aa_2stage.f @@ -0,0 +1,490 @@ +*> \brief \b CDRVSY_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CDRVSY_AA_2STAGE( +* DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, NMAX, +* A, AFAC, AINV, B, X, XACT, WORK, RWORK, IWORK, +* NOUT ) +* +* .. Scalar Arguments .. +* LOGICAL TSTERR +* INTEGER NMAX, NN, NOUT, NRHS +* REAL THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER IWORK( * ), NVAL( * ) +* REAL RWORK( * ) +* COMPLEX A( * ), AFAC( * ), AINV( * ), B( * ), +* $ WORK( * ), X( * ), XACT( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CDRVSY_AA_2STAGE tests the driver routine CSYSV_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> The matrix types to be used for testing. Matrices of type j +*> (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) = +*> .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER +*> The number of values of N contained in the vector NVAL. +*> \endverbatim +*> +*> \param[in] NVAL +*> \verbatim +*> NVAL is INTEGER array, dimension (NN) +*> The values of the matrix dimension N. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand side vectors to be generated for +*> each linear system. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is REAL +*> The threshold value for the test ratios. A result is +*> included in the output file if RESULT >= THRESH. To have +*> every test ratio printed, use THRESH = 0. +*> \endverbatim +*> +*> \param[in] TSTERR +*> \verbatim +*> TSTERR is LOGICAL +*> Flag that indicates whether error exits are to be tested. +*> \endverbatim +*> +*> \param[in] NMAX +*> \verbatim +*> NMAX is INTEGER +*> The maximum value permitted for N, used in dimensioning the +*> work arrays. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is COMPLEX array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AFAC +*> \verbatim +*> AFAC is COMPLEX array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AINV +*> \verbatim +*> AINV is COMPLEX array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is COMPLEX array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] XACT +*> \verbatim +*> XACT is COMPLEX array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (NMAX*max(2,NRHS)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is COMPLEX array, dimension (NMAX+2*NRHS) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (NMAX) +*> \endverbatim +*> +*> \param[in] NOUT +*> \verbatim +*> NOUT is INTEGER +*> The unit number for output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex_lin +* +* ===================================================================== + SUBROUTINE CDRVSY_AA_2STAGE( + $ DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, + $ NMAX, A, AFAC, AINV, B, X, XACT, WORK, + $ RWORK, IWORK, NOUT ) +* +* -- LAPACK test routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + LOGICAL TSTERR + INTEGER NMAX, NN, NOUT, NRHS + REAL THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER IWORK( * ), NVAL( * ) + REAL RWORK( * ) + COMPLEX A( * ), AFAC( * ), AINV( * ), B( * ), + $ WORK( * ), X( * ), XACT( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) + INTEGER NTYPES, NTESTS + PARAMETER ( NTYPES = 10, NTESTS = 3 ) + INTEGER NFACT + PARAMETER ( NFACT = 2 ) +* .. +* .. Local Scalars .. + LOGICAL ZEROT + CHARACTER DIST, FACT, TYPE, UPLO, XTYPE + CHARACTER*3 MATPATH, PATH + INTEGER I, I1, I2, IFACT, IMAT, IN, INFO, IOFF, IUPLO, + $ IZERO, J, K, KL, KU, LDA, LWORK, MODE, N, + $ NB, NBMIN, NERRS, NFAIL, NIMAT, NRUN, NT + REAL ANORM, CNDNUM +* .. +* .. Local Arrays .. + CHARACTER FACTS( NFACT ), UPLOS( 2 ) + INTEGER ISEED( 4 ), ISEEDY( 4 ) + REAL RESULT( NTESTS ) +* .. +* .. External Functions .. + COMPLEX CLANSY, SGET06 + EXTERNAL CLANSY, SGET06 +* .. +* .. External Subroutines .. + EXTERNAL ALADHD, ALAERH, ALASVM, XLAENV, CERRVX, + $ CGET04, CLACPY, CLARHS, CLATB4, CLATMS, + $ CSYSV_AA_2STAGE, CSYT01_AA, CSYT02, + $ CSYTRF_AA_2STAGE +* .. +* .. Scalars in Common .. + LOGICAL LERR, OK + CHARACTER*32 SRNAMT + INTEGER INFOT, NUNIT +* .. +* .. Common blocks .. + COMMON / INFOC / INFOT, NUNIT, OK, LERR + COMMON / SRNAMC / SRNAMT +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, MAX, MIN +* .. +* .. Data statements .. + DATA ISEEDY / 1988, 1989, 1990, 1991 / + DATA UPLOS / 'U', 'L' / , FACTS / 'F', 'N' / +* .. +* .. Executable Statements .. +* +* Initialize constants and the random number seed. +* +* Test path +* + PATH( 1: 1 ) = 'Complex precision' + PATH( 2: 3 ) = 'S2' +* +* Path to generate matrices +* + MATPATH( 1: 1 ) = 'Complex precision' + MATPATH( 2: 3 ) = 'SY' +* + NRUN = 0 + NFAIL = 0 + NERRS = 0 + DO 10 I = 1, 4 + ISEED( I ) = ISEEDY( I ) + 10 CONTINUE +* +* Test the error exits +* + IF( TSTERR ) + $ CALL CERRVX( PATH, NOUT ) + INFOT = 0 +* +* Set the block size and minimum block size for testing. +* + NB = 1 + NBMIN = 2 + CALL XLAENV( 1, NB ) + CALL XLAENV( 2, NBMIN ) +* +* Do for each value of N in NVAL +* + DO 180 IN = 1, NN + N = NVAL( IN ) + LDA = MAX( N, 1 ) + XTYPE = 'N' + NIMAT = NTYPES + IF( N.LE.0 ) + $ NIMAT = 1 +* + DO 170 IMAT = 1, NIMAT +* +* Do the tests only if DOTYPE( IMAT ) is true. +* + IF( .NOT.DOTYPE( IMAT ) ) + $ GO TO 170 +* +* Skip types 3, 4, 5, or 6 if the matrix size is too small. +* + ZEROT = IMAT.GE.3 .AND. IMAT.LE.6 + IF( ZEROT .AND. N.LT.IMAT-2 ) + $ GO TO 170 +* +* Do first for UPLO = 'U', then for UPLO = 'L' +* + DO 160 IUPLO = 1, 2 + UPLO = UPLOS( IUPLO ) +* +* Begin generate the test matrix A. +* +* Set up parameters with CLATB4 for the matrix generator +* based on the type of matrix to be generated. +* + CALL CLATB4( MATPATH, IMAT, N, N, TYPE, KL, KU, ANORM, + $ MODE, CNDNUM, DIST ) +* +* Generate a matrix with CLATMS. +* + SRNAMT = 'CLATMS' + CALL CLATMS( N, N, DIST, ISEED, TYPE, RWORK, MODE, + $ CNDNUM, ANORM, KL, KU, UPLO, A, LDA, + $ WORK, INFO ) +* +* Check error code from CLATMS and handle error. +* + IF( INFO.NE.0 ) THEN + CALL ALAERH( PATH, 'CLATMS', INFO, 0, UPLO, N, N, + $ -1, -1, -1, IMAT, NFAIL, NERRS, NOUT ) + GO TO 160 + END IF +* +* For types 3-6, zero one or more rows and columns of +* the matrix to test that INFO is returned correctly. +* + IF( ZEROT ) THEN + IF( IMAT.EQ.3 ) THEN + IZERO = 1 + ELSE IF( IMAT.EQ.4 ) THEN + IZERO = N + ELSE + IZERO = N / 2 + 1 + END IF +* + IF( IMAT.LT.6 ) THEN +* +* Set row and column IZERO to zero. +* + IF( IUPLO.EQ.1 ) THEN + IOFF = ( IZERO-1 )*LDA + DO 20 I = 1, IZERO - 1 + A( IOFF+I ) = CZERO + 20 CONTINUE + IOFF = IOFF + IZERO + DO 30 I = IZERO, N + A( IOFF ) = CZERO + IOFF = IOFF + LDA + 30 CONTINUE + ELSE + IOFF = IZERO + DO 40 I = 1, IZERO - 1 + A( IOFF ) = CZERO + IOFF = IOFF + LDA + 40 CONTINUE + IOFF = IOFF - IZERO + DO 50 I = IZERO, N + A( IOFF+I ) = CZERO + 50 CONTINUE + END IF + ELSE + IOFF = 0 + IF( IUPLO.EQ.1 ) THEN +* +* Set the first IZERO rows and columns to zero. +* + DO 70 J = 1, N + I2 = MIN( J, IZERO ) + DO 60 I = 1, I2 + A( IOFF+I ) = CZERO + 60 CONTINUE + IOFF = IOFF + LDA + 70 CONTINUE + IZERO = 1 + ELSE +* +* Set the first IZERO rows and columns to zero. +* + IOFF = 0 + DO 90 J = 1, N + I1 = MAX( J, IZERO ) + DO 80 I = I1, N + A( IOFF+I ) = CZERO + 80 CONTINUE + IOFF = IOFF + LDA + 90 CONTINUE + END IF + END IF + ELSE + IZERO = 0 + END IF +* +* End generate the test matrix A. +* +* + DO 150 IFACT = 1, NFACT +* +* Do first for FACT = 'F', then for other values. +* + FACT = FACTS( IFACT ) +* +* Form an exact solution and set the right hand side. +* + SRNAMT = 'CLARHS' + CALL CLARHS( MATPATH, XTYPE, UPLO, ' ', N, N, KL, KU, + $ NRHS, A, LDA, XACT, LDA, B, LDA, ISEED, + $ INFO ) + XTYPE = 'C' +* +* --- Test CSYSV_AA_2STAGE --- +* + IF( IFACT.EQ.2 ) THEN + CALL CLACPY( UPLO, N, N, A, LDA, AFAC, LDA ) + CALL CLACPY( 'Full', N, NRHS, B, LDA, X, LDA ) +* +* Factor the matrix and solve the system using CSYSV_AA. +* + SRNAMT = 'CSYSV_AA_2STAGE ' + LWORK = MIN(N*NB, 3*NMAX*NMAX) + CALL CSYSV_AA_2STAGE( UPLO, N, NRHS, AFAC, LDA, + $ AINV, (3*NB+1)*N, + $ IWORK, IWORK( 1+N ), + $ X, LDA, WORK, LWORK, INFO ) +* +* Adjust the expected value of INFO to account for +* pivoting. +* + IF( IZERO.GT.0 ) THEN + J = 1 + K = IZERO + 100 CONTINUE + IF( J.EQ.K ) THEN + K = IWORK( J ) + ELSE IF( IWORK( J ).EQ.K ) THEN + K = J + END IF + IF( J.LT.K ) THEN + J = J + 1 + GO TO 100 + END IF + ELSE + K = 0 + END IF +* +* Check error code from CSYSV_AA . +* + IF( INFO.NE.K ) THEN + CALL ALAERH( PATH, 'CSYSV_AA', INFO, K, + $ UPLO, N, N, -1, -1, NRHS, + $ IMAT, NFAIL, NERRS, NOUT ) + GO TO 120 + ELSE IF( INFO.NE.0 ) THEN + GO TO 120 + END IF +* +* Compute residual of the computed solution. +* + CALL CLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA ) + CALL CSYT02( UPLO, N, NRHS, A, LDA, X, LDA, WORK, + $ LDA, RWORK, RESULT( 1 ) ) +* +* Reconstruct matrix from factors and compute +* residual. +* +c CALL CSY01_AA( UPLO, N, A, LDA, AFAC, LDA, +c $ IWORK, AINV, LDA, RWORK, +c $ RESULT( 2 ) ) +c NT = 2 + NT = 1 +* +* Print information about the tests that did not pass +* the threshold. +* + DO 110 K = 1, NT + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALADHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9999 )'CSYSV_AA_2STAGE ', + $ UPLO, N, IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 110 CONTINUE + NRUN = NRUN + NT + 120 CONTINUE + END IF +* + 150 CONTINUE +* + 160 CONTINUE + 170 CONTINUE + 180 CONTINUE +* +* Print a summary of the results. +* + CALL ALASVM( PATH, NOUT, NFAIL, NRUN, NERRS ) +* + 9999 FORMAT( 1X, A, ', UPLO=''', A1, ''', N =', I5, ', type ', I2, + $ ', test ', I2, ', ratio =', G12.5 ) + RETURN +* +* End of SDRVSY_AA_2STAGE +* + END diff --git a/lapack-netlib/TESTING/LIN/cerrhe.f b/lapack-netlib/TESTING/LIN/cerrhe.f index 535707f0c..9e8daa219 100644 --- a/lapack-netlib/TESTING/LIN/cerrhe.f +++ b/lapack-netlib/TESTING/LIN/cerrhe.f @@ -48,17 +48,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex_lin * * ===================================================================== SUBROUTINE CERRHE( PATH, NUNIT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER*3 PATH @@ -94,6 +94,7 @@ $ CHETRI_3, CHETRI_3X, CHETRI_ROOK, CHETRI2, $ CHETRI2X, CHETRS, CHETRS_3, CHETRS_ROOK, $ CHETRS_AA, CHKXER, CHPCON, CHPRFS, CHPTRF, + $ CHETRF_AA_2STAGE, CHETRS_AA_2STAGE, $ CHPTRI, CHPTRS * .. * .. Scalars in Common .. @@ -471,7 +472,7 @@ CALL CHECON_3( 'U', 1, A, 1, E, IP, -1.0E0, RCOND, W, INFO) CALL CHKXER( 'CHECON_3', INFOT, NOUT, LERR, OK ) * - ELSE IF( LSAMEN( 2, C2, 'HP' ) ) THEN + ELSE IF( LSAMEN( 2, C2, 'HA' ) ) THEN * * Test error exits of the routines that use factorization * of a Hermitian indefinite matrix with Aasen's algorithm. @@ -489,10 +490,10 @@ CALL CHETRF_AA( 'U', 2, A, 1, IP, W, 4, INFO ) CALL CHKXER( 'CHETRF_AA', INFOT, NOUT, LERR, OK ) INFOT = 7 - CALL CHETRF_AA( 'U', 0, A, 1, IP, W, 0, INFO ) + CALL CHETRF_AA( 'U', 2, A, 2, IP, W, 0, INFO ) CALL CHKXER( 'CHETRF_AA', INFOT, NOUT, LERR, OK ) INFOT = 7 - CALL CHETRF_AA( 'U', 0, A, 1, IP, W, -2, INFO ) + CALL CHETRF_AA( 'U', 2, A, 2, IP, W, -2, INFO ) CALL CHKXER( 'CHETRF_AA', INFOT, NOUT, LERR, OK ) * * CHETRS_AA @@ -514,11 +515,68 @@ CALL CHETRS_AA( 'U', 2, 1, A, 2, IP, B, 1, W, 1, INFO ) CALL CHKXER( 'CHETRS_AA', INFOT, NOUT, LERR, OK ) INFOT = 10 - CALL CHETRS_AA( 'U', 0, 1, A, 1, IP, B, 1, W, 0, INFO ) + CALL CHETRS_AA( 'U', 2, 1, A, 2, IP, B, 2, W, 0, INFO ) CALL CHKXER( 'CHETRS_AA', INFOT, NOUT, LERR, OK ) INFOT = 10 - CALL CHETRS_AA( 'U', 0, 1, A, 1, IP, B, 1, W, -2, INFO ) + CALL CHETRS_AA( 'U', 2, 1, A, 2, IP, B, 2, W, -2, INFO ) CALL CHKXER( 'CHETRS_AA', INFOT, NOUT, LERR, OK ) +* + ELSE IF( LSAMEN( 2, C2, 'H2' ) ) THEN +* +* Test error exits of the routines that use factorization +* of a symmetric indefinite matrix with Aasen's algorithm. +* +* CHETRF_AA_2STAGE +* + SRNAMT = 'CHETRF_AA_2STAGE' + INFOT = 1 + CALL CHETRF_AA_2STAGE( '/', 0, A, 1, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'CHETRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CHETRF_AA_2STAGE( 'U', -1, A, 1, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'CHETRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CHETRF_AA_2STAGE( 'U', 2, A, 1, A, 2, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'CHETRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL CHETRF_AA_2STAGE( 'U', 2, A, 2, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'CHETRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL CHETRF_AA_2STAGE( 'U', 2, A, 2, A, 8, IP, IP, W, 0, + $ INFO ) + CALL CHKXER( 'CHETRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) +* +* CHETRS_AA_2STAGE +* + SRNAMT = 'CHETRS_AA_2STAGE' + INFOT = 1 + CALL CHETRS_AA_2STAGE( '/', 0, 0, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'CHETRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CHETRS_AA_2STAGE( 'U', -1, 0, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'CHETRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CHETRS_AA_2STAGE( 'U', 0, -1, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'CHETRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CHETRS_AA_2STAGE( 'U', 2, 1, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'CHETRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL CHETRS_AA_2STAGE( 'U', 2, 1, A, 2, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'CHETRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL CHETRS_AA_2STAGE( 'U', 2, 1, A, 2, A, 8, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'CHETRS_AA_STAGE', INFOT, NOUT, LERR, OK ) * * Test error exits of the routines that use factorization * of a Hermitian indefinite packed matrix with patrial diff --git a/lapack-netlib/TESTING/LIN/cerrsy.f b/lapack-netlib/TESTING/LIN/cerrsy.f index 90d6be9aa..17c9f36fb 100644 --- a/lapack-netlib/TESTING/LIN/cerrsy.f +++ b/lapack-netlib/TESTING/LIN/cerrsy.f @@ -48,17 +48,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex_lin * * ===================================================================== SUBROUTINE CERRSY( PATH, NUNIT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER*3 PATH @@ -596,6 +596,63 @@ INFOT = 10 CALL CSYTRS_AA( 'U', 0, 1, A, 1, IP, B, 1, W, -2, INFO ) CALL CHKXER( 'CSYTRS_AA', INFOT, NOUT, LERR, OK ) +* + ELSE IF( LSAMEN( 2, C2, 'S2' ) ) THEN +* +* Test error exits of the routines that use factorization +* of a symmetric indefinite matrix with Aasen's algorithm. +* +* CSYTRF_AA_2STAGE +* + SRNAMT = 'CSYTRF_AA_2STAGE' + INFOT = 1 + CALL CSYTRF_AA_2STAGE( '/', 0, A, 1, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'CSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CSYTRF_AA_2STAGE( 'U', -1, A, 1, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'CSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CSYTRF_AA_2STAGE( 'U', 2, A, 1, A, 2, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'CSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL CSYTRF_AA_2STAGE( 'U', 2, A, 2, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'CSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL CSYTRF_AA_2STAGE( 'U', 2, A, 2, A, 8, IP, IP, W, 0, + $ INFO ) + CALL CHKXER( 'CSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) +* +* CHETRS_AA_2STAGE +* + SRNAMT = 'CSYTRS_AA_2STAGE' + INFOT = 1 + CALL CSYTRS_AA_2STAGE( '/', 0, 0, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'CSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CSYTRS_AA_2STAGE( 'U', -1, 0, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'CSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CSYTRS_AA_2STAGE( 'U', 0, -1, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'CSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CSYTRS_AA_2STAGE( 'U', 2, 1, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'CSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL CSYTRS_AA_2STAGE( 'U', 2, 1, A, 2, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'CSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL CSYTRS_AA_2STAGE( 'U', 2, 1, A, 2, A, 8, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'CSYTRS_AA_STAGE', INFOT, NOUT, LERR, OK ) * END IF * diff --git a/lapack-netlib/TESTING/LIN/cerrvx.f b/lapack-netlib/TESTING/LIN/cerrvx.f index 2bddd2b84..d2d3d2a85 100644 --- a/lapack-netlib/TESTING/LIN/cerrvx.f +++ b/lapack-netlib/TESTING/LIN/cerrvx.f @@ -48,17 +48,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex_lin * * ===================================================================== SUBROUTINE CERRVX( PATH, NUNIT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER*3 PATH @@ -94,7 +94,7 @@ $ CHPSV, CHPSVX, CPBSV, CPBSVX, CPOSV, CPOSVX, $ CPPSV, CPPSVX, CPTSV, CPTSVX, CSPSV, CSPSVX, $ CSYSV, CSYSV_AA, CSYSV_RK, CSYSV_ROOK, - $ CSYSVX + $ CSYSVX, CSYSV_AA_2STAGE * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -716,6 +716,66 @@ INFOT = 8 CALL CHESV_AA( 'U', 2, 0, A, 2, IP, B, 1, W, 1, INFO ) CALL CHKXER( 'CHESV_AA', INFOT, NOUT, LERR, OK ) +* + ELSE IF( LSAMEN( 2, C2, 'H2' ) ) THEN +* +* CHESV_AASEN_2STAGE +* + SRNAMT = 'CHESV_AA_2STAGE' + INFOT = 1 + CALL CHESV_AA_2STAGE( '/', 0, 0, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'CHESV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CHESV_AA_2STAGE( 'U', -1, 0, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'CHESV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CHESV_AA_2STAGE( 'U', 0, -1, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'CHESV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CHESV_AA_2STAGE( 'U', 2, 1, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'CHESV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL CHESV_AA_2STAGE( 'U', 2, 1, A, 2, A, 2, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'CHESV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL CHESV_AA_2STAGE( 'U', 2, 1, A, 2, A, 1, IP, IP, B, 2, + $ W, 1, INFO ) + CALL CHKXER( 'CHESV_AA_2STAGE', INFOT, NOUT, LERR, OK ) +* + ELSE IF( LSAMEN( 2, C2, 'S2' ) ) THEN +* +* CSYSV_AASEN_2STAGE +* + SRNAMT = 'CSYSV_AA_2STAGE' + INFOT = 1 + CALL CSYSV_AA_2STAGE( '/', 0, 0, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'CSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CSYSV_AA_2STAGE( 'U', -1, 0, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'CSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CSYSV_AA_2STAGE( 'U', 0, -1, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'CSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CSYSV_AA_2STAGE( 'U', 2, 1, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'CSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL CSYSV_AA_2STAGE( 'U', 2, 1, A, 2, A, 2, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'CSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL CSYSV_AA_2STAGE( 'U', 2, 1, A, 2, A, 1, IP, IP, B, 2, + $ W, 1, INFO ) + CALL CHKXER( 'CSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) * ELSE IF( LSAMEN( 2, C2, 'HP' ) ) THEN * diff --git a/lapack-netlib/TESTING/LIN/chet01_3.f b/lapack-netlib/TESTING/LIN/chet01_3.f index 3e08c094a..a1b63ae71 100644 --- a/lapack-netlib/TESTING/LIN/chet01_3.f +++ b/lapack-netlib/TESTING/LIN/chet01_3.f @@ -133,7 +133,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex_lin * @@ -141,10 +141,10 @@ SUBROUTINE CHET01_3( UPLO, N, A, LDA, AFAC, LDAFAC, E, IPIV, C, $ LDC, RWORK, RESID ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/TESTING/LIN/chkxer.f b/lapack-netlib/TESTING/LIN/chkxer.f index 86a44e38f..cca9ddb31 100644 --- a/lapack-netlib/TESTING/LIN/chkxer.f +++ b/lapack-netlib/TESTING/LIN/chkxer.f @@ -14,25 +14,7 @@ * LOGICAL LERR, OK * CHARACTER*(*) SRNAMT * INTEGER INFOT, NOUT -* .. -* .. Intrinsic Functions .. -* INTRINSIC LEN_TRIM -* .. -* .. Executable Statements .. -* IF( .NOT.LERR ) THEN -* WRITE( NOUT, FMT = 9999 )INFOT, -* $ SRNAMT( 1:LEN_TRIM( SRNAMT ) ) -* OK = .FALSE. -* END IF -* LERR = .FALSE. -* RETURN -* -* 9999 FORMAT( ' *** Illegal value of parameter number ', I2, -* $ ' not detected by ', A6, ' ***' ) -* -* End of CHKXER. -* -* END +* * *> \par Purpose: * ============= @@ -52,17 +34,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex_lin * * ===================================================================== SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * -* -- LAPACK test routine (input) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. LOGICAL LERR, OK diff --git a/lapack-netlib/TESTING/LIN/clahilb.f b/lapack-netlib/TESTING/LIN/clahilb.f index 0ce9eb1b5..f88491a0d 100644 --- a/lapack-netlib/TESTING/LIN/clahilb.f +++ b/lapack-netlib/TESTING/LIN/clahilb.f @@ -8,11 +8,11 @@ * Definition: * =========== * -* SUBROUTINE CLAHILB(N, NRHS, A, LDA, X, LDX, B, LDB, WORK, +* SUBROUTINE CLAHILB( N, NRHS, A, LDA, X, LDX, B, LDB, WORK, * INFO, PATH) * * .. Scalar Arguments .. -* INTEGER T, N, NRHS, LDA, LDX, LDB, INFO +* INTEGER N, NRHS, LDA, LDX, LDB, INFO * .. Array Arguments .. * REAL WORK(N) * COMPLEX A(LDA,N), X(LDX, NRHS), B(LDB, NRHS) @@ -56,7 +56,7 @@ *> *> \param[in] NRHS *> \verbatim -*> NRHS is NRHS +*> NRHS is INTEGER *> The requested number of right-hand sides. *> \endverbatim *> @@ -126,21 +126,21 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex_lin * * ===================================================================== - SUBROUTINE CLAHILB(N, NRHS, A, LDA, X, LDX, B, LDB, WORK, + SUBROUTINE CLAHILB( N, NRHS, A, LDA, X, LDX, B, LDB, WORK, $ INFO, PATH) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. - INTEGER T, N, NRHS, LDA, LDX, LDB, INFO + INTEGER N, NRHS, LDA, LDX, LDB, INFO * .. Array Arguments .. REAL WORK(N) COMPLEX A(LDA,N), X(LDX, NRHS), B(LDB, NRHS) @@ -220,7 +220,8 @@ END DO * * Generate the scaled Hilbert matrix in A -* If we are testing SY routines, take D1_i = D2_i, else, D1_i = D2_i* +* If we are testing SY routines, take +* D1_i = D2_i, else, D1_i = D2_i* IF ( LSAMEN( 2, C2, 'SY' ) ) THEN DO J = 1, N DO I = 1, N @@ -250,8 +251,9 @@ WORK(J) = ( ( (WORK(J-1)/(J-1)) * (J-1 - N) ) /(J-1) ) $ * (N +J -1) END DO -* -* If we are testing SY routines, take D1_i = D2_i, else, D1_i = D2_i* + +* If we are testing SY routines, +* take D1_i = D2_i, else, D1_i = D2_i* IF ( LSAMEN( 2, C2, 'SY' ) ) THEN DO J = 1, NRHS DO I = 1, N diff --git a/lapack-netlib/TESTING/LIN/clarhs.f b/lapack-netlib/TESTING/LIN/clarhs.f index ddf5706a5..8b2b31bf8 100644 --- a/lapack-netlib/TESTING/LIN/clarhs.f +++ b/lapack-netlib/TESTING/LIN/clarhs.f @@ -190,7 +190,7 @@ *> \verbatim *> INFO is INTEGER *> = 0: successful exit -*> < 0: if INFO = -k, the k-th argument had an illegal value +*> < 0: if INFO = -i, the i-th argument had an illegal value *> \endverbatim * * Authors: @@ -201,7 +201,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex_lin * @@ -209,10 +209,10 @@ SUBROUTINE CLARHS( PATH, XTYPE, UPLO, TRANS, M, N, KL, KU, NRHS, $ A, LDA, X, LDX, B, LDB, ISEED, INFO ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO, XTYPE diff --git a/lapack-netlib/TESTING/LIN/csyt01_3.f b/lapack-netlib/TESTING/LIN/csyt01_3.f index 9d4ed77ad..3f0f53251 100644 --- a/lapack-netlib/TESTING/LIN/csyt01_3.f +++ b/lapack-netlib/TESTING/LIN/csyt01_3.f @@ -133,7 +133,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex_lin * @@ -141,10 +141,10 @@ SUBROUTINE CSYT01_3( UPLO, N, A, LDA, AFAC, LDAFAC, E, IPIV, C, $ LDC, RWORK, RESID ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/TESTING/LIN/dchkaa.f b/lapack-netlib/TESTING/LIN/dchkaa.f index 4be10b366..c5fd7afda 100644 --- a/lapack-netlib/TESTING/LIN/dchkaa.f +++ b/lapack-netlib/TESTING/LIN/dchkaa.f @@ -53,6 +53,7 @@ *> DSR 10 List types on next line if 0 < NTYPES < 10 *> DSK 10 List types on next line if 0 < NTYPES < 10 *> DSA 10 List types on next line if 0 < NTYPES < 10 +*> DS2 10 List types on next line if 0 < NTYPES < 10 *> DSP 10 List types on next line if 0 < NTYPES < 10 *> DTR 18 List types on next line if 0 < NTYPES < 18 *> DTP 18 List types on next line if 0 < NTYPES < 18 @@ -108,7 +109,7 @@ * ===================================================================== PROGRAM DCHKAA * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 @@ -149,7 +150,7 @@ $ RANKVAL( MAXIN ), PIV( NMAX ) DOUBLE PRECISION A( ( KDMAX+1 )*NMAX, 7 ), B( NMAX*MAXRHS, 4 ), $ E( NMAX ), RWORK( 5*NMAX+2*MAXRHS ), - $ S( 2*NMAX ), WORK( NMAX, NMAX+MAXRHS+30 ) + $ S( 2*NMAX ), WORK( NMAX, 3*NMAX+MAXRHS+30 ) * .. * .. External Functions .. LOGICAL LSAME, LSAMEN @@ -723,6 +724,35 @@ WRITE( NOUT, FMT = 9988 )PATH END IF * +* + ELSE IF( LSAMEN( 2, C2, 'S2' ) ) THEN +* +* SA: symmetric indefinite matrices, +* with partial (Aasen's) pivoting algorithm +* + NTYPES = 10 + CALL ALAREQ( PATH, NMATS, DOTYPE, NTYPES, NIN, NOUT ) +* + IF( TSTCHK ) THEN + CALL DCHKSY_AA_2STAGE( DOTYPE, NN, NVAL, NNB2, NBVAL2, + $ NNS, NSVAL, THRESH, TSTERR, LDA, + $ A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), + $ B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), + $ WORK, RWORK, IWORK, NOUT ) + ELSE + WRITE( NOUT, FMT = 9989 )PATH + END IF +* + IF( TSTDRV ) THEN + CALL DDRVSY_AA_2STAGE( + $ DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, + $ LDA, A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), + $ B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), + $ WORK, RWORK, IWORK, NOUT ) + ELSE + WRITE( NOUT, FMT = 9988 )PATH + END IF +* * ELSE IF( LSAMEN( 2, C2, 'SP' ) ) THEN * diff --git a/lapack-netlib/TESTING/LIN/dchklqtp.f b/lapack-netlib/TESTING/LIN/dchklqtp.f index 42ebc963a..75940af59 100644 --- a/lapack-netlib/TESTING/LIN/dchklqtp.f +++ b/lapack-netlib/TESTING/LIN/dchklqtp.f @@ -94,7 +94,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup double_lin * @@ -103,10 +103,10 @@ $ NBVAL, NOUT ) IMPLICIT NONE * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -132,7 +132,7 @@ DOUBLE PRECISION RESULT( NTESTS ) * .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, DERRLQTP, DLQT04 + EXTERNAL ALAERH, ALAHD, ALASUM, DERRLQTP, DLQT05 * .. * .. Scalars in Common .. LOGICAL LERR, OK diff --git a/lapack-netlib/TESTING/LIN/dchkrfp.f b/lapack-netlib/TESTING/LIN/dchkrfp.f index d6c50fba3..cf59e88dc 100644 --- a/lapack-netlib/TESTING/LIN/dchkrfp.f +++ b/lapack-netlib/TESTING/LIN/dchkrfp.f @@ -59,7 +59,7 @@ * ===================================================================== PROGRAM DCHKRFP * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 @@ -216,11 +216,6 @@ WRITE( NOUT, FMT = 9999 ) STOP END IF -* - IF( FATAL ) THEN - WRITE( NOUT, FMT = 9999 ) - STOP - END IF * * Calculate and print the machine dependent constants. * diff --git a/lapack-netlib/TESTING/LIN/dchksy_aa.f b/lapack-netlib/TESTING/LIN/dchksy_aa.f index 53d7db6e8..8974cdec5 100644 --- a/lapack-netlib/TESTING/LIN/dchksy_aa.f +++ b/lapack-netlib/TESTING/LIN/dchksy_aa.f @@ -161,7 +161,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * * @precisions fortran d -> z c * @@ -172,10 +172,10 @@ $ THRESH, TSTERR, NMAX, A, AFAC, AINV, B, $ X, XACT, WORK, RWORK, IWORK, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -215,15 +215,10 @@ INTEGER ISEED( 4 ), ISEEDY( 4 ) DOUBLE PRECISION RESULT( NTESTS ) * .. -* .. External Functions .. - DOUBLE PRECISION DGET06, DLANSY - EXTERNAL DGET06, DLANSY -* .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, DERRSY, DGET04, DLACPY, - $ DLARHS, DLATB4, DLATMS, DPOT02, DPOT03, DPOT05, - $ DSYCON, DSYRFS, DSYT01_AA, DSYTRF_AA, - $ DSYTRI2, DSYTRS_AA, XLAENV + EXTERNAL ALAERH, ALAHD, ALASUM, DERRSY, DLACPY, DLARHS, + $ DLATB4, DLATMS, DPOT02, DSYT01_AA, DSYTRF_AA, + $ DSYTRS_AA, XLAENV * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -437,22 +432,22 @@ * Adjust the expected value of INFO to account for * pivoting. * - IF( IZERO.GT.0 ) THEN - J = 1 - K = IZERO - 100 CONTINUE - IF( J.EQ.K ) THEN - K = IWORK( J ) - ELSE IF( IWORK( J ).EQ.K ) THEN - K = J - END IF - IF( J.LT.K ) THEN - J = J + 1 - GO TO 100 - END IF - ELSE +c IF( IZERO.GT.0 ) THEN +c J = 1 +c K = IZERO +c 100 CONTINUE +c IF( J.EQ.K ) THEN +c K = IWORK( J ) +c ELSE IF( IWORK( J ).EQ.K ) THEN +c K = J +c END IF +c IF( J.LT.K ) THEN +c J = J + 1 +c GO TO 100 +c END IF +c ELSE K = 0 - END IF +c END IF * * Check error code from DSYTRF and handle error. * @@ -516,31 +511,34 @@ * Check error code from DSYTRS and handle error. * IF( INFO.NE.0 ) THEN - CALL ALAERH( PATH, 'DSYTRS_AA', INFO, 0, - $ UPLO, N, N, -1, -1, NRHS, IMAT, - $ NFAIL, NERRS, NOUT ) - END IF -* - CALL DLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA ) + IF( IZERO.EQ.0 ) THEN + CALL ALAERH( PATH, 'DSYTRS_AA', INFO, 0, + $ UPLO, N, N, -1, -1, NRHS, IMAT, + $ NFAIL, NERRS, NOUT ) + END IF + ELSE + CALL DLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA + $ ) * -* Compute the residual for the solution +* Compute the residual for the solution * - CALL DPOT02( UPLO, N, NRHS, A, LDA, X, LDA, WORK, - $ LDA, RWORK, RESULT( 2 ) ) + CALL DPOT02( UPLO, N, NRHS, A, LDA, X, LDA, + $ WORK, LDA, RWORK, RESULT( 2 ) ) * * -* Print information about the tests that did not pass -* the threshold. +* Print information about the tests that did not pass +* the threshold. * - DO 120 K = 2, 2 - IF( RESULT( K ).GE.THRESH ) THEN - IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) - $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, - $ IMAT, K, RESULT( K ) - NFAIL = NFAIL + 1 - END IF - 120 CONTINUE + DO 120 K = 2, 2 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, + $ IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 120 CONTINUE + END IF NRUN = NRUN + 1 * * End do for each value of NRHS in NSVAL. diff --git a/lapack-netlib/TESTING/LIN/dchksy_aa_2stage.f b/lapack-netlib/TESTING/LIN/dchksy_aa_2stage.f new file mode 100644 index 000000000..5698bcf94 --- /dev/null +++ b/lapack-netlib/TESTING/LIN/dchksy_aa_2stage.f @@ -0,0 +1,572 @@ +*> \brief \b DCHKSY_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DCHKSY_AA_2STAGE( DOTYPE, NN, NVAL, NNB, NBVAL, +* NNS, NSVAL, THRESH, TSTERR, NMAX, A, +* AFAC, AINV, B, X, XACT, WORK, RWORK, +* IWORK, NOUT ) +* +* .. Scalar Arguments .. +* LOGICAL TSTERR +* INTEGER NMAX, NN, NNB, NNS, NOUT +* DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER IWORK( * ), NBVAL( * ), NSVAL( * ), NVAL( * ) +* DOUBLE PRECISION A( * ), AFAC( * ), AINV( * ), B( * ), +* $ RWORK( * ), WORK( * ), X( * ), XACT( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DCHKSY_AA_2STAGE tests DSYTRF_AA_2STAGE, -TRS_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> The matrix types to be used for testing. Matrices of type j +*> (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) = +*> .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER +*> The number of values of N contained in the vector NVAL. +*> \endverbatim +*> +*> \param[in] NVAL +*> \verbatim +*> NVAL is INTEGER array, dimension (NN) +*> The values of the matrix dimension N. +*> \endverbatim +*> +*> \param[in] NNB +*> \verbatim +*> NNB is INTEGER +*> The number of values of NB contained in the vector NBVAL. +*> \endverbatim +*> +*> \param[in] NBVAL +*> \verbatim +*> NBVAL is INTEGER array, dimension (NBVAL) +*> The values of the blocksize NB. +*> \endverbatim +*> +*> \param[in] NNS +*> \verbatim +*> NNS is INTEGER +*> The number of values of NRHS contained in the vector NSVAL. +*> \endverbatim +*> +*> \param[in] NSVAL +*> \verbatim +*> NSVAL is INTEGER array, dimension (NNS) +*> The values of the number of right hand sides NRHS. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is DOUBLE PRECISION +*> The threshold value for the test ratios. A result is +*> included in the output file if RESULT >= THRESH. To have +*> every test ratio printed, use THRESH = 0. +*> \endverbatim +*> +*> \param[in] TSTERR +*> \verbatim +*> TSTERR is LOGICAL +*> Flag that indicates whether error exits are to be tested. +*> \endverbatim +*> +*> \param[in] NMAX +*> \verbatim +*> NMAX is INTEGER +*> The maximum value permitted for N, used in dimensioning the +*> work arrays. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AFAC +*> \verbatim +*> AFAC is DOUBLE PRECISION array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AINV +*> \verbatim +*> AINV is DOUBLE PRECISION array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (NMAX*NSMAX) +*> where NSMAX is the largest entry in NSVAL. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (NMAX*NSMAX) +*> \endverbatim +*> +*> \param[out] XACT +*> \verbatim +*> XACT is DOUBLE PRECISION array, dimension (NMAX*NSMAX) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (NMAX*max(3,NSMAX)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (max(NMAX,2*NSMAX)) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (2*NMAX) +*> \endverbatim +*> +*> \param[in] NOUT +*> \verbatim +*> NOUT is INTEGER +*> The unit number for output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +* @precisions fortran d -> z c +* +*> \ingroup double_lin +* +* ===================================================================== + SUBROUTINE DCHKSY_AA_2STAGE( DOTYPE, NN, NVAL, NNB, NBVAL, NNS, + $ NSVAL, THRESH, TSTERR, NMAX, A, AFAC, AINV, + $ B, X, XACT, WORK, RWORK, IWORK, NOUT ) +* +* -- LAPACK test routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + LOGICAL TSTERR + INTEGER NN, NNB, NNS, NMAX, NOUT + DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER IWORK( * ), NBVAL( * ), NSVAL( * ), NVAL( * ) + DOUBLE PRECISION A( * ), AFAC( * ), AINV( * ), B( * ), + $ RWORK( * ), WORK( * ), X( * ), XACT( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + INTEGER NTYPES + PARAMETER ( NTYPES = 10 ) + INTEGER NTESTS + PARAMETER ( NTESTS = 9 ) +* .. +* .. Local Scalars .. + LOGICAL ZEROT + CHARACTER DIST, TYPE, UPLO, XTYPE + CHARACTER*3 PATH, MATPATH + INTEGER I, I1, I2, IMAT, IN, INB, INFO, IOFF, IRHS, + $ IUPLO, IZERO, J, K, KL, KU, LDA, LWORK, MODE, + $ N, NB, NERRS, NFAIL, NIMAT, NRHS, NRUN, NT + DOUBLE PRECISION ANORM, CNDNUM +* .. +* .. Local Arrays .. + CHARACTER UPLOS( 2 ) + INTEGER ISEED( 4 ), ISEEDY( 4 ) + DOUBLE PRECISION RESULT( NTESTS ) +* .. +* .. External Subroutines .. + EXTERNAL ALAERH, ALAHD, ALASUM, DERRSY, DLACPY, DLARHS, + $ DLATB4, DLATMS, DPOT02, DSYTRF_AA_2STAGE + $ DSYTRS_AA_2STAGE, XLAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Scalars in Common .. + LOGICAL LERR, OK + CHARACTER*32 SRNAMT + INTEGER INFOT, NUNIT +* .. +* .. Common blocks .. + COMMON / INFOC / INFOT, NUNIT, OK, LERR + COMMON / SRNAMC / SRNAMT +* .. +* .. Data statements .. + DATA ISEEDY / 1988, 1989, 1990, 1991 / + DATA UPLOS / 'U', 'L' / +* .. +* .. Executable Statements .. +* +* Initialize constants and the random number seed. +* +* Test path +* + PATH( 1: 1 ) = 'Double precision' + PATH( 2: 3 ) = 'S2' +* +* Path to generate matrices +* + MATPATH( 1: 1 ) = 'Double precision' + MATPATH( 2: 3 ) = 'SY' + NRUN = 0 + NFAIL = 0 + NERRS = 0 + DO 10 I = 1, 4 + ISEED( I ) = ISEEDY( I ) + 10 CONTINUE +* +* Test the error exits +* + IF( TSTERR ) + $ CALL DERRSY( PATH, NOUT ) + INFOT = 0 +* +* Set the minimum block size for which the block routine should +* be used, which will be later returned by ILAENV +* + CALL XLAENV( 2, 2 ) +* +* Do for each value of N in NVAL +* + DO 180 IN = 1, NN + N = NVAL( IN ) + IF( N .GT. NMAX ) THEN + NFAIL = NFAIL + 1 + WRITE(NOUT, 9995) 'M ', N, NMAX + GO TO 180 + END IF + LDA = MAX( N, 1 ) + XTYPE = 'N' + NIMAT = NTYPES + IF( N.LE.0 ) + $ NIMAT = 1 +* + IZERO = 0 +* +* Do for each value of matrix type IMAT +* + DO 170 IMAT = 1, NIMAT +* +* Do the tests only if DOTYPE( IMAT ) is true. +* + IF( .NOT.DOTYPE( IMAT ) ) + $ GO TO 170 +* +* Skip types 3, 4, 5, or 6 if the matrix size is too small. +* + ZEROT = IMAT.GE.3 .AND. IMAT.LE.6 + IF( ZEROT .AND. N.LT.IMAT-2 ) + $ GO TO 170 +* +* Do first for UPLO = 'U', then for UPLO = 'L' +* + DO 160 IUPLO = 1, 2 + UPLO = UPLOS( IUPLO ) +* +* Begin generate the test matrix A. +* +* +* Set up parameters with DLATB4 for the matrix generator +* based on the type of matrix to be generated. +* + CALL DLATB4( MATPATH, IMAT, N, N, TYPE, KL, KU, + $ ANORM, MODE, CNDNUM, DIST ) +* +* Generate a matrix with DLATMS. +* + SRNAMT = 'DLATMS' + CALL DLATMS( N, N, DIST, ISEED, TYPE, RWORK, MODE, + $ CNDNUM, ANORM, KL, KU, UPLO, A, LDA, WORK, + $ INFO ) +* +* Check error code from DLATMS and handle error. +* + IF( INFO.NE.0 ) THEN + CALL ALAERH( PATH, 'DLATMS', INFO, 0, UPLO, N, N, -1, + $ -1, -1, IMAT, NFAIL, NERRS, NOUT ) +* +* Skip all tests for this generated matrix +* + GO TO 160 + END IF +* +* For matrix types 3-6, zero one or more rows and +* columns of the matrix to test that INFO is returned +* correctly. +* + IF( ZEROT ) THEN + IF( IMAT.EQ.3 ) THEN + IZERO = 1 + ELSE IF( IMAT.EQ.4 ) THEN + IZERO = N + ELSE + IZERO = N / 2 + 1 + END IF +* + IF( IMAT.LT.6 ) THEN +* +* Set row and column IZERO to zero. +* + IF( IUPLO.EQ.1 ) THEN + IOFF = ( IZERO-1 )*LDA + DO 20 I = 1, IZERO - 1 + A( IOFF+I ) = ZERO + 20 CONTINUE + IOFF = IOFF + IZERO + DO 30 I = IZERO, N + A( IOFF ) = ZERO + IOFF = IOFF + LDA + 30 CONTINUE + ELSE + IOFF = IZERO + DO 40 I = 1, IZERO - 1 + A( IOFF ) = ZERO + IOFF = IOFF + LDA + 40 CONTINUE + IOFF = IOFF - IZERO + DO 50 I = IZERO, N + A( IOFF+I ) = ZERO + 50 CONTINUE + END IF + ELSE + IF( IUPLO.EQ.1 ) THEN +* +* Set the first IZERO rows and columns to zero. +* + IOFF = 0 + DO 70 J = 1, N + I2 = MIN( J, IZERO ) + DO 60 I = 1, I2 + A( IOFF+I ) = ZERO + 60 CONTINUE + IOFF = IOFF + LDA + 70 CONTINUE + IZERO = 1 + ELSE +* +* Set the last IZERO rows and columns to zero. +* + IOFF = 0 + DO 90 J = 1, N + I1 = MAX( J, IZERO ) + DO 80 I = I1, N + A( IOFF+I ) = ZERO + 80 CONTINUE + IOFF = IOFF + LDA + 90 CONTINUE + END IF + END IF + ELSE + IZERO = 0 + END IF +* +* End generate the test matrix A. +* +* Do for each value of NB in NBVAL +* + DO 150 INB = 1, NNB +* +* Set the optimal blocksize, which will be later +* returned by ILAENV. +* + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) +* +* Copy the test matrix A into matrix AFAC which +* will be factorized in place. This is needed to +* preserve the test matrix A for subsequent tests. +* + CALL DLACPY( UPLO, N, N, A, LDA, AFAC, LDA ) +* +* Compute the L*D*L**T or U*D*U**T factorization of the +* matrix. IWORK stores details of the interchanges and +* the block structure of D. AINV is a work array for +* block factorization, LWORK is the length of AINV. +* + SRNAMT = 'DSYTRF_AA_2STAGE' + LWORK = MIN(N*NB, 3*NMAX*NMAX) + CALL DSYTRF_AA_2STAGE( UPLO, N, AFAC, LDA, + $ AINV, (3*NB+1)*N, + $ IWORK, IWORK( 1+N ), + $ WORK, LWORK, + $ INFO ) +* +* Adjust the expected value of INFO to account for +* pivoting. +* + IF( IZERO.GT.0 ) THEN + J = 1 + K = IZERO + 100 CONTINUE + IF( J.EQ.K ) THEN + K = IWORK( J ) + ELSE IF( IWORK( J ).EQ.K ) THEN + K = J + END IF + IF( J.LT.K ) THEN + J = J + 1 + GO TO 100 + END IF + ELSE + K = 0 + END IF +* +* Check error code from DSYTRF and handle error. +* + IF( INFO.NE.K ) THEN + CALL ALAERH( PATH, 'DSYTRF_AA_2STAGE', INFO, K, + $ UPLO, N, N, -1, -1, NB, IMAT, NFAIL, + $ NERRS, NOUT ) + END IF +* +*+ TEST 1 +* Reconstruct matrix from factors and compute residual. +* +c CALL DSYT01_AA( UPLO, N, A, LDA, AFAC, LDA, IWORK, +c $ AINV, LDA, RWORK, RESULT( 1 ) ) +c NT = 1 + NT = 0 +* +* +* Print information about the tests that did not pass +* the threshold. +* + DO 110 K = 1, NT + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9999 )UPLO, N, NB, IMAT, K, + $ RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 110 CONTINUE + NRUN = NRUN + NT +* +* Skip solver test if INFO is not 0. +* + IF( INFO.NE.0 ) THEN + GO TO 140 + END IF +* +* Do for each value of NRHS in NSVAL. +* + DO 130 IRHS = 1, NNS + NRHS = NSVAL( IRHS ) +* +*+ TEST 2 (Using TRS) +* Solve and compute residual for A * X = B. +* +* Choose a set of NRHS random solution vectors +* stored in XACT and set up the right hand side B +* + SRNAMT = 'DLARHS' + CALL DLARHS( MATPATH, XTYPE, UPLO, ' ', N, N, + $ KL, KU, NRHS, A, LDA, XACT, LDA, + $ B, LDA, ISEED, INFO ) + CALL DLACPY( 'Full', N, NRHS, B, LDA, X, LDA ) +* + SRNAMT = 'DSYTRS_AA_2STAGE' + LWORK = MAX( 1, 3*N-2 ) + CALL DSYTRS_AA_2STAGE( UPLO, N, NRHS, AFAC, LDA, + $ AINV, (3*NB+1)*N, IWORK, IWORK( 1+N ), + $ X, LDA, INFO ) +* +* Check error code from DSYTRS and handle error. +* + IF( INFO.NE.0 ) THEN + IF( IZERO.EQ.0 ) THEN + CALL ALAERH( PATH, 'DSYTRS_AA_2STAGE', + $ INFO, 0, UPLO, N, N, -1, -1, + $ NRHS, IMAT, NFAIL, NERRS, NOUT ) + END IF + ELSE + CALL DLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA + $ ) +* +* Compute the residual for the solution +* + CALL DPOT02( UPLO, N, NRHS, A, LDA, X, LDA, + $ WORK, LDA, RWORK, RESULT( 2 ) ) +* +* +* Print information about the tests that did not pass +* the threshold. +* + DO 120 K = 2, 2 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, + $ IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 120 CONTINUE + END IF + NRUN = NRUN + 1 +* +* End do for each value of NRHS in NSVAL. +* + 130 CONTINUE + 140 CONTINUE + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + 180 CONTINUE +* +* Print a summary of the results. +* + CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS ) +* + 9999 FORMAT( ' UPLO = ''', A1, ''', N =', I5, ', NB =', I4, ', type ', + $ I2, ', test ', I2, ', ratio =', G12.5 ) + 9998 FORMAT( ' UPLO = ''', A1, ''', N =', I5, ', NRHS=', I3, ', type ', + $ I2, ', test(', I2, ') =', G12.5 ) + 9995 FORMAT( ' Invalid input value: ', A4, '=', I6, '; must be <=', + $ I6 ) + RETURN +* +* End of DCHKSY_AA_2STAGE +* + END diff --git a/lapack-netlib/TESTING/LIN/ddrvls.f b/lapack-netlib/TESTING/LIN/ddrvls.f index 5d190e118..2f4975553 100644 --- a/lapack-netlib/TESTING/LIN/ddrvls.f +++ b/lapack-netlib/TESTING/LIN/ddrvls.f @@ -183,7 +183,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup double_lin * @@ -192,10 +192,10 @@ $ NBVAL, NXVAL, THRESH, TSTERR, A, COPYA, B, $ COPYB, C, S, COPYS, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -233,8 +233,8 @@ DOUBLE PRECISION EPS, NORMA, NORMB, RCOND * .. * .. Local Arrays .. - INTEGER ISEED( 4 ), ISEEDY( 4 ), IWORKQUERY - DOUBLE PRECISION RESULT( NTESTS ), WORKQUERY + INTEGER ISEED( 4 ), ISEEDY( 4 ), IWQ + DOUBLE PRECISION RESULT( NTESTS ), WQ * .. * .. Allocatable Arrays .. DOUBLE PRECISION, ALLOCATABLE :: WORK (:) @@ -321,43 +321,76 @@ M = MMAX N = NMAX NRHS = NSMAX - LDA = MAX( 1, M ) - LDB = MAX( 1, M, N ) MNMIN = MAX( MIN( M, N ), 1 ) * * Compute workspace needed for routines * DQRT14, DQRT17 (two side cases), DQRT15 and DQRT12 * - LWORK = MAX( ( M+N )*NRHS, + LWORK = MAX( 1, ( M+N )*NRHS, $ ( N+NRHS )*( M+2 ), ( M+NRHS )*( N+2 ), $ MAX( M+MNMIN, NRHS*MNMIN,2*N+M ), $ MAX( M*N+4*MNMIN+MAX(M,N), M*N+2*MNMIN+4*N ) ) + LIWORK = 1 +* +* Iterate through all test cases and compute necessary workspace +* sizes for ?GELS, ?GETSLS, ?GELSY, ?GELSS and ?GELSD routines. +* + DO IM = 1, NM + M = MVAL( IM ) + LDA = MAX( 1, M ) + DO IN = 1, NN + N = NVAL( IN ) + MNMIN = MAX(MIN( M, N ),1) + LDB = MAX( 1, M, N ) + DO INS = 1, NNS + NRHS = NSVAL( INS ) + DO IRANK = 1, 2 + DO ISCALE = 1, 3 + ITYPE = ( IRANK-1 )*3 + ISCALE + IF( DOTYPE( ITYPE ) ) THEN + IF( IRANK.EQ.1 ) THEN + DO ITRAN = 1, 2 + IF( ITRAN.EQ.1 ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* +* Compute workspace needed for DGELS + CALL DGELS( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_DGELS = INT ( WQ ) +* Compute workspace needed for DGETSLS + CALL DGETSLS( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_DGETSLS = INT( WQ ) + ENDDO + END IF +* Compute workspace needed for DGELSY + CALL DGELSY( M, N, NRHS, A, LDA, B, LDB, IWQ, + $ RCOND, CRANK, WQ, -1, INFO ) + LWORK_DGELSY = INT( WQ ) +* Compute workspace needed for DGELSS + CALL DGELSS( M, N, NRHS, A, LDA, B, LDB, S, + $ RCOND, CRANK, WQ, -1 , INFO ) + LWORK_DGELSS = INT( WQ ) +* Compute workspace needed for DGELSD + CALL DGELSD( M, N, NRHS, A, LDA, B, LDB, S, + $ RCOND, CRANK, WQ, -1, IWQ, INFO ) + LWORK_DGELSD = INT( WQ ) +* Compute LIWORK workspace needed for DGELSY and DGELSD + LIWORK = MAX( LIWORK, N, IWQ ) +* Compute LWORK workspace needed for all functions + LWORK = MAX( LWORK, LWORK_DGELS, LWORK_DGETSLS, + $ LWORK_DGELSY, LWORK_DGELSS, + $ LWORK_DGELSD ) + END IF + ENDDO + ENDDO + ENDDO + ENDDO + ENDDO * -* Compute workspace needed for DGELS - CALL DGELS( 'N', M, N, NRHS, A, LDA, B, LDB, - $ WORKQUERY, -1, INFO ) - LWORK_DGELS = INT ( WORKQUERY ) -* Compute workspace needed for DGETSLS - CALL DGETSLS( 'N', M, N, NRHS, A, LDA, B, LDB, - $ WORKQUERY, -1, INFO ) - LWORK_DGETSLS = INT( WORKQUERY ) -* Compute workspace needed for DGELSY - CALL DGELSY( M, N, NRHS, A, LDA, B, LDB, IWORKQUERY, - $ RCOND, CRANK, WORKQUERY, -1, INFO ) - LWORK_DGELSY = INT( WORKQUERY ) -* Compute workspace needed for DGELSS - CALL DGELSS( M, N, NRHS, A, LDA, B, LDB, S, - $ RCOND, CRANK, WORKQUERY, -1 , INFO ) - LWORK_DGELSS = INT( WORKQUERY ) -* Compute workspace needed for DGELSD - CALL DGELSD( M, N, NRHS, A, LDA, B, LDB, S, - $ RCOND, CRANK, WORKQUERY, -1, IWORKQUERY, INFO ) - LWORK_DGELSD = INT( WORKQUERY ) -* Compute LIWORK workspace needed for DGELSY and DGELSD - LIWORK = MAX( 1, N, IWORKQUERY ) -* Compute LWORK workspace needed for all functions - LWORK = MAX( 1, LWORK, LWORK_DGELS, LWORK_DGETSLS, LWORK_DGELSY, - $ LWORK_DGELSS, LWORK_DGELSD ) LWLSY = LWORK * ALLOCATE( WORK( LWORK ) ) diff --git a/lapack-netlib/TESTING/LIN/ddrvrf3.f b/lapack-netlib/TESTING/LIN/ddrvrf3.f index c00aac1a7..59d291817 100644 --- a/lapack-netlib/TESTING/LIN/ddrvrf3.f +++ b/lapack-netlib/TESTING/LIN/ddrvrf3.f @@ -110,7 +110,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup double_lin * @@ -118,10 +118,10 @@ SUBROUTINE DDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, + D_WORK_DLANGE, D_WORK_DGEQRF, TAU ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER LDA, NN, NOUT @@ -223,7 +223,7 @@ * IF ( IALPHA.EQ. 1) THEN ALPHA = ZERO - ELSE IF ( IALPHA.EQ. 1) THEN + ELSE IF ( IALPHA.EQ. 2) THEN ALPHA = ONE ELSE ALPHA = DLARND( 2, ISEED ) diff --git a/lapack-netlib/TESTING/LIN/ddrvsy_aa.f b/lapack-netlib/TESTING/LIN/ddrvsy_aa.f index af39303e1..cf1b63841 100644 --- a/lapack-netlib/TESTING/LIN/ddrvsy_aa.f +++ b/lapack-netlib/TESTING/LIN/ddrvsy_aa.f @@ -143,7 +143,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * * @precisions fortran d -> z c * @@ -154,10 +154,10 @@ $ NMAX, A, AFAC, AINV, B, X, XACT, WORK, $ RWORK, IWORK, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -201,7 +201,7 @@ * .. * .. External Subroutines .. EXTERNAL ALADHD, ALAERH, ALASVM, DERRVX, DGET04, DLACPY, - $ DLARHS, DLASET, DLATB4, DLATMS, DPOT02, DPOT05, + $ DLARHS, DLASET, DLATB4, DLATMS, DPOT02, $ DSYSV_AA, DSYT01_AA, DSYTRF_AA, XLAENV * .. * .. Scalars in Common .. @@ -240,7 +240,6 @@ DO 10 I = 1, 4 ISEED( I ) = ISEEDY( I ) 10 CONTINUE - LWORK = MAX( 2*NMAX, NMAX*NRHS ) * * Test the error exits * @@ -259,6 +258,8 @@ * DO 180 IN = 1, NN N = NVAL( IN ) + LWORK = MAX( 3*N-2, N*(1+NB) ) + LWORK = MAX( LWORK, 1 ) LDA = MAX( N, 1 ) XTYPE = 'N' NIMAT = NTYPES diff --git a/lapack-netlib/TESTING/LIN/ddrvsy_aa_2stage.f b/lapack-netlib/TESTING/LIN/ddrvsy_aa_2stage.f new file mode 100644 index 000000000..0be321eb0 --- /dev/null +++ b/lapack-netlib/TESTING/LIN/ddrvsy_aa_2stage.f @@ -0,0 +1,490 @@ +*> \brief \b DDRVSY_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DDRVSY_AA_2STAGE( +* DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, NMAX, +* A, AFAC, AINV, B, X, XACT, WORK, RWORK, IWORK, +* NOUT ) +* +* .. Scalar Arguments .. +* LOGICAL TSTERR +* INTEGER NMAX, NN, NOUT, NRHS +* DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER IWORK( * ), NVAL( * ) +* DOUBLE PRECISION RWORK( * ) +* DOUBLE PRECISION A( * ), AFAC( * ), AINV( * ), B( * ), +* $ WORK( * ), X( * ), XACT( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DDRVSY_AA_2STAGE tests the driver routine DSYSV_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> The matrix types to be used for testing. Matrices of type j +*> (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) = +*> .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER +*> The number of values of N contained in the vector NVAL. +*> \endverbatim +*> +*> \param[in] NVAL +*> \verbatim +*> NVAL is INTEGER array, dimension (NN) +*> The values of the matrix dimension N. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand side vectors to be generated for +*> each linear system. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is DOUBLE PRECISION +*> The threshold value for the test ratios. A result is +*> included in the output file if RESULT >= THRESH. To have +*> every test ratio printed, use THRESH = 0. +*> \endverbatim +*> +*> \param[in] TSTERR +*> \verbatim +*> TSTERR is LOGICAL +*> Flag that indicates whether error exits are to be tested. +*> \endverbatim +*> +*> \param[in] NMAX +*> \verbatim +*> NMAX is INTEGER +*> The maximum value permitted for N, used in dimensioning the +*> work arrays. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AFAC +*> \verbatim +*> AFAC is DOUBLE PRECISION array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AINV +*> \verbatim +*> AINV is DOUBLE PRECISION array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] XACT +*> \verbatim +*> XACT is DOUBLE PRECISION array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (NMAX*max(2,NRHS)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (NMAX+2*NRHS) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (NMAX) +*> \endverbatim +*> +*> \param[in] NOUT +*> \verbatim +*> NOUT is INTEGER +*> The unit number for output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup double_lin +* +* ===================================================================== + SUBROUTINE DDRVSY_AA_2STAGE( + $ DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, + $ NMAX, A, AFAC, AINV, B, X, XACT, WORK, + $ RWORK, IWORK, NOUT ) +* +* -- LAPACK test routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + LOGICAL TSTERR + INTEGER NMAX, NN, NOUT, NRHS + DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER IWORK( * ), NVAL( * ) + DOUBLE PRECISION RWORK( * ) + DOUBLE PRECISION A( * ), AFAC( * ), AINV( * ), B( * ), + $ WORK( * ), X( * ), XACT( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + INTEGER NTYPES, NTESTS + PARAMETER ( NTYPES = 10, NTESTS = 3 ) + INTEGER NFACT + PARAMETER ( NFACT = 2 ) +* .. +* .. Local Scalars .. + LOGICAL ZEROT + CHARACTER DIST, FACT, TYPE, UPLO, XTYPE + CHARACTER*3 MATPATH, PATH + INTEGER I, I1, I2, IFACT, IMAT, IN, INFO, IOFF, IUPLO, + $ IZERO, J, K, KL, KU, LDA, LWORK, MODE, N, + $ NB, NBMIN, NERRS, NFAIL, NIMAT, NRUN, NT + DOUBLE PRECISION ANORM, CNDNUM +* .. +* .. Local Arrays .. + CHARACTER FACTS( NFACT ), UPLOS( 2 ) + INTEGER ISEED( 4 ), ISEEDY( 4 ) + DOUBLE PRECISION RESULT( NTESTS ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLANSY, SGET06 + EXTERNAL DLANSY, SGET06 +* .. +* .. External Subroutines .. + EXTERNAL ALADHD, ALAERH, ALASVM, XLAENV, DERRVX, + $ DGET04, DLACPY, DLARHS, DLATB4, DLATMS, + $ DSYSV_AA_2STAGE, CHET01_AA, DPOT02, + $ DSYTRF_AA_2STAGE +* .. +* .. Scalars in Common .. + LOGICAL LERR, OK + CHARACTER*32 SRNAMT + INTEGER INFOT, NUNIT +* .. +* .. Common blocks .. + COMMON / INFOC / INFOT, NUNIT, OK, LERR + COMMON / SRNAMC / SRNAMT +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, MAX, MIN +* .. +* .. Data statements .. + DATA ISEEDY / 1988, 1989, 1990, 1991 / + DATA UPLOS / 'U', 'L' / , FACTS / 'F', 'N' / +* .. +* .. Executable Statements .. +* +* Initialize constants and the random number seed. +* +* Test path +* + PATH( 1: 1 ) = 'Double precision' + PATH( 2: 3 ) = 'S2' +* +* Path to generate matrices +* + MATPATH( 1: 1 ) = 'Double precision' + MATPATH( 2: 3 ) = 'SY' +* + NRUN = 0 + NFAIL = 0 + NERRS = 0 + DO 10 I = 1, 4 + ISEED( I ) = ISEEDY( I ) + 10 CONTINUE +* +* Test the error exits +* + IF( TSTERR ) + $ CALL DERRVX( PATH, NOUT ) + INFOT = 0 +* +* Set the block size and minimum block size for testing. +* + NB = 1 + NBMIN = 2 + CALL XLAENV( 1, NB ) + CALL XLAENV( 2, NBMIN ) +* +* Do for each value of N in NVAL +* + DO 180 IN = 1, NN + N = NVAL( IN ) + LDA = MAX( N, 1 ) + XTYPE = 'N' + NIMAT = NTYPES + IF( N.LE.0 ) + $ NIMAT = 1 +* + DO 170 IMAT = 1, NIMAT +* +* Do the tests only if DOTYPE( IMAT ) is true. +* + IF( .NOT.DOTYPE( IMAT ) ) + $ GO TO 170 +* +* Skip types 3, 4, 5, or 6 if the matrix size is too small. +* + ZEROT = IMAT.GE.3 .AND. IMAT.LE.6 + IF( ZEROT .AND. N.LT.IMAT-2 ) + $ GO TO 170 +* +* Do first for UPLO = 'U', then for UPLO = 'L' +* + DO 160 IUPLO = 1, 2 + UPLO = UPLOS( IUPLO ) +* +* Begin generate the test matrix A. +* +* Set up parameters with DLATB4 for the matrix generator +* based on the type of matrix to be generated. +* + CALL DLATB4( MATPATH, IMAT, N, N, TYPE, KL, KU, ANORM, + $ MODE, CNDNUM, DIST ) +* +* Generate a matrix with DLATMS. +* + SRNAMT = 'DLATMS' + CALL DLATMS( N, N, DIST, ISEED, TYPE, RWORK, MODE, + $ CNDNUM, ANORM, KL, KU, UPLO, A, LDA, + $ WORK, INFO ) +* +* Check error code from DLATMS and handle error. +* + IF( INFO.NE.0 ) THEN + CALL ALAERH( PATH, 'DLATMS', INFO, 0, UPLO, N, N, + $ -1, -1, -1, IMAT, NFAIL, NERRS, NOUT ) + GO TO 160 + END IF +* +* For types 3-6, zero one or more rows and columns of +* the matrix to test that INFO is returned correctly. +* + IF( ZEROT ) THEN + IF( IMAT.EQ.3 ) THEN + IZERO = 1 + ELSE IF( IMAT.EQ.4 ) THEN + IZERO = N + ELSE + IZERO = N / 2 + 1 + END IF +* + IF( IMAT.LT.6 ) THEN +* +* Set row and column IZERO to zero. +* + IF( IUPLO.EQ.1 ) THEN + IOFF = ( IZERO-1 )*LDA + DO 20 I = 1, IZERO - 1 + A( IOFF+I ) = ZERO + 20 CONTINUE + IOFF = IOFF + IZERO + DO 30 I = IZERO, N + A( IOFF ) = ZERO + IOFF = IOFF + LDA + 30 CONTINUE + ELSE + IOFF = IZERO + DO 40 I = 1, IZERO - 1 + A( IOFF ) = ZERO + IOFF = IOFF + LDA + 40 CONTINUE + IOFF = IOFF - IZERO + DO 50 I = IZERO, N + A( IOFF+I ) = ZERO + 50 CONTINUE + END IF + ELSE + IOFF = 0 + IF( IUPLO.EQ.1 ) THEN +* +* Set the first IZERO rows and columns to zero. +* + DO 70 J = 1, N + I2 = MIN( J, IZERO ) + DO 60 I = 1, I2 + A( IOFF+I ) = ZERO + 60 CONTINUE + IOFF = IOFF + LDA + 70 CONTINUE + IZERO = 1 + ELSE +* +* Set the first IZERO rows and columns to zero. +* + IOFF = 0 + DO 90 J = 1, N + I1 = MAX( J, IZERO ) + DO 80 I = I1, N + A( IOFF+I ) = ZERO + 80 CONTINUE + IOFF = IOFF + LDA + 90 CONTINUE + END IF + END IF + ELSE + IZERO = 0 + END IF +* +* End generate the test matrix A. +* +* + DO 150 IFACT = 1, NFACT +* +* Do first for FACT = 'F', then for other values. +* + FACT = FACTS( IFACT ) +* +* Form an exact solution and set the right hand side. +* + SRNAMT = 'DLARHS' + CALL DLARHS( MATPATH, XTYPE, UPLO, ' ', N, N, KL, KU, + $ NRHS, A, LDA, XACT, LDA, B, LDA, ISEED, + $ INFO ) + XTYPE = 'C' +* +* --- Test DSYSV_AA_2STAGE --- +* + IF( IFACT.EQ.2 ) THEN + CALL DLACPY( UPLO, N, N, A, LDA, AFAC, LDA ) + CALL DLACPY( 'Full', N, NRHS, B, LDA, X, LDA ) +* +* Factor the matrix and solve the system using DSYSV_AA. +* + SRNAMT = 'DSYSV_AA_2STAGE ' + LWORK = MIN(N*NB, 3*NMAX*NMAX) + CALL DSYSV_AA_2STAGE( UPLO, N, NRHS, AFAC, LDA, + $ AINV, (3*NB+1)*N, + $ IWORK, IWORK( 1+N ), + $ X, LDA, WORK, LWORK, INFO ) +* +* Adjust the expected value of INFO to account for +* pivoting. +* + IF( IZERO.GT.0 ) THEN + J = 1 + K = IZERO + 100 CONTINUE + IF( J.EQ.K ) THEN + K = IWORK( J ) + ELSE IF( IWORK( J ).EQ.K ) THEN + K = J + END IF + IF( J.LT.K ) THEN + J = J + 1 + GO TO 100 + END IF + ELSE + K = 0 + END IF +* +* Check error code from DSYSV_AA . +* + IF( INFO.NE.K ) THEN + CALL ALAERH( PATH, 'DSYSV_AA', INFO, K, + $ UPLO, N, N, -1, -1, NRHS, + $ IMAT, NFAIL, NERRS, NOUT ) + GO TO 120 + ELSE IF( INFO.NE.0 ) THEN + GO TO 120 + END IF +* +* Compute residual of the computed solution. +* + CALL DLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA ) + CALL DPOT02( UPLO, N, NRHS, A, LDA, X, LDA, WORK, + $ LDA, RWORK, RESULT( 1 ) ) +* +* Reconstruct matrix from factors and compute +* residual. +* +c CALL CHET01_AA( UPLO, N, A, LDA, AFAC, LDA, +c $ IWORK, AINV, LDA, RWORK, +c $ RESULT( 2 ) ) +c NT = 2 + NT = 1 +* +* Print information about the tests that did not pass +* the threshold. +* + DO 110 K = 1, NT + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALADHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9999 )'DSYSV_AA ', + $ UPLO, N, IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 110 CONTINUE + NRUN = NRUN + NT + 120 CONTINUE + END IF +* + 150 CONTINUE +* + 160 CONTINUE + 170 CONTINUE + 180 CONTINUE +* +* Print a summary of the results. +* + CALL ALASVM( PATH, NOUT, NFAIL, NRUN, NERRS ) +* + 9999 FORMAT( 1X, A, ', UPLO=''', A1, ''', N =', I5, ', type ', I2, + $ ', test ', I2, ', ratio =', G12.5 ) + RETURN +* +* End of DDRVSY_AA_2STAGE +* + END diff --git a/lapack-netlib/TESTING/LIN/derrsy.f b/lapack-netlib/TESTING/LIN/derrsy.f index d7d00fa9e..b511cdcc5 100644 --- a/lapack-netlib/TESTING/LIN/derrsy.f +++ b/lapack-netlib/TESTING/LIN/derrsy.f @@ -48,17 +48,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_lin * * ===================================================================== SUBROUTINE DERRSY( PATH, NUNIT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER*3 PATH @@ -93,7 +93,7 @@ $ DSYTRF_RK, DSYTRF_ROOK, DSYTRF_AA, DSYTRI, $ DSYTRI_3, DSYTRI_3X, DSYTRI_ROOK, DSYTRI2, $ DSYTRI2X, DSYTRS, DSYTRS_3, DSYTRS_ROOK, - $ DSYTRS_AA + $ DSYTRS_AA, DSYTRF_AA_2STAGE, DSYTRS_AA_2STAGE * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -525,6 +525,62 @@ CALL DSYTRS_AA( 'U', 0, 1, A, 2, IP, B, 1, W, -2, INFO ) CALL CHKXER( 'DSYTRS_AA', INFOT, NOUT, LERR, OK ) * + ELSE IF( LSAMEN( 2, C2, 'S2' ) ) THEN +* +* Test error exits of the routines that use factorization +* of a symmetric indefinite matrix with Aasen's algorithm. +* +* DSYTRF_AA_2STAGE +* + SRNAMT = 'DSYTRF_AA_2STAGE' + INFOT = 1 + CALL DSYTRF_AA_2STAGE( '/', 0, A, 1, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'DSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DSYTRF_AA_2STAGE( 'U', -1, A, 1, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'DSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DSYTRF_AA_2STAGE( 'U', 2, A, 1, A, 2, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'DSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL DSYTRF_AA_2STAGE( 'U', 2, A, 2, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'DSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL DSYTRF_AA_2STAGE( 'U', 2, A, 2, A, 8, IP, IP, W, 0, + $ INFO ) + CALL CHKXER( 'DSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) +* +* DSYTRS_AA_2STAGE +* + SRNAMT = 'DSYTRS_AA_2STAGE' + INFOT = 1 + CALL DSYTRS_AA_2STAGE( '/', 0, 0, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'DSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DSYTRS_AA_2STAGE( 'U', -1, 0, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'DSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DSYTRS_AA_2STAGE( 'U', 0, -1, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'DSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL DSYTRS_AA_2STAGE( 'U', 2, 1, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'DSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL DSYTRS_AA_2STAGE( 'U', 2, 1, A, 2, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'DSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL DSYTRS_AA_2STAGE( 'U', 2, 1, A, 2, A, 8, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'DSYTRS_AA_STAGE', INFOT, NOUT, LERR, OK ) ELSE IF( LSAMEN( 2, C2, 'SP' ) ) THEN * * Test error exits of the routines that use factorization diff --git a/lapack-netlib/TESTING/LIN/derrvx.f b/lapack-netlib/TESTING/LIN/derrvx.f index 58f270c62..3a4a6b7fc 100644 --- a/lapack-netlib/TESTING/LIN/derrvx.f +++ b/lapack-netlib/TESTING/LIN/derrvx.f @@ -48,17 +48,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_lin * * ===================================================================== SUBROUTINE DERRVX( PATH, NUNIT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER*3 PATH @@ -91,7 +91,8 @@ EXTERNAL CHKXER, DGBSV, DGBSVX, DGESV, DGESVX, DGTSV, $ DGTSVX, DPBSV, DPBSVX, DPOSV, DPOSVX, DPPSV, $ DPPSVX, DPTSV, DPTSVX, DSPSV, DSPSVX, DSYSV, - $ DSYSV_AA, DSYSV_RK, DSYSV_ROOK, DSYSVX + $ DSYSV_AA, DSYSV_RK, DSYSV_ROOK, DSYSVX, + $ DSYSV_AA_2STAGE * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -716,6 +717,36 @@ INFOT = 8 CALL DSYSV_AA( 'U', 2, 0, A, 2, IP, B, 1, W, 1, INFO ) CALL CHKXER( 'DSYSV_AA', INFOT, NOUT, LERR, OK ) +* + ELSE IF( LSAMEN( 2, C2, 'S2' ) ) THEN +* +* DSYSV_AASEN_2STAGE +* + SRNAMT = 'DSYSV_AA_2STAGE' + INFOT = 1 + CALL DSYSV_AA_2STAGE( '/', 0, 0, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'DSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DSYSV_AA_2STAGE( 'U', -1, 0, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'DSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DSYSV_AA_2STAGE( 'U', 0, -1, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'DSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL DSYSV_AA_2STAGE( 'U', 2, 1, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'DSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL DSYSV_AA_2STAGE( 'U', 2, 1, A, 2, A, 2, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'DSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL DSYSV_AA_2STAGE( 'U', 2, 1, A, 2, A, 1, IP, IP, B, 2, + $ W, 1, INFO ) + CALL CHKXER( 'DSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) * ELSE IF( LSAMEN( 2, C2, 'SP' ) ) THEN * diff --git a/lapack-netlib/TESTING/LIN/dlahilb.f b/lapack-netlib/TESTING/LIN/dlahilb.f index a1989d578..e115e335a 100644 --- a/lapack-netlib/TESTING/LIN/dlahilb.f +++ b/lapack-netlib/TESTING/LIN/dlahilb.f @@ -8,7 +8,7 @@ * Definition: * =========== * -* SUBROUTINE DLAHILB(N, NRHS, A, LDA, X, LDX, B, LDB, WORK, INFO) +* SUBROUTINE DLAHILB( N, NRHS, A, LDA, X, LDX, B, LDB, WORK, INFO) * * .. Scalar Arguments .. * INTEGER N, NRHS, LDA, LDX, LDB, INFO @@ -53,7 +53,7 @@ *> *> \param[in] NRHS *> \verbatim -*> NRHS is NRHS +*> NRHS is INTEGER *> The requested number of right-hand sides. *> \endverbatim *> @@ -117,17 +117,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup double_lin * * ===================================================================== - SUBROUTINE DLAHILB(N, NRHS, A, LDA, X, LDX, B, LDB, WORK, INFO) + SUBROUTINE DLAHILB( N, NRHS, A, LDA, X, LDX, B, LDB, WORK, INFO) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER N, NRHS, LDA, LDX, LDB, INFO @@ -140,7 +140,6 @@ INTEGER TM, TI, R INTEGER M INTEGER I, J - COMPLEX*16 TMP * .. * .. Parameters .. * NMAX_EXACT the largest dimension where the generated data is @@ -203,9 +202,8 @@ * * Generate matrix B as simply the first NRHS columns of M * the * identity. - TMP = DBLE(M) - CALL DLASET('Full', N, NRHS, 0.0D+0, TMP, B, LDB) -* + CALL DLASET('Full', N, NRHS, 0.0D+0, DBLE(M), B, LDB) + * Generate the true solutions in X. Because B = the first NRHS * columns of M*I, the true solutions are just the first NRHS columns * of the inverse Hilbert matrix. diff --git a/lapack-netlib/TESTING/LIN/dsyt01_3.f b/lapack-netlib/TESTING/LIN/dsyt01_3.f index 5eabb05f6..a12425ee5 100644 --- a/lapack-netlib/TESTING/LIN/dsyt01_3.f +++ b/lapack-netlib/TESTING/LIN/dsyt01_3.f @@ -132,7 +132,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup double_lin * @@ -140,10 +140,10 @@ SUBROUTINE DSYT01_3( UPLO, N, A, LDA, AFAC, LDAFAC, E, IPIV, C, $ LDC, RWORK, RESID ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/TESTING/LIN/ilaenv.f b/lapack-netlib/TESTING/LIN/ilaenv.f index 657128bc1..5f7f291aa 100644 --- a/lapack-netlib/TESTING/LIN/ilaenv.f +++ b/lapack-netlib/TESTING/LIN/ilaenv.f @@ -119,7 +119,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup aux_lin * @@ -150,10 +150,10 @@ INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, $ N4 ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER*( * ) NAME, OPTS @@ -244,3 +244,50 @@ C ILAENV = 0 * End of ILAENV * END + INTEGER FUNCTION ILAENV2STAGE( ISPEC, NAME, OPTS, N1, N2, + $ N3, N4 ) +* .. Scalar Arguments .. + CHARACTER*( * ) NAME, OPTS + INTEGER ISPEC, N1, N2, N3, N4 +* .. +* +* ===================================================================== +* +* .. Local variables .. + INTEGER IISPEC +* .. External Functions .. + INTEGER IPARAM2STAGE + EXTERNAL IPARAM2STAGE +* .. +* .. Arrays in Common .. + INTEGER IPARMS( 100 ) +* .. +* .. Common blocks .. + COMMON / CLAENV / IPARMS +* .. +* .. Save statement .. + SAVE / CLAENV / +* .. +* .. Executable Statements .. +* + IF(( ISPEC.GE.1 ) .AND. (ISPEC.LE.5)) THEN +* +* 1 <= ISPEC <= 5: 2stage eigenvalues SVD routines. +* + IF( ISPEC.EQ.1 ) THEN + ILAENV2STAGE = IPARMS( 1 ) + ELSE + IISPEC = 16 + ISPEC + ILAENV2STAGE = IPARAM2STAGE( IISPEC, NAME, OPTS, + $ N1, N2, N3, N4 ) + ENDIF +* + ELSE +* +* Invalid value for ISPEC +* + ILAENV2STAGE = -1 + END IF +* + RETURN + END diff --git a/lapack-netlib/TESTING/LIN/schkaa.f b/lapack-netlib/TESTING/LIN/schkaa.f index bbee97b81..33b109aa7 100644 --- a/lapack-netlib/TESTING/LIN/schkaa.f +++ b/lapack-netlib/TESTING/LIN/schkaa.f @@ -53,6 +53,7 @@ *> SSR 10 List types on next line if 0 < NTYPES < 10 *> SSK 10 List types on next line if 0 < NTYPES < 10 *> SSA 10 List types on next line if 0 < NTYPES < 10 +*> SS2 10 List types on next line if 0 < NTYPES < 10 *> SSP 10 List types on next line if 0 < NTYPES < 10 *> STR 18 List types on next line if 0 < NTYPES < 18 *> STP 18 List types on next line if 0 < NTYPES < 18 @@ -108,7 +109,7 @@ * ===================================================================== PROGRAM SCHKAA * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 @@ -164,7 +165,8 @@ $ SCHKTP, SCHKTR, SCHKTZ, SDRVGB, SDRVGE, SDRVGT, $ SDRVLS, SDRVPB, SDRVPO, SDRVPP, SDRVPT, SDRVSP, $ SDRVSY, SDRVSY_ROOK, SDRVSY_RK, SDRVSY_AA, - $ ILAVER, SCHKLQTP, SCHKQRT, SCHKQRTP + $ ILAVER, SCHKLQTP, SCHKQRT, SCHKQRTP, + $ SCHKLQT, SCHKTSQR * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -720,6 +722,34 @@ ELSE WRITE( NOUT, FMT = 9988 )PATH END IF +* + ELSE IF( LSAMEN( 2, C2, 'S2' ) ) THEN +* +* SA: symmetric indefinite matrices, +* with partial (Aasen's) pivoting algorithm +* + NTYPES = 10 + CALL ALAREQ( PATH, NMATS, DOTYPE, NTYPES, NIN, NOUT ) +* + IF( TSTCHK ) THEN + CALL SCHKSY_AA_2STAGE( DOTYPE, NN, NVAL, NNB2, NBVAL2, + $ NNS, NSVAL, THRESH, TSTERR, LDA, + $ A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), + $ B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), + $ WORK, RWORK, IWORK, NOUT ) + ELSE + WRITE( NOUT, FMT = 9989 )PATH + END IF +* + IF( TSTDRV ) THEN + CALL SDRVSY_AA_2STAGE( + $ DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, + $ LDA, A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), + $ B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), + $ WORK, RWORK, IWORK, NOUT ) + ELSE + WRITE( NOUT, FMT = 9988 )PATH + END IF * ELSE IF( LSAMEN( 2, C2, 'SP' ) ) THEN * diff --git a/lapack-netlib/TESTING/LIN/schklqtp.f b/lapack-netlib/TESTING/LIN/schklqtp.f index 183882efd..dbb44e020 100644 --- a/lapack-netlib/TESTING/LIN/schklqtp.f +++ b/lapack-netlib/TESTING/LIN/schklqtp.f @@ -94,7 +94,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup double_lin * @@ -103,10 +103,10 @@ $ NBVAL, NOUT ) IMPLICIT NONE * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -132,7 +132,7 @@ REAL RESULT( NTESTS ) * .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, DERRLQTP, DLQT04 + EXTERNAL ALAERH, ALAHD, ALASUM, SERRLQTP, SLQT05 * .. * .. Scalars in Common .. LOGICAL LERR, OK diff --git a/lapack-netlib/TESTING/LIN/schkqrtp.f b/lapack-netlib/TESTING/LIN/schkqrtp.f index 8a6915d3c..32bae3b39 100644 --- a/lapack-netlib/TESTING/LIN/schkqrtp.f +++ b/lapack-netlib/TESTING/LIN/schkqrtp.f @@ -94,7 +94,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_lin * @@ -103,10 +103,10 @@ $ NBVAL, NOUT ) IMPLICIT NONE * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -132,7 +132,7 @@ REAL RESULT( NTESTS ) * .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, SERRQRTP + EXTERNAL ALAERH, ALAHD, ALASUM, SERRQRTP, SQRT05 * .. * .. Scalars in Common .. LOGICAL LERR, OK diff --git a/lapack-netlib/TESTING/LIN/schkrfp.f b/lapack-netlib/TESTING/LIN/schkrfp.f index 75d19e373..066bca694 100644 --- a/lapack-netlib/TESTING/LIN/schkrfp.f +++ b/lapack-netlib/TESTING/LIN/schkrfp.f @@ -59,7 +59,7 @@ * ===================================================================== PROGRAM SCHKRFP * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 @@ -215,11 +215,6 @@ WRITE( NOUT, FMT = 9999 ) STOP END IF -* - IF( FATAL ) THEN - WRITE( NOUT, FMT = 9999 ) - STOP - END IF * * Calculate and print the machine dependent constants. * diff --git a/lapack-netlib/TESTING/LIN/schksy_aa.f b/lapack-netlib/TESTING/LIN/schksy_aa.f index 35b1d9507..93be3bdd4 100644 --- a/lapack-netlib/TESTING/LIN/schksy_aa.f +++ b/lapack-netlib/TESTING/LIN/schksy_aa.f @@ -161,9 +161,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 -* -* @precisions fortran d -> z c +*> \date November 2017 * *> \ingroup real_lin * @@ -172,10 +170,10 @@ $ THRESH, TSTERR, NMAX, A, AFAC, AINV, B, $ X, XACT, WORK, RWORK, IWORK, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -215,15 +213,10 @@ INTEGER ISEED( 4 ), ISEEDY( 4 ) REAL RESULT( NTESTS ) * .. -* .. External Functions .. - REAL DGET06, SLANSY - EXTERNAL DGET06, SLANSY -* .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, SERRSY, SGET04, SLACPY, - $ SLARHS, SLATB4, SLATMS, SPOT02, DPOT03, DPOT05, - $ DSYCON, SSYRFS, SSYT01_AA, SSYTRF_AA, - $ DSYTRI2, SSYTRS_AA, XLAENV + EXTERNAL ALAERH, ALAHD, ALASUM, SERRSY, SLACPY, SLARHS, + $ SLATB4, SLATMS, SPOT02, SSYT01_AA, SSYTRF_AA, + $ SSYTRS_AA, XLAENV * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -438,22 +431,22 @@ * Adjust the expected value of INFO to account for * pivoting. * - IF( IZERO.GT.0 ) THEN - J = 1 - K = IZERO - 100 CONTINUE - IF( J.EQ.K ) THEN - K = IWORK( J ) - ELSE IF( IWORK( J ).EQ.K ) THEN - K = J - END IF - IF( J.LT.K ) THEN - J = J + 1 - GO TO 100 - END IF - ELSE +c IF( IZERO.GT.0 ) THEN +c J = 1 +c K = IZERO +c 100 CONTINUE +c IF( J.EQ.K ) THEN +c K = IWORK( J ) +c ELSE IF( IWORK( J ).EQ.K ) THEN +c K = J +c END IF +c IF( J.LT.K ) THEN +c J = J + 1 +c GO TO 100 +c END IF +c ELSE K = 0 - END IF +c END IF * * Check error code from SSYTRF and handle error. * @@ -517,31 +510,34 @@ * Check error code from SSYTRS and handle error. * IF( INFO.NE.0 ) THEN - CALL ALAERH( PATH, 'SSYTRS_AA', INFO, 0, - $ UPLO, N, N, -1, -1, NRHS, IMAT, - $ NFAIL, NERRS, NOUT ) - END IF -* - CALL SLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA ) + IF( IZERO.EQ.0 ) THEN + CALL ALAERH( PATH, 'SSYTRS_AA', INFO, 0, + $ UPLO, N, N, -1, -1, NRHS, IMAT, + $ NFAIL, NERRS, NOUT ) + END IF + ELSE + CALL SLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA + $ ) * -* Compute the residual for the solution +* Compute the residual for the solution * - CALL SPOT02( UPLO, N, NRHS, A, LDA, X, LDA, WORK, - $ LDA, RWORK, RESULT( 2 ) ) + CALL SPOT02( UPLO, N, NRHS, A, LDA, X, LDA, + $ WORK, LDA, RWORK, RESULT( 2 ) ) * * * Print information about the tests that did not pass * the threshold. * - DO 120 K = 2, 2 - IF( RESULT( K ).GE.THRESH ) THEN - IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) - $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, - $ IMAT, K, RESULT( K ) - NFAIL = NFAIL + 1 - END IF - 120 CONTINUE + DO 120 K = 2, 2 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, + $ IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 120 CONTINUE + END IF NRUN = NRUN + 1 * * End do for each value of NRHS in NSVAL. diff --git a/lapack-netlib/TESTING/LIN/schksy_aa_2stage.f b/lapack-netlib/TESTING/LIN/schksy_aa_2stage.f new file mode 100644 index 000000000..7cdcc9181 --- /dev/null +++ b/lapack-netlib/TESTING/LIN/schksy_aa_2stage.f @@ -0,0 +1,572 @@ +*> \brief \b SCHKSY_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE SCHKSY_AA_2STAGE( DOTYPE, NN, NVAL, NNB, NBVAL, +* NNS, NSVAL, THRESH, TSTERR, NMAX, A, +* AFAC, AINV, B, X, XACT, WORK, RWORK, +* IWORK, NOUT ) +* +* .. Scalar Arguments .. +* LOGICAL TSTERR +* INTEGER NMAX, NN, NNB, NNS, NOUT +* REAL THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER IWORK( * ), NBVAL( * ), NSVAL( * ), NVAL( * ) +* REAL A( * ), AFAC( * ), AINV( * ), B( * ), +* $ RWORK( * ), WORK( * ), X( * ), XACT( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SCHKSY_AA_2STAGE tests SSYTRF_AA_2STAGE, -TRS_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> The matrix types to be used for testing. Matrices of type j +*> (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) = +*> .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER +*> The number of values of N contained in the vector NVAL. +*> \endverbatim +*> +*> \param[in] NVAL +*> \verbatim +*> NVAL is INTEGER array, dimension (NN) +*> The values of the matrix dimension N. +*> \endverbatim +*> +*> \param[in] NNB +*> \verbatim +*> NNB is INTEGER +*> The number of values of NB contained in the vector NBVAL. +*> \endverbatim +*> +*> \param[in] NBVAL +*> \verbatim +*> NBVAL is INTEGER array, dimension (NBVAL) +*> The values of the blocksize NB. +*> \endverbatim +*> +*> \param[in] NNS +*> \verbatim +*> NNS is INTEGER +*> The number of values of NRHS contained in the vector NSVAL. +*> \endverbatim +*> +*> \param[in] NSVAL +*> \verbatim +*> NSVAL is INTEGER array, dimension (NNS) +*> The values of the number of right hand sides NRHS. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is REAL +*> The threshold value for the test ratios. A result is +*> included in the output file if RESULT >= THRESH. To have +*> every test ratio printed, use THRESH = 0. +*> \endverbatim +*> +*> \param[in] TSTERR +*> \verbatim +*> TSTERR is LOGICAL +*> Flag that indicates whether error exits are to be tested. +*> \endverbatim +*> +*> \param[in] NMAX +*> \verbatim +*> NMAX is INTEGER +*> The maximum value permitted for N, used in dimensioning the +*> work arrays. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is REAL array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AFAC +*> \verbatim +*> AFAC is REAL array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AINV +*> \verbatim +*> AINV is REAL array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is REAL array, dimension (NMAX*NSMAX) +*> where NSMAX is the largest entry in NSVAL. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (NMAX*NSMAX) +*> \endverbatim +*> +*> \param[out] XACT +*> \verbatim +*> XACT is REAL array, dimension (NMAX*NSMAX) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (NMAX*max(3,NSMAX)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (max(NMAX,2*NSMAX)) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (2*NMAX) +*> \endverbatim +*> +*> \param[in] NOUT +*> \verbatim +*> NOUT is INTEGER +*> The unit number for output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup real_lin +* +* ===================================================================== + SUBROUTINE SCHKSY_AA_2STAGE( DOTYPE, NN, NVAL, NNB, NBVAL, NNS, + $ NSVAL, THRESH, TSTERR, NMAX, A, AFAC, AINV, + $ B, X, XACT, WORK, RWORK, IWORK, NOUT ) +* +* -- LAPACK test routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + LOGICAL TSTERR + INTEGER NN, NNB, NNS, NMAX, NOUT + REAL THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER IWORK( * ), NBVAL( * ), NSVAL( * ), NVAL( * ) + REAL A( * ), AFAC( * ), AINV( * ), B( * ), + $ RWORK( * ), WORK( * ), X( * ), XACT( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + INTEGER NTYPES + PARAMETER ( NTYPES = 10 ) + INTEGER NTESTS + PARAMETER ( NTESTS = 9 ) +* .. +* .. Local Scalars .. + LOGICAL ZEROT + CHARACTER DIST, TYPE, UPLO, XTYPE + CHARACTER*3 PATH, MATPATH + INTEGER I, I1, I2, IMAT, IN, INB, INFO, IOFF, IRHS, + $ IUPLO, IZERO, J, K, KL, KU, LDA, LWORK, MODE, + $ N, NB, NERRS, NFAIL, NIMAT, NRHS, NRUN, NT + REAL ANORM, CNDNUM +* .. +* .. Local Arrays .. + CHARACTER UPLOS( 2 ) + INTEGER ISEED( 4 ), ISEEDY( 4 ) + REAL RESULT( NTESTS ) +* .. +* .. External Subroutines .. + EXTERNAL ALAERH, ALAHD, ALASUM, SERRSY, SLACPY, SLARHS, + $ SLATB4, SLATMS, SPOT02, SSYT01_AA, + $ SSYTRF_AA_2STAGE, SSYTRS_AA_2STAGE, + $ XLAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Scalars in Common .. + LOGICAL LERR, OK + CHARACTER*32 SRNAMT + INTEGER INFOT, NUNIT +* .. +* .. Common blocks .. + COMMON / INFOC / INFOT, NUNIT, OK, LERR + COMMON / SRNAMC / SRNAMT +* .. +* .. Data statements .. + DATA ISEEDY / 1988, 1989, 1990, 1991 / + DATA UPLOS / 'U', 'L' / +* .. +* .. Executable Statements .. +* +* Initialize constants and the random number seed. +* +* +* Test path +* + PATH( 1: 1 ) = 'Single precision' + PATH( 2: 3 ) = 'S2' +* +* Path to generate matrices +* + MATPATH( 1: 1 ) = 'Single precision' + MATPATH( 2: 3 ) = 'SY' + NRUN = 0 + NFAIL = 0 + NERRS = 0 + DO 10 I = 1, 4 + ISEED( I ) = ISEEDY( I ) + 10 CONTINUE +* +* Test the error exits +* + IF( TSTERR ) + $ CALL SERRSY( PATH, NOUT ) + INFOT = 0 +* +* Set the minimum block size for which the block routine should +* be used, which will be later returned by ILAENV +* + CALL XLAENV( 2, 2 ) +* +* Do for each value of N in NVAL +* + DO 180 IN = 1, NN + N = NVAL( IN ) + IF( N .GT. NMAX ) THEN + NFAIL = NFAIL + 1 + WRITE(NOUT, 9995) 'M ', N, NMAX + GO TO 180 + END IF + LDA = MAX( N, 1 ) + XTYPE = 'N' + NIMAT = NTYPES + IF( N.LE.0 ) + $ NIMAT = 1 +* + IZERO = 0 +* +* Do for each value of matrix type IMAT +* + DO 170 IMAT = 1, NIMAT +* +* Do the tests only if DOTYPE( IMAT ) is true. +* + IF( .NOT.DOTYPE( IMAT ) ) + $ GO TO 170 +* +* Skip types 3, 4, 5, or 6 if the matrix size is too small. +* + ZEROT = IMAT.GE.3 .AND. IMAT.LE.6 + IF( ZEROT .AND. N.LT.IMAT-2 ) + $ GO TO 170 +* +* Do first for UPLO = 'U', then for UPLO = 'L' +* + DO 160 IUPLO = 1, 2 + UPLO = UPLOS( IUPLO ) +* +* Begin generate the test matrix A. +* +* +* Set up parameters with SLATB4 for the matrix generator +* based on the type of matrix to be generated. +* + CALL SLATB4( MATPATH, IMAT, N, N, TYPE, KL, KU, + $ ANORM, MODE, CNDNUM, DIST ) +* +* Generate a matrix with SLATMS. +* + SRNAMT = 'SLATMS' + CALL SLATMS( N, N, DIST, ISEED, TYPE, RWORK, MODE, + $ CNDNUM, ANORM, KL, KU, UPLO, A, LDA, WORK, + $ INFO ) +* +* Check error code from SLATMS and handle error. +* + IF( INFO.NE.0 ) THEN + CALL ALAERH( PATH, 'SLATMS', INFO, 0, UPLO, N, N, -1, + $ -1, -1, IMAT, NFAIL, NERRS, NOUT ) +* +* Skip all tests for this generated matrix +* + GO TO 160 + END IF +* +* For matrix types 3-6, zero one or more rows and +* columns of the matrix to test that INFO is returned +* correctly. +* + IF( ZEROT ) THEN + IF( IMAT.EQ.3 ) THEN + IZERO = 1 + ELSE IF( IMAT.EQ.4 ) THEN + IZERO = N + ELSE + IZERO = N / 2 + 1 + END IF +* + IF( IMAT.LT.6 ) THEN +* +* Set row and column IZERO to zero. +* + IF( IUPLO.EQ.1 ) THEN + IOFF = ( IZERO-1 )*LDA + DO 20 I = 1, IZERO - 1 + A( IOFF+I ) = ZERO + 20 CONTINUE + IOFF = IOFF + IZERO + DO 30 I = IZERO, N + A( IOFF ) = ZERO + IOFF = IOFF + LDA + 30 CONTINUE + ELSE + IOFF = IZERO + DO 40 I = 1, IZERO - 1 + A( IOFF ) = ZERO + IOFF = IOFF + LDA + 40 CONTINUE + IOFF = IOFF - IZERO + DO 50 I = IZERO, N + A( IOFF+I ) = ZERO + 50 CONTINUE + END IF + ELSE + IF( IUPLO.EQ.1 ) THEN +* +* Set the first IZERO rows and columns to zero. +* + IOFF = 0 + DO 70 J = 1, N + I2 = MIN( J, IZERO ) + DO 60 I = 1, I2 + A( IOFF+I ) = ZERO + 60 CONTINUE + IOFF = IOFF + LDA + 70 CONTINUE + IZERO = 1 + ELSE +* +* Set the last IZERO rows and columns to zero. +* + IOFF = 0 + DO 90 J = 1, N + I1 = MAX( J, IZERO ) + DO 80 I = I1, N + A( IOFF+I ) = ZERO + 80 CONTINUE + IOFF = IOFF + LDA + 90 CONTINUE + END IF + END IF + ELSE + IZERO = 0 + END IF +* +* End generate the test matrix A. +* +* Do for each value of NB in NBVAL +* + DO 150 INB = 1, NNB +* +* Set the optimal blocksize, which will be later +* returned by ILAENV. +* + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) +* +* Copy the test matrix A into matrix AFAC which +* will be factorized in place. This is needed to +* preserve the test matrix A for subsequent tests. +* + CALL SLACPY( UPLO, N, N, A, LDA, AFAC, LDA ) +* +* Compute the L*D*L**T or U*D*U**T factorization of the +* matrix. IWORK stores details of the interchanges and +* the block structure of D. AINV is a work array for +* block factorization, LWORK is the length of AINV. +* + SRNAMT = 'SSYTRF_AA_2STAGE' + LWORK = MIN(N*NB, 3*NMAX*NMAX) + CALL SSYTRF_AA_2STAGE( UPLO, N, AFAC, LDA, + $ AINV, (3*NB+1)*N, + $ IWORK, IWORK( 1+N ), + $ WORK, LWORK, + $ INFO ) +* +* Adjust the expected value of INFO to account for +* pivoting. +* + IF( IZERO.GT.0 ) THEN + J = 1 + K = IZERO + 100 CONTINUE + IF( J.EQ.K ) THEN + K = IWORK( J ) + ELSE IF( IWORK( J ).EQ.K ) THEN + K = J + END IF + IF( J.LT.K ) THEN + J = J + 1 + GO TO 100 + END IF + ELSE + K = 0 + END IF +* +* Check error code from SSYTRF and handle error. +* + IF( INFO.NE.K ) THEN + CALL ALAERH( PATH, 'SSYTRF_AA_2STAGE', INFO, K, + $ UPLO, N, N, -1, -1, NB, IMAT, NFAIL, + $ NERRS, NOUT ) + END IF +* +*+ TEST 1 +* Reconstruct matrix from factors and compute residual. +* +* CALL SSYT01_AA( UPLO, N, A, LDA, AFAC, LDA, IWORK, +* $ AINV, LDA, RWORK, RESULT( 1 ) ) +* NT = 1 + NT = 0 +* +* +* Print information about the tests that did not pass +* the threshold. +* + DO 110 K = 1, NT + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9999 )UPLO, N, NB, IMAT, K, + $ RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 110 CONTINUE + NRUN = NRUN + NT +* +* Skip solver test if INFO is not 0. +* + IF( INFO.NE.0 ) THEN + GO TO 140 + END IF +* +* Do for each value of NRHS in NSVAL. +* + DO 130 IRHS = 1, NNS + NRHS = NSVAL( IRHS ) +* +*+ TEST 2 (Using TRS) +* Solve and compute residual for A * X = B. +* +* Choose a set of NRHS random solution vectors +* stored in XACT and set up the right hand side B +* + SRNAMT = 'SLARHS' + CALL SLARHS( MATPATH, XTYPE, UPLO, ' ', N, N, + $ KL, KU, NRHS, A, LDA, XACT, LDA, + $ B, LDA, ISEED, INFO ) + CALL SLACPY( 'Full', N, NRHS, B, LDA, X, LDA ) +* + SRNAMT = 'SSYTRS_AA_2STAGE' + LWORK = MAX( 1, 3*N-2 ) + CALL SSYTRS_AA_2STAGE( UPLO, N, NRHS, AFAC, LDA, + $ AINV, (3*NB+1)*N, IWORK, IWORK( 1+N ), + $ X, LDA, INFO ) +* +* Check error code from SSYTRS and handle error. +* + IF( INFO.NE.0 ) THEN + IF( IZERO.EQ.0 ) THEN + CALL ALAERH( PATH, 'SSYTRS_AA_2STAGE', + $ INFO, 0, UPLO, N, N, -1, -1, + $ NRHS, IMAT, NFAIL, NERRS, NOUT ) + END IF + ELSE + CALL SLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA + $ ) +* +* Compute the residual for the solution +* + CALL SPOT02( UPLO, N, NRHS, A, LDA, X, LDA, + $ WORK, LDA, RWORK, RESULT( 2 ) ) +* +* +* Print information about the tests that did not pass +* the threshold. +* + DO 120 K = 2, 2 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, + $ IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 120 CONTINUE + END IF + NRUN = NRUN + 1 +* +* End do for each value of NRHS in NSVAL. +* + 130 CONTINUE + 140 CONTINUE + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + 180 CONTINUE +* +* Print a summary of the results. +* + CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS ) +* + 9999 FORMAT( ' UPLO = ''', A1, ''', N =', I5, ', NB =', I4, ', type ', + $ I2, ', test ', I2, ', ratio =', G12.5 ) + 9998 FORMAT( ' UPLO = ''', A1, ''', N =', I5, ', NRHS=', I3, ', type ', + $ I2, ', test(', I2, ') =', G12.5 ) + 9995 FORMAT( ' Invalid input value: ', A4, '=', I6, '; must be <=', + $ I6 ) + RETURN +* +* End of DCHKSY_AA_2STAGE +* + END diff --git a/lapack-netlib/TESTING/LIN/schksy_rk.f b/lapack-netlib/TESTING/LIN/schksy_rk.f index 22416ca4d..5456150a6 100644 --- a/lapack-netlib/TESTING/LIN/schksy_rk.f +++ b/lapack-netlib/TESTING/LIN/schksy_rk.f @@ -167,7 +167,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_lin * @@ -176,10 +176,10 @@ $ THRESH, TSTERR, NMAX, A, AFAC, E, AINV, B, $ X, XACT, WORK, RWORK, IWORK, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -210,7 +210,7 @@ CHARACTER DIST, TYPE, UPLO, XTYPE CHARACTER*3 PATH, MATPATH INTEGER I, I1, I2, IMAT, IN, INB, INFO, IOFF, IRHS, - $ ITEMP, IUPLO, IZERO, J, K, KL, KU, LDA, LWORK, + $ IUPLO, IZERO, J, K, KL, KU, LDA, LWORK, $ MODE, N, NB, NERRS, NFAIL, NIMAT, NRHS, NRUN, $ NT REAL ALPHA, ANORM, CNDNUM, CONST, STEMP, SING_MAX, @@ -218,7 +218,7 @@ * .. * .. Local Arrays .. CHARACTER UPLOS( 2 ) - INTEGER IDUMMY( 1 ), ISEED( 4 ), ISEEDY( 4 ) + INTEGER ISEED( 4 ), ISEEDY( 4 ) REAL BLOCK( 2, 2 ), SDUMMY( 1 ), RESULT( NTESTS ) * .. * .. External Functions .. diff --git a/lapack-netlib/TESTING/LIN/sdrvls.f b/lapack-netlib/TESTING/LIN/sdrvls.f index c408a9bf0..2cf3439b5 100644 --- a/lapack-netlib/TESTING/LIN/sdrvls.f +++ b/lapack-netlib/TESTING/LIN/sdrvls.f @@ -183,7 +183,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_lin * @@ -192,10 +192,10 @@ $ NBVAL, NXVAL, THRESH, TSTERR, A, COPYA, B, $ COPYB, C, S, COPYS, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -233,8 +233,8 @@ REAL EPS, NORMA, NORMB, RCOND * .. * .. Local Arrays .. - INTEGER ISEED( 4 ), ISEEDY( 4 ), IWORKQUERY - REAL RESULT( NTESTS ), WORKQUERY + INTEGER ISEED( 4 ), ISEEDY( 4 ), IWQ + REAL RESULT( NTESTS ), WQ * .. * .. Allocatable Arrays .. REAL, ALLOCATABLE :: WORK (:) @@ -248,7 +248,7 @@ EXTERNAL ALAERH, ALAHD, ALASVM, SAXPY, SERRLS, SGELS, $ SGELSD, SGELSS, SGELSY, SGEMM, SLACPY, $ SLARNV, SQRT13, SQRT15, SQRT16, SSCAL, - $ XLAENV + $ XLAENV, SGETSLS * .. * .. Intrinsic Functions .. INTRINSIC INT, LOG, MAX, MIN, REAL, SQRT @@ -321,43 +321,76 @@ M = MMAX N = NMAX NRHS = NSMAX - LDA = MAX( 1, M ) - LDB = MAX( 1, M, N ) MNMIN = MAX( MIN( M, N ), 1 ) * * Compute workspace needed for routines * SQRT14, SQRT17 (two side cases), SQRT15 and SQRT12 * - LWORK = MAX( ( M+N )*NRHS, + LWORK = MAX( 1, ( M+N )*NRHS, $ ( N+NRHS )*( M+2 ), ( M+NRHS )*( N+2 ), $ MAX( M+MNMIN, NRHS*MNMIN,2*N+M ), $ MAX( M*N+4*MNMIN+MAX(M,N), M*N+2*MNMIN+4*N ) ) + LIWORK = 1 +* +* Iterate through all test cases and compute necessary workspace +* sizes for ?GELS, ?GETSLS, ?GELSY, ?GELSS and ?GELSD routines. +* + DO IM = 1, NM + M = MVAL( IM ) + LDA = MAX( 1, M ) + DO IN = 1, NN + N = NVAL( IN ) + MNMIN = MAX(MIN( M, N ),1) + LDB = MAX( 1, M, N ) + DO INS = 1, NNS + NRHS = NSVAL( INS ) + DO IRANK = 1, 2 + DO ISCALE = 1, 3 + ITYPE = ( IRANK-1 )*3 + ISCALE + IF( DOTYPE( ITYPE ) ) THEN + IF( IRANK.EQ.1 ) THEN + DO ITRAN = 1, 2 + IF( ITRAN.EQ.1 ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* +* Compute workspace needed for SGELS + CALL SGELS( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_SGELS = INT ( WQ ) +* Compute workspace needed for SGETSLS + CALL SGETSLS( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_SGETSLS = INT( WQ ) + ENDDO + END IF +* Compute workspace needed for SGELSY + CALL SGELSY( M, N, NRHS, A, LDA, B, LDB, IWQ, + $ RCOND, CRANK, WQ, -1, INFO ) + LWORK_SGELSY = INT( WQ ) +* Compute workspace needed for SGELSS + CALL SGELSS( M, N, NRHS, A, LDA, B, LDB, S, + $ RCOND, CRANK, WQ, -1 , INFO ) + LWORK_SGELSS = INT( WQ ) +* Compute workspace needed for SGELSD + CALL SGELSD( M, N, NRHS, A, LDA, B, LDB, S, + $ RCOND, CRANK, WQ, -1, IWQ, INFO ) + LWORK_SGELSD = INT( WQ ) +* Compute LIWORK workspace needed for SGELSY and SGELSD + LIWORK = MAX( LIWORK, N, IWQ ) +* Compute LWORK workspace needed for all functions + LWORK = MAX( LWORK, LWORK_SGELS, LWORK_SGETSLS, + $ LWORK_SGELSY, LWORK_SGELSS, + $ LWORK_SGELSD ) + END IF + ENDDO + ENDDO + ENDDO + ENDDO + ENDDO * -* Compute workspace needed for SGELS - CALL SGELS( 'N', M, N, NRHS, A, LDA, B, LDB, - $ WORKQUERY, -1, INFO ) - LWORK_SGELS = INT ( WORKQUERY ) -* Compute workspace needed for SGETSLS - CALL SGETSLS( 'N', M, N, NRHS, A, LDA, B, LDB, - $ WORKQUERY, -1, INFO ) - LWORK_SGETSLS = INT( WORKQUERY ) -* Compute workspace needed for SGELSY - CALL SGELSY( M, N, NRHS, A, LDA, B, LDB, IWORKQUERY, - $ RCOND, CRANK, WORKQUERY, -1, INFO ) - LWORK_SGELSY = INT( WORKQUERY ) -* Compute workspace needed for SGELSS - CALL SGELSS( M, N, NRHS, A, LDA, B, LDB, S, - $ RCOND, CRANK, WORKQUERY, -1 , INFO ) - LWORK_SGELSS = INT( WORKQUERY ) -* Compute workspace needed for SGELSD - CALL SGELSD( M, N, NRHS, A, LDA, B, LDB, S, - $ RCOND, CRANK, WORKQUERY, -1, IWORKQUERY, INFO ) - LWORK_SGELSD = INT( WORKQUERY ) -* Compute LIWORK workspace needed for SGELSY and SGELSD - LIWORK = MAX( 1, N, IWORKQUERY ) -* Compute LWORK workspace needed for all functions - LWORK = MAX( 1, LWORK, LWORK_SGELS, LWORK_SGETSLS, LWORK_SGELSY, - $ LWORK_SGELSS, LWORK_SGELSD ) LWLSY = LWORK * ALLOCATE( WORK( LWORK ) ) diff --git a/lapack-netlib/TESTING/LIN/sdrvrf3.f b/lapack-netlib/TESTING/LIN/sdrvrf3.f index 33ac10842..aa8e4f1da 100644 --- a/lapack-netlib/TESTING/LIN/sdrvrf3.f +++ b/lapack-netlib/TESTING/LIN/sdrvrf3.f @@ -110,7 +110,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup single_lin * @@ -118,10 +118,10 @@ SUBROUTINE SDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, + S_WORK_SLANGE, S_WORK_SGEQRF, TAU ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER LDA, NN, NOUT @@ -223,7 +223,7 @@ * IF ( IALPHA.EQ. 1) THEN ALPHA = ZERO - ELSE IF ( IALPHA.EQ. 1) THEN + ELSE IF ( IALPHA.EQ. 2) THEN ALPHA = ONE ELSE ALPHA = SLARND( 2, ISEED ) diff --git a/lapack-netlib/TESTING/LIN/sdrvsy_aa.f b/lapack-netlib/TESTING/LIN/sdrvsy_aa.f index 8e5686f82..e4357c45f 100644 --- a/lapack-netlib/TESTING/LIN/sdrvsy_aa.f +++ b/lapack-netlib/TESTING/LIN/sdrvsy_aa.f @@ -143,7 +143,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup real_lin * @@ -152,10 +152,10 @@ $ NMAX, A, AFAC, AINV, B, X, XACT, WORK, $ RWORK, IWORK, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -199,7 +199,7 @@ * .. * .. External Subroutines .. EXTERNAL ALADHD, ALAERH, ALASVM, SERRVX, SGET04, SLACPY, - $ SLARHS, SLASET, SLATB4, SLATMS, SPOT02, DPOT05, + $ SLARHS, SLASET, SLATB4, SLATMS, SPOT02, $ SSYSV_AA, SSYT01_AA, SSYTRF_AA, XLAENV * .. * .. Scalars in Common .. @@ -238,7 +238,6 @@ DO 10 I = 1, 4 ISEED( I ) = ISEEDY( I ) 10 CONTINUE - LWORK = MAX( 2*NMAX, NMAX*NRHS ) * * Test the error exits * @@ -257,6 +256,8 @@ * DO 180 IN = 1, NN N = NVAL( IN ) + LWORK = MAX( 3*N-2, N*(1+NB) ) + LWORK = MAX( LWORK, 1 ) LDA = MAX( N, 1 ) XTYPE = 'N' NIMAT = NTYPES diff --git a/lapack-netlib/TESTING/LIN/sdrvsy_aa_2stage.f b/lapack-netlib/TESTING/LIN/sdrvsy_aa_2stage.f new file mode 100644 index 000000000..d8d9dc0a9 --- /dev/null +++ b/lapack-netlib/TESTING/LIN/sdrvsy_aa_2stage.f @@ -0,0 +1,490 @@ +*> \brief \b SDRVSY_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE SDRVSY_AA_2STAGE( +* DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, NMAX, +* A, AFAC, AINV, B, X, XACT, WORK, RWORK, IWORK, +* NOUT ) +* +* .. Scalar Arguments .. +* LOGICAL TSTERR +* INTEGER NMAX, NN, NOUT, NRHS +* REAL THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER IWORK( * ), NVAL( * ) +* REAL RWORK( * ) +* REAL A( * ), AFAC( * ), AINV( * ), B( * ), +* $ WORK( * ), X( * ), XACT( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SDRVSY_AA_2STAGE tests the driver routine SSYSV_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> The matrix types to be used for testing. Matrices of type j +*> (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) = +*> .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER +*> The number of values of N contained in the vector NVAL. +*> \endverbatim +*> +*> \param[in] NVAL +*> \verbatim +*> NVAL is INTEGER array, dimension (NN) +*> The values of the matrix dimension N. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand side vectors to be generated for +*> each linear system. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is REAL +*> The threshold value for the test ratios. A result is +*> included in the output file if RESULT >= THRESH. To have +*> every test ratio printed, use THRESH = 0. +*> \endverbatim +*> +*> \param[in] TSTERR +*> \verbatim +*> TSTERR is LOGICAL +*> Flag that indicates whether error exits are to be tested. +*> \endverbatim +*> +*> \param[in] NMAX +*> \verbatim +*> NMAX is INTEGER +*> The maximum value permitted for N, used in dimensioning the +*> work arrays. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is REAL array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AFAC +*> \verbatim +*> AFAC is REAL array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AINV +*> \verbatim +*> AINV is REAL array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is REAL array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] XACT +*> \verbatim +*> XACT is REAL array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (NMAX*max(2,NRHS)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (NMAX+2*NRHS) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (NMAX) +*> \endverbatim +*> +*> \param[in] NOUT +*> \verbatim +*> NOUT is INTEGER +*> The unit number for output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup real_lin +* +* ===================================================================== + SUBROUTINE SDRVSY_AA_2STAGE( + $ DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, + $ NMAX, A, AFAC, AINV, B, X, XACT, WORK, + $ RWORK, IWORK, NOUT ) +* +* -- LAPACK test routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + LOGICAL TSTERR + INTEGER NMAX, NN, NOUT, NRHS + REAL THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER IWORK( * ), NVAL( * ) + REAL RWORK( * ) + REAL A( * ), AFAC( * ), AINV( * ), B( * ), + $ WORK( * ), X( * ), XACT( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + INTEGER NTYPES, NTESTS + PARAMETER ( NTYPES = 10, NTESTS = 3 ) + INTEGER NFACT + PARAMETER ( NFACT = 2 ) +* .. +* .. Local Scalars .. + LOGICAL ZEROT + CHARACTER DIST, FACT, TYPE, UPLO, XTYPE + CHARACTER*3 MATPATH, PATH + INTEGER I, I1, I2, IFACT, IMAT, IN, INFO, IOFF, IUPLO, + $ IZERO, J, K, KL, KU, LDA, LWORK, MODE, N, + $ NB, NBMIN, NERRS, NFAIL, NIMAT, NRUN, NT + REAL ANORM, CNDNUM +* .. +* .. Local Arrays .. + CHARACTER FACTS( NFACT ), UPLOS( 2 ) + INTEGER ISEED( 4 ), ISEEDY( 4 ) + REAL RESULT( NTESTS ) +* .. +* .. External Functions .. + REAL SLANSY, SGET06 + EXTERNAL SLANSY, SGET06 +* .. +* .. External Subroutines .. + EXTERNAL ALADHD, ALAERH, ALASVM, XLAENV, SERRVX, + $ CGET04, SLACPY, SLARHS, SLATB4, SLATMS, + $ SSYSV_AA_2STAGE, SSYT01_AA, SPOT02, + $ SSYTRF_AA_2STAGE +* .. +* .. Scalars in Common .. + LOGICAL LERR, OK + CHARACTER*32 SRNAMT + INTEGER INFOT, NUNIT +* .. +* .. Common blocks .. + COMMON / INFOC / INFOT, NUNIT, OK, LERR + COMMON / SRNAMC / SRNAMT +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, MAX, MIN +* .. +* .. Data statements .. + DATA ISEEDY / 1988, 1989, 1990, 1991 / + DATA UPLOS / 'U', 'L' / , FACTS / 'F', 'N' / +* .. +* .. Executable Statements .. +* +* Initialize constants and the random number seed. +* +* Test path +* + PATH( 1: 1 ) = 'Single precision' + PATH( 2: 3 ) = 'S2' +* +* Path to generate matrices +* + MATPATH( 1: 1 ) = 'Single precision' + MATPATH( 2: 3 ) = 'SY' +* + NRUN = 0 + NFAIL = 0 + NERRS = 0 + DO 10 I = 1, 4 + ISEED( I ) = ISEEDY( I ) + 10 CONTINUE +* +* Test the error exits +* + IF( TSTERR ) + $ CALL SERRVX( PATH, NOUT ) + INFOT = 0 +* +* Set the block size and minimum block size for testing. +* + NB = 1 + NBMIN = 2 + CALL XLAENV( 1, NB ) + CALL XLAENV( 2, NBMIN ) +* +* Do for each value of N in NVAL +* + DO 180 IN = 1, NN + N = NVAL( IN ) + LDA = MAX( N, 1 ) + XTYPE = 'N' + NIMAT = NTYPES + IF( N.LE.0 ) + $ NIMAT = 1 +* + DO 170 IMAT = 1, NIMAT +* +* Do the tests only if DOTYPE( IMAT ) is true. +* + IF( .NOT.DOTYPE( IMAT ) ) + $ GO TO 170 +* +* Skip types 3, 4, 5, or 6 if the matrix size is too small. +* + ZEROT = IMAT.GE.3 .AND. IMAT.LE.6 + IF( ZEROT .AND. N.LT.IMAT-2 ) + $ GO TO 170 +* +* Do first for UPLO = 'U', then for UPLO = 'L' +* + DO 160 IUPLO = 1, 2 + UPLO = UPLOS( IUPLO ) +* +* Begin generate the test matrix A. +* +* Set up parameters with SLATB4 for the matrix generator +* based on the type of matrix to be generated. +* + CALL SLATB4( MATPATH, IMAT, N, N, TYPE, KL, KU, ANORM, + $ MODE, CNDNUM, DIST ) +* +* Generate a matrix with SLATMS. +* + SRNAMT = 'SLATMS' + CALL SLATMS( N, N, DIST, ISEED, TYPE, RWORK, MODE, + $ CNDNUM, ANORM, KL, KU, UPLO, A, LDA, + $ WORK, INFO ) +* +* Check error code from SLATMS and handle error. +* + IF( INFO.NE.0 ) THEN + CALL ALAERH( PATH, 'SLATMS', INFO, 0, UPLO, N, N, + $ -1, -1, -1, IMAT, NFAIL, NERRS, NOUT ) + GO TO 160 + END IF +* +* For types 3-6, zero one or more rows and columns of +* the matrix to test that INFO is returned correctly. +* + IF( ZEROT ) THEN + IF( IMAT.EQ.3 ) THEN + IZERO = 1 + ELSE IF( IMAT.EQ.4 ) THEN + IZERO = N + ELSE + IZERO = N / 2 + 1 + END IF +* + IF( IMAT.LT.6 ) THEN +* +* Set row and column IZERO to zero. +* + IF( IUPLO.EQ.1 ) THEN + IOFF = ( IZERO-1 )*LDA + DO 20 I = 1, IZERO - 1 + A( IOFF+I ) = ZERO + 20 CONTINUE + IOFF = IOFF + IZERO + DO 30 I = IZERO, N + A( IOFF ) = ZERO + IOFF = IOFF + LDA + 30 CONTINUE + ELSE + IOFF = IZERO + DO 40 I = 1, IZERO - 1 + A( IOFF ) = ZERO + IOFF = IOFF + LDA + 40 CONTINUE + IOFF = IOFF - IZERO + DO 50 I = IZERO, N + A( IOFF+I ) = ZERO + 50 CONTINUE + END IF + ELSE + IOFF = 0 + IF( IUPLO.EQ.1 ) THEN +* +* Set the first IZERO rows and columns to zero. +* + DO 70 J = 1, N + I2 = MIN( J, IZERO ) + DO 60 I = 1, I2 + A( IOFF+I ) = ZERO + 60 CONTINUE + IOFF = IOFF + LDA + 70 CONTINUE + IZERO = 1 + ELSE +* +* Set the first IZERO rows and columns to zero. +* + IOFF = 0 + DO 90 J = 1, N + I1 = MAX( J, IZERO ) + DO 80 I = I1, N + A( IOFF+I ) = ZERO + 80 CONTINUE + IOFF = IOFF + LDA + 90 CONTINUE + END IF + END IF + ELSE + IZERO = 0 + END IF +* +* End generate the test matrix A. +* +* + DO 150 IFACT = 1, NFACT +* +* Do first for FACT = 'F', then for other values. +* + FACT = FACTS( IFACT ) +* +* Form an exact solution and set the right hand side. +* + SRNAMT = 'SLARHS' + CALL SLARHS( MATPATH, XTYPE, UPLO, ' ', N, N, KL, KU, + $ NRHS, A, LDA, XACT, LDA, B, LDA, ISEED, + $ INFO ) + XTYPE = 'C' +* +* --- Test SSYSV_AA_2STAGE --- +* + IF( IFACT.EQ.2 ) THEN + CALL SLACPY( UPLO, N, N, A, LDA, AFAC, LDA ) + CALL SLACPY( 'Full', N, NRHS, B, LDA, X, LDA ) +* +* Factor the matrix and solve the system using SSYSV_AA. +* + SRNAMT = 'SSYSV_AA_2STAGE ' + LWORK = MIN(N*NB, 3*NMAX*NMAX) + CALL SSYSV_AA_2STAGE( UPLO, N, NRHS, AFAC, LDA, + $ AINV, (3*NB+1)*N, + $ IWORK, IWORK( 1+N ), + $ X, LDA, WORK, LWORK, INFO ) +* +* Adjust the expected value of INFO to account for +* pivoting. +* + IF( IZERO.GT.0 ) THEN + J = 1 + K = IZERO + 100 CONTINUE + IF( J.EQ.K ) THEN + K = IWORK( J ) + ELSE IF( IWORK( J ).EQ.K ) THEN + K = J + END IF + IF( J.LT.K ) THEN + J = J + 1 + GO TO 100 + END IF + ELSE + K = 0 + END IF +* +* Check error code from SSYSV_AA . +* + IF( INFO.NE.K ) THEN + CALL ALAERH( PATH, 'SSYSV_AA', INFO, K, + $ UPLO, N, N, -1, -1, NRHS, + $ IMAT, NFAIL, NERRS, NOUT ) + GO TO 120 + ELSE IF( INFO.NE.0 ) THEN + GO TO 120 + END IF +* +* Compute residual of the computed solution. +* + CALL SLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA ) + CALL SPOT02( UPLO, N, NRHS, A, LDA, X, LDA, WORK, + $ LDA, RWORK, RESULT( 1 ) ) +* +* Reconstruct matrix from factors and compute +* residual. +* +c CALL SSY01_AA( UPLO, N, A, LDA, AFAC, LDA, +c $ IWORK, AINV, LDA, RWORK, +c $ RESULT( 2 ) ) +c NT = 2 + NT = 1 +* +* Print information about the tests that did not pass +* the threshold. +* + DO 110 K = 1, NT + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALADHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9999 )'SSYSV_AA ', + $ UPLO, N, IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 110 CONTINUE + NRUN = NRUN + NT + 120 CONTINUE + END IF +* + 150 CONTINUE +* + 160 CONTINUE + 170 CONTINUE + 180 CONTINUE +* +* Print a summary of the results. +* + CALL ALASVM( PATH, NOUT, NFAIL, NRUN, NERRS ) +* + 9999 FORMAT( 1X, A, ', UPLO=''', A1, ''', N =', I5, ', type ', I2, + $ ', test ', I2, ', ratio =', G12.5 ) + RETURN +* +* End of SDRVSY_AA_2STAGE +* + END diff --git a/lapack-netlib/TESTING/LIN/serrsy.f b/lapack-netlib/TESTING/LIN/serrsy.f index ce6975d1e..fe9af2f41 100644 --- a/lapack-netlib/TESTING/LIN/serrsy.f +++ b/lapack-netlib/TESTING/LIN/serrsy.f @@ -48,17 +48,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_lin * * ===================================================================== SUBROUTINE SERRSY( PATH, NUNIT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER*3 PATH @@ -92,8 +92,8 @@ $ SSYTF2_RK, SSYTF2_ROOK, SSYTRF, SSYTRF_RK, $ SSYTRF_ROOK, SSYTRI, SSYTF2, SSYTRI_3, $ SSYTRI_3X, SSYTRI_ROOK, SSYTRF_AA, SSYTRI2, - $ SYTRI2X, SSYTRS, SSYTRS_3, SSYTRS_ROOK, - $ SSYTRS_AA + $ SSYTRI2X, SSYTRS, SSYTRS_3, SSYTRS_ROOK, + $ SSYTRS_AA, SSYTRF_AA_2STAGE, SSYTRS_AA_2STAGE * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -524,6 +524,62 @@ INFOT = 10 CALL SSYTRS_AA( 'U', 0, 1, A, 2, IP, B, 1, W, -2, INFO ) CALL CHKXER( 'SSYTRS_AA', INFOT, NOUT, LERR, OK ) + ELSE IF( LSAMEN( 2, C2, 'S2' ) ) THEN +* +* Test error exits of the routines that use factorization +* of a symmetric indefinite matrix with Aasen's algorithm. +* +* SSYTRF_AA_2STAGE +* + SRNAMT = 'SSYTRF_AA_2STAGE' + INFOT = 1 + CALL SSYTRF_AA_2STAGE( '/', 0, A, 1, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'SSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SSYTRF_AA_2STAGE( 'U', -1, A, 1, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'SSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SSYTRF_AA_2STAGE( 'U', 2, A, 1, A, 2, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'SSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL SSYTRF_AA_2STAGE( 'U', 2, A, 2, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'SSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL SSYTRF_AA_2STAGE( 'U', 2, A, 2, A, 8, IP, IP, W, 0, + $ INFO ) + CALL CHKXER( 'SSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) +* +* SSYTRS_AA_2STAGE +* + SRNAMT = 'SSYTRS_AA_2STAGE' + INFOT = 1 + CALL SSYTRS_AA_2STAGE( '/', 0, 0, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'SSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SSYTRS_AA_2STAGE( 'U', -1, 0, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'SSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SSYTRS_AA_2STAGE( 'U', 0, -1, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'SSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL SSYTRS_AA_2STAGE( 'U', 2, 1, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'SSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL SSYTRS_AA_2STAGE( 'U', 2, 1, A, 2, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'SSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL SSYTRS_AA_2STAGE( 'U', 2, 1, A, 2, A, 8, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'SSYTRS_AA_STAGE', INFOT, NOUT, LERR, OK ) * ELSE IF( LSAMEN( 2, C2, 'SP' ) ) THEN * diff --git a/lapack-netlib/TESTING/LIN/serrvx.f b/lapack-netlib/TESTING/LIN/serrvx.f index 3317e840a..a63ed38d7 100644 --- a/lapack-netlib/TESTING/LIN/serrvx.f +++ b/lapack-netlib/TESTING/LIN/serrvx.f @@ -48,17 +48,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup single_lin * * ===================================================================== SUBROUTINE SERRVX( PATH, NUNIT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER*3 PATH @@ -91,7 +91,8 @@ EXTERNAL CHKXER, SGBSV, SGBSVX, SGESV, SGESVX, SGTSV, $ SGTSVX, SPBSV, SPBSVX, SPOSV, SPOSVX, SPPSV, $ SPPSVX, SPTSV, SPTSVX, SSPSV, SSPSVX, SSYSV, - $ SSYSV_AA, SSYSV_RK, SSYSV_ROOK, SSYSVX + $ SSYSV_AA, SSYSV_RK, SSYSV_ROOK, SSYSVX, + $ SSYSV_AA_2STAGE * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -711,6 +712,36 @@ INFOT = 8 CALL SSYSV_AA( 'U', 2, 0, A, 2, IP, B, 1, W, 1, INFO ) CALL CHKXER( 'SSYSV_AA', INFOT, NOUT, LERR, OK ) +* + ELSE IF( LSAMEN( 2, C2, 'S2' ) ) THEN +* +* DSYSV_AASEN_2STAGE +* + SRNAMT = 'SSYSV_AA_2STAGE' + INFOT = 1 + CALL SSYSV_AA_2STAGE( '/', 0, 0, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'SSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SSYSV_AA_2STAGE( 'U', -1, 0, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'SSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SSYSV_AA_2STAGE( 'U', 0, -1, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'SSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL SSYSV_AA_2STAGE( 'U', 2, 1, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'SSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL SSYSV_AA_2STAGE( 'U', 2, 1, A, 2, A, 2, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'SSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL SSYSV_AA_2STAGE( 'U', 2, 1, A, 2, A, 1, IP, IP, B, 2, + $ W, 1, INFO ) + CALL CHKXER( 'SSYSV_AA_2STAGE', INFOT, NOUT, LERR, OK ) * ELSE IF( LSAMEN( 2, C2, 'SP' ) ) THEN * diff --git a/lapack-netlib/TESTING/LIN/slahilb.f b/lapack-netlib/TESTING/LIN/slahilb.f index be7af415e..7944f0e9d 100644 --- a/lapack-netlib/TESTING/LIN/slahilb.f +++ b/lapack-netlib/TESTING/LIN/slahilb.f @@ -8,7 +8,7 @@ * Definition: * =========== * -* SUBROUTINE SLAHILB(N, NRHS, A, LDA, X, LDX, B, LDB, WORK, INFO) +* SUBROUTINE SLAHILB( N, NRHS, A, LDA, X, LDX, B, LDB, WORK, INFO) * * .. Scalar Arguments .. * INTEGER N, NRHS, LDA, LDX, LDB, INFO @@ -53,7 +53,7 @@ *> *> \param[in] NRHS *> \verbatim -*> NRHS is NRHS +*> NRHS is INTEGER *> The requested number of right-hand sides. *> \endverbatim *> @@ -117,17 +117,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup single_lin * * ===================================================================== - SUBROUTINE SLAHILB(N, NRHS, A, LDA, X, LDX, B, LDB, WORK, INFO) + SUBROUTINE SLAHILB( N, NRHS, A, LDA, X, LDX, B, LDB, WORK, INFO) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER N, NRHS, LDA, LDX, LDB, INFO diff --git a/lapack-netlib/TESTING/LIN/sqrt04.f b/lapack-netlib/TESTING/LIN/sqrt04.f index d86caca8e..d0fb6de35 100644 --- a/lapack-netlib/TESTING/LIN/sqrt04.f +++ b/lapack-netlib/TESTING/LIN/sqrt04.f @@ -74,7 +74,7 @@ SUBROUTINE SQRT04(M,N,NB,RESULT) IMPLICIT NONE * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 @@ -103,6 +103,9 @@ * .. Local Arrays .. INTEGER ISEED( 4 ) * .. +* .. External Subroutine .. + EXTERNAL SGEMM, SLACPY, SLARNV, SGEMQRT, SLASET, SGEQRT, SSYRK +* .. * .. External Functions .. REAL SLAMCH REAL SLANGE, SLANSY diff --git a/lapack-netlib/TESTING/LIN/sqrt05.f b/lapack-netlib/TESTING/LIN/sqrt05.f index 705d28938..0c1d52761 100644 --- a/lapack-netlib/TESTING/LIN/sqrt05.f +++ b/lapack-netlib/TESTING/LIN/sqrt05.f @@ -81,7 +81,7 @@ SUBROUTINE SQRT05(M,N,L,NB,RESULT) IMPLICIT NONE * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 @@ -110,6 +110,10 @@ * .. Local Arrays .. INTEGER ISEED( 4 ) * .. +* .. External Subroutine .. + EXTERNAL SGEMM, SLARNV, STPMQRT, STPQRT, SGEMQRT, SSYRK, SLACPY, + $ SLASET +* .. * .. External Functions .. REAL SLAMCH REAL SLANGE, SLANSY diff --git a/lapack-netlib/TESTING/LIN/ssyt01_3.f b/lapack-netlib/TESTING/LIN/ssyt01_3.f index f370962c3..11ddea4c3 100644 --- a/lapack-netlib/TESTING/LIN/ssyt01_3.f +++ b/lapack-netlib/TESTING/LIN/ssyt01_3.f @@ -132,7 +132,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup single_lin * @@ -140,10 +140,10 @@ SUBROUTINE SSYT01_3( UPLO, N, A, LDA, AFAC, LDAFAC, E, IPIV, C, $ LDC, RWORK, RESID ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/TESTING/LIN/ssyt01_aa.f b/lapack-netlib/TESTING/LIN/ssyt01_aa.f index eecdf8a4d..c0fee7154 100644 --- a/lapack-netlib/TESTING/LIN/ssyt01_aa.f +++ b/lapack-netlib/TESTING/LIN/ssyt01_aa.f @@ -116,7 +116,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * * *> \ingroup real_lin @@ -125,10 +125,10 @@ SUBROUTINE SSYT01_AA( UPLO, N, A, LDA, AFAC, LDAFAC, IPIV, C, $ LDC, RWORK, RESID ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER UPLO @@ -157,7 +157,7 @@ EXTERNAL LSAME, SLAMCH, SLANSY * .. * .. External Subroutines .. - EXTERNAL SLASET, SLAVSY + EXTERNAL SLASET, SLAVSY, SSWAP, STRMM, SLACPY * .. * .. Intrinsic Functions .. INTRINSIC DBLE diff --git a/lapack-netlib/TESTING/LIN/zchkaa.f b/lapack-netlib/TESTING/LIN/zchkaa.f index 5146766b5..d2be2525d 100644 --- a/lapack-netlib/TESTING/LIN/zchkaa.f +++ b/lapack-netlib/TESTING/LIN/zchkaa.f @@ -53,6 +53,9 @@ *> ZHR 10 List types on next line if 0 < NTYPES < 10 *> ZHK 10 List types on next line if 0 < NTYPES < 10 *> ZHA 10 List types on next line if 0 < NTYPES < 10 +*> ZH2 10 List types on next line if 0 < NTYPES < 10 +*> ZSA 11 List types on next line if 0 < NTYPES < 10 +*> ZS2 11 List types on next line if 0 < NTYPES < 10 *> ZHP 10 List types on next line if 0 < NTYPES < 10 *> ZSY 11 List types on next line if 0 < NTYPES < 11 *> ZSR 11 List types on next line if 0 < NTYPES < 11 @@ -105,17 +108,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16_lin * * ===================================================================== PROGRAM ZCHKAA * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * ===================================================================== * @@ -168,10 +171,11 @@ $ ZCHKSY_ROOK, ZCHKSY_RK, ZCHKSY_AA, ZCHKTB, $ ZCHKTP, ZCHKTR, ZCHKTZ, ZDRVGB, ZDRVGE, ZDRVGT, $ ZDRVHE, ZDRVHE_ROOK, ZDRVHE_RK, ZDRVHE_AA, - $ ZDRVHP, ZDRVLS, ZDRVPB, ZDRVPO, ZDRVPP, ZDRVPT, - $ ZDRVSP, ZDRVSY, ZDRVSY_ROOK, ZDRVSY_RK, - $ ZDRVSY_AA, ILAVER, ZCHKQRT, ZCHKQRTP, ZCHKLQT, - $ ZCHKLQTP, ZCHKTSQR + $ ZDRVHE_AA_2STAGE, ZDRVHP, ZDRVLS, ZDRVPB, + $ ZDRVPO, ZDRVPP, ZDRVPT, ZDRVSP, ZDRVSY, + $ ZDRVSY_ROOK, ZDRVSY_RK, ZDRVSY_AA, + $ ZDRVSY_AA_2STAGE, ILAVER, ZCHKQRT, ZCHKQRTP, + $ ZCHKLQT, ZCHKLQTP, ZCHKTSQR * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -700,8 +704,8 @@ * ELSE IF( LSAMEN( 2, C2, 'HA' ) ) THEN * -* HA: Hermitian indefinite matrices, -* with partial (Aasen's) pivoting algorithm +* HA: Hermitian matrices, +* Aasen Algorithm * NTYPES = 10 CALL ALAREQ( PATH, NMATS, DOTYPE, NTYPES, NIN, NOUT ) @@ -724,6 +728,35 @@ ELSE WRITE( NOUT, FMT = 9988 )PATH END IF +* + ELSE IF( LSAMEN( 2, C2, 'H2' ) ) THEN +* +* H2: Hermitian matrices, +* with partial (Aasen's) pivoting algorithm +* + NTYPES = 10 + CALL ALAREQ( PATH, NMATS, DOTYPE, NTYPES, NIN, NOUT ) +* + IF( TSTCHK ) THEN + CALL ZCHKHE_AA_2STAGE( DOTYPE, NN, NVAL, NNB2, NBVAL2, + $ NNS, NSVAL, THRESH, TSTERR, LDA, + $ A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), + $ B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), + $ WORK, RWORK, IWORK, NOUT ) + ELSE + WRITE( NOUT, FMT = 9989 )PATH + END IF +* + IF( TSTDRV ) THEN + CALL ZDRVHE_AA_2STAGE( + $ DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, + $ LDA, A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), + $ B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), + $ WORK, RWORK, IWORK, NOUT ) + ELSE + WRITE( NOUT, FMT = 9988 )PATH + END IF +* * ELSE IF( LSAMEN( 2, C2, 'HP' ) ) THEN * @@ -831,9 +864,7 @@ * ELSE IF( LSAMEN( 2, C2, 'SA' ) ) THEN * -* SK: symmetric indefinite matrices, -* with bounded Bunch-Kaufman (rook) pivoting algorithm, -* differnet matrix storage format than SR path version. +* SA: symmetric indefinite matrices with Aasen's algorithm, * NTYPES = 11 CALL ALAREQ( PATH, NMATS, DOTYPE, NTYPES, NIN, NOUT ) @@ -855,6 +886,34 @@ ELSE WRITE( NOUT, FMT = 9988 )PATH END IF +* + ELSE IF( LSAMEN( 2, C2, 'S2' ) ) THEN +* +* S2: symmetric indefinite matrices with Aasen's algorithm +* 2 stage +* + NTYPES = 11 + CALL ALAREQ( PATH, NMATS, DOTYPE, NTYPES, NIN, NOUT ) +* + IF( TSTCHK ) THEN + CALL ZCHKSY_AA_2STAGE( DOTYPE, NN, NVAL, NNB2, NBVAL2, NNS, + $ NSVAL, THRESH, TSTERR, LDA, + $ A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), + $ B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), + $ WORK, RWORK, IWORK, NOUT ) + ELSE + WRITE( NOUT, FMT = 9989 )PATH + END IF +* + IF( TSTDRV ) THEN + CALL ZDRVSY_AA_2STAGE( + $ DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, + $ LDA, A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), + $ B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), WORK, + $ RWORK, IWORK, NOUT ) + ELSE + WRITE( NOUT, FMT = 9988 )PATH + END IF * ELSE IF( LSAMEN( 2, C2, 'SP' ) ) THEN * diff --git a/lapack-netlib/TESTING/LIN/zchkhe_aa.f b/lapack-netlib/TESTING/LIN/zchkhe_aa.f index 36125cce9..4100801b7 100644 --- a/lapack-netlib/TESTING/LIN/zchkhe_aa.f +++ b/lapack-netlib/TESTING/LIN/zchkhe_aa.f @@ -162,7 +162,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * * *> \ingroup complex16_lin @@ -172,10 +172,10 @@ $ THRESH, TSTERR, NMAX, A, AFAC, AINV, B, $ X, XACT, WORK, RWORK, IWORK, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -218,15 +218,10 @@ INTEGER ISEED( 4 ), ISEEDY( 4 ) DOUBLE PRECISION RESULT( NTESTS ) * .. -* .. External Functions .. - DOUBLE PRECISION DGET06, ZLANHE - EXTERNAL DGET06, ZLANHE -* .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, XLAENV, ZERRHE, ZGET04, - $ ZHECON, ZHERFS, ZHET01_AA, ZHETRF_AA, ZHETRI2, - $ ZHETRS_AA, ZLACPY, ZLAIPD, ZLARHS, ZLATB4, - $ ZLATMS, ZPOT02, ZPOT03, ZPOT05 + EXTERNAL ALAERH, ALAHD, ALASUM, XLAENV, ZERRHE, + $ ZHET01_AA, ZHETRF_AA, ZHETRS_AA, ZLACPY, + $ ZLAIPD, ZLARHS, ZLATB4, ZLATMS, ZPOT02 * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -438,22 +433,22 @@ * Adjust the expected value of INFO to account for * pivoting. * - IF( IZERO.GT.0 ) THEN - J = 1 - K = IZERO - 100 CONTINUE - IF( J.EQ.K ) THEN - K = IWORK( J ) - ELSE IF( IWORK( J ).EQ.K ) THEN - K = J - END IF - IF( J.LT.K ) THEN - J = J + 1 - GO TO 100 - END IF - ELSE +c IF( IZERO.GT.0 ) THEN +c J = 1 +c K = IZERO +c 100 CONTINUE +c IF( J.EQ.K ) THEN +c K = IWORK( J ) +c ELSE IF( IWORK( J ).EQ.K ) THEN +c K = J +c END IF +c IF( J.LT.K ) THEN +c J = J + 1 +c GO TO 100 +c END IF +c ELSE K = 0 - END IF +c END IF * * Check error code from ZHETRF and handle error. * @@ -516,30 +511,34 @@ * Check error code from ZHETRS and handle error. * IF( INFO.NE.0 ) THEN - CALL ALAERH( PATH, 'ZHETRS', INFO, 0, UPLO, N, - $ N, -1, -1, NRHS, IMAT, NFAIL, - $ NERRS, NOUT ) - END IF + IF( IZERO.EQ.0 ) THEN + CALL ALAERH( PATH, 'ZHETRS_AA', INFO, 0, + $ UPLO, N, N, -1, -1, NRHS, IMAT, + $ NFAIL, NERRS, NOUT ) + END IF + ELSE * - CALL ZLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA ) + CALL ZLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA + $ ) * -* Compute the residual for the solution +* Compute the residual for the solution * - CALL ZPOT02( UPLO, N, NRHS, A, LDA, X, LDA, WORK, - $ LDA, RWORK, RESULT( 2 ) ) + CALL ZPOT02( UPLO, N, NRHS, A, LDA, X, LDA, + $ WORK, LDA, RWORK, RESULT( 2 ) ) * -* Print information about the tests that did not pass -* the threshold. +* Print information about the tests that did not pass +* the threshold. * - DO 120 K = 2, 2 - IF( RESULT( K ).GE.THRESH ) THEN - IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) - $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, - $ IMAT, K, RESULT( K ) - NFAIL = NFAIL + 1 - END IF - 120 CONTINUE + DO 120 K = 2, 2 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, + $ IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 120 CONTINUE + END IF NRUN = NRUN + 1 * * End do for each value of NRHS in NSVAL. diff --git a/lapack-netlib/TESTING/LIN/zchkhe_aa_2stage.f b/lapack-netlib/TESTING/LIN/zchkhe_aa_2stage.f new file mode 100644 index 000000000..43c5435af --- /dev/null +++ b/lapack-netlib/TESTING/LIN/zchkhe_aa_2stage.f @@ -0,0 +1,580 @@ +*> \brief \b ZCHKHE_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZCHKHE_AA_2STAGE( DOTYPE, NN, NVAL, NNB, NBVAL, +* NNS, NSVAL, THRESH, TSTERR, NMAX, A, +* AFAC, AINV, B, X, XACT, WORK, RWORK, +* IWORK, NOUT ) +* +* .. Scalar Arguments .. +* LOGICAL TSTERR +* INTEGER NMAX, NN, NNB, NNS, NOUT +* DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER IWORK( * ), NBVAL( * ), NSVAL( * ), NVAL( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( * ), AFAC( * ), AINV( * ), B( * ), +* $ WORK( * ), X( * ), XACT( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZCHKSY_AA_2STAGE tests ZHETRF_AA_2STAGE, -TRS_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> The matrix types to be used for testing. Matrices of type j +*> (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) = +*> .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER +*> The number of values of N contained in the vector NVAL. +*> \endverbatim +*> +*> \param[in] NVAL +*> \verbatim +*> NVAL is INTEGER array, dimension (NN) +*> The values of the matrix dimension N. +*> \endverbatim +*> +*> \param[in] NNB +*> \verbatim +*> NNB is INTEGER +*> The number of values of NB contained in the vector NBVAL. +*> \endverbatim +*> +*> \param[in] NBVAL +*> \verbatim +*> NBVAL is INTEGER array, dimension (NBVAL) +*> The values of the blocksize NB. +*> \endverbatim +*> +*> \param[in] NNS +*> \verbatim +*> NNS is INTEGER +*> The number of values of NRHS contained in the vector NSVAL. +*> \endverbatim +*> +*> \param[in] NSVAL +*> \verbatim +*> NSVAL is INTEGER array, dimension (NNS) +*> The values of the number of right hand sides NRHS. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is DOUBLE PRECISION +*> The threshold value for the test ratios. A result is +*> included in the output file if RESULT >= THRESH. To have +*> every test ratio printed, use THRESH = 0. +*> \endverbatim +*> +*> \param[in] TSTERR +*> \verbatim +*> TSTERR is LOGICAL +*> Flag that indicates whether error exits are to be tested. +*> \endverbatim +*> +*> \param[in] NMAX +*> \verbatim +*> NMAX is INTEGER +*> The maximum value permitted for N, used in dimensioning the +*> work arrays. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AFAC +*> \verbatim +*> AFAC is COMPLEX*16 array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AINV +*> \verbatim +*> AINV is COMPLEX*16 array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (NMAX*NSMAX) +*> where NSMAX is the largest entry in NSVAL. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (NMAX*NSMAX) +*> \endverbatim +*> +*> \param[out] XACT +*> \verbatim +*> XACT is COMPLEX*16 array, dimension (NMAX*NSMAX) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (NMAX*max(3,NSMAX)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (max(NMAX,2*NSMAX)) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (2*NMAX) +*> \endverbatim +*> +*> \param[in] NOUT +*> \verbatim +*> NOUT is INTEGER +*> The unit number for output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16_lin +* +* ===================================================================== + SUBROUTINE ZCHKHE_AA_2STAGE( DOTYPE, NN, NVAL, NNB, NBVAL, NNS, + $ NSVAL, THRESH, TSTERR, NMAX, A, AFAC, AINV, + $ B, X, XACT, WORK, RWORK, IWORK, NOUT ) +* +* -- LAPACK test routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + LOGICAL TSTERR + INTEGER NN, NNB, NNS, NMAX, NOUT + DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER IWORK( * ), NBVAL( * ), NSVAL( * ), NVAL( * ) + COMPLEX*16 A( * ), AFAC( * ), AINV( * ), B( * ), + $ RWORK( * ), WORK( * ), X( * ), XACT( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) + INTEGER NTYPES + PARAMETER ( NTYPES = 10 ) + INTEGER NTESTS + PARAMETER ( NTESTS = 9 ) +* .. +* .. Local Scalars .. + LOGICAL ZEROT + CHARACTER DIST, TYPE, UPLO, XTYPE + CHARACTER*3 PATH, MATPATH + INTEGER I, I1, I2, IMAT, IN, INB, INFO, IOFF, IRHS, + $ IUPLO, IZERO, J, K, KL, KU, LDA, LWORK, MODE, + $ N, NB, NERRS, NFAIL, NIMAT, NRHS, NRUN, NT + DOUBLE PRECISION ANORM, CNDNUM +* .. +* .. Local Arrays .. + CHARACTER UPLOS( 2 ) + INTEGER ISEED( 4 ), ISEEDY( 4 ) + DOUBLE PRECISION RESULT( NTESTS ) +* .. +* .. External Subroutines .. + EXTERNAL ALAERH, ALAHD, ALASUM, ZERRHE, ZLACPY, + $ ZLARHS, ZLATB4, ZLATMS, ZPOT02, + $ ZHETRF_AA_2STAGE, ZHETRS_AA_2STAGE, + $ XLAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Scalars in Common .. + LOGICAL LERR, OK + CHARACTER*32 SRNAMT + INTEGER INFOT, NUNIT +* .. +* .. Common blocks .. + COMMON / INFOC / INFOT, NUNIT, OK, LERR + COMMON / SRNAMC / SRNAMT +* .. +* .. Data statements .. + DATA ISEEDY / 1988, 1989, 1990, 1991 / + DATA UPLOS / 'U', 'L' / +* .. +* .. Executable Statements .. +* +* Initialize constants and the random number seed. +* +* Test path +* + PATH( 1: 1 ) = 'Zomplex precision' + PATH( 2: 3 ) = 'H2' +* +* Path to generate matrices +* + MATPATH( 1: 1 ) = 'Zomplex precision' + MATPATH( 2: 3 ) = 'HE' + NRUN = 0 + NFAIL = 0 + NERRS = 0 + DO 10 I = 1, 4 + ISEED( I ) = ISEEDY( I ) + 10 CONTINUE +* +* Test the error exits +* + IF( TSTERR ) + $ CALL ZERRHE( PATH, NOUT ) + INFOT = 0 +* +* Set the minimum block size for which the block routine should +* be used, which will be later returned by ILAENV +* + CALL XLAENV( 2, 2 ) +* +* Do for each value of N in NVAL +* + DO 180 IN = 1, NN + N = NVAL( IN ) + IF( N .GT. NMAX ) THEN + NFAIL = NFAIL + 1 + WRITE(NOUT, 9995) 'M ', N, NMAX + GO TO 180 + END IF + LDA = MAX( N, 1 ) + XTYPE = 'N' + NIMAT = NTYPES + IF( N.LE.0 ) + $ NIMAT = 1 +* + IZERO = 0 +* +* Do for each value of matrix type IMAT +* + DO 170 IMAT = 1, NIMAT +* +* Do the tests only if DOTYPE( IMAT ) is true. +* + IF( .NOT.DOTYPE( IMAT ) ) + $ GO TO 170 +* +* Skip types 3, 4, 5, or 6 if the matrix size is too small. +* + ZEROT = IMAT.GE.3 .AND. IMAT.LE.6 + IF( ZEROT .AND. N.LT.IMAT-2 ) + $ GO TO 170 +* +* Do first for UPLO = 'U', then for UPLO = 'L' +* + DO 160 IUPLO = 1, 2 + UPLO = UPLOS( IUPLO ) +* +* Begin generate the test matrix A. +* +* +* Set up parameters with ZLATB4 for the matrix generator +* based on the type of matrix to be generated. +* + CALL ZLATB4( MATPATH, IMAT, N, N, TYPE, KL, KU, + $ ANORM, MODE, CNDNUM, DIST ) +* +* Generate a matrix with ZLATMS. +* + SRNAMT = 'ZLATMS' + CALL ZLATMS( N, N, DIST, ISEED, TYPE, RWORK, MODE, + $ CNDNUM, ANORM, KL, KU, UPLO, A, LDA, WORK, + $ INFO ) +* +* Check error code from ZLATMS and handle error. +* + IF( INFO.NE.0 ) THEN + CALL ALAERH( PATH, 'ZLATMS', INFO, 0, UPLO, N, N, -1, + $ -1, -1, IMAT, NFAIL, NERRS, NOUT ) +* +* Skip all tests for this generated matrix +* + GO TO 160 + END IF +* +* For matrix types 3-6, zero one or more rows and +* columns of the matrix to test that INFO is returned +* correctly. +* + IF( ZEROT ) THEN + IF( IMAT.EQ.3 ) THEN + IZERO = 1 + ELSE IF( IMAT.EQ.4 ) THEN + IZERO = N + ELSE + IZERO = N / 2 + 1 + END IF +* + IF( IMAT.LT.6 ) THEN +* +* Set row and column IZERO to zero. +* + IF( IUPLO.EQ.1 ) THEN + IOFF = ( IZERO-1 )*LDA + DO 20 I = 1, IZERO - 1 + A( IOFF+I ) = CZERO + 20 CONTINUE + IOFF = IOFF + IZERO + DO 30 I = IZERO, N + A( IOFF ) = CZERO + IOFF = IOFF + LDA + 30 CONTINUE + ELSE + IOFF = IZERO + DO 40 I = 1, IZERO - 1 + A( IOFF ) = CZERO + IOFF = IOFF + LDA + 40 CONTINUE + IOFF = IOFF - IZERO + DO 50 I = IZERO, N + A( IOFF+I ) = CZERO + 50 CONTINUE + END IF + ELSE + IF( IUPLO.EQ.1 ) THEN +* +* Set the first IZERO rows and columns to zero. +* + IOFF = 0 + DO 70 J = 1, N + I2 = MIN( J, IZERO ) + DO 60 I = 1, I2 + A( IOFF+I ) = CZERO + 60 CONTINUE + IOFF = IOFF + LDA + 70 CONTINUE + IZERO = 1 + ELSE +* +* Set the last IZERO rows and columns to zero. +* + IOFF = 0 + DO 90 J = 1, N + I1 = MAX( J, IZERO ) + DO 80 I = I1, N + A( IOFF+I ) = CZERO + 80 CONTINUE + IOFF = IOFF + LDA + 90 CONTINUE + END IF + END IF + ELSE + IZERO = 0 + END IF +* +* End generate test matrix A. +* +* +* Set the imaginary part of the diagonals. +* + CALL ZLAIPD( N, A, LDA+1, 0 ) +* +* Do for each value of NB in NBVAL +* + DO 150 INB = 1, NNB +* +* Set the optimal blocksize, which will be later +* returned by ILAENV. +* + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) +* +* Copy the test matrix A into matrix AFAC which +* will be factorized in place. This is needed to +* preserve the test matrix A for subsequent tests. +* + CALL ZLACPY( UPLO, N, N, A, LDA, AFAC, LDA ) +* +* Compute the L*D*L**T or U*D*U**T factorization of the +* matrix. IWORK stores details of the interchanges and +* the block structure of D. AINV is a work array for +* block factorization, LWORK is the length of AINV. +* + SRNAMT = 'ZHETRF_AA_2STAGE' + LWORK = MIN(N*NB, 3*NMAX*NMAX) + CALL ZHETRF_AA_2STAGE( UPLO, N, AFAC, LDA, + $ AINV, (3*NB+1)*N, + $ IWORK, IWORK( 1+N ), + $ WORK, LWORK, + $ INFO ) +* +* Adjust the expected value of INFO to account for +* pivoting. +* + IF( IZERO.GT.0 ) THEN + J = 1 + K = IZERO + 100 CONTINUE + IF( J.EQ.K ) THEN + K = IWORK( J ) + ELSE IF( IWORK( J ).EQ.K ) THEN + K = J + END IF + IF( J.LT.K ) THEN + J = J + 1 + GO TO 100 + END IF + ELSE + K = 0 + END IF +* +* Check error code from CHETRF and handle error. +* + IF( INFO.NE.K ) THEN + CALL ALAERH( PATH, 'ZHETRF_AA_2STAGE', INFO, K, + $ UPLO, N, N, -1, -1, NB, IMAT, NFAIL, + $ NERRS, NOUT ) + END IF +* +*+ TEST 1 +* Reconstruct matrix from factors and compute residual. +* +* NEED TO CREATE ZHET01_AA_2STAGE +* CALL ZHET01_AA( UPLO, N, A, LDA, AFAC, LDA, IWORK, +* $ AINV, LDA, RWORK, RESULT( 1 ) ) +* NT = 1 + NT = 0 +* +* +* Print information about the tests that did not pass +* the threshold. +* + DO 110 K = 1, NT + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9999 )UPLO, N, NB, IMAT, K, + $ RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 110 CONTINUE + NRUN = NRUN + NT +* +* Skip solver test if INFO is not 0. +* + IF( INFO.NE.0 ) THEN + GO TO 140 + END IF +* +* Do for each value of NRHS in NSVAL. +* + DO 130 IRHS = 1, NNS + NRHS = NSVAL( IRHS ) +* +*+ TEST 2 (Using TRS) +* Solve and compute residual for A * X = B. +* +* Choose a set of NRHS random solution vectors +* stored in XACT and set up the right hand side B +* + SRNAMT = 'ZLARHS' + CALL ZLARHS( MATPATH, XTYPE, UPLO, ' ', N, N, + $ KL, KU, NRHS, A, LDA, XACT, LDA, + $ B, LDA, ISEED, INFO ) + CALL ZLACPY( 'Full', N, NRHS, B, LDA, X, LDA ) +* + SRNAMT = 'ZHETRS_AA_2STAGE' + LWORK = MAX( 1, 3*N-2 ) + CALL ZHETRS_AA_2STAGE( UPLO, N, NRHS, AFAC, LDA, + $ AINV, (3*NB+1)*N, IWORK, IWORK( 1+N ), + $ X, LDA, INFO ) +* +* Check error code from ZHETRS and handle error. +* + IF( INFO.NE.0 ) THEN + IF( IZERO.EQ.0 ) THEN + CALL ALAERH( PATH, 'ZHETRS_AA_2STAGE', + $ INFO, 0, UPLO, N, N, -1, -1, + $ NRHS, IMAT, NFAIL, NERRS, NOUT ) + END IF + ELSE +* + CALL ZLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA + $ ) +* +* Compute the residual for the solution +* + CALL ZPOT02( UPLO, N, NRHS, A, LDA, X, LDA, + $ WORK, LDA, RWORK, RESULT( 2 ) ) +* +* Print information about the tests that did not pass +* the threshold. +* + DO 120 K = 2, 2 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, + $ IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 120 CONTINUE + END IF + NRUN = NRUN + 1 +* +* End do for each value of NRHS in NSVAL. +* + 130 CONTINUE + 140 CONTINUE + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + 180 CONTINUE +* +* Print a summary of the results. +* + CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS ) +* + 9999 FORMAT( ' UPLO = ''', A1, ''', N =', I5, ', NB =', I4, ', type ', + $ I2, ', test ', I2, ', ratio =', G12.5 ) + 9998 FORMAT( ' UPLO = ''', A1, ''', N =', I5, ', NRHS=', I3, ', type ', + $ I2, ', test(', I2, ') =', G12.5 ) + 9995 FORMAT( ' Invalid input value: ', A4, '=', I6, '; must be <=', + $ I6 ) + RETURN +* +* End of ZCHKSY_AA_2STAGE +* + END diff --git a/lapack-netlib/TESTING/LIN/zchkrfp.f b/lapack-netlib/TESTING/LIN/zchkrfp.f index 27586a448..ba5fd6166 100644 --- a/lapack-netlib/TESTING/LIN/zchkrfp.f +++ b/lapack-netlib/TESTING/LIN/zchkrfp.f @@ -59,7 +59,7 @@ * ===================================================================== PROGRAM ZCHKRFP * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 @@ -217,11 +217,6 @@ WRITE( NOUT, FMT = 9999 ) STOP END IF -* - IF( FATAL ) THEN - WRITE( NOUT, FMT = 9999 ) - STOP - END IF * * Calculate and print the machine dependent constants. * diff --git a/lapack-netlib/TESTING/LIN/zchksy_aa.f b/lapack-netlib/TESTING/LIN/zchksy_aa.f index 465f06d0e..186a5b39d 100644 --- a/lapack-netlib/TESTING/LIN/zchksy_aa.f +++ b/lapack-netlib/TESTING/LIN/zchksy_aa.f @@ -15,13 +15,14 @@ * .. Scalar Arguments .. * LOGICAL TSTERR * INTEGER NMAX, NN, NNB, NNS, NOUT -* COMPLEX*16 THRESH +* DOUBLE PRECISION THRESH * .. * .. Array Arguments .. * LOGICAL DOTYPE( * ) * INTEGER IWORK( * ), NBVAL( * ), NSVAL( * ), NVAL( * ) +* DOUBLE PRECISION RWORK( * ) * COMPLEX*16 A( * ), AFAC( * ), AINV( * ), B( * ), -* $ RWORK( * ), WORK( * ), X( * ), XACT( * ) +* $ WORK( * ), X( * ), XACT( * ) * .. * * @@ -82,7 +83,7 @@ *> *> \param[in] THRESH *> \verbatim -*> THRESH is COMPLEX*16 +*> THRESH is DOUBLE PRECISION *> The threshold value for the test ratios. A result is *> included in the output file if RESULT >= THRESH. To have *> every test ratio printed, use THRESH = 0. @@ -161,9 +162,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 -* -* @generated from LIN/dchksy_aa.f, fortran d -> z, Wed Nov 16 21:34:18 2016 +*> \date November 2017 * *> \ingroup complex16_lin * @@ -172,10 +171,10 @@ $ THRESH, TSTERR, NMAX, A, AFAC, AINV, B, $ X, XACT, WORK, RWORK, IWORK, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * IMPLICIT NONE * @@ -198,7 +197,7 @@ DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CZERO - PARAMETER ( CZERO = 0.0E+0 ) + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER NTYPES PARAMETER ( NTYPES = 10 ) INTEGER NTESTS @@ -218,15 +217,10 @@ INTEGER ISEED( 4 ), ISEEDY( 4 ) DOUBLE PRECISION RESULT( NTESTS ) * .. -* .. External Functions .. - DOUBLE PRECISION DGET06, ZLANSY - EXTERNAL DGET06, ZLANSY -* .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, ZERRSY, ZGET04, ZLACPY, - $ ZLARHS, ZLATB4, ZLATMS, ZSYT02, DSYT03, DSYT05, - $ DSYCON, ZSYRFS, ZSYT01_AA, ZSYTRF_AA, - $ DSYTRI2, ZSYTRS_AA, XLAENV + EXTERNAL ALAERH, ALAHD, ALASUM, ZERRSY, ZLACPY, ZLARHS, + $ ZLATB4, ZLATMS, ZSYT02, ZSYT01_AA, ZSYTRF_AA, + $ ZSYTRS_AA, XLAENV * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -440,22 +434,22 @@ * Adjust the expected value of INFO to account for * pivoting. * - IF( IZERO.GT.0 ) THEN - J = 1 - K = IZERO - 100 CONTINUE - IF( J.EQ.K ) THEN - K = IWORK( J ) - ELSE IF( IWORK( J ).EQ.K ) THEN - K = J - END IF - IF( J.LT.K ) THEN - J = J + 1 - GO TO 100 - END IF - ELSE +c IF( IZERO.GT.0 ) THEN +c J = 1 +c K = IZERO +c 100 CONTINUE +c IF( J.EQ.K ) THEN +c K = IWORK( J ) +c ELSE IF( IWORK( J ).EQ.K ) THEN +c K = J +c END IF +c IF( J.LT.K ) THEN +c J = J + 1 +c GO TO 100 +c END IF +c ELSE K = 0 - END IF +c END IF * * Check error code from ZSYTRF and handle error. * @@ -519,31 +513,34 @@ * Check error code from ZSYTRS and handle error. * IF( INFO.NE.0 ) THEN - CALL ALAERH( PATH, 'ZSYTRS_AA', INFO, 0, - $ UPLO, N, N, -1, -1, NRHS, IMAT, - $ NFAIL, NERRS, NOUT ) - END IF -* - CALL ZLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA ) + IF( IZERO.EQ.0 ) THEN + CALL ALAERH( PATH, 'ZSYTRS_AA', INFO, 0, + $ UPLO, N, N, -1, -1, NRHS, IMAT, + $ NFAIL, NERRS, NOUT ) + END IF + ELSE + CALL ZLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA + $ ) * -* Compute the residual for the solution +* Compute the residual for the solution * - CALL ZSYT02( UPLO, N, NRHS, A, LDA, X, LDA, WORK, - $ LDA, RWORK, RESULT( 2 ) ) + CALL ZSYT02( UPLO, N, NRHS, A, LDA, X, LDA, + $ WORK, LDA, RWORK, RESULT( 2 ) ) * * -* Print information about the tests that did not pass -* the threshold. +* Print information about the tests that did not pass +* the threshold. * - DO 120 K = 2, 2 - IF( RESULT( K ).GE.THRESH ) THEN - IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) - $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, - $ IMAT, K, RESULT( K ) - NFAIL = NFAIL + 1 - END IF - 120 CONTINUE + DO 120 K = 2, 2 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, + $ IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 120 CONTINUE + END IF NRUN = NRUN + 1 * * End do for each value of NRHS in NSVAL. diff --git a/lapack-netlib/TESTING/LIN/zchksy_aa_2stage.f b/lapack-netlib/TESTING/LIN/zchksy_aa_2stage.f new file mode 100644 index 000000000..d4d8c2939 --- /dev/null +++ b/lapack-netlib/TESTING/LIN/zchksy_aa_2stage.f @@ -0,0 +1,573 @@ +*> \brief \b ZCHKSY_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZCHKSY_AA_2STAGE( DOTYPE, NN, NVAL, NNB, NBVAL, +* NNS, NSVAL, THRESH, TSTERR, NMAX, A, +* AFAC, AINV, B, X, XACT, WORK, RWORK, +* IWORK, NOUT ) +* +* .. Scalar Arguments .. +* LOGICAL TSTERR +* INTEGER NMAX, NN, NNB, NNS, NOUT +* DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER IWORK( * ), NBVAL( * ), NSVAL( * ), NVAL( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( * ), AFAC( * ), AINV( * ), B( * ), +* $ WORK( * ), X( * ), XACT( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZCHKSY_AA_2STAGE tests ZSYTRF_AA_2STAGE, -TRS_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> The matrix types to be used for testing. Matrices of type j +*> (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) = +*> .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER +*> The number of values of N contained in the vector NVAL. +*> \endverbatim +*> +*> \param[in] NVAL +*> \verbatim +*> NVAL is INTEGER array, dimension (NN) +*> The values of the matrix dimension N. +*> \endverbatim +*> +*> \param[in] NNB +*> \verbatim +*> NNB is INTEGER +*> The number of values of NB contained in the vector NBVAL. +*> \endverbatim +*> +*> \param[in] NBVAL +*> \verbatim +*> NBVAL is INTEGER array, dimension (NBVAL) +*> The values of the blocksize NB. +*> \endverbatim +*> +*> \param[in] NNS +*> \verbatim +*> NNS is INTEGER +*> The number of values of NRHS contained in the vector NSVAL. +*> \endverbatim +*> +*> \param[in] NSVAL +*> \verbatim +*> NSVAL is INTEGER array, dimension (NNS) +*> The values of the number of right hand sides NRHS. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is DOUBLE PRECISION +*> The threshold value for the test ratios. A result is +*> included in the output file if RESULT >= THRESH. To have +*> every test ratio printed, use THRESH = 0. +*> \endverbatim +*> +*> \param[in] TSTERR +*> \verbatim +*> TSTERR is LOGICAL +*> Flag that indicates whether error exits are to be tested. +*> \endverbatim +*> +*> \param[in] NMAX +*> \verbatim +*> NMAX is INTEGER +*> The maximum value permitted for N, used in dimensioning the +*> work arrays. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AFAC +*> \verbatim +*> AFAC is COMPLEX*16 array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AINV +*> \verbatim +*> AINV is COMPLEX*16 array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (NMAX*NSMAX) +*> where NSMAX is the largest entry in NSVAL. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (NMAX*NSMAX) +*> \endverbatim +*> +*> \param[out] XACT +*> \verbatim +*> XACT is COMPLEX*16 array, dimension (NMAX*NSMAX) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (NMAX*max(3,NSMAX)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is COMPLEX*16 array, dimension (max(NMAX,2*NSMAX)) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (2*NMAX) +*> \endverbatim +*> +*> \param[in] NOUT +*> \verbatim +*> NOUT is INTEGER +*> The unit number for output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16_lin +* +* ===================================================================== + SUBROUTINE ZCHKSY_AA_2STAGE( DOTYPE, NN, NVAL, NNB, NBVAL, NNS, + $ NSVAL, THRESH, TSTERR, NMAX, A, AFAC, AINV, + $ B, X, XACT, WORK, RWORK, IWORK, NOUT ) +* +* -- LAPACK test routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + LOGICAL TSTERR + INTEGER NN, NNB, NNS, NMAX, NOUT + DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER IWORK( * ), NBVAL( * ), NSVAL( * ), NVAL( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( * ), AFAC( * ), AINV( * ), B( * ), + $ WORK( * ), X( * ), XACT( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) + INTEGER NTYPES + PARAMETER ( NTYPES = 10 ) + INTEGER NTESTS + PARAMETER ( NTESTS = 9 ) +* .. +* .. Local Scalars .. + LOGICAL ZEROT + CHARACTER DIST, TYPE, UPLO, XTYPE + CHARACTER*3 PATH, MATPATH + INTEGER I, I1, I2, IMAT, IN, INB, INFO, IOFF, IRHS, + $ IUPLO, IZERO, J, K, KL, KU, LDA, LWORK, MODE, + $ N, NB, NERRS, NFAIL, NIMAT, NRHS, NRUN, NT + DOUBLE PRECISION ANORM, CNDNUM +* .. +* .. Local Arrays .. + CHARACTER UPLOS( 2 ) + INTEGER ISEED( 4 ), ISEEDY( 4 ) + DOUBLE PRECISION RESULT( NTESTS ) +* .. +* .. External Subroutines .. + EXTERNAL ALAERH, ALAHD, ALASUM, CERRSY, ZLACPY, ZLARHS, + $ CLATB4, ZLATMS, ZSYT02, ZSYT01, + $ ZSYTRF_AA_2STAGE, ZSYTRS_AA_2STAGE, + $ XLAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Scalars in Common .. + LOGICAL LERR, OK + CHARACTER*32 SRNAMT + INTEGER INFOT, NUNIT +* .. +* .. Common blocks .. + COMMON / INFOC / INFOT, NUNIT, OK, LERR + COMMON / SRNAMC / SRNAMT +* .. +* .. Data statements .. + DATA ISEEDY / 1988, 1989, 1990, 1991 / + DATA UPLOS / 'U', 'L' / +* .. +* .. Executable Statements .. +* +* Initialize constants and the random number seed. +* +* Test path +* + PATH( 1: 1 ) = 'Zomplex precision' + PATH( 2: 3 ) = 'S2' +* +* Path to generate matrices +* + MATPATH( 1: 1 ) = 'Zomplex precision' + MATPATH( 2: 3 ) = 'SY' + NRUN = 0 + NFAIL = 0 + NERRS = 0 + DO 10 I = 1, 4 + ISEED( I ) = ISEEDY( I ) + 10 CONTINUE +* +* Test the error exits +* + IF( TSTERR ) + $ CALL ZERRSY( PATH, NOUT ) + INFOT = 0 +* +* Set the minimum block size for which the block routine should +* be used, which will be later returned by ILAENV +* + CALL XLAENV( 2, 2 ) +* +* Do for each value of N in NVAL +* + DO 180 IN = 1, NN + N = NVAL( IN ) + IF( N .GT. NMAX ) THEN + NFAIL = NFAIL + 1 + WRITE(NOUT, 9995) 'M ', N, NMAX + GO TO 180 + END IF + LDA = MAX( N, 1 ) + XTYPE = 'N' + NIMAT = NTYPES + IF( N.LE.0 ) + $ NIMAT = 1 +* + IZERO = 0 +* +* Do for each value of matrix type IMAT +* + DO 170 IMAT = 1, NIMAT +* +* Do the tests only if DOTYPE( IMAT ) is true. +* + IF( .NOT.DOTYPE( IMAT ) ) + $ GO TO 170 +* +* Skip types 3, 4, 5, or 6 if the matrix size is too small. +* + ZEROT = IMAT.GE.3 .AND. IMAT.LE.6 + IF( ZEROT .AND. N.LT.IMAT-2 ) + $ GO TO 170 +* +* Do first for UPLO = 'U', then for UPLO = 'L' +* + DO 160 IUPLO = 1, 2 + UPLO = UPLOS( IUPLO ) +* +* Begin generate the test matrix A. +* +* +* Set up parameters with ZLATB4 for the matrix generator +* based on the type of matrix to be generated. +* + CALL ZLATB4( MATPATH, IMAT, N, N, TYPE, KL, KU, + $ ANORM, MODE, CNDNUM, DIST ) +* +* Generate a matrix with ZLATMS. +* + SRNAMT = 'ZLATMS' + CALL ZLATMS( N, N, DIST, ISEED, TYPE, RWORK, MODE, + $ CNDNUM, ANORM, KL, KU, UPLO, A, LDA, WORK, + $ INFO ) +* +* Check error code from ZLATMS and handle error. +* + IF( INFO.NE.0 ) THEN + CALL ALAERH( PATH, 'ZLATMS', INFO, 0, UPLO, N, N, -1, + $ -1, -1, IMAT, NFAIL, NERRS, NOUT ) +* +* Skip all tests for this generated matrix +* + GO TO 160 + END IF +* +* For matrix types 3-6, zero one or more rows and +* columns of the matrix to test that INFO is returned +* correctly. +* + IF( ZEROT ) THEN + IF( IMAT.EQ.3 ) THEN + IZERO = 1 + ELSE IF( IMAT.EQ.4 ) THEN + IZERO = N + ELSE + IZERO = N / 2 + 1 + END IF +* + IF( IMAT.LT.6 ) THEN +* +* Set row and column IZERO to zero. +* + IF( IUPLO.EQ.1 ) THEN + IOFF = ( IZERO-1 )*LDA + DO 20 I = 1, IZERO - 1 + A( IOFF+I ) = CZERO + 20 CONTINUE + IOFF = IOFF + IZERO + DO 30 I = IZERO, N + A( IOFF ) = CZERO + IOFF = IOFF + LDA + 30 CONTINUE + ELSE + IOFF = IZERO + DO 40 I = 1, IZERO - 1 + A( IOFF ) = CZERO + IOFF = IOFF + LDA + 40 CONTINUE + IOFF = IOFF - IZERO + DO 50 I = IZERO, N + A( IOFF+I ) = CZERO + 50 CONTINUE + END IF + ELSE + IF( IUPLO.EQ.1 ) THEN +* +* Set the first IZERO rows and columns to zero. +* + IOFF = 0 + DO 70 J = 1, N + I2 = MIN( J, IZERO ) + DO 60 I = 1, I2 + A( IOFF+I ) = CZERO + 60 CONTINUE + IOFF = IOFF + LDA + 70 CONTINUE + IZERO = 1 + ELSE +* +* Set the last IZERO rows and columns to zero. +* + IOFF = 0 + DO 90 J = 1, N + I1 = MAX( J, IZERO ) + DO 80 I = I1, N + A( IOFF+I ) = CZERO + 80 CONTINUE + IOFF = IOFF + LDA + 90 CONTINUE + END IF + END IF + ELSE + IZERO = 0 + END IF +* +* End generate the test matrix A. +* +* Do for each value of NB in NBVAL +* + DO 150 INB = 1, NNB +* +* Set the optimal blocksize, which will be later +* returned by ILAENV. +* + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) +* +* Copy the test matrix A into matrix AFAC which +* will be factorized in place. This is needed to +* preserve the test matrix A for subsequent tests. +* + CALL ZLACPY( UPLO, N, N, A, LDA, AFAC, LDA ) +* +* Compute the L*D*L**T or U*D*U**T factorization of the +* matrix. IWORK stores details of the interchanges and +* the block structure of D. AINV is a work array for +* block factorization, LWORK is the length of AINV. +* + SRNAMT = 'ZSYTRF_AA_2STAGE' + LWORK = MIN(N*NB, 3*NMAX*NMAX) + CALL ZSYTRF_AA_2STAGE( UPLO, N, AFAC, LDA, + $ AINV, (3*NB+1)*N, + $ IWORK, IWORK( 1+N ), + $ WORK, LWORK, + $ INFO ) +* +* Adjust the expected value of INFO to account for +* pivoting. +* + IF( IZERO.GT.0 ) THEN + J = 1 + K = IZERO + 100 CONTINUE + IF( J.EQ.K ) THEN + K = IWORK( J ) + ELSE IF( IWORK( J ).EQ.K ) THEN + K = J + END IF + IF( J.LT.K ) THEN + J = J + 1 + GO TO 100 + END IF + ELSE + K = 0 + END IF +* +* Check error code from ZSYTRF and handle error. +* + IF( INFO.NE.K ) THEN + CALL ALAERH( PATH, 'ZSYTRF_AA_2STAGE', INFO, K, + $ UPLO, N, N, -1, -1, NB, IMAT, NFAIL, + $ NERRS, NOUT ) + END IF +* +*+ TEST 1 +* Reconstruct matrix from factors and compute residual. +* +c CALL ZSYT01_AA( UPLO, N, A, LDA, AFAC, LDA, IWORK, +c $ AINV, LDA, RWORK, RESULT( 1 ) ) +c NT = 1 + NT = 0 +* +* +* Print information about the tests that did not pass +* the threshold. +* + DO 110 K = 1, NT + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9999 )UPLO, N, NB, IMAT, K, + $ RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 110 CONTINUE + NRUN = NRUN + NT +* +* Skip solver test if INFO is not 0. +* + IF( INFO.NE.0 ) THEN + GO TO 140 + END IF +* +* Do for each value of NRHS in NSVAL. +* + DO 130 IRHS = 1, NNS + NRHS = NSVAL( IRHS ) +* +*+ TEST 2 (Using TRS) +* Solve and compute residual for A * X = B. +* +* Choose a set of NRHS random solution vectors +* stored in XACT and set up the right hand side B +* + SRNAMT = 'ZLARHS' + CALL ZLARHS( MATPATH, XTYPE, UPLO, ' ', N, N, + $ KL, KU, NRHS, A, LDA, XACT, LDA, + $ B, LDA, ISEED, INFO ) + CALL ZLACPY( 'Full', N, NRHS, B, LDA, X, LDA ) +* + SRNAMT = 'ZSYTRS_AA_2STAGE' + LWORK = MAX( 1, 3*N-2 ) + CALL ZSYTRS_AA_2STAGE( UPLO, N, NRHS, AFAC, LDA, + $ AINV, (3*NB+1)*N, IWORK, IWORK( 1+N ), + $ X, LDA, INFO ) +* +* Check error code from ZSYTRS and handle error. +* + IF( INFO.NE.0 ) THEN + IF( IZERO.EQ.0 ) THEN + CALL ALAERH( PATH, 'ZSYTRS_AA_2STAGE', + $ INFO, 0, UPLO, N, N, -1, -1, + $ NRHS, IMAT, NFAIL, NERRS, NOUT ) + END IF + ELSE + CALL ZLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA + $ ) +* +* Compute the residual for the solution +* + CALL ZSYT02( UPLO, N, NRHS, A, LDA, X, LDA, + $ WORK, LDA, RWORK, RESULT( 2 ) ) +* +* +* Print information about the tests that did not pass +* the threshold. +* + DO 120 K = 2, 2 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9998 )UPLO, N, NRHS, + $ IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 120 CONTINUE + END IF + NRUN = NRUN + 1 +* +* End do for each value of NRHS in NSVAL. +* + 130 CONTINUE + 140 CONTINUE + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + 180 CONTINUE +* +* Print a summary of the results. +* + CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS ) +* + 9999 FORMAT( ' UPLO = ''', A1, ''', N =', I5, ', NB =', I4, ', type ', + $ I2, ', test ', I2, ', ratio =', G12.5 ) + 9998 FORMAT( ' UPLO = ''', A1, ''', N =', I5, ', NRHS=', I3, ', type ', + $ I2, ', test(', I2, ') =', G12.5 ) + 9995 FORMAT( ' Invalid input value: ', A4, '=', I6, '; must be <=', + $ I6 ) + RETURN +* +* End of ZCHKSY_AA_2STAGE +* + END diff --git a/lapack-netlib/TESTING/LIN/zchktsqr.f b/lapack-netlib/TESTING/LIN/zchktsqr.f index 236160923..e6e6ac556 100644 --- a/lapack-netlib/TESTING/LIN/zchktsqr.f +++ b/lapack-netlib/TESTING/LIN/zchktsqr.f @@ -94,7 +94,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup double_lin * @@ -103,10 +103,10 @@ $ NBVAL, NOUT ) IMPLICIT NONE * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -132,8 +132,8 @@ DOUBLE PRECISION RESULT( NTESTS ) * .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, DERRTSQR, - $ DTSQR01, XLAENV + EXTERNAL ALAERH, ALAHD, ALASUM, ZERRTSQR, + $ ZTSQR01, XLAENV * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN diff --git a/lapack-netlib/TESTING/LIN/zdrvhe_aa.f b/lapack-netlib/TESTING/LIN/zdrvhe_aa.f index 87ebdaa20..326627e8c 100644 --- a/lapack-netlib/TESTING/LIN/zdrvhe_aa.f +++ b/lapack-netlib/TESTING/LIN/zdrvhe_aa.f @@ -144,7 +144,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16_lin * @@ -153,10 +153,10 @@ $ NMAX, A, AFAC, AINV, B, X, XACT, WORK, $ RWORK, IWORK, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -241,7 +241,6 @@ DO 10 I = 1, 4 ISEED( I ) = ISEEDY( I ) 10 CONTINUE - LWORK = MAX( 2*NMAX, NMAX*NRHS ) * * Test the error exits * @@ -260,6 +259,8 @@ * DO 180 IN = 1, NN N = NVAL( IN ) + LWORK = MAX( 3*N-2, N*(1+NB) ) + LWORK = MAX( LWORK, 1 ) LDA = MAX( N, 1 ) XTYPE = 'N' NIMAT = NTYPES diff --git a/lapack-netlib/TESTING/LIN/zdrvhe_aa_2stage.f b/lapack-netlib/TESTING/LIN/zdrvhe_aa_2stage.f new file mode 100644 index 000000000..655a8c788 --- /dev/null +++ b/lapack-netlib/TESTING/LIN/zdrvhe_aa_2stage.f @@ -0,0 +1,491 @@ +*> \brief \b ZDRVHE_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZDRVHE_AA_2STAGE( +* DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, NMAX, +* A, AFAC, AINV, B, X, XACT, WORK, RWORK, IWORK, +* NOUT ) +* +* .. Scalar Arguments .. +* LOGICAL TSTERR +* INTEGER NMAX, NN, NOUT, NRHS +* DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER IWORK( * ), NVAL( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( * ), AFAC( * ), AINV( * ), B( * ), +* $ WORK( * ), X( * ), XACT( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZDRVHE_AA_2STAGE tests the driver routine ZHESV_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> The matrix types to be used for testing. Matrices of type j +*> (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) = +*> .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER +*> The number of values of N contained in the vector NVAL. +*> \endverbatim +*> +*> \param[in] NVAL +*> \verbatim +*> NVAL is INTEGER array, dimension (NN) +*> The values of the matrix dimension N. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand side vectors to be generated for +*> each linear system. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is DOUBLE PRECISION +*> The threshold value for the test ratios. A result is +*> included in the output file if RESULT >= THRESH. To have +*> every test ratio printed, use THRESH = 0. +*> \endverbatim +*> +*> \param[in] TSTERR +*> \verbatim +*> TSTERR is LOGICAL +*> Flag that indicates whether error exits are to be tested. +*> \endverbatim +*> +*> \param[in] NMAX +*> \verbatim +*> NMAX is INTEGER +*> The maximum value permitted for N, used in dimensioning the +*> work arrays. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AFAC +*> \verbatim +*> AFAC is COMPLEX*16 array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AINV +*> \verbatim +*> AINV is COMPLEX*16 array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] XACT +*> \verbatim +*> XACT is COMPLEX*16 array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (NMAX*max(2,NRHS)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (NMAX+2*NRHS) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (NMAX) +*> \endverbatim +*> +*> \param[in] NOUT +*> \verbatim +*> NOUT is INTEGER +*> The unit number for output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16_lin +* +* ===================================================================== + SUBROUTINE ZDRVHE_AA_2STAGE( + $ DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, + $ NMAX, A, AFAC, AINV, B, X, XACT, WORK, + $ RWORK, IWORK, NOUT ) +* +* -- LAPACK test routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + LOGICAL TSTERR + INTEGER NMAX, NN, NOUT, NRHS + DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER IWORK( * ), NVAL( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( * ), AFAC( * ), AINV( * ), B( * ), + $ WORK( * ), X( * ), XACT( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + INTEGER NTYPES, NTESTS + PARAMETER ( NTYPES = 10, NTESTS = 3 ) + INTEGER NFACT + PARAMETER ( NFACT = 2 ) +* .. +* .. Local Scalars .. + LOGICAL ZEROT + CHARACTER DIST, FACT, TYPE, UPLO, XTYPE + CHARACTER*3 MATPATH, PATH + INTEGER I, I1, I2, IFACT, IMAT, IN, INFO, IOFF, IUPLO, + $ IZERO, J, K, KL, KU, LDA, LWORK, MODE, N, + $ NB, NBMIN, NERRS, NFAIL, NIMAT, NRUN, NT + DOUBLE PRECISION ANORM, CNDNUM +* .. +* .. Local Arrays .. + CHARACTER FACTS( NFACT ), UPLOS( 2 ) + INTEGER ISEED( 4 ), ISEEDY( 4 ) + DOUBLE PRECISION RESULT( NTESTS ) +* .. +* .. External Functions .. + DOUBLE PRECISION DGET06, ZLANHE + EXTERNAL DGET06, ZLANHE +* .. +* .. External Subroutines .. + EXTERNAL ALADHD, ALAERH, ALASVM, XLAENV, ZERRVX, + $ ZGET04, ZLACPY, ZLARHS, ZLATB4, ZLATMS, + $ ZHESV_AA_2STAGE, ZHET01_AA, ZPOT02, + $ ZHETRF_AA_2STAGE +* .. +* .. Scalars in Common .. + LOGICAL LERR, OK + CHARACTER*32 SRNAMT + INTEGER INFOT, NUNIT +* .. +* .. Common blocks .. + COMMON / INFOC / INFOT, NUNIT, OK, LERR + COMMON / SRNAMC / SRNAMT +* .. +* .. Intrinsic Functions .. + INTRINSIC DCMPLX, MAX, MIN +* .. +* .. Data statements .. + DATA ISEEDY / 1988, 1989, 1990, 1991 / + DATA UPLOS / 'U', 'L' / , FACTS / 'F', 'N' / +* .. +* .. Executable Statements .. +* +* Initialize constants and the random number seed. +* +* Test path +* + PATH( 1: 1 ) = 'Zomplex precision' + PATH( 2: 3 ) = 'H2' +* +* Path to generate matrices +* + MATPATH( 1: 1 ) = 'Zomplex precision' + MATPATH( 2: 3 ) = 'HE' +* + NRUN = 0 + NFAIL = 0 + NERRS = 0 + DO 10 I = 1, 4 + ISEED( I ) = ISEEDY( I ) + 10 CONTINUE +* +* Test the error exits +* + IF( TSTERR ) + $ CALL ZERRVX( PATH, NOUT ) + INFOT = 0 +* +* Set the block size and minimum block size for testing. +* + NB = 1 + NBMIN = 2 + CALL XLAENV( 1, NB ) + CALL XLAENV( 2, NBMIN ) +* +* Do for each value of N in NVAL +* + DO 180 IN = 1, NN + N = NVAL( IN ) + LDA = MAX( N, 1 ) + XTYPE = 'N' + NIMAT = NTYPES + IF( N.LE.0 ) + $ NIMAT = 1 +* + DO 170 IMAT = 1, NIMAT +* +* Do the tests only if DOTYPE( IMAT ) is true. +* + IF( .NOT.DOTYPE( IMAT ) ) + $ GO TO 170 +* +* Skip types 3, 4, 5, or 6 if the matrix size is too small. +* + ZEROT = IMAT.GE.3 .AND. IMAT.LE.6 + IF( ZEROT .AND. N.LT.IMAT-2 ) + $ GO TO 170 +* +* Do first for UPLO = 'U', then for UPLO = 'L' +* + DO 160 IUPLO = 1, 2 + UPLO = UPLOS( IUPLO ) +* +* Begin generate the test matrix A. +* +* Set up parameters with ZLATB4 for the matrix generator +* based on the type of matrix to be generated. +* + CALL ZLATB4( MATPATH, IMAT, N, N, TYPE, KL, KU, ANORM, + $ MODE, CNDNUM, DIST ) +* +* Generate a matrix with ZLATMS. +* + SRNAMT = 'ZLATMS' + CALL ZLATMS( N, N, DIST, ISEED, TYPE, RWORK, MODE, + $ CNDNUM, ANORM, KL, KU, UPLO, A, LDA, + $ WORK, INFO ) +* +* Check error code from ZLATMS and handle error. +* + IF( INFO.NE.0 ) THEN + CALL ALAERH( PATH, 'ZLATMS', INFO, 0, UPLO, N, N, + $ -1, -1, -1, IMAT, NFAIL, NERRS, NOUT ) + GO TO 160 + END IF +* +* For types 3-6, zero one or more rows and columns of +* the matrix to test that INFO is returned correctly. +* + IF( ZEROT ) THEN + IF( IMAT.EQ.3 ) THEN + IZERO = 1 + ELSE IF( IMAT.EQ.4 ) THEN + IZERO = N + ELSE + IZERO = N / 2 + 1 + END IF +* + IF( IMAT.LT.6 ) THEN +* +* Set row and column IZERO to zero. +* + IF( IUPLO.EQ.1 ) THEN + IOFF = ( IZERO-1 )*LDA + DO 20 I = 1, IZERO - 1 + A( IOFF+I ) = ZERO + 20 CONTINUE + IOFF = IOFF + IZERO + DO 30 I = IZERO, N + A( IOFF ) = ZERO + IOFF = IOFF + LDA + 30 CONTINUE + ELSE + IOFF = IZERO + DO 40 I = 1, IZERO - 1 + A( IOFF ) = ZERO + IOFF = IOFF + LDA + 40 CONTINUE + IOFF = IOFF - IZERO + DO 50 I = IZERO, N + A( IOFF+I ) = ZERO + 50 CONTINUE + END IF + ELSE + IOFF = 0 + IF( IUPLO.EQ.1 ) THEN +* +* Set the first IZERO rows and columns to zero. +* + DO 70 J = 1, N + I2 = MIN( J, IZERO ) + DO 60 I = 1, I2 + A( IOFF+I ) = ZERO + 60 CONTINUE + IOFF = IOFF + LDA + 70 CONTINUE + IZERO = 1 + ELSE +* +* Set the first IZERO rows and columns to zero. +* + IOFF = 0 + DO 90 J = 1, N + I1 = MAX( J, IZERO ) + DO 80 I = I1, N + A( IOFF+I ) = ZERO + 80 CONTINUE + IOFF = IOFF + LDA + 90 CONTINUE + END IF + END IF + ELSE + IZERO = 0 + END IF +* +* End generate the test matrix A. +* +* + DO 150 IFACT = 1, NFACT +* +* Do first for FACT = 'F', then for other values. +* + FACT = FACTS( IFACT ) +* +* Form an exact solution and set the right hand side. +* + SRNAMT = 'ZLARHS' + CALL ZLARHS( MATPATH, XTYPE, UPLO, ' ', N, N, KL, KU, + $ NRHS, A, LDA, XACT, LDA, B, LDA, ISEED, + $ INFO ) + XTYPE = 'C' +* +* --- Test ZHESV_AA_2STAGE --- +* + IF( IFACT.EQ.2 ) THEN + CALL ZLACPY( UPLO, N, N, A, LDA, AFAC, LDA ) + CALL ZLACPY( 'Full', N, NRHS, B, LDA, X, LDA ) +* +* Factor the matrix and solve the system using ZHESV_AA. +* + SRNAMT = 'ZHESV_AA_2STAGE ' + LWORK = MIN(N*NB, 3*NMAX*NMAX) + CALL ZHESV_AA_2STAGE( UPLO, N, NRHS, AFAC, LDA, + $ AINV, (3*NB+1)*N, + $ IWORK, IWORK( 1+N ), + $ X, LDA, WORK, LWORK, INFO ) +* +* Adjust the expected value of INFO to account for +* pivoting. +* + IF( IZERO.GT.0 ) THEN + J = 1 + K = IZERO + 100 CONTINUE + IF( J.EQ.K ) THEN + K = IWORK( J ) + ELSE IF( IWORK( J ).EQ.K ) THEN + K = J + END IF + IF( J.LT.K ) THEN + J = J + 1 + GO TO 100 + END IF + ELSE + K = 0 + END IF +* +* Check error code from ZHESV_AA . +* + IF( INFO.NE.K ) THEN + CALL ALAERH( PATH, 'ZHESV_AA', INFO, K, + $ UPLO, N, N, -1, -1, NRHS, + $ IMAT, NFAIL, NERRS, NOUT ) + GO TO 120 + ELSE IF( INFO.NE.0 ) THEN + GO TO 120 + END IF +* +* Compute residual of the computed solution. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA ) + CALL ZPOT02( UPLO, N, NRHS, A, LDA, X, LDA, WORK, + $ LDA, RWORK, RESULT( 1 ) ) +* +* Reconstruct matrix from factors and compute +* residual. +* +* NEED TO CREATE ZHET01_AA_2STAGE +* CALL ZHET01_AA( UPLO, N, A, LDA, AFAC, LDA, +* $ IWORK, AINV, LDA, RWORK, +* $ RESULT( 2 ) ) +* NT = 2 + NT = 1 +* +* Print information about the tests that did not pass +* the threshold. +* + DO 110 K = 1, NT + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALADHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9999 )'ZHESV_AA_2STAGE', + $ UPLO, N, IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 110 CONTINUE + NRUN = NRUN + NT + 120 CONTINUE + END IF +* + 150 CONTINUE +* + 160 CONTINUE + 170 CONTINUE + 180 CONTINUE +* +* Print a summary of the results. +* + CALL ALASVM( PATH, NOUT, NFAIL, NRUN, NERRS ) +* + 9999 FORMAT( 1X, A, ', UPLO=''', A1, ''', N =', I5, ', type ', I2, + $ ', test ', I2, ', ratio =', G12.5 ) + RETURN +* +* End of ZDRVHE_AA_2STAGE +* + END diff --git a/lapack-netlib/TESTING/LIN/zdrvls.f b/lapack-netlib/TESTING/LIN/zdrvls.f index c9485e45d..681852bc2 100644 --- a/lapack-netlib/TESTING/LIN/zdrvls.f +++ b/lapack-netlib/TESTING/LIN/zdrvls.f @@ -183,7 +183,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16_lin * @@ -192,10 +192,10 @@ $ NBVAL, NXVAL, THRESH, TSTERR, A, COPYA, B, $ COPYB, C, S, COPYS, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -237,9 +237,9 @@ DOUBLE PRECISION EPS, NORMA, NORMB, RCOND * .. * .. Local Arrays .. - INTEGER ISEED( 4 ), ISEEDY( 4 ), IWORKQUERY - DOUBLE PRECISION RESULT( NTESTS ), RWORKQUERY - COMPLEX*16 WORKQUERY + INTEGER ISEED( 4 ), ISEEDY( 4 ), IWQ + DOUBLE PRECISION RESULT( NTESTS ), RWQ + COMPLEX*16 WQ * .. * .. Allocatable Arrays .. COMPLEX*16, ALLOCATABLE :: WORK (:) @@ -324,48 +324,85 @@ M = MMAX N = NMAX NRHS = NSMAX - LDA = MAX( 1, M ) - LDB = MAX( 1, M, N ) MNMIN = MAX( MIN( M, N ), 1 ) * * Compute workspace needed for routines * ZQRT14, ZQRT17 (two side cases), ZQRT15 and ZQRT12 * - LWORK = MAX( ( M+N )*NRHS, + LWORK = MAX( 1, ( M+N )*NRHS, $ ( N+NRHS )*( M+2 ), ( M+NRHS )*( N+2 ), $ MAX( M+MNMIN, NRHS*MNMIN,2*N+M ), $ MAX( M*N+4*MNMIN+MAX(M,N), M*N+2*MNMIN+4*N ) ) + LRWORK = 1 + LIWORK = 1 +* +* Iterate through all test cases and compute necessary workspace +* sizes for ?GELS, ?GETSLS, ?GELSY, ?GELSS and ?GELSD routines. +* + DO IM = 1, NM + M = MVAL( IM ) + LDA = MAX( 1, M ) + DO IN = 1, NN + N = NVAL( IN ) + MNMIN = MAX(MIN( M, N ),1) + LDB = MAX( 1, M, N ) + DO INS = 1, NNS + NRHS = NSVAL( INS ) + DO IRANK = 1, 2 + DO ISCALE = 1, 3 + ITYPE = ( IRANK-1 )*3 + ISCALE + IF( DOTYPE( ITYPE ) ) THEN + IF( IRANK.EQ.1 ) THEN + DO ITRAN = 1, 2 + IF( ITRAN.EQ.1 ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* +* Compute workspace needed for ZGELS + CALL ZGELS( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_ZGELS = INT ( WQ ) +* Compute workspace needed for ZGETSLS + CALL ZGETSLS( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_ZGETSLS = INT( WQ ) + ENDDO + END IF +* Compute workspace needed for ZGELSY + CALL ZGELSY( M, N, NRHS, A, LDA, B, LDB, IWQ, + $ RCOND, CRANK, WQ, -1, RWORK, INFO ) + LWORK_ZGELSY = INT( WQ ) + LRWORK_ZGELSY = 2*N +* Compute workspace needed for ZGELSS + CALL ZGELSS( M, N, NRHS, A, LDA, B, LDB, S, + $ RCOND, CRANK, WQ, -1 , RWORK, + $ INFO ) + LWORK_ZGELSS = INT( WQ ) + LRWORK_ZGELSS = 5*MNMIN +* Compute workspace needed for ZGELSD + CALL ZGELSD( M, N, NRHS, A, LDA, B, LDB, S, + $ RCOND, CRANK, WQ, -1, RWQ, IWQ, + $ INFO ) + LWORK_ZGELSD = INT( WQ ) + LRWORK_ZGELSD = INT( RWQ ) +* Compute LIWORK workspace needed for ZGELSY and ZGELSD + LIWORK = MAX( LIWORK, N, IWQ ) +* Compute LRWORK workspace needed for ZGELSY, ZGELSS and ZGELSD + LRWORK = MAX( LRWORK, LRWORK_ZGELSY, + $ LRWORK_ZGELSS, LRWORK_ZGELSD ) +* Compute LWORK workspace needed for all functions + LWORK = MAX( LWORK, LWORK_ZGELS, LWORK_ZGETSLS, + $ LWORK_ZGELSY, LWORK_ZGELSS, + $ LWORK_ZGELSD ) + END IF + ENDDO + ENDDO + ENDDO + ENDDO + ENDDO * -* Compute workspace needed for ZGELS - CALL ZGELS( 'N', M, N, NRHS, A, LDA, B, LDB, - $ WORKQUERY, -1, INFO ) - LWORK_ZGELS = INT ( WORKQUERY ) -* Compute workspace needed for ZGETSLS - CALL ZGETSLS( 'N', M, N, NRHS, A, LDA, B, LDB, - $ WORKQUERY, -1, INFO ) - LWORK_ZGETSLS = INT( WORKQUERY ) -* Compute workspace needed for ZGELSY - CALL ZGELSY( M, N, NRHS, A, LDA, B, LDB, IWORKQUERY, - $ RCOND, CRANK, WORKQUERY, -1, RWORK, INFO ) - LWORK_ZGELSY = INT( WORKQUERY ) - LRWORK_ZGELSY = 2*N -* Compute workspace needed for ZGELSS - CALL ZGELSS( M, N, NRHS, A, LDA, B, LDB, S, - $ RCOND, CRANK, WORKQUERY, -1 , RWORK, INFO ) - LWORK_ZGELSS = INT( WORKQUERY ) - LRWORK_ZGELSS = 5*MNMIN -* Compute workspace needed for ZGELSD - CALL ZGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, CRANK, - $ WORKQUERY, -1, RWORKQUERY, IWORKQUERY, INFO ) - LWORK_ZGELSD = INT( WORKQUERY ) - LRWORK_ZGELSD = INT( RWORKQUERY ) -* Compute LIWORK workspace needed for ZGELSY and ZGELSD - LIWORK = MAX( 1, N, IWORKQUERY ) -* Compute LRWORK workspace needed for ZGELSY, ZGELSS and ZGELSD - LRWORK = MAX( 1, LRWORK_ZGELSY, LRWORK_ZGELSS, LRWORK_ZGELSD ) -* Compute LWORK workspace needed for all functions - LWORK = MAX( 1, LWORK, LWORK_ZGELS, LWORK_ZGETSLS, LWORK_ZGELSY, - $ LWORK_ZGELSS, LWORK_ZGELSD ) LWLSY = LWORK * ALLOCATE( WORK( LWORK ) ) diff --git a/lapack-netlib/TESTING/LIN/zdrvrf3.f b/lapack-netlib/TESTING/LIN/zdrvrf3.f index e596b0b6a..8bb9f2b94 100644 --- a/lapack-netlib/TESTING/LIN/zdrvrf3.f +++ b/lapack-netlib/TESTING/LIN/zdrvrf3.f @@ -111,7 +111,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16_lin * @@ -119,10 +119,10 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, + D_WORK_ZLANGE, Z_WORK_ZGEQRF, TAU ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER LDA, NN, NOUT @@ -227,7 +227,7 @@ * IF ( IALPHA.EQ. 1) THEN ALPHA = ZERO - ELSE IF ( IALPHA.EQ. 1) THEN + ELSE IF ( IALPHA.EQ. 2) THEN ALPHA = ONE ELSE ALPHA = ZLARND( 4, ISEED ) diff --git a/lapack-netlib/TESTING/LIN/zdrvrf4.f b/lapack-netlib/TESTING/LIN/zdrvrf4.f index 93e866b08..c264355a1 100644 --- a/lapack-netlib/TESTING/LIN/zdrvrf4.f +++ b/lapack-netlib/TESTING/LIN/zdrvrf4.f @@ -106,7 +106,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16_lin * @@ -114,10 +114,10 @@ SUBROUTINE ZDRVRF4( NOUT, NN, NVAL, THRESH, C1, C2, LDC, CRF, A, + LDA, D_WORK_ZLANGE ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER LDA, LDC, NN, NOUT @@ -209,10 +209,10 @@ IF ( IALPHA.EQ. 1) THEN ALPHA = ZERO BETA = ZERO - ELSE IF ( IALPHA.EQ. 1) THEN + ELSE IF ( IALPHA.EQ. 2) THEN ALPHA = ONE BETA = ZERO - ELSE IF ( IALPHA.EQ. 1) THEN + ELSE IF ( IALPHA.EQ. 3) THEN ALPHA = ZERO BETA = ONE ELSE diff --git a/lapack-netlib/TESTING/LIN/zdrvsy_aa.f b/lapack-netlib/TESTING/LIN/zdrvsy_aa.f index 7b9626785..ae4c8a5aa 100644 --- a/lapack-netlib/TESTING/LIN/zdrvsy_aa.f +++ b/lapack-netlib/TESTING/LIN/zdrvsy_aa.f @@ -144,9 +144,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 -* -* @generated from LIN/ddrvsy_aa.f, fortran d -> z, Thu Nov 17 12:14:51 2016 +*> \date November 2017 * *> \ingroup complex16_lin * @@ -155,10 +153,10 @@ $ NMAX, A, AFAC, AINV, B, X, XACT, WORK, $ RWORK, IWORK, NOUT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -204,8 +202,8 @@ EXTERNAL DGET06, ZLANSY * .. * .. External Subroutines .. - EXTERNAL ALADHD, ALAERH, ALASVM, DERRVX, ZGET04, ZLACPY, - $ ZLARHS, ZLASET, ZLATB4, ZLATMS, ZSYT02, DSYT05, + EXTERNAL ALADHD, ALAERH, ALASVM, ZERRVX, ZGET04, ZLACPY, + $ ZLARHS, ZLASET, ZLATB4, ZLATMS, ZSYT02, $ ZSYSV_AA, ZSYT01_AA, ZSYTRF_AA, XLAENV * .. * .. Scalars in Common .. @@ -244,7 +242,6 @@ DO 10 I = 1, 4 ISEED( I ) = ISEEDY( I ) 10 CONTINUE - LWORK = MAX( 2*NMAX, NMAX*NRHS ) * * Test the error exits * @@ -263,6 +260,8 @@ * DO 180 IN = 1, NN N = NVAL( IN ) + LWORK = MAX( 3*N-2, N*(1+NB) ) + LWORK = MAX( LWORK, 1 ) LDA = MAX( N, 1 ) XTYPE = 'N' NIMAT = NTYPES diff --git a/lapack-netlib/TESTING/LIN/zdrvsy_aa_2stage.f b/lapack-netlib/TESTING/LIN/zdrvsy_aa_2stage.f new file mode 100644 index 000000000..d93e44542 --- /dev/null +++ b/lapack-netlib/TESTING/LIN/zdrvsy_aa_2stage.f @@ -0,0 +1,492 @@ +*> \brief \b ZDRVSY_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZDRVSY_AA_2STAGE( +* DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, NMAX, +* A, AFAC, AINV, B, X, XACT, WORK, RWORK, IWORK, +* NOUT ) +* +* .. Scalar Arguments .. +* LOGICAL TSTERR +* INTEGER NMAX, NN, NOUT, NRHS +* DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER IWORK( * ), NVAL( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( * ), AFAC( * ), AINV( * ), B( * ), +* $ WORK( * ), X( * ), XACT( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZDRVSY_AA_2STAGE tests the driver routine ZSYSV_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> The matrix types to be used for testing. Matrices of type j +*> (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) = +*> .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER +*> The number of values of N contained in the vector NVAL. +*> \endverbatim +*> +*> \param[in] NVAL +*> \verbatim +*> NVAL is INTEGER array, dimension (NN) +*> The values of the matrix dimension N. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand side vectors to be generated for +*> each linear system. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is DOUBLE PRECISION +*> The threshold value for the test ratios. A result is +*> included in the output file if RESULT >= THRESH. To have +*> every test ratio printed, use THRESH = 0. +*> \endverbatim +*> +*> \param[in] TSTERR +*> \verbatim +*> TSTERR is LOGICAL +*> Flag that indicates whether error exits are to be tested. +*> \endverbatim +*> +*> \param[in] NMAX +*> \verbatim +*> NMAX is INTEGER +*> The maximum value permitted for N, used in dimensioning the +*> work arrays. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AFAC +*> \verbatim +*> AFAC is COMPLEX*16 array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] AINV +*> \verbatim +*> AINV is COMPLEX*16 array, dimension (NMAX*NMAX) +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] XACT +*> \verbatim +*> XACT is COMPLEX*16 array, dimension (NMAX*NRHS) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (NMAX*max(2,NRHS)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is COMPLEX*16 array, dimension (NMAX+2*NRHS) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (NMAX) +*> \endverbatim +*> +*> \param[in] NOUT +*> \verbatim +*> NOUT is INTEGER +*> The unit number for output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16_lin +* +* ===================================================================== + SUBROUTINE ZDRVSY_AA_2STAGE( + $ DOTYPE, NN, NVAL, NRHS, THRESH, TSTERR, + $ NMAX, A, AFAC, AINV, B, X, XACT, WORK, + $ RWORK, IWORK, NOUT ) +* +* -- LAPACK test routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + LOGICAL TSTERR + INTEGER NMAX, NN, NOUT, NRHS + DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER IWORK( * ), NVAL( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( * ), AFAC( * ), AINV( * ), B( * ), + $ WORK( * ), X( * ), XACT( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) + INTEGER NTYPES, NTESTS + PARAMETER ( NTYPES = 10, NTESTS = 3 ) + INTEGER NFACT + PARAMETER ( NFACT = 2 ) +* .. +* .. Local Scalars .. + LOGICAL ZEROT + CHARACTER DIST, FACT, TYPE, UPLO, XTYPE + CHARACTER*3 MATPATH, PATH + INTEGER I, I1, I2, IFACT, IMAT, IN, INFO, IOFF, IUPLO, + $ IZERO, J, K, KL, KU, LDA, LWORK, MODE, N, + $ NB, NBMIN, NERRS, NFAIL, NIMAT, NRUN, NT + DOUBLE PRECISION ANORM, CNDNUM +* .. +* .. Local Arrays .. + CHARACTER FACTS( NFACT ), UPLOS( 2 ) + INTEGER ISEED( 4 ), ISEEDY( 4 ) + DOUBLE PRECISION RESULT( NTESTS ) +* .. +* .. External Functions .. + DOUBLE PRECISION DGET06, ZLANSY + EXTERNAL DGET06, ZLANSY +* .. +* .. External Subroutines .. + EXTERNAL ALADHD, ALAERH, ALASVM, XLAENV, ZERRVX, + $ ZGET04, ZLACPY, ZLARHS, ZLATB4, ZLATMS, + $ ZSYSV_AA_2STAGE, ZSYT01_AA, ZSYT02, + $ ZSYTRF_AA_2STAGE +* .. +* .. Scalars in Common .. + LOGICAL LERR, OK + CHARACTER*32 SRNAMT + INTEGER INFOT, NUNIT +* .. +* .. Common blocks .. + COMMON / INFOC / INFOT, NUNIT, OK, LERR + COMMON / SRNAMC / SRNAMT +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, MAX, MIN +* .. +* .. Data statements .. + DATA ISEEDY / 1988, 1989, 1990, 1991 / + DATA UPLOS / 'U', 'L' / , FACTS / 'F', 'N' / +* .. +* .. Executable Statements .. +* +* Initialize constants and the random number seed. +* +* Test path +* + PATH( 1: 1 ) = 'Zomplex precision' + PATH( 2: 3 ) = 'H2' +* +* Path to generate matrices +* + MATPATH( 1: 1 ) = 'Zomplex precision' + MATPATH( 2: 3 ) = 'SY' +* + NRUN = 0 + NFAIL = 0 + NERRS = 0 + DO 10 I = 1, 4 + ISEED( I ) = ISEEDY( I ) + 10 CONTINUE +* +* Test the error exits +* + IF( TSTERR ) + $ CALL ZERRVX( PATH, NOUT ) + INFOT = 0 +* +* Set the block size and minimum block size for testing. +* + NB = 1 + NBMIN = 2 + CALL XLAENV( 1, NB ) + CALL XLAENV( 2, NBMIN ) +* +* Do for each value of N in NVAL +* + DO 180 IN = 1, NN + N = NVAL( IN ) + LDA = MAX( N, 1 ) + XTYPE = 'N' + NIMAT = NTYPES + IF( N.LE.0 ) + $ NIMAT = 1 +* + DO 170 IMAT = 1, NIMAT +* +* Do the tests only if DOTYPE( IMAT ) is true. +* + IF( .NOT.DOTYPE( IMAT ) ) + $ GO TO 170 +* +* Skip types 3, 4, 5, or 6 if the matrix size is too small. +* + ZEROT = IMAT.GE.3 .AND. IMAT.LE.6 + IF( ZEROT .AND. N.LT.IMAT-2 ) + $ GO TO 170 +* +* Do first for UPLO = 'U', then for UPLO = 'L' +* + DO 160 IUPLO = 1, 2 + UPLO = UPLOS( IUPLO ) +* +* Begin generate the test matrix A. +* +* Set up parameters with ZLATB4 for the matrix generator +* based on the type of matrix to be generated. +* + CALL ZLATB4( MATPATH, IMAT, N, N, TYPE, KL, KU, ANORM, + $ MODE, CNDNUM, DIST ) +* +* Generate a matrix with ZLATMS. +* + SRNAMT = 'ZLATMS' + CALL ZLATMS( N, N, DIST, ISEED, TYPE, RWORK, MODE, + $ CNDNUM, ANORM, KL, KU, UPLO, A, LDA, + $ WORK, INFO ) +* +* Check error code from ZLATMS and handle error. +* + IF( INFO.NE.0 ) THEN + CALL ALAERH( PATH, 'ZLATMS', INFO, 0, UPLO, N, N, + $ -1, -1, -1, IMAT, NFAIL, NERRS, NOUT ) + GO TO 160 + END IF +* +* For types 3-6, zero one or more rows and columns of +* the matrix to test that INFO is returned correctly. +* + IF( ZEROT ) THEN + IF( IMAT.EQ.3 ) THEN + IZERO = 1 + ELSE IF( IMAT.EQ.4 ) THEN + IZERO = N + ELSE + IZERO = N / 2 + 1 + END IF +* + IF( IMAT.LT.6 ) THEN +* +* Set row and column IZERO to zero. +* + IF( IUPLO.EQ.1 ) THEN + IOFF = ( IZERO-1 )*LDA + DO 20 I = 1, IZERO - 1 + A( IOFF+I ) = CZERO + 20 CONTINUE + IOFF = IOFF + IZERO + DO 30 I = IZERO, N + A( IOFF ) = CZERO + IOFF = IOFF + LDA + 30 CONTINUE + ELSE + IOFF = IZERO + DO 40 I = 1, IZERO - 1 + A( IOFF ) = CZERO + IOFF = IOFF + LDA + 40 CONTINUE + IOFF = IOFF - IZERO + DO 50 I = IZERO, N + A( IOFF+I ) = CZERO + 50 CONTINUE + END IF + ELSE + IOFF = 0 + IF( IUPLO.EQ.1 ) THEN +* +* Set the first IZERO rows and columns to zero. +* + DO 70 J = 1, N + I2 = MIN( J, IZERO ) + DO 60 I = 1, I2 + A( IOFF+I ) = CZERO + 60 CONTINUE + IOFF = IOFF + LDA + 70 CONTINUE + IZERO = 1 + ELSE +* +* Set the first IZERO rows and columns to zero. +* + IOFF = 0 + DO 90 J = 1, N + I1 = MAX( J, IZERO ) + DO 80 I = I1, N + A( IOFF+I ) = CZERO + 80 CONTINUE + IOFF = IOFF + LDA + 90 CONTINUE + END IF + END IF + ELSE + IZERO = 0 + END IF +* +* End generate the test matrix A. +* +* + DO 150 IFACT = 1, NFACT +* +* Do first for FACT = 'F', then for other values. +* + FACT = FACTS( IFACT ) +* +* Form an exact solution and set the right hand side. +* + SRNAMT = 'ZLARHS' + CALL ZLARHS( MATPATH, XTYPE, UPLO, ' ', N, N, KL, KU, + $ NRHS, A, LDA, XACT, LDA, B, LDA, ISEED, + $ INFO ) + XTYPE = 'C' +* +* --- Test ZSYSV_AA_2STAGE --- +* + IF( IFACT.EQ.2 ) THEN + CALL ZLACPY( UPLO, N, N, A, LDA, AFAC, LDA ) + CALL ZLACPY( 'Full', N, NRHS, B, LDA, X, LDA ) +* +* Factor the matrix and solve the system using ZSYSV_AA. +* + SRNAMT = 'ZSYSV_AA_2STAGE ' + LWORK = MIN(N*NB, 3*NMAX*NMAX) + CALL ZSYSV_AA_2STAGE( UPLO, N, NRHS, AFAC, LDA, + $ AINV, (3*NB+1)*N, + $ IWORK, IWORK( 1+N ), + $ X, LDA, WORK, LWORK, INFO ) +* +* Adjust the expected value of INFO to account for +* pivoting. +* + IF( IZERO.GT.0 ) THEN + J = 1 + K = IZERO + 100 CONTINUE + IF( J.EQ.K ) THEN + K = IWORK( J ) + ELSE IF( IWORK( J ).EQ.K ) THEN + K = J + END IF + IF( J.LT.K ) THEN + J = J + 1 + GO TO 100 + END IF + ELSE + K = 0 + END IF +* +* Check error code from ZSYSV_AA_2STAGE . +* + IF( INFO.NE.K ) THEN + CALL ALAERH( PATH, 'ZSYSV_AA_2STAGE', INFO, K, + $ UPLO, N, N, -1, -1, NRHS, + $ IMAT, NFAIL, NERRS, NOUT ) + GO TO 120 + ELSE IF( INFO.NE.0 ) THEN + GO TO 120 + END IF +* +* Compute residual of the computed solution. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDA, WORK, LDA ) + CALL ZSYT02( UPLO, N, NRHS, A, LDA, X, LDA, WORK, + $ LDA, RWORK, RESULT( 1 ) ) +* +* Reconstruct matrix from factors and compute +* residual. +* +c CALL ZSY01_AA( UPLO, N, A, LDA, AFAC, LDA, +c $ IWORK, AINV, LDA, RWORK, +c $ RESULT( 2 ) ) +c NT = 2 + NT = 1 +* +* Print information about the tests that did not pass +* the threshold. +* + DO 110 K = 1, NT + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALADHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9999 )'ZSYSV_AA_2STAGE ', + $ UPLO, N, IMAT, K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + 110 CONTINUE + NRUN = NRUN + NT + 120 CONTINUE + END IF +* + 150 CONTINUE +* + 160 CONTINUE + 170 CONTINUE + 180 CONTINUE +* +* Print a summary of the results. +* + CALL ALASVM( PATH, NOUT, NFAIL, NRUN, NERRS ) +* + 9999 FORMAT( 1X, A, ', UPLO=''', A1, ''', N =', I5, ', type ', I2, + $ ', test ', I2, ', ratio =', G12.5 ) + RETURN +* +* End of ZDRVSY_AA_2STAGE +* + END diff --git a/lapack-netlib/TESTING/LIN/zerrhe.f b/lapack-netlib/TESTING/LIN/zerrhe.f index 5f1465b29..5b2f30bdf 100644 --- a/lapack-netlib/TESTING/LIN/zerrhe.f +++ b/lapack-netlib/TESTING/LIN/zerrhe.f @@ -48,17 +48,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16_lin * * ===================================================================== SUBROUTINE ZERRHE( PATH, NUNIT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER*3 PATH @@ -90,11 +90,12 @@ * .. External Subroutines .. EXTERNAL ALAESM, CHKXER, ZHECON, ZHECON_3, ZHECON_ROOK, $ ZHERFS, ZHETF2, ZHETF2_RK, ZHETF2_ROOK, ZHETRF, - $ ZHETRF_RK, ZHETRF_ROOK, ZHETRF_AA, ZHETRI, + $ ZHETRF_RK, ZHETRF_ROOK, ZHETRF_AA, + $ ZHETRF_AA_2STAGE, ZHETRI, $ ZHETRI_3, ZHETRI_3X, ZHETRI_ROOK, ZHETRI2, $ ZHETRI2X, ZHETRS, ZHETRS_3, ZHETRS_ROOK, - $ ZHETRS_AA, ZHPCON, ZHPRFS, ZHPTRF, ZHPTRI, - $ ZHPTRS + $ ZHETRS_AA, ZHETRS_AA_2STAGE, ZHPCON, ZHPRFS, + $ ZHPTRF, ZHPTRI, ZHPTRS * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -521,6 +522,63 @@ INFOT = 10 CALL ZHETRS_AA( 'U', 0, 1, A, 1, IP, B, 1, W, -2, INFO ) CALL CHKXER( 'ZHETRS_AA', INFOT, NOUT, LERR, OK ) +* + ELSE IF( LSAMEN( 2, C2, 'S2' ) ) THEN +* +* Test error exits of the routines that use factorization +* of a symmetric indefinite matrix with Aasen's algorithm. +* +* ZHETRF_AA_2STAGE +* + SRNAMT = 'ZHETRF_AA_2STAGE' + INFOT = 1 + CALL ZHETRF_AA_2STAGE( '/', 0, A, 1, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'ZHETRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZHETRF_AA_2STAGE( 'U', -1, A, 1, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'ZHETRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZHETRF_AA_2STAGE( 'U', 2, A, 1, A, 2, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'ZHETRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL ZHETRF_AA_2STAGE( 'U', 2, A, 2, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'ZHETRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL ZHETRF_AA_2STAGE( 'U', 2, A, 2, A, 8, IP, IP, W, 0, + $ INFO ) + CALL CHKXER( 'ZHETRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) +* +* ZHETRS_AA_2STAGE +* + SRNAMT = 'ZHETRS_AA_2STAGE' + INFOT = 1 + CALL ZHETRS_AA_2STAGE( '/', 0, 0, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'ZHETRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZHETRS_AA_2STAGE( 'U', -1, 0, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'ZHETRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZHETRS_AA_2STAGE( 'U', 0, -1, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'ZHETRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZHETRS_AA_2STAGE( 'U', 2, 1, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'ZHETRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL ZHETRS_AA_2STAGE( 'U', 2, 1, A, 2, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'ZHETRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL ZHETRS_AA_2STAGE( 'U', 2, 1, A, 2, A, 8, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'ZHETRS_AA_STAGE', INFOT, NOUT, LERR, OK ) * ELSE IF( LSAMEN( 2, C2, 'HP' ) ) THEN * diff --git a/lapack-netlib/TESTING/LIN/zerrsy.f b/lapack-netlib/TESTING/LIN/zerrsy.f index 4179e98f3..642d748da 100644 --- a/lapack-netlib/TESTING/LIN/zerrsy.f +++ b/lapack-netlib/TESTING/LIN/zerrsy.f @@ -48,17 +48,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16_lin * * ===================================================================== SUBROUTINE ZERRSY( PATH, NUNIT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER*3 PATH @@ -91,7 +91,7 @@ $ ZSPTRS, ZSYCON, ZSYCON_3, ZSYCON_ROOK, ZSYRFS, $ ZSYTF2, ZSYTF2_RK, ZSYTF2_ROOK, ZSYTRF, $ ZSYTRF_RK, ZSYTRF_ROOK, ZSYTRI, ZSYTRI_3, - $ ZSYTRI_3X, ZSYTRI_ROOK, ZSYTRI2, ZSYTRI2Z, + $ ZSYTRI_3X, ZSYTRI_ROOK, ZSYTRI2, ZSYTRI2X, $ ZSYTRS, ZSYTRS_3, ZSYTRS_ROOK * .. * .. Scalars in Common .. @@ -592,6 +592,63 @@ INFOT = 8 CALL ZSYTRS_AA( 'U', 2, 1, A, 2, IP, B, 1, W, 1, INFO ) CALL CHKXER( 'ZSYTRS_AA', INFOT, NOUT, LERR, OK ) +* + ELSE IF( LSAMEN( 2, C2, 'S2' ) ) THEN +* +* Test error exits of the routines that use factorization +* of a symmetric indefinite matrix with Aasen's algorithm. +* +* ZSYTRF_AA_2STAGE +* + SRNAMT = 'ZSYTRF_AA_2STAGE' + INFOT = 1 + CALL ZSYTRF_AA_2STAGE( '/', 0, A, 1, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'ZSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZSYTRF_AA_2STAGE( 'U', -1, A, 1, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'ZSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZSYTRF_AA_2STAGE( 'U', 2, A, 1, A, 2, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'ZSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL ZSYTRF_AA_2STAGE( 'U', 2, A, 2, A, 1, IP, IP, W, 1, + $ INFO ) + CALL CHKXER( 'ZSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL ZSYTRF_AA_2STAGE( 'U', 2, A, 2, A, 8, IP, IP, W, 0, + $ INFO ) + CALL CHKXER( 'ZSYTRF_AA_2STAGE', INFOT, NOUT, LERR, OK ) +* +* CHETRS_AA_2STAGE +* + SRNAMT = 'ZSYTRS_AA_2STAGE' + INFOT = 1 + CALL ZSYTRS_AA_2STAGE( '/', 0, 0, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'ZSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZSYTRS_AA_2STAGE( 'U', -1, 0, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'ZSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZSYTRS_AA_2STAGE( 'U', 0, -1, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'ZSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZSYTRS_AA_2STAGE( 'U', 2, 1, A, 1, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'ZSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL ZSYTRS_AA_2STAGE( 'U', 2, 1, A, 2, A, 1, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'ZSYTRS_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL ZSYTRS_AA_2STAGE( 'U', 2, 1, A, 2, A, 8, IP, IP, + $ B, 1, INFO ) + CALL CHKXER( 'ZSYTRS_AA_STAGE', INFOT, NOUT, LERR, OK ) * END IF * diff --git a/lapack-netlib/TESTING/LIN/zerrvx.f b/lapack-netlib/TESTING/LIN/zerrvx.f index 6d64a9e72..29ba744ed 100644 --- a/lapack-netlib/TESTING/LIN/zerrvx.f +++ b/lapack-netlib/TESTING/LIN/zerrvx.f @@ -48,17 +48,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16_lin * * ===================================================================== SUBROUTINE ZERRVX( PATH, NUNIT ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. CHARACTER*3 PATH @@ -94,7 +94,7 @@ $ ZHPSV, ZHPSVX, ZPBSV, ZPBSVX, ZPOSV, ZPOSVX, $ ZPPSV, ZPPSVX, ZPTSV, ZPTSVX, ZSPSV, ZSPSVX, $ ZSYSV, ZSYSV_AA, ZSYSV_RK, ZSYSV_ROOK, - $ ZSYSVX + $ ZSYSVX, ZSYSV_AA_2STAGE * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -718,6 +718,36 @@ INFOT = 8 CALL ZHESV_AA( 'U', 2, 0, A, 2, IP, B, 1, W, 1, INFO ) CALL CHKXER( 'ZHESV_AA', INFOT, NOUT, LERR, OK ) +* + ELSE IF( LSAMEN( 2, C2, 'H2' ) ) THEN +* +* CHESV_AASEN_2STAGE +* + SRNAMT = 'ZHESV_AA_2STAGE' + INFOT = 1 + CALL ZHESV_AA_2STAGE( '/', 0, 0, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'ZHESV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZHESV_AA_2STAGE( 'U', -1, 0, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'ZHESV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZHESV_AA_2STAGE( 'U', 0, -1, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'ZHESV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZHESV_AA_2STAGE( 'U', 2, 1, A, 1, A, 1, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'ZHESV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL ZHESV_AA_2STAGE( 'U', 2, 1, A, 2, A, 2, IP, IP, B, 1, + $ W, 1, INFO ) + CALL CHKXER( 'ZHESV_AA_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL ZHESV_AA_2STAGE( 'U', 2, 1, A, 2, A, 1, IP, IP, B, 2, + $ W, 1, INFO ) + CALL CHKXER( 'ZHESV_AA_2STAGE', INFOT, NOUT, LERR, OK ) * ELSE IF( LSAMEN( 2, C2, 'HP' ) ) THEN * diff --git a/lapack-netlib/TESTING/LIN/zhet01_3.f b/lapack-netlib/TESTING/LIN/zhet01_3.f index 3499868cb..ba9955045 100644 --- a/lapack-netlib/TESTING/LIN/zhet01_3.f +++ b/lapack-netlib/TESTING/LIN/zhet01_3.f @@ -133,7 +133,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16_lin * @@ -141,10 +141,10 @@ SUBROUTINE ZHET01_3( UPLO, N, A, LDA, AFAC, LDAFAC, E, IPIV, C, $ LDC, RWORK, RESID ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/TESTING/LIN/zlahilb.f b/lapack-netlib/TESTING/LIN/zlahilb.f index 98c0303db..a6dc79b20 100644 --- a/lapack-netlib/TESTING/LIN/zlahilb.f +++ b/lapack-netlib/TESTING/LIN/zlahilb.f @@ -8,7 +8,7 @@ * Definition: * =========== * -* SUBROUTINE ZLAHILB(N, NRHS, A, LDA, X, LDX, B, LDB, WORK, +* SUBROUTINE ZLAHILB( N, NRHS, A, LDA, X, LDX, B, LDB, WORK, * INFO, PATH) * * .. Scalar Arguments .. @@ -56,7 +56,7 @@ *> *> \param[in] NRHS *> \verbatim -*> NRHS is NRHS +*> NRHS is INTEGER *> The requested number of right-hand sides. *> \endverbatim *> @@ -126,18 +126,18 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16_lin * * ===================================================================== - SUBROUTINE ZLAHILB(N, NRHS, A, LDA, X, LDX, B, LDB, WORK, + SUBROUTINE ZLAHILB( N, NRHS, A, LDA, X, LDX, B, LDB, WORK, $ INFO, PATH) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. INTEGER N, NRHS, LDA, LDX, LDB, INFO @@ -220,7 +220,8 @@ END DO * * Generate the scaled Hilbert matrix in A -* If we are testing SY routines, take D1_i = D2_i, else, D1_i = D2_i* +* If we are testing SY routines, +* take D1_i = D2_i, else, D1_i = D2_i* IF ( LSAMEN( 2, C2, 'SY' ) ) THEN DO J = 1, N DO I = 1, N @@ -250,8 +251,9 @@ WORK(J) = ( ( (WORK(J-1)/(J-1)) * (J-1 - N) ) /(J-1) ) $ * (N +J -1) END DO -* -* If we are testing SY routines, take D1_i = D2_i, else, D1_i = D2_i* + +* If we are testing SY routines, +* take D1_i = D2_i, else, D1_i = D2_i* IF ( LSAMEN( 2, C2, 'SY' ) ) THEN DO J = 1, NRHS DO I = 1, N diff --git a/lapack-netlib/TESTING/LIN/zlarhs.f b/lapack-netlib/TESTING/LIN/zlarhs.f index a2f5f9b85..9d1452840 100644 --- a/lapack-netlib/TESTING/LIN/zlarhs.f +++ b/lapack-netlib/TESTING/LIN/zlarhs.f @@ -190,7 +190,7 @@ *> \verbatim *> INFO is INTEGER *> = 0: successful exit -*> < 0: if INFO = -k, the k-th argument had an illegal value +*> < 0: if INFO = -i, the i-th argument had an illegal value *> \endverbatim * * Authors: @@ -201,7 +201,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16_lin * @@ -209,10 +209,10 @@ SUBROUTINE ZLARHS( PATH, XTYPE, UPLO, TRANS, M, N, KL, KU, NRHS, $ A, LDA, X, LDX, B, LDB, ISEED, INFO ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO, XTYPE diff --git a/lapack-netlib/TESTING/LIN/zsyt01_3.f b/lapack-netlib/TESTING/LIN/zsyt01_3.f index 892294075..8106a61b0 100644 --- a/lapack-netlib/TESTING/LIN/zsyt01_3.f +++ b/lapack-netlib/TESTING/LIN/zsyt01_3.f @@ -133,7 +133,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date June 2017 * *> \ingroup complex16_lin * @@ -141,10 +141,10 @@ SUBROUTINE ZSYT01_3( UPLO, N, A, LDA, AFAC, LDAFAC, E, IPIV, C, $ LDC, RWORK, RESID ) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.7.1) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* June 2017 * * .. Scalar Arguments .. CHARACTER UPLO diff --git a/lapack-netlib/TESTING/MATGEN/CMakeLists.txt b/lapack-netlib/TESTING/MATGEN/CMakeLists.txt index 09b6e3b4b..bc986da3a 100644 --- a/lapack-netlib/TESTING/MATGEN/CMakeLists.txt +++ b/lapack-netlib/TESTING/MATGEN/CMakeLists.txt @@ -2,74 +2,51 @@ # This is the makefile to create a library of the test matrix # generators used in LAPACK. The files are organized as follows: # -# SCATGEN -- Auxiliary routines called from both REAL and COMPLEX -# DZATGEN -- Auxiliary routines called from both DOUBLE PRECISION -# and COMPLEX*16 +# SCATGEN -- Auxiliary routines called from single precision +# DZATGEN -- Auxiliary routines called from double precision # SMATGEN -- Single precision real matrix generation routines # CMATGEN -- Single precision complex matrix generation routines # DMATGEN -- Double precision real matrix generation routines # ZMATGEN -- Double precision complex matrix generation routines # -# The library can be set up to include routines for any combination -# of the four precisions. To create or add to the library, enter make -# followed by one or more of the precisions desired. Some examples: -# make single -# make single complex -# make single double complex complex16 -# Alternatively, the command -# make -# without any arguments creates a library of all four precisions. -# The library is called -# tmglib.a -# and is created at the LAPACK directory level. -# -# To remove the object files after the library is created, enter -# make clean -# On some systems, you can force the source files to be recompiled by -# entering (for example) -# make single FRC=FRC -# ####################################################################### -set(SCATGEN slatm1.f slaran.f slarnd.f) +set(SCATGEN slatm1.f slatm7.f slaran.f slarnd.f) set(SMATGEN slatms.f slatme.f slatmr.f slatmt.f slagge.f slagsy.f slakf2.f slarge.f slaror.f slarot.f slatm2.f - slatm3.f slatm5.f slatm6.f slatm7.f slahilb.f) + slatm3.f slatm5.f slatm6.f slahilb.f) set(CMATGEN clatms.f clatme.f clatmr.f clatmt.f clagge.f claghe.f clagsy.f clakf2.f clarge.f claror.f clarot.f - clatm1.f clarnd.f clatm2.f clatm3.f clatm5.f clatm6.f clahilb.f slatm7.f) + clatm1.f clarnd.f clatm2.f clatm3.f clatm5.f clatm6.f clahilb.f) -set(DZATGEN dlatm1.f dlaran.f dlarnd.f) +set(DZATGEN dlatm1.f dlatm7.f dlaran.f dlarnd.f) set(DMATGEN dlatms.f dlatme.f dlatmr.f dlatmt.f dlagge.f dlagsy.f dlakf2.f dlarge.f dlaror.f dlarot.f dlatm2.f - dlatm3.f dlatm5.f dlatm6.f dlatm7.f dlahilb.f) + dlatm3.f dlatm5.f dlatm6.f dlahilb.f) set(ZMATGEN zlatms.f zlatme.f zlatmr.f zlatmt.f zlagge.f zlaghe.f zlagsy.f zlakf2.f zlarge.f zlaror.f zlarot.f - zlatm1.f zlarnd.f zlatm2.f zlatm3.f zlatm5.f zlatm6.f zlahilb.f dlatm7.f) + zlatm1.f zlarnd.f zlatm2.f zlatm3.f zlatm5.f zlatm6.f zlahilb.f) + +set(SOURCES) if(BUILD_SINGLE) - set(ALLOBJ ${SMATGEN} ${SCATGEN}) + list(APPEND SOURCES ${SMATGEN} ${SCATGEN}) endif() if(BUILD_DOUBLE) - set(ALLOBJ ${ALLOBJ} ${DMATGEN} ${DZATGEN}) + list(APPEND SOURCES ${DMATGEN} ${DZATGEN}) endif() if(BUILD_COMPLEX) - set(ALLOBJ ${ALLOBJ} ${CMATGEN} ${SCATGEN}) + list(APPEND SOURCES ${CMATGEN} ${SCATGEN}) endif() if(BUILD_COMPLEX16) - set(ALLOBJ ${ALLOBJ} ${ZMATGEN} ${DZATGEN}) + list(APPEND SOURCES ${ZMATGEN} ${DZATGEN}) endif() +list(REMOVE_DUPLICATES SOURCES) -if(NOT ALLOBJ) - set(ALLOBJ ${SMATGEN} ${CMATGEN} ${SCATGEN} ${DMATGEN} ${ZMATGEN} - ${DZATGEN}) -else() - list(REMOVE_DUPLICATES ALLOBJ) -endif() -add_library(tmglib ${ALLOBJ}) -target_link_libraries(tmglib ${LAPACK_LIBRARIES}) +add_library(tmglib ${SOURCES}) +target_link_libraries(tmglib ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) lapack_install_library(tmglib) diff --git a/lapack-netlib/TESTING/MATGEN/Makefile b/lapack-netlib/TESTING/MATGEN/Makefile index 34a6ff07e..e20004c2f 100644 --- a/lapack-netlib/TESTING/MATGEN/Makefile +++ b/lapack-netlib/TESTING/MATGEN/Makefile @@ -4,9 +4,8 @@ include ../../make.inc # This is the makefile to create a library of the test matrix # generators used in LAPACK. The files are organized as follows: # -# SCATGEN -- Auxiliary routines called from both REAL and COMPLEX -# DZATGEN -- Auxiliary routines called from both DOUBLE PRECISION -# and COMPLEX*16 +# SCATGEN -- Auxiliary routines called from single precision +# DZATGEN -- Auxiliary routines called from double precision # SMATGEN -- Single precision real matrix generation routines # CMATGEN -- Single precision complex matrix generation routines # DMATGEN -- Double precision real matrix generation routines @@ -26,28 +25,28 @@ include ../../make.inc # and is created at the LAPACK directory level. # # To remove the object files after the library is created, enter -# make clean +# make cleanobj # On some systems, you can force the source files to be recompiled by # entering (for example) # make single FRC=FRC # ####################################################################### -SCATGEN = slatm1.o slaran.o slarnd.o +SCATGEN = slatm1.o slatm7.o slaran.o slarnd.o SMATGEN = slatms.o slatme.o slatmr.o slatmt.o \ slagge.o slagsy.o slakf2.o slarge.o slaror.o slarot.o slatm2.o \ - slatm3.o slatm5.o slatm6.o slatm7.o slahilb.o + slatm3.o slatm5.o slatm6.o slahilb.o CMATGEN = clatms.o clatme.o clatmr.o clatmt.o \ clagge.o claghe.o clagsy.o clakf2.o clarge.o claror.o clarot.o \ clatm1.o clarnd.o clatm2.o clatm3.o clatm5.o clatm6.o clahilb.o -DZATGEN = dlatm1.o dlaran.o dlarnd.o +DZATGEN = dlatm1.o dlatm7.o dlaran.o dlarnd.o DMATGEN = dlatms.o dlatme.o dlatmr.o dlatmt.o \ dlagge.o dlagsy.o dlakf2.o dlarge.o dlaror.o dlarot.o dlatm2.o \ - dlatm3.o dlatm5.o dlatm6.o dlatm7.o dlahilb.o + dlatm3.o dlatm5.o dlatm6.o dlahilb.o ZMATGEN = zlatms.o zlatme.o zlatmr.o zlatmt.o \ zlagge.o zlaghe.o zlagsy.o zlakf2.o zlarge.o zlaror.o zlarot.o \ @@ -59,23 +58,23 @@ ALLOBJ = $(SMATGEN) $(CMATGEN) $(SCATGEN) $(DMATGEN) $(ZMATGEN) \ $(DZATGEN) ../../$(TMGLIB): $(ALLOBJ) - $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ single: $(SMATGEN) $(SCATGEN) - $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $(SMATGEN) $(SCATGEN) + $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) complex: $(CMATGEN) $(SCATGEN) - $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $(CMATGEN) $(SCATGEN) + $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) double: $(DMATGEN) $(DZATGEN) - $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $(DMATGEN) $(DZATGEN) + $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) complex16: $(ZMATGEN) $(DZATGEN) - $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $(ZMATGEN) $(DZATGEN) + $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) $(SCATGEN): $(FRC) @@ -88,8 +87,11 @@ $(ZMATGEN): $(FRC) FRC: @FRC=$(FRC) -clean: +clean: cleanobj #cleanlib +cleanobj: rm -f *.o +cleanlib: + rm -f ../../$(TMGLIB) .f.o: $(FORTRAN) $(OPTS) -c -o $@ $< diff --git a/lapack-netlib/TESTING/MATGEN/clahilb.f b/lapack-netlib/TESTING/MATGEN/clahilb.f index 612c6c68f..13902872c 100644 --- a/lapack-netlib/TESTING/MATGEN/clahilb.f +++ b/lapack-netlib/TESTING/MATGEN/clahilb.f @@ -126,7 +126,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex_matgen * @@ -134,10 +134,10 @@ SUBROUTINE CLAHILB( N, NRHS, A, LDA, X, LDX, B, LDB, WORK, $ INFO, PATH) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER N, NRHS, LDA, LDX, LDB, INFO @@ -154,7 +154,7 @@ INTEGER I, J COMPLEX TMP CHARACTER*2 C2 - +* .. * .. Parameters .. * NMAX_EXACT the largest dimension where the generated data is * exact. @@ -163,7 +163,7 @@ * ??? complex uses how many bits ??? INTEGER NMAX_EXACT, NMAX_APPROX, SIZE_D PARAMETER (NMAX_EXACT = 6, NMAX_APPROX = 11, SIZE_D = 8) - +* * d's are generated from random permuation of those eight elements. COMPLEX D1(8), D2(8), INVD1(8), INVD2(8) DATA D1 /(-1,0),(0,1),(-1,-1),(0,-1),(1,0),(-1,1),(1,1),(1,-1)/ @@ -173,7 +173,9 @@ $ (-.5,-.5),(.5,-.5),(.5,.5)/ DATA INVD2 /(-1,0),(0,1),(-.5,-.5),(0,-1),(1,0), $ (-.5,.5),(.5,.5),(.5,-.5)/ - +* .. +* .. External Subroutines .. + EXTERNAL XERBLA * .. * .. External Functions EXTERNAL CLASET, LSAMEN @@ -204,7 +206,7 @@ IF (N .GT. NMAX_EXACT) THEN INFO = 1 END IF - +* * Compute M = the LCM of the integers [1, 2*N-1]. The largest * reasonable N is small enough that integers suffice (up to N = 11). M = 1 @@ -219,7 +221,7 @@ END DO M = (M / TI) * I END DO - +* * Generate the scaled Hilbert matrix in A * If we are testing SY routines, take * D1_i = D2_i, else, D1_i = D2_i* @@ -238,12 +240,12 @@ END DO END DO END IF - +* * Generate matrix B as simply the first NRHS columns of M * the * identity. TMP = REAL(M) CALL CLASET('Full', N, NRHS, (0.0,0.0), TMP, B, LDB) - +* * Generate the true solutions in X. Because B = the first NRHS * columns of M*I, the true solutions are just the first NRHS columns * of the inverse Hilbert matrix. diff --git a/lapack-netlib/TESTING/MATGEN/dlahilb.f b/lapack-netlib/TESTING/MATGEN/dlahilb.f index 7b2badabc..5d0351f46 100644 --- a/lapack-netlib/TESTING/MATGEN/dlahilb.f +++ b/lapack-netlib/TESTING/MATGEN/dlahilb.f @@ -1,4 +1,4 @@ -C> \brief \b DLAHILB +*> \brief \b DLAHILB * * =========== DOCUMENTATION =========== * @@ -117,17 +117,17 @@ C> \brief \b DLAHILB *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup double_matgen * * ===================================================================== SUBROUTINE DLAHILB( N, NRHS, A, LDA, X, LDX, B, LDB, WORK, INFO) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER N, NRHS, LDA, LDX, LDB, INFO @@ -140,7 +140,7 @@ C> \brief \b DLAHILB INTEGER TM, TI, R INTEGER M INTEGER I, J - +* .. * .. Parameters .. * NMAX_EXACT the largest dimension where the generated data is * exact. @@ -148,7 +148,9 @@ C> \brief \b DLAHILB * a small componentwise relative error. INTEGER NMAX_EXACT, NMAX_APPROX PARAMETER (NMAX_EXACT = 6, NMAX_APPROX = 11) - +* .. +* .. External Subroutines .. + EXTERNAL XERBLA * .. * .. External Functions EXTERNAL DLASET @@ -177,7 +179,7 @@ C> \brief \b DLAHILB IF (N .GT. NMAX_EXACT) THEN INFO = 1 END IF - +* * Compute M = the LCM of the integers [1, 2*N-1]. The largest * reasonable N is small enough that integers suffice (up to N = 11). M = 1 @@ -192,14 +194,14 @@ C> \brief \b DLAHILB END DO M = (M / TI) * I END DO - +* * Generate the scaled Hilbert matrix in A DO J = 1, N DO I = 1, N A(I, J) = DBLE(M) / (I + J - 1) END DO END DO - +* * Generate matrix B as simply the first NRHS columns of M * the * identity. CALL DLASET('Full', N, NRHS, 0.0D+0, DBLE(M), B, LDB) @@ -212,12 +214,12 @@ C> \brief \b DLAHILB WORK(J) = ( ( (WORK(J-1)/(J-1)) * (J-1 - N) ) /(J-1) ) $ * (N +J -1) END DO - +* DO J = 1, NRHS DO I = 1, N X(I, J) = (WORK(I)*WORK(J)) / (I + J - 1) END DO END DO - +* END diff --git a/lapack-netlib/TESTING/MATGEN/slahilb.f b/lapack-netlib/TESTING/MATGEN/slahilb.f index 170cce62f..cb92db6db 100644 --- a/lapack-netlib/TESTING/MATGEN/slahilb.f +++ b/lapack-netlib/TESTING/MATGEN/slahilb.f @@ -117,17 +117,17 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup real_matgen * * ===================================================================== SUBROUTINE SLAHILB( N, NRHS, A, LDA, X, LDX, B, LDB, WORK, INFO) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER N, NRHS, LDA, LDX, LDB, INFO @@ -140,7 +140,7 @@ INTEGER TM, TI, R INTEGER M INTEGER I, J - +* .. * .. Parameters .. * NMAX_EXACT the largest dimension where the generated data is * exact. @@ -148,7 +148,9 @@ * a small componentwise relative error. INTEGER NMAX_EXACT, NMAX_APPROX PARAMETER (NMAX_EXACT = 6, NMAX_APPROX = 11) - +* .. +* .. External Subroutines .. + EXTERNAL XERBLA * .. * .. External Functions EXTERNAL SLASET @@ -177,7 +179,7 @@ IF (N .GT. NMAX_EXACT) THEN INFO = 1 END IF - +* * Compute M = the LCM of the integers [1, 2*N-1]. The largest * reasonable N is small enough that integers suffice (up to N = 11). M = 1 @@ -192,18 +194,18 @@ END DO M = (M / TI) * I END DO - +* * Generate the scaled Hilbert matrix in A DO J = 1, N DO I = 1, N A(I, J) = REAL(M) / (I + J - 1) END DO END DO - +* * Generate matrix B as simply the first NRHS columns of M * the * identity. CALL SLASET('Full', N, NRHS, 0.0, REAL(M), B, LDB) - +* * Generate the true solutions in X. Because B = the first NRHS * columns of M*I, the true solutions are just the first NRHS columns * of the inverse Hilbert matrix. @@ -212,12 +214,12 @@ WORK(J) = ( ( (WORK(J-1)/(J-1)) * (J-1 - N) ) /(J-1) ) $ * (N +J -1) END DO - +* DO J = 1, NRHS DO I = 1, N X(I, J) = (WORK(I)*WORK(J)) / (I + J - 1) END DO END DO - +* END diff --git a/lapack-netlib/TESTING/MATGEN/zlahilb.f b/lapack-netlib/TESTING/MATGEN/zlahilb.f index 892109295..43057931d 100644 --- a/lapack-netlib/TESTING/MATGEN/zlahilb.f +++ b/lapack-netlib/TESTING/MATGEN/zlahilb.f @@ -126,7 +126,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date December 2016 +*> \date November 2017 * *> \ingroup complex16_matgen * @@ -134,10 +134,10 @@ SUBROUTINE ZLAHILB( N, NRHS, A, LDA, X, LDX, B, LDB, WORK, $ INFO, PATH) * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 +* November 2017 * * .. Scalar Arguments .. INTEGER N, NRHS, LDA, LDX, LDB, INFO @@ -154,7 +154,7 @@ INTEGER I, J COMPLEX*16 TMP CHARACTER*2 C2 - +* .. * .. Parameters .. * NMAX_EXACT the largest dimension where the generated data is * exact. @@ -163,7 +163,7 @@ * ??? complex uses how many bits ??? INTEGER NMAX_EXACT, NMAX_APPROX, SIZE_D PARAMETER (NMAX_EXACT = 6, NMAX_APPROX = 11, SIZE_D = 8) - +* * d's are generated from random permuation of those eight elements. COMPLEX*16 d1(8), d2(8), invd1(8), invd2(8) DATA D1 /(-1,0),(0,1),(-1,-1),(0,-1),(1,0),(-1,1),(1,1),(1,-1)/ @@ -174,6 +174,9 @@ DATA INVD2 /(-1,0),(0,1),(-.5,-.5),(0,-1),(1,0), $ (-.5,.5),(.5,.5),(.5,-.5)/ * .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. * .. External Functions EXTERNAL ZLASET, LSAMEN INTRINSIC DBLE @@ -203,7 +206,7 @@ IF (N .GT. NMAX_EXACT) THEN INFO = 1 END IF - +* * Compute M = the LCM of the integers [1, 2*N-1]. The largest * reasonable N is small enough that integers suffice (up to N = 11). M = 1 @@ -218,7 +221,7 @@ END DO M = (M / TI) * I END DO - +* * Generate the scaled Hilbert matrix in A * If we are testing SY routines, * take D1_i = D2_i, else, D1_i = D2_i* @@ -237,12 +240,12 @@ END DO END DO END IF - +* * Generate matrix B as simply the first NRHS columns of M * the * identity. TMP = DBLE(M) CALL ZLASET('Full', N, NRHS, (0.0D+0,0.0D+0), TMP, B, LDB) - +* * Generate the true solutions in X. Because B = the first NRHS * columns of M*I, the true solutions are just the first NRHS columns * of the inverse Hilbert matrix. diff --git a/lapack-netlib/TESTING/Makefile b/lapack-netlib/TESTING/Makefile index dfb5fc176..8b883c0fa 100644 --- a/lapack-netlib/TESTING/Makefile +++ b/lapack-netlib/TESTING/Makefile @@ -21,10 +21,9 @@ # The executable files are called: # xlintsts, xlintstd, xlintstc, and xlintstz for LIN # xeigtsts, xeigtstd, xeigtstc, and xeigtstz for EIG -# and exist in the current directory level. # # To remove the output files after the tests have been run, enter -# make clean +# make cleantest # # To re-run specific tests after a make, enter (for example): # 'rm ssvd.out; make' or: @@ -37,11 +36,6 @@ include ../make.inc -ifneq ($(strip $(VARLIB)),) - LAPACKLIB := $(VARLIB) ../$(LAPACKLIB) -endif - - all: single complex double complex16 singleproto doubleproto complexproto complex16proto SEIGTST= snep.out \ @@ -157,443 +151,441 @@ complex16proto: $(ZLINTSTPROTO) # # ======== SINGLE LIN TESTS =========================== -stest.out: stest.in xlintsts +stest.out: stest.in LIN/xlintsts @echo Testing REAL LAPACK linear equation routines - ./xlintsts < stest.in > $@ 2>&1 + ./LIN/xlintsts < $< > $@ 2>&1 # # ======== COMPLEX LIN TESTS ========================== -ctest.out: ctest.in xlintstc +ctest.out: ctest.in LIN/xlintstc @echo Testing COMPLEX LAPACK linear equation routines - ./xlintstc < ctest.in > $@ 2>&1 + ./LIN/xlintstc < $< > $@ 2>&1 # # ======== DOUBLE LIN TESTS =========================== -dtest.out: dtest.in xlintstd +dtest.out: dtest.in LIN/xlintstd @echo Testing DOUBLE PRECISION LAPACK linear equation routines - ./xlintstd < dtest.in > $@ 2>&1 + ./LIN/xlintstd < $< > $@ 2>&1 # # ======== COMPLEX16 LIN TESTS ======================== -ztest.out: ztest.in xlintstz +ztest.out: ztest.in LIN/xlintstz @echo Testing COMPLEX16 LAPACK linear equation routines - ./xlintstz < ztest.in > $@ 2>&1 + ./LIN/xlintstz < $< > $@ 2>&1 # # ======== SINGLE-DOUBLE PROTO LIN TESTS ============== -dstest.out: dstest.in xlintstds +dstest.out: dstest.in LIN/xlintstds @echo Testing SINGLE-DOUBLE PRECISION LAPACK prototype linear equation routines - ./xlintstds < dstest.in > $@ 2>&1 + ./LIN/xlintstds < $< > $@ 2>&1 # # ======== COMPLEX-COMPLEX16 LIN TESTS ======================== -zctest.out: zctest.in xlintstzc +zctest.out: zctest.in LIN/xlintstzc @echo Testing COMPLEX-COMPLEX16 LAPACK prototype linear equation routines - ./xlintstzc < zctest.in > $@ 2>&1 + ./LIN/xlintstzc < $< > $@ 2>&1 # # ======== SINGLE RFP LIN TESTS ======================== -stest_rfp.out: stest_rfp.in xlintstrfs +stest_rfp.out: stest_rfp.in LIN/xlintstrfs @echo Testing REAL LAPACK RFP prototype linear equation routines - ./xlintstrfs < stest_rfp.in > $@ 2>&1 + ./LIN/xlintstrfs < $< > $@ 2>&1 # # ======== COMPLEX16 RFP LIN TESTS ======================== -dtest_rfp.out: dtest_rfp.in xlintstrfd +dtest_rfp.out: dtest_rfp.in LIN/xlintstrfd @echo Testing DOUBLE PRECISION LAPACK RFP prototype linear equation routines - ./xlintstrfd < dtest_rfp.in > $@ 2>&1 + ./LIN/xlintstrfd < $< > $@ 2>&1 # # ======== COMPLEX16 RFP LIN TESTS ======================== -ctest_rfp.out: ctest_rfp.in xlintstrfc +ctest_rfp.out: ctest_rfp.in LIN/xlintstrfc @echo Testing COMPLEX LAPACK RFP prototype linear equation routines - ./xlintstrfc < ctest_rfp.in > $@ 2>&1 + ./LIN/xlintstrfc < $< > $@ 2>&1 # # ======== COMPLEX16 RFP LIN TESTS ======================== -ztest_rfp.out: ztest_rfp.in xlintstrfz +ztest_rfp.out: ztest_rfp.in LIN/xlintstrfz @echo Testing COMPLEX16 LAPACK RFP prototype linear equation routines - ./xlintstrfz < ztest_rfp.in > $@ 2>&1 + ./LIN/xlintstrfz < $< > $@ 2>&1 # # # ======== SINGLE EIG TESTS =========================== # -snep.out: nep.in xeigtsts +snep.out: nep.in EIG/xeigtsts @echo NEP: Testing Nonsymmetric Eigenvalue Problem routines - ./xeigtsts < nep.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -ssep.out: sep.in xeigtsts +ssep.out: sep.in EIG/xeigtsts @echo SEP: Testing Symmetric Eigenvalue Problem routines - ./xeigtsts < sep.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -sse2.out: se2.in xeigtsts +sse2.out: se2.in EIG/xeigtsts @echo SEP: Testing Symmetric Eigenvalue Problem routines - ./xeigtsts < se2.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -ssvd.out: svd.in xeigtsts +ssvd.out: svd.in EIG/xeigtsts @echo SVD: Testing Singular Value Decomposition routines - ./xeigtsts < svd.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -sec.out: sec.in xeigtsts +sec.out: sec.in EIG/xeigtsts @echo SEC: Testing REAL Eigen Condition Routines - ./xeigtsts < sec.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -sed.out: sed.in xeigtsts +sed.out: sed.in EIG/xeigtsts @echo SEV: Testing REAL Nonsymmetric Eigenvalue Driver - ./xeigtsts < sed.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -sgg.out: sgg.in xeigtsts +sgg.out: sgg.in EIG/xeigtsts @echo SGG: Testing REAL Nonsymmetric Generalized Eigenvalue Problem routines - ./xeigtsts < sgg.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -sgd.out: sgd.in xeigtsts +sgd.out: sgd.in EIG/xeigtsts @echo SGD: Testing REAL Nonsymmetric Generalized Eigenvalue Problem driver routines - ./xeigtsts < sgd.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -ssb.out: ssb.in xeigtsts +ssb.out: ssb.in EIG/xeigtsts @echo SSB: Testing REAL Symmetric Eigenvalue Problem routines - ./xeigtsts < ssb.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -ssg.out: ssg.in xeigtsts +ssg.out: ssg.in EIG/xeigtsts @echo SSG: Testing REAL Symmetric Generalized Eigenvalue Problem routines - ./xeigtsts < ssg.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -sbal.out: sbal.in xeigtsts +sbal.out: sbal.in EIG/xeigtsts @echo SGEBAL: Testing the balancing of a REAL general matrix - ./xeigtsts < sbal.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -sbak.out: sbak.in xeigtsts +sbak.out: sbak.in EIG/xeigtsts @echo SGEBAK: Testing the back transformation of a REAL balanced matrix - ./xeigtsts < sbak.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -sgbal.out: sgbal.in xeigtsts +sgbal.out: sgbal.in EIG/xeigtsts @echo SGGBAL: Testing the balancing of a pair of REAL general matrices - ./xeigtsts < sgbal.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -sgbak.out: sgbak.in xeigtsts +sgbak.out: sgbak.in EIG/xeigtsts @echo SGGBAK: Testing the back transformation of a pair of REAL balanced matrices - ./xeigtsts < sgbak.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -sbb.out: sbb.in xeigtsts +sbb.out: sbb.in EIG/xeigtsts @echo SBB: Testing banded Singular Value Decomposition routines - ./xeigtsts < sbb.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -sglm.out: glm.in xeigtsts +sglm.out: glm.in EIG/xeigtsts @echo GLM: Testing Generalized Linear Regression Model routines - ./xeigtsts < glm.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -sgqr.out: gqr.in xeigtsts +sgqr.out: gqr.in EIG/xeigtsts @echo GQR: Testing Generalized QR and RQ factorization routines - ./xeigtsts < gqr.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -sgsv.out: gsv.in xeigtsts +sgsv.out: gsv.in EIG/xeigtsts @echo GSV: Testing Generalized Singular Value Decomposition routines - ./xeigtsts < gsv.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -scsd.out: csd.in xeigtsts +scsd.out: csd.in EIG/xeigtsts @echo CSD: Testing CS Decomposition routines - ./xeigtsts < csd.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 -slse.out: lse.in xeigtsts +slse.out: lse.in EIG/xeigtsts @echo LSE: Testing Constrained Linear Least Squares routines - ./xeigtsts < lse.in > $@ 2>&1 + ./EIG/xeigtsts < $< > $@ 2>&1 # # ======== COMPLEX EIG TESTS =========================== -cnep.out: nep.in xeigtstc +cnep.out: nep.in EIG/xeigtstc @echo NEP: Testing Nonsymmetric Eigenvalue Problem routines - ./xeigtstc < nep.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -csep.out: sep.in xeigtstc +csep.out: sep.in EIG/xeigtstc @echo SEP: Testing Symmetric Eigenvalue Problem routines - ./xeigtstc < sep.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -cse2.out: se2.in xeigtstc +cse2.out: se2.in EIG/xeigtstc @echo SEP: Testing Symmetric Eigenvalue Problem routines - ./xeigtstc < se2.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -csvd.out: svd.in xeigtstc +csvd.out: svd.in EIG/xeigtstc @echo SVD: Testing Singular Value Decomposition routines - ./xeigtstc < svd.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -cec.out: cec.in xeigtstc +cec.out: cec.in EIG/xeigtstc @echo CEC: Testing COMPLEX Eigen Condition Routines - ./xeigtstc < cec.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -ced.out: ced.in xeigtstc +ced.out: ced.in EIG/xeigtstc @echo CES: Testing COMPLEX Nonsymmetric Schur Form Driver - ./xeigtstc < ced.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -cgg.out: cgg.in xeigtstc +cgg.out: cgg.in EIG/xeigtstc @echo CGG: Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem routines - ./xeigtstc < cgg.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -cgd.out: cgd.in xeigtstc +cgd.out: cgd.in EIG/xeigtstc @echo CGD: Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem driver routines - ./xeigtstc < cgd.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -csb.out: csb.in xeigtstc +csb.out: csb.in EIG/xeigtstc @echo CHB: Testing Hermitian Eigenvalue Problem routines - ./xeigtstc < csb.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -csg.out: csg.in xeigtstc +csg.out: csg.in EIG/xeigtstc @echo CSG: Testing Symmetric Generalized Eigenvalue Problem routines - ./xeigtstc < csg.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -cbal.out: cbal.in xeigtstc +cbal.out: cbal.in EIG/xeigtstc @echo CGEBAL: Testing the balancing of a COMPLEX general matrix - ./xeigtstc < cbal.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -cbak.out: cbak.in xeigtstc +cbak.out: cbak.in EIG/xeigtstc @echo CGEBAK: Testing the back transformation of a COMPLEX balanced matrix - ./xeigtstc < cbak.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -cgbal.out: cgbal.in xeigtstc +cgbal.out: cgbal.in EIG/xeigtstc @echo CGGBAL: Testing the balancing of a pair of COMPLEX general matrices - ./xeigtstc < cgbal.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -cgbak.out: cgbak.in xeigtstc +cgbak.out: cgbak.in EIG/xeigtstc @echo CGGBAK: Testing the back transformation of a pair of COMPLEX balanced matrices - ./xeigtstc < cgbak.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -cbb.out: cbb.in xeigtstc +cbb.out: cbb.in EIG/xeigtstc @echo CBB: Testing banded Singular Value Decomposition routines - ./xeigtstc < cbb.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -cglm.out: glm.in xeigtstc +cglm.out: glm.in EIG/xeigtstc @echo GLM: Testing Generalized Linear Regression Model routines - ./xeigtstc < glm.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -cgqr.out: gqr.in xeigtstc +cgqr.out: gqr.in EIG/xeigtstc @echo GQR: Testing Generalized QR and RQ factorization routines - ./xeigtstc < gqr.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -cgsv.out: gsv.in xeigtstc +cgsv.out: gsv.in EIG/xeigtstc @echo GSV: Testing Generalized Singular Value Decomposition routines - ./xeigtstc < gsv.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -ccsd.out: csd.in xeigtstc +ccsd.out: csd.in EIG/xeigtstc @echo CSD: Testing CS Decomposition routines - ./xeigtstc < csd.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 -clse.out: lse.in xeigtstc +clse.out: lse.in EIG/xeigtstc @echo LSE: Testing Constrained Linear Least Squares routines - ./xeigtstc < lse.in > $@ 2>&1 + ./EIG/xeigtstc < $< > $@ 2>&1 # # ======== DOUBLE EIG TESTS =========================== -dnep.out: nep.in xeigtstd +dnep.out: nep.in EIG/xeigtstd @echo NEP: Testing Nonsymmetric Eigenvalue Problem routines - ./xeigtstd < nep.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dsep.out: sep.in xeigtstd +dsep.out: sep.in EIG/xeigtstd @echo SEP: Testing Symmetric Eigenvalue Problem routines - ./xeigtstd < sep.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dse2.out: se2.in xeigtstd +dse2.out: se2.in EIG/xeigtstd @echo SEP: Testing Symmetric Eigenvalue Problem routines - ./xeigtstd < se2.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dsvd.out: svd.in xeigtstd +dsvd.out: svd.in EIG/xeigtstd @echo SVD: Testing Singular Value Decomposition routines - ./xeigtstd < svd.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dec.out: dec.in xeigtstd +dec.out: dec.in EIG/xeigtstd @echo DEC: Testing DOUBLE PRECISION Eigen Condition Routines - ./xeigtstd < dec.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -ded.out: ded.in xeigtstd +ded.out: ded.in EIG/xeigtstd @echo DEV: Testing DOUBLE PRECISION Nonsymmetric Eigenvalue Driver - ./xeigtstd < ded.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dgg.out: dgg.in xeigtstd +dgg.out: dgg.in EIG/xeigtstd @echo DGG: Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem routines - ./xeigtstd < dgg.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dgd.out: dgd.in xeigtstd +dgd.out: dgd.in EIG/xeigtstd @echo DGD: Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem driver routines - ./xeigtstd < dgd.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dsb.out: dsb.in xeigtstd +dsb.out: dsb.in EIG/xeigtstd @echo DSB: Testing DOUBLE PRECISION Symmetric Eigenvalue Problem routines - ./xeigtstd < dsb.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dsg.out: dsg.in xeigtstd +dsg.out: dsg.in EIG/xeigtstd @echo DSG: Testing DOUBLE PRECISION Symmetric Generalized Eigenvalue Problem routines - ./xeigtstd < dsg.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dbal.out: dbal.in xeigtstd +dbal.out: dbal.in EIG/xeigtstd @echo DGEBAL: Testing the balancing of a DOUBLE PRECISION general matrix - ./xeigtstd < dbal.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dbak.out: dbak.in xeigtstd +dbak.out: dbak.in EIG/xeigtstd @echo DGEBAK: Testing the back transformation of a DOUBLE PRECISION balanced matrix - ./xeigtstd < dbak.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dgbal.out: dgbal.in xeigtstd +dgbal.out: dgbal.in EIG/xeigtstd @echo DGGBAL: Testing the balancing of a pair of DOUBLE PRECISION general matrices - ./xeigtstd < dgbal.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dgbak.out: dgbak.in xeigtstd +dgbak.out: dgbak.in EIG/xeigtstd @echo DGGBAK: Testing the back transformation of a pair of DOUBLE PRECISION balanced matrices - ./xeigtstd < dgbak.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dbb.out: dbb.in xeigtstd +dbb.out: dbb.in EIG/xeigtstd @echo DBB: Testing banded Singular Value Decomposition routines - ./xeigtstd < dbb.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dglm.out: glm.in xeigtstd +dglm.out: glm.in EIG/xeigtstd @echo GLM: Testing Generalized Linear Regression Model routines - ./xeigtstd < glm.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dgqr.out: gqr.in xeigtstd +dgqr.out: gqr.in EIG/xeigtstd @echo GQR: Testing Generalized QR and RQ factorization routines - ./xeigtstd < gqr.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dgsv.out: gsv.in xeigtstd +dgsv.out: gsv.in EIG/xeigtstd @echo GSV: Testing Generalized Singular Value Decomposition routines - ./xeigtstd < gsv.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dcsd.out: csd.in xeigtstd +dcsd.out: csd.in EIG/xeigtstd @echo CSD: Testing CS Decomposition routines - ./xeigtstd < csd.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 -dlse.out: lse.in xeigtstd +dlse.out: lse.in EIG/xeigtstd @echo LSE: Testing Constrained Linear Least Squares routines - ./xeigtstd < lse.in > $@ 2>&1 + ./EIG/xeigtstd < $< > $@ 2>&1 # # ======== COMPLEX16 EIG TESTS =========================== -znep.out: nep.in xeigtstz +znep.out: nep.in EIG/xeigtstz @echo NEP: Testing Nonsymmetric Eigenvalue Problem routines - ./xeigtstz < nep.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zsep.out: sep.in xeigtstz +zsep.out: sep.in EIG/xeigtstz @echo SEP: Testing Symmetric Eigenvalue Problem routines - ./xeigtstz < sep.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zse2.out: se2.in xeigtstz +zse2.out: se2.in EIG/xeigtstz @echo SEP: Testing Symmetric Eigenvalue Problem routines - ./xeigtstz < se2.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zsvd.out: svd.in xeigtstz +zsvd.out: svd.in EIG/xeigtstz @echo SVD: Testing Singular Value Decomposition routines - ./xeigtstz < svd.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zec.out: zec.in xeigtstz +zec.out: zec.in EIG/xeigtstz @echo ZEC: Testing COMPLEX16 Eigen Condition Routines - ./xeigtstz < zec.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zed.out: zed.in xeigtstz +zed.out: zed.in EIG/xeigtstz @echo ZES: Testing COMPLEX16 Nonsymmetric Schur Form Driver - ./xeigtstz < zed.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zgg.out: zgg.in xeigtstz +zgg.out: zgg.in EIG/xeigtstz @echo ZGG: Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem routines - ./xeigtstz < zgg.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zgd.out: zgd.in xeigtstz +zgd.out: zgd.in EIG/xeigtstz @echo ZGD: Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem driver routines - ./xeigtstz < zgd.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zsb.out: zsb.in xeigtstz +zsb.out: zsb.in EIG/xeigtstz @echo ZHB: Testing Hermitian Eigenvalue Problem routines - ./xeigtstz < zsb.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zsg.out: zsg.in xeigtstz +zsg.out: zsg.in EIG/xeigtstz @echo ZSG: Testing Symmetric Generalized Eigenvalue Problem routines - ./xeigtstz < zsg.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zbal.out: zbal.in xeigtstz +zbal.out: zbal.in EIG/xeigtstz @echo ZGEBAL: Testing the balancing of a COMPLEX16 general matrix - ./xeigtstz < zbal.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zbak.out: zbak.in xeigtstz +zbak.out: zbak.in EIG/xeigtstz @echo ZGEBAK: Testing the back transformation of a COMPLEX16 balanced matrix - ./xeigtstz < zbak.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zgbal.out: zgbal.in xeigtstz +zgbal.out: zgbal.in EIG/xeigtstz @echo ZGGBAL: Testing the balancing of a pair of COMPLEX general matrices - ./xeigtstz < zgbal.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zgbak.out: zgbak.in xeigtstz +zgbak.out: zgbak.in EIG/xeigtstz @echo ZGGBAK: Testing the back transformation of a pair of COMPLEX16 balanced matrices - ./xeigtstz < zgbak.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zbb.out: zbb.in xeigtstz +zbb.out: zbb.in EIG/xeigtstz @echo ZBB: Testing banded Singular Value Decomposition routines - ./xeigtstz < zbb.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zglm.out: glm.in xeigtstz +zglm.out: glm.in EIG/xeigtstz @echo GLM: Testing Generalized Linear Regression Model routines - ./xeigtstz < glm.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zgqr.out: gqr.in xeigtstz +zgqr.out: gqr.in EIG/xeigtstz @echo GQR: Testing Generalized QR and RQ factorization routines - ./xeigtstz < gqr.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zgsv.out: gsv.in xeigtstz +zgsv.out: gsv.in EIG/xeigtstz @echo GSV: Testing Generalized Singular Value Decomposition routines - ./xeigtstz < gsv.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zcsd.out: csd.in xeigtstz +zcsd.out: csd.in EIG/xeigtstz @echo CSD: Testing CS Decomposition routines - ./xeigtstz < csd.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 -zlse.out: lse.in xeigtstz +zlse.out: lse.in EIG/xeigtstz @echo LSE: Testing Constrained Linear Least Squares routines - ./xeigtstz < lse.in > $@ 2>&1 + ./EIG/xeigtstz < $< > $@ 2>&1 # ============================================================================== -xlintsts: ../$(LAPACKLIB) ../$(TMGLIB) $(FRCLIN) $(FRC) - cd LIN ; $(MAKE) single +LIN/xlintsts: $(FRCLIN) $(FRC) + $(MAKE) -C LIN xlintsts -xlintstc: ../$(LAPACKLIB) ../$(TMGLIB) $(FRCLIN) $(FRC) - cd LIN ; $(MAKE) complex +LIN/xlintstc: $(FRCLIN) $(FRC) + $(MAKE) -C LIN xlintstc -xlintstd: ../$(LAPACKLIB) ../$(TMGLIB) $(FRCLIN) $(FRC) - cd LIN ; $(MAKE) double +LIN/xlintstd: $(FRCLIN) $(FRC) + $(MAKE) -C LIN xlintstd -xlintstz: ../$(LAPACKLIB) ../$(TMGLIB) $(FRCLIN) $(FRC) - cd LIN ; $(MAKE) complex16 +LIN/xlintstz: $(FRCLIN) $(FRC) + $(MAKE) -C LIN xlintstz -xlintstrfs: ../$(LAPACKLIB) ../$(TMGLIB) $(FRCLIN) $(FRC) - cd LIN ; $(MAKE) proto-single +LIN/xlintstrfs: $(FRCLIN) $(FRC) + $(MAKE) -C LIN xlintstrfs -xlintstrfc: ../$(LAPACKLIB) ../$(TMGLIB) $(FRCLIN) $(FRC) - cd LIN ; $(MAKE) proto-complex +LIN/xlintstrfc: $(FRCLIN) $(FRC) + $(MAKE) -C LIN xlintstrfc -xlintstrfd: ../$(LAPACKLIB) ../$(TMGLIB) $(FRCLIN) $(FRC) - cd LIN ; $(MAKE) proto-double +LIN/xlintstrfd: $(FRCLIN) $(FRC) + $(MAKE) -C LIN xlintstrfd -xlintstrfz: ../$(LAPACKLIB) ../$(TMGLIB) $(FRCLIN) $(FRC) - cd LIN ; $(MAKE) proto-complex16 +LIN/xlintstrfz: $(FRCLIN) $(FRC) + $(MAKE) -C LIN xlintstrfz -xlintstds: ../$(LAPACKLIB) ../$(TMGLIB) $(FRCLIN) $(FRC) - cd LIN ; $(MAKE) proto-double +LIN/xlintstds: $(FRCLIN) $(FRC) + $(MAKE) -C LIN xlintstds -xlintstzc: ../$(LAPACKLIB) ../$(TMGLIB) $(FRCLIN) $(FRC) - cd LIN ; $(MAKE) proto-complex16 +LIN/xlintstzc: $(FRCLIN) $(FRC) + $(MAKE) -C LIN xlintstzc -xeigtsts: ../$(LAPACKLIB) ../$(TMGLIB) $(FRCEIG) $(FRC) - cd EIG ; $(MAKE) single +EIG/xeigtsts: $(FRCEIG) $(FRC) + $(MAKE) -C EIG xeigtsts -xeigtstc: ../$(LAPACKLIB) ../$(TMGLIB) $(FRCEIG) $(FRC) - cd EIG ; $(MAKE) complex +EIG/xeigtstc: $(FRCEIG) $(FRC) + $(MAKE) -C EIG xeigtstc -xeigtstd: ../$(LAPACKLIB) ../$(TMGLIB) $(FRCEIG) $(FRC) - cd EIG ; $(MAKE) double +EIG/xeigtstd: $(FRCEIG) $(FRC) + $(MAKE) -C EIG xeigtstd -xeigtstz: ../$(LAPACKLIB) ../$(TMGLIB) $(FRCEIG) $(FRC) - cd EIG ; $(MAKE) complex16 +EIG/xeigtstz: $(FRCEIG) $(FRC) + $(MAKE) -C EIG xeigtstz -clean: +clean: cleantest +cleantest: rm -f *.out core -cleanup: - rm -f x* *.out core - FRCLIN: @FRCLIN=$(FRCLIN) diff --git a/lapack-netlib/TESTING/ctest.in b/lapack-netlib/TESTING/ctest.in index c5ed21fde..2f3853a03 100644 --- a/lapack-netlib/TESTING/ctest.in +++ b/lapack-netlib/TESTING/ctest.in @@ -26,6 +26,9 @@ CHE 10 List types on next line if 0 < NTYPES < 10 CHR 10 List types on next line if 0 < NTYPES < 10 CHK 10 List types on next line if 0 < NTYPES < 10 CHA 10 List types on next line if 0 < NTYPES < 10 +CH2 10 List types on next line if 0 < NTYPES < 10 +CSA 11 List types on next line if 0 < NTYPES < 10 +CS2 11 List types on next line if 0 < NTYPES < 10 CHP 10 List types on next line if 0 < NTYPES < 10 CSY 11 List types on next line if 0 < NTYPES < 11 CSR 11 List types on next line if 0 < NTYPES < 11 diff --git a/lapack-netlib/TESTING/dbal.in b/lapack-netlib/TESTING/dbal.in index 94268a117..c00fe3e83 100644 --- a/lapack-netlib/TESTING/dbal.in +++ b/lapack-netlib/TESTING/dbal.in @@ -87,13 +87,13 @@ DBL: Tests DGEBAL 0.0000D+00 0.8192D+04 0.0000D+00 0.0000D+00 0.8000D+01 1 5 - 1.0000D+00 0.0000D-03 0.0000D-03 0.0000D-03 250.0000D-03 + 1.0000D+00 0.0000D-03 0.0000D-03 0.0000D-03 2.0000D+00 0.0000D-03 2.0000D+00 1.0240D+03 16.0000D+00 16.0000D+00 - 256.0000D-03 1.0000D-03 4.0000D+00 0.0000D-03 2.0480D+03 + 3.2000D-02 1.0000D-03 4.0000D+00 0.0000D-03 2.0480D+03 0.0000D-03 250.0000D-03 16.0000D+00 4.0000D+00 4.0000D+00 0.0000D-03 2.0480D+03 0.0000D-03 0.0000D-03 8.0000D+00 - 64.0000D+00 500.0000D-03 62.5000D-03 4.0000D+00 2.0000D+00 + 8.0000D+00 500.0000D-03 62.5000D-03 4.0000D+00 2.0000D+00 4 0.1000D+01 0.1000D+07 0.1000D+07 0.1000D+07 @@ -167,15 +167,15 @@ DBL: Tests DGEBAL 0.0000D+00 0.8000D+01 0.0000D+00 0.4000D-02 0.1250D+00 -0.2000D+00 0.3000D+01 2 5 - 6.4000D+01 2.5000D-01 5.00000D-01 0.0000D+00 0.0000D+00 1.0000D+00 -2.0000D+00 - 0.0000D+00 4.0000D+00 2.00000D+00 4.0960D+00 1.6000D+00 0.0000D+00 1.0240D+01 - 0.0000D+00 5.0000D-01 3.00000D+00 4.0960D+00 1.0000D+00 0.0000D+00 -6.4000D+00 - 0.0000D+00 1.0000D+00 -3.90625D+00 1.0000D+00 -3.1250D+00 0.0000D+00 8.0000D+00 - 0.0000D+00 -2.0000D+00 4.00000D+00 1.6000D+00 2.0000D+00 -8.0000D+00 8.0000D+00 + 6.4000D+01 1.0000D+00 5.00000D-01 0.0000D+00 0.0000D+00 1.0000D+00 -2.0000D+00 + 0.0000D+00 4.0000D+00 5.00000D-01 1.0240D+00 8.0000D-01 0.0000D+00 2.5600D+00 + 0.0000D+00 2.0000D+00 3.00000D+00 4.0960D+00 2.0000D+00 0.0000D+00 -6.4000D+00 + 0.0000D+00 4.0000D+00 -3.90625D+00 1.0000D+00 -6.2500D+00 0.0000D+00 8.0000D+00 + 0.0000D+00 -4.0000D+00 2.00000D+00 8.0000D-01 2.0000D+00 -4.0000D+00 4.0000D+00 0.0000D+00 0.0000D+00 0.00000D+00 0.0000D+00 0.0000D+00 6.0000D+00 1.0000D+00 0.0000D+00 0.0000D+00 0.00000D+00 0.0000D+00 0.0000D+00 0.0000D+00 0.0000D+00 - 3.0000D+00 1.953125D-03 3.1250D-02 3.2000D+01 2.5000D-01 1.0000D+00 6.0000D+00 + 3.0000D+00 7.812500D-03 3.1250D-02 3.2000D+01 5.0000D-01 1.0000D+00 6.0000D+00 5 0.1000D+04 0.2000D+01 0.3000D+01 0.4000D+01 0.5000D+06 @@ -185,13 +185,13 @@ DBL: Tests DGEBAL 0.6000D+01 0.2000D+03 0.1000D+01 0.6000D+03 0.3000D+01 1 5 - 1.0000D+03 3.1250D-02 3.7500D-01 6.2500D-02 3.90625D+03 - 5.7600D+02 0.0000D+00 1.6000D-03 1.0000D+00 1.5000D+00 - 0.0000D+00 -3.7500D+01 2.0000D+00 1.2500D-01 6.2500D-02 - 5.7600D+02 2.0000D-03 8.0000D+00 1.0000D+00 -5.0000D+02 - 7.6800D+02 4.0000D+02 1.6000D+01 1.2000D+03 3.0000D+00 + 1.0000D+03 3.1250D-02 3.7500D-01 3.1250D-02 1.95312500D+03 + 5.7600D+02 0.0000D+00 1.6000D-03 5.0000D-01 7.50000000D-01 + 0.0000D+00 -3.7500D+01 2.0000D+00 6.2500D-02 3.12500000D-02 + 1.1520D+03 4.0000D-03 1.6000D+01 1.0000D+00 -5.00000000D+02 + 1.5360D+03 8.0000D+02 3.2000D+01 1.2000D+03 3.00000000D+00 - 1.2800D+02 2.0000D+00 1.6000D+01 2.0000D+00 1.0000D+00 + 3.2000D+01 5.0000D-01 4.0000D+00 2.5000D-01 1.2500D-01 6 1.0000D+00 1.0000D+120 0.0000D+00 0.0000D+00 0.0000D+00 0.0000D+00 @@ -202,14 +202,13 @@ DBL: Tests DGEBAL 0.0000D+00 0.0000D+00 0.0000D+00 0.0000D+00 1.0000D-120 1.0000D+00 1 6 - 1.000000000000000000D+00 6.344854593289122931D+03 0.000000000000000000D+00 0.000000000000000000D+00 0.000000000000000000D+00 0.000000000000000000D+00 - 1.576080247855779135D-04 1.000000000000000000D+00 6.344854593289122931D+03 0.000000000000000000D+00 0.000000000000000000D+00 0.000000000000000000D+00 - 0.000000000000000000D+00 1.576080247855779135D-04 1.000000000000000000D+00 3.172427296644561466D+03 0.000000000000000000D+00 0.000000000000000000D+00 - 0.000000000000000000D+00 0.000000000000000000D+00 3.152160495711558270D-04 1.000000000000000000D+00 1.586213648322280733D+03 0.000000000000000000D+00 - 0.000000000000000000D+00 0.000000000000000000D+00 0.000000000000000000D+00 6.304320991423116539D-04 1.000000000000000000D+00 1.586213648322280733D+03 - 0.000000000000000000D+00 0.000000000000000000D+00 0.000000000000000000D+00 0.000000000000000000D+00 6.304320991423116539D-04 1.000000000000000000D+00 - - 2.494800386918399765D+291 1.582914569427869018D+175 1.004336277661868922D+59 3.186183822264904554D-58 5.053968264940243633D-175 8.016673440035891112D-292 - +0.10000000000000000000D+01 0.63448545932891229313D+04 0.00000000000000000000D+00 0.00000000000000000000D+00 0.00000000000000000000D+00 0.00000000000000000000D+00 +0.15760802478557791348D-03 0.10000000000000000000D+01 0.63448545932891229313D+04 0.00000000000000000000D+00 0.00000000000000000000D+00 0.00000000000000000000D+00 +0.00000000000000000000D+00 0.15760802478557791348D-03 0.10000000000000000000D+01 0.31724272966445614657D+04 0.00000000000000000000D+00 0.00000000000000000000D+00 +0.00000000000000000000D+00 0.00000000000000000000D+00 0.31521604957115582695D-03 0.10000000000000000000D+01 0.15862136483222807328D+04 0.00000000000000000000D+00 +0.00000000000000000000D+00 0.00000000000000000000D+00 0.00000000000000000000D+00 0.63043209914231165391D-03 0.10000000000000000000D+01 0.79310682416114036641D+03 +0.00000000000000000000D+00 0.00000000000000000000D+00 0.00000000000000000000D+00 0.00000000000000000000D+00 0.12608641982846233078D-02 0.10000000000000000000D+01 + + 2.494800386918399765D+291 1.582914569427869018D+175 1.004336277661868922D+59 3.186183822264904554D-58 5.053968264940243633D-175 0.40083367200179455560D-291; 0 diff --git a/lapack-netlib/TESTING/dtest.in b/lapack-netlib/TESTING/dtest.in index d05a27ca7..a7a16ee41 100644 --- a/lapack-netlib/TESTING/dtest.in +++ b/lapack-netlib/TESTING/dtest.in @@ -26,6 +26,7 @@ DSY 10 List types on next line if 0 < NTYPES < 10 DSR 10 List types on next line if 0 < NTYPES < 10 DSK 10 List types on next line if 0 < NTYPES < 10 DSA 10 List types on next line if 0 < NTYPES < 10 +DS2 10 List types on next line if 0 < NTYPES < 10 DSP 10 List types on next line if 0 < NTYPES < 10 DTR 18 List types on next line if 0 < NTYPES < 18 DTP 18 List types on next line if 0 < NTYPES < 18 diff --git a/lapack-netlib/TESTING/stest.in b/lapack-netlib/TESTING/stest.in index 30f1c4704..d32047047 100644 --- a/lapack-netlib/TESTING/stest.in +++ b/lapack-netlib/TESTING/stest.in @@ -26,6 +26,7 @@ SSY 10 List types on next line if 0 < NTYPES < 10 SSR 10 List types on next line if 0 < NTYPES < 10 SSK 10 List types on next line if 0 < NTYPES < 10 SSA 10 List types on next line if 0 < NTYPES < 10 +SS2 10 List types on next line if 0 < NTYPES < 10 SSP 10 List types on next line if 0 < NTYPES < 10 STR 18 List types on next line if 0 < NTYPES < 18 STP 18 List types on next line if 0 < NTYPES < 18 diff --git a/lapack-netlib/TESTING/ztest.in b/lapack-netlib/TESTING/ztest.in index aba4a3d55..520253941 100644 --- a/lapack-netlib/TESTING/ztest.in +++ b/lapack-netlib/TESTING/ztest.in @@ -26,6 +26,9 @@ ZHE 10 List types on next line if 0 < NTYPES < 10 ZHR 10 List types on next line if 0 < NTYPES < 10 ZHK 10 List types on next line if 0 < NTYPES < 10 ZHA 10 List types on next line if 0 < NTYPES < 10 +ZH2 10 List types on next line if 0 < NTYPES < 10 +ZSA 11 List types on next line if 0 < NTYPES < 10 +ZS2 11 List types on next line if 0 < NTYPES < 10 ZHP 10 List types on next line if 0 < NTYPES < 10 ZSY 11 List types on next line if 0 < NTYPES < 11 ZSR 11 List types on next line if 0 < NTYPES < 11 diff --git a/lapack-netlib/appveyor.yml b/lapack-netlib/appveyor.yml new file mode 100644 index 000000000..7fc3fbdd7 --- /dev/null +++ b/lapack-netlib/appveyor.yml @@ -0,0 +1,64 @@ +# Windows testing. +# Syntax for this file: +# http://www.appveyor.com/docs/appveyor-yml + +shallow_clone: true + +platform: x64 + +cache: + - x86_64-4.9.2-release-win32-seh-rt_v4-rev4.7z + - i686-4.9.2-release-win32-dwarf-rt_v4-rev4.7z + +environment: + CTEST_OUTPUT_ON_FAILURE: 1 + matrix: + - MINGW_DIR: mingw64 + MINGW_URL: https://sourceforge.net/projects/mingw-w64/files/Toolchains%20targetting%20Win64/Personal%20Builds/mingw-builds/4.9.2/threads-win32/seh/x86_64-4.9.2-release-win32-seh-rt_v4-rev4.7z/download + MINGW_ARCHIVE: x86_64-4.9.2-release-win32-seh-rt_v4-rev4.7z + - MINGW_DIR: mingw32 + MINGW_URL: https://sourceforge.net/projects/mingw-w64/files/Toolchains%20targetting%20Win32/Personal%20Builds/mingw-builds/4.9.2/threads-win32/dwarf/i686-4.9.2-release-win32-dwarf-rt_v4-rev4.7z/download + MINGW_ARCHIVE: i686-4.9.2-release-win32-dwarf-rt_v4-rev4.7z + +install: + - if not exist "%MINGW_ARCHIVE%" appveyor DownloadFile "%MINGW_URL%" -FileName "%MINGW_ARCHIVE%" + - 7z x -y "%MINGW_ARCHIVE%" > nul + # CMake refuses to generate MinGW Makefiles if sh.exe is in the Path + - ps: Get-Command sh.exe -All | Remove-Item + +build_script: + - echo "NUMBER_OF_PROCESSORS=%NUMBER_OF_PROCESSORS%" + - set PATH=%CD%\%MINGW_DIR%\bin;%PATH% + - g++ --version + - mingw32-make --version + - cmake --version + - if "%APPVEYOR_REPO_TAG%"=="true" (set CMAKE_BUILD_TYPE=Release) else (set CMAKE_BUILD_TYPE=Debug) + - set SRC_DIR=%CD% + - echo %SRC_DIR% + - set BLD_DIR=%SRC_DIR%\..\lapack-appveyor-bld + - set INST_DIR=%SRC_DIR%\..\lapack-appveyor-install + - mkdir -p %BLD_DIR% + - cd %BLD_DIR% + # See issue #17 on github dashboard. Once resolved, use -DCBLAS=ON + # - cmake -DCMAKE_INSTALL_PREFIX=${INST_DIR} -DLAPACKE=ON ${SRC_DIR} + - cmake + -G "MinGW Makefiles" + -DBUILDNAME:STRING="appveyor-%MINGW_DIR%-%APPVEYOR_REPO_BRANCH%" + -DCMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE% + -DCMAKE_INSTALL_PREFIX=%INST_DIR% + -DCBLAS:BOOL=ON + -DLAPACKE:BOOL=ON + -DBUILD_TESTING=ON + -DLAPACKE_WITH_TMG:BOOL=ON + %SRC_DIR% + - mingw32-make -j%NUMBER_OF_PROCESSORS% + +test_script: + - ctest -D ExperimentalStart + - ctest -D ExperimentalConfigure + - ctest -D ExperimentalBuild -j%NUMBER_OF_PROCESSORS% + - ctest -D ExperimentalTest --schedule-random -j%NUMBER_OF_PROCESSORS% --output-on-failure --timeout 100 -E "CBLAS\-.*cblat1" + - ctest -D ExperimentalSubmit + +after_test: + - mingw32-make install -j%NUMBER_OF_PROCESSORS% diff --git a/lapack-netlib/lapack.pc.in b/lapack-netlib/lapack.pc.in index 878efc2ee..316c87101 100644 --- a/lapack-netlib/lapack.pc.in +++ b/lapack-netlib/lapack.pc.in @@ -1,9 +1,9 @@ -prefix=@prefix@ -libdir=@libdir@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ Name: LAPACK Description: FORTRAN reference implementation of LAPACK Linear Algebra PACKage Version: @LAPACK_VERSION@ URL: http://www.netlib.org/lapack/ Libs: -L${libdir} -llapack -Requires: blas +Requires.private: blas diff --git a/lapack-netlib/lapack_testing.py b/lapack-netlib/lapack_testing.py index 70783fee9..3c917482d 100755 --- a/lapack-netlib/lapack_testing.py +++ b/lapack-netlib/lapack_testing.py @@ -232,12 +232,12 @@ for dtype in range_prec: letter+"bb","glm","gqr", "gsv","csd","lse", letter+"test", letter+dtypes[0][dtype-1]+"test",letter+"test_rfp"), - ("Nonsymmetric Eigenvalue Problem", "Symmetric Eigenvalue Problem", "Symmetric Eigenvalue Problem 2 stage", "Singular Value Decomposition", - "Eigen Condition","Nonsymmetric Eigenvalue","Nonsymmetric Generalized Eigenvalue Problem", - "Nonsymmetric Generalized Eigenvalue Problem driver", "Symmetric Eigenvalue Problem", "Symmetric Eigenvalue Generalized Problem", - "Banded Singular Value Decomposition routines", "Generalized Linear Regression Model routines", "Generalized QR and RQ factorization routines", - "Generalized Singular Value Decomposition routines", "CS Decomposition routines", "Constrained Linear Least Squares routines", - "Linear Equation routines", "Mixed Precision linear equation routines","RFP linear equation routines"), + ("Nonsymmetric-Eigenvalue-Problem", "Symmetric-Eigenvalue-Problem", "Symmetric-Eigenvalue-Problem-2-stage", "Singular-Value-Decomposition", + "Eigen-Condition","Nonsymmetric-Eigenvalue","Nonsymmetric-Generalized-Eigenvalue-Problem", + "Nonsymmetric-Generalized-Eigenvalue-Problem-driver", "Symmetric-Eigenvalue-Problem", "Symmetric-Eigenvalue-Generalized-Problem", + "Banded-Singular-Value-Decomposition-routines", "Generalized-Linear-Regression-Model-routines", "Generalized-QR-and-RQ-factorization-routines", + "Generalized-Singular-Value-Decomposition-routines", "CS-Decomposition-routines", "Constrained-Linear-Least-Squares-routines", + "Linear-Equation-routines", "Mixed-Precision-linear-equation-routines","RFP-linear-equation-routines"), (letter+"nep", letter+"sep", letter+"se2", letter+"svd", letter+"ec",letter+"ed",letter+"gg", letter+"gd",letter+"sb",letter+"sg", @@ -268,7 +268,7 @@ for dtype in range_prec: # EIG TESTS cmdbase="xeigtst"+letter+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" if (not just_errors and not short_summary): - print("--> Testing "+name+" "+dtests[1][dtest]+" [ "+cmdbase+" ]") + print("Testing "+name+" "+dtests[1][dtest]+"-"+cmdbase, end=' ') # Run the process: either to read the file or run the LAPACK testing nb_test = run_summary_test(f, cmdbase, short_summary) list_results[0][dtype]+=nb_test[0] @@ -279,13 +279,13 @@ for dtype in range_prec: if (not short_summary): if (nb_test[0]>0 and just_errors==0): - print("--> Tests passed: "+str(nb_test[0])) + print("passed: "+str(nb_test[0])) if (nb_test[1]>0): - print("--> Tests failing to pass the threshold: "+str(nb_test[1])) + print("failing to pass the threshold: "+str(nb_test[1])) if (nb_test[2]>0): - print("--> Illegal Error: "+str(nb_test[2])) + print("Illegal Error: "+str(nb_test[2])) if (nb_test[3]>0): - print("--> Info Error: "+str(nb_test[3])) + print("Info Error: "+str(nb_test[3])) if (got_error>0 and just_errors==1): print("ERROR IS LOCATED IN "+name+" "+dtests[1][dtest]+" [ "+cmdbase+" ]") print("") diff --git a/lapack-netlib/make.inc.example b/lapack-netlib/make.inc.example index 7f66018e8..d780c3a23 100644 --- a/lapack-netlib/make.inc.example +++ b/lapack-netlib/make.inc.example @@ -1,75 +1,79 @@ #################################################################### # LAPACK make include file. # -# LAPACK, Version 3.7.0 # -# December 2016 # +# LAPACK, Version 3.8.0 # +# November 2017 # #################################################################### -# + SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. # -# Modify the FORTRAN and OPTS definitions to refer to the -# compiler and desired compiler options for your machine. NOOPT -# refers to the compiler options desired when NO OPTIMIZATION is -# selected. Define LOADER and LOADOPTS to refer to the loader and -# desired load options for your machine. +CC = gcc +CFLAGS = -O3 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. # # Note: During a regular execution, LAPACK might create NaN and Inf # and handle these quantities appropriately. As a consequence, one # should not compile LAPACK with flags such as -ffpe-trap=overflow. # -FORTRAN = gfortran -OPTS = -O2 -frecursive -DRVOPTS = $(OPTS) -NOOPT = -O0 -frecursive +FORTRAN = gfortran +OPTS = -O2 -frecursive +DRVOPTS = $(OPTS) +NOOPT = -O0 -frecursive + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# LOADER = gfortran LOADOPTS = -# -# Comment out the following line to include deprecated routines to the -# LAPACK library. + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = ranlib + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +#TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. # #BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. # -# Timer for the SECOND and DSECND routines -# -# Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME -# TIMER = EXT_ETIME -# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ -# TIMER = EXT_ETIME_ -# For gfortran compiler: SECOND and DSECND will use a call to the INTERNAL FUNCTION ETIME -TIMER = INT_ETIME -# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) -# SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME -# TIMER = INT_CPU_TIME -# If neither of this works...you can use the NONE value... In that case, SECOND and DSECND will always return 0 -# TIMER = NONE -# -# Configuration LAPACKE: Native C interface to LAPACK -# To generate LAPACKE library: type 'make lapackelib' -# Configuration file: turned off (default) -# Complex types: C99 (default) -# Name pattern: mixed case (default) -# (64-bit) Data model: LP64 (default) -# -# CC is the C compiler, normally invoked with options CFLAGS. -# -CC = gcc -CFLAGS = -O3 -# -# The archiver and the flag(s) to use when building archive (library) -# If you system has no ranlib, set RANLIB = echo. -# -ARCH = ar -ARCHFLAGS= cr -RANLIB = ranlib -# +#LAPACKE_WITH_TMG = Yes + # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The -# relevant routines will be compiled and XBLAS will be linked only if -# USEXBLAS is defined. -# -# USEXBLAS = Yes -XBLASLIB = -# XBLASLIB = -lxblas +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. # +#USEXBLAS = Yes +#XBLASLIB = -lxblas + # The location of the libraries to which you will link. (The # machine-specific, optimized BLAS library should be used whenever # possible.) From 2d52f0f4c3f1941a2b34486fd22e0c0d5a452498 Mon Sep 17 00:00:00 2001 From: martin Date: Thu, 23 Nov 2017 21:22:01 +0100 Subject: [PATCH 063/122] update cmakefiles for lapack 3.8.0 --- cmake/lapack.cmake | 18 +++++----- cmake/lapacke.cmake | 80 +++++++++++++++++++++++++++++++++++++++++++++ exports/gensymbol | 6 ---- 3 files changed, 89 insertions(+), 15 deletions(-) diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake index a4ac00eff..327e7696c 100644 --- a/cmake/lapack.cmake +++ b/cmake/lapack.cmake @@ -1,6 +1,6 @@ # Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files. -set(ALLAUX ilaenv.f ieeeck.f lsamen.f iparmq.f iparam2stage.F +set(ALLAUX ilaenv.f ilaenv2stage.f ieeeck.f lsamen.f iparmq.f iparam2stage.F ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f ../INSTALL/ilaver.f xerbla_array.f ../INSTALL/slamch.f) @@ -89,7 +89,7 @@ set(SLASRC ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f ssyconvf.f ssyconvf_rook.f - ssysv_aa.f ssytrf_aa.f ssytrs_aa.f + ssysv_aa.f ssysv_aa_2stage.f ssytrf_aa.f ssytrf_aa_2stage.f ssytrs_aa.f ssytrs_aa_2stage.f ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f ssytri_rook.f ssycon_rook.f ssysv_rook.f ssytf2_rk.f ssytrf_rk.f ssytrs_3.f @@ -151,7 +151,7 @@ set(CLASRC chetrs_rook.f checon_rook.f chesv_rook.f chetf2_rk.f chetrf_rk.f chetri_3.f chetri_3x.f chetrs_3.f checon_3.f chesv_rk.f - chesv_aa.f chetrf_aa.f chetrs_aa.f + chesv_aa.f chesv_aa_2stage.f chetrf_aa.f chetrf_aa_2stage.f chetrs_aa.f chetrs_aa_2stage.f chgeqz.f chpcon.f chpev.f chpevd.f chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f chpsvx.f @@ -186,8 +186,8 @@ set(CLASRC csyconv.f csyconvf.f csyconvf_rook.f csytf2_rook.f csytrf_rook.f csytrs_rook.f csytri_rook.f csycon_rook.f csysv_rook.f - csytf2_rk.f csytrf_rk.f csytrf_aa.f csytrs_3.f csytrs_aa.f - csytri_3.f csytri_3x.f csycon_3.f csysv_rk.f csysv_aa.f + csytf2_rk.f csytrf_rk.f csytrf_aa.f csytrf_aa_2stage.f csytrs_3.f csytrs_aa.f csytrs_aa_2stage.f + csytri_3.f csytri_3x.f csycon_3.f csysv_rk.f csysv_aa.f csysv_aa_2stage.f ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f ctprfs.f ctptri.f @@ -277,7 +277,7 @@ set(DLASRC dsytri_rook.f dsycon_rook.f dsysv_rook.f dsytf2_rk.f dsytrf_rk.f dsytrs_3.f dsytri_3.f dsytri_3x.f dsycon_3.f dsysv_rk.f - dsysv_aa.f dsytrf_aa.f dsytrs_aa.f + dsysv_aa.f dsysv_aa_2stage.f dsytrf_aa.f dsytrf_aa_2stage.f dsytrs_aa.f dsytrs_aa_2stage.f dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f @@ -335,7 +335,7 @@ set(ZLASRC zhetrs_rook.f zhecon_rook.f zhesv_rook.f zhetf2_rk.f zhetrf_rk.f zhetri_3.f zhetri_3x.f zhetrs_3.f zhecon_3.f zhesv_rk.f - zhesv_aa.f zhetrf_aa.f zhetrs_aa.f + zhesv_aa.f zhesv_aa_2stage.f zhetrf_aa.f zhetrf_aa_2stage.f zhetrs_aa.f zhetrs_aa_2stage.f zhgeqz.f zhpcon.f zhpev.f zhpevd.f zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f zhpsvx.f @@ -370,9 +370,9 @@ set(ZLASRC zsytri2.f zsytri2x.f zsyswapr.f zsytrs.f zsytrs2.f zsyconv.f zsyconvf.f zsyconvf_rook.f - zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f zsytrs_aa.f + zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f zsytrs_aa.f zsytrs_aa_2stage.f zsytri_rook.f zsycon_rook.f zsysv_rook.f - zsytf2_rk.f zsytrf_rk.f zsytrf_aa.f zsytrs_3.f + zsytf2_rk.f zsytrf_rk.f zsytrf_aa.f zsytrf_aa_2stage.f zsytrs_3.f zsytri_3.f zsytri_3x.f zsycon_3.f zsysv_rk.f zsysv_aa.f ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f diff --git a/cmake/lapacke.cmake b/cmake/lapacke.cmake index 93e2824a1..b7b121948 100644 --- a/cmake/lapacke.cmake +++ b/cmake/lapacke.cmake @@ -46,6 +46,8 @@ set(CSRC lapacke_cgehrd_work.c lapacke_cgejsv.c lapacke_cgejsv_work.c + lapacke_cgelq.c + lapacke_cgelq_work.c lapacke_cgelq2.c lapacke_cgelq2_work.c lapacke_cgelqf.c @@ -60,12 +62,16 @@ set(CSRC lapacke_cgelsy_work.c lapacke_cgemqr.c lapacke_cgemqr_work.c + lapacke_cgemlq.c + lapacke_cgemlq_work.c lapacke_cgemqrt.c lapacke_cgemqrt_work.c lapacke_cgeqlf.c lapacke_cgeqlf_work.c lapacke_cgeqp3.c lapacke_cgeqp3_work.c + lapacke_cgeqr.c + lapacke_cgeqr_work.c lapacke_cgeqr2.c lapacke_cgeqr2_work.c lapacke_cgeqrf.c @@ -210,6 +216,8 @@ set(CSRC lapacke_chesv_work.c lapacke_chesv_aa.c lapacke_chesv_aa_work.c + lapacke_chesv_aa_2stage.c + lapacke_chesv_aa_2stage_work.c lapacke_chesv_rk.c lapacke_chesv_rk_work.c lapacke_chesvx.c @@ -224,6 +232,8 @@ set(CSRC lapacke_chetrf_rook_work.c lapacke_chetrf_aa.c lapacke_chetrf_aa_work.c + lapacke_chetrf_aa_2stage.c + lapacke_chetrf_aa_2stage_work.c lapacke_chetrf_rk.c lapacke_chetrf_rk_work.c lapacke_chetri.c @@ -242,6 +252,9 @@ set(CSRC lapacke_chetrs_rook_work.c lapacke_chetrs_aa.c lapacke_chetrs_aa_work.c + lapacke_chetrs_aa_2stage.c + lapacke_chetrs_aa_2stage_work.c + lapacke_chetrf_rk.c lapacke_chetrs_3.c lapacke_chetrs_3_work.c lapacke_chfrk.c @@ -290,6 +303,11 @@ set(CSRC lapacke_clacp2_work.c lapacke_clacpy.c lapacke_clacpy_work.c + lapacke_clacrm.c + lapacke_clacrm_work.c + lapacke_clarcm.c + lapacke_clarcm_work.c + lapacke_clacn2.c lapacke_clag2z.c lapacke_clag2z_work.c lapacke_clange.c @@ -318,6 +336,8 @@ set(CSRC lapacke_clascl_work.c lapacke_claset.c lapacke_claset_work.c + lapacke_classq.c + lapacke_classq_work.c lapacke_claswp.c lapacke_claswp_work.c lapacke_clauum.c @@ -436,6 +456,8 @@ set(CSRC lapacke_csysv_work.c lapacke_csysv_aa.c lapacke_csysv_aa_work.c + lapacke_csysv_aa_2stage.c + lapacke_csysv_aa_2stage_work.c lapacke_csysv_rk.c lapacke_csysv_rk_work.c lapacke_csysvx.c @@ -448,6 +470,8 @@ set(CSRC lapacke_csytrf_rook_work.c lapacke_csytrf_aa.c lapacke_csytrf_aa_work.c + lapacke_csytrf_aa_2stage.c + lapacke_csytrf_aa_2stage_work.c lapacke_csytrf_rk.c lapacke_csytrf_rk_work.c lapacke_csytri.c @@ -466,6 +490,8 @@ set(CSRC lapacke_csytrs_rook_work.c lapacke_csytrs_aa.c lapacke_csytrs_aa_work.c + lapacke_csytrs_aa_2stage.c + lapacke_csytrs_aa_2stage_work.c lapacke_csytrs_3.c lapacke_csytrs_3_work.c lapacke_ctbcon.c @@ -633,6 +659,8 @@ set(DSRC lapacke_dgehrd_work.c lapacke_dgejsv.c lapacke_dgejsv_work.c + lapacke_dgelq.c + lapacke_dgelq_work.c lapacke_dgelq2.c lapacke_dgelq2_work.c lapacke_dgelqf.c @@ -645,6 +673,8 @@ set(DSRC lapacke_dgelss_work.c lapacke_dgelsy.c lapacke_dgelsy_work.c + lapacke_dgemlq.c + lapacke_dgemlq_work.c lapacke_dgemqr.c lapacke_dgemqr_work.c lapacke_dgemqrt.c @@ -653,6 +683,8 @@ set(DSRC lapacke_dgeqlf_work.c lapacke_dgeqp3.c lapacke_dgeqp3_work.c + lapacke_dgeqr.c + lapacke_dgeqr_work.c lapacke_dgeqr2.c lapacke_dgeqr2_work.c lapacke_dgeqrf.c @@ -785,6 +817,9 @@ set(DSRC lapacke_dlaset_work.c lapacke_dlasrt.c lapacke_dlasrt_work.c + lapacke_dlassq.c + lapacke_dlassq_work.c + lapacke_dlaswp.c lapacke_dlaswp.c lapacke_dlaswp_work.c lapacke_dlauum.c @@ -1025,6 +1060,8 @@ set(DSRC lapacke_dsysv_work.c lapacke_dsysv_aa.c lapacke_dsysv_aa_work.c + lapacke_dsysv_aa_2stage.c + lapacke_dsysv_aa_2stage_work.c lapacke_dsysv_rk.c lapacke_dsysv_rk_work.c lapacke_dsysvx.c @@ -1039,6 +1076,8 @@ set(DSRC lapacke_dsytrf_rook_work.c lapacke_dsytrf_aa.c lapacke_dsytrf_aa_work.c + lapacke_dsytrf_aa_2stage.c + lapacke_dsytrf_aa_2stage_work.c lapacke_dsytrf_rk.c lapacke_dsytrf_rk_work.c lapacke_dsytri.c @@ -1055,6 +1094,8 @@ set(DSRC lapacke_dsytrs2_work.c lapacke_dsytrs_aa.c lapacke_dsytrs_aa_work.c + lapacke_dsytrs_aa_2stage.c + lapacke_dsytrs_aa_2stage_work.c lapacke_dsytrs_3.c lapacke_dsytrs_3_work.c lapacke_dsytrs_work.c @@ -1184,6 +1225,8 @@ set(SSRC lapacke_sgehrd_work.c lapacke_sgejsv.c lapacke_sgejsv_work.c + lapacke_sgelq.c + lapacke_sgelq_work.c lapacke_sgelq2.c lapacke_sgelq2_work.c lapacke_sgelqf.c @@ -1196,6 +1239,8 @@ set(SSRC lapacke_sgelss_work.c lapacke_sgelsy.c lapacke_sgelsy_work.c + lapacke_sgemlq.c + lapacke_sgemlq_work.c lapacke_sgemqr.c lapacke_sgemqr_work.c lapacke_sgemqrt.c @@ -1204,6 +1249,8 @@ set(SSRC lapacke_sgeqlf_work.c lapacke_sgeqp3.c lapacke_sgeqp3_work.c + lapacke_sgeqr.c + lapacke_sgeqr_work.c lapacke_sgeqr2.c lapacke_sgeqr2_work.c lapacke_sgeqrf.c @@ -1336,6 +1383,8 @@ set(SSRC lapacke_slaset_work.c lapacke_slasrt.c lapacke_slasrt_work.c + lapacke_slassq.c + lapacke_slassq_work.c lapacke_slaswp.c lapacke_slaswp_work.c lapacke_slauum.c @@ -1572,6 +1621,8 @@ set(SSRC lapacke_ssysv_work.c lapacke_ssysv_aa.c lapacke_ssysv_aa_work.c + lapacke_ssysv_aa_2stage.c + lapacke_ssysv_aa_2stage_work.c lapacke_ssysv_rk.c lapacke_ssysv_rk_work.c lapacke_ssysvx.c @@ -1586,6 +1637,9 @@ set(SSRC lapacke_ssytrf_rook_work.c lapacke_ssytrf_aa.c lapacke_ssytrf_aa_work.c + lapacke_ssytrf_aa_2stage.c + lapacke_ssytrf_aa_2stage_work.c + lapacke_ssytrf_rook.c lapacke_ssytrf_rk.c lapacke_ssytrf_rk_work.c lapacke_ssytri.c @@ -1602,6 +1656,8 @@ set(SSRC lapacke_ssytrs2_work.c lapacke_ssytrs_aa.c lapacke_ssytrs_aa_work.c + lapacke_ssytrs_aa_2stage.c + lapacke_ssytrs_aa_2stage_work.c lapacke_ssytrs_3.c lapacke_ssytrs_3_work.c lapacke_ssytrs_work.c @@ -1729,6 +1785,8 @@ set(ZSRC lapacke_zgehrd_work.c lapacke_zgejsv.c lapacke_zgejsv_work.c + lapacke_zgelq.c + lapacke_zgelq_work.c lapacke_zgelq2.c lapacke_zgelq2_work.c lapacke_zgelqf.c @@ -1741,6 +1799,8 @@ set(ZSRC lapacke_zgelss_work.c lapacke_zgelsy.c lapacke_zgelsy_work.c + lapacke_zgemlq.c + lapacke_zgemlq_work.c lapacke_zgemqr.c lapacke_zgemqr_work.c lapacke_zgemqrt.c @@ -1749,6 +1809,8 @@ set(ZSRC lapacke_zgeqlf_work.c lapacke_zgeqp3.c lapacke_zgeqp3_work.c + lapacke_zgeqr.c + lapacke_zgeqr_work.c lapacke_zgeqr2.c lapacke_zgeqr2_work.c lapacke_zgeqrf.c @@ -1893,6 +1955,8 @@ set(ZSRC lapacke_zhesv_work.c lapacke_zhesv_aa.c lapacke_zhesv_aa_work.c + lapacke_zhesv_aa_2stage.c + lapacke_zhesv_aa_2stage_work.c lapacke_zhesv_rk.c lapacke_zhesv_rk_work.c lapacke_zhesvx.c @@ -1907,6 +1971,8 @@ set(ZSRC lapacke_zhetrf_rook_work.c lapacke_zhetrf_aa.c lapacke_zhetrf_aa_work.c + lapacke_zhetrf_aa_2stage.c + lapacke_zhetrf_aa_2stage_work.c lapacke_zhetrf_rk.c lapacke_zhetrf_rk_work.c lapacke_zhetri.c @@ -1924,6 +1990,8 @@ set(ZSRC lapacke_zhetrs_work.c lapacke_zhetrs_aa.c lapacke_zhetrs_aa_work.c + lapacke_zhetrs_aa_2stage.c + lapacke_zhetrs_aa_2stage_work.c lapacke_zhetrs_3.c lapacke_zhetrs_3_work.c lapacke_zhetrs_rook_work.c @@ -1973,6 +2041,8 @@ set(ZSRC lapacke_zlacp2_work.c lapacke_zlacpy.c lapacke_zlacpy_work.c + lapacke_zlacrm.c + lapacke_zlacrm_work.c lapacke_zlag2c.c lapacke_zlag2c_work.c lapacke_zlange.c @@ -1987,6 +2057,8 @@ set(ZSRC lapacke_zlapmr_work.c lapacke_zlapmt.c lapacke_zlapmt_work.c + lapacke_zlarcm.c + lapacke_zlarcm_work.c lapacke_zlarfb.c lapacke_zlarfb_work.c lapacke_zlarfg.c @@ -2001,6 +2073,8 @@ set(ZSRC lapacke_zlascl_work.c lapacke_zlaset.c lapacke_zlaset_work.c + lapacke_zlassq.c + lapacke_zlassq_work.c lapacke_zlaswp.c lapacke_zlaswp_work.c lapacke_zlauum.c @@ -2119,6 +2193,8 @@ set(ZSRC lapacke_zsysv_work.c lapacke_zsysv_aa.c lapacke_zsysv_aa_work.c + lapacke_zsysv_aa_2stage.c + lapacke_zsysv_aa_2stage_work.c lapacke_zsysv_rk.c lapacke_zsysv_rk_work.c lapacke_zsysvx.c @@ -2131,6 +2207,8 @@ set(ZSRC lapacke_zsytrf_rook_work.c lapacke_zsytrf_aa.c lapacke_zsytrf_aa_work.c + lapacke_zsytrf_aa_2stage.c + lapacke_zsytrf_aa_2stage_work.c lapacke_zsytrf_rk.c lapacke_zsytrf_rk_work.c lapacke_zsytri.c @@ -2149,6 +2227,8 @@ set(ZSRC lapacke_zsytrs_rook_work.c lapacke_zsytrs_aa.c lapacke_zsytrs_aa_work.c + lapacke_zsytrs_aa_2stage.c + lapacke_zsytrs_aa_2stage_work.c lapacke_zsytrs_3.c lapacke_zsytrs_3_work.c lapacke_ztbcon.c diff --git a/exports/gensymbol b/exports/gensymbol index b8c4b444b..21a1b703d 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -3471,12 +3471,6 @@ LAPACKE_ssytrf_aa_2stage_work, LAPACKE_ssytrs_aa_2stage, LAPACKE_ssytrs_aa_2stage_work, - LAPACKE_zhbev_2stage, - LAPACKE_zhbev_2stage_work, - LAPACKE_zhbevd_2stage, - LAPACKE_zhbevd_2stage_work, - LAPACKE_zhbevx_2stage, - LAPACKE_zhbevx_2stage_work, LAPACKE_zhesv_aa_2stage, LAPACKE_zhesv_aa_2stage_work, LAPACKE_zhetrf_aa_2stage, From 4054d32def49c8a764ac7bfd1e8cf0ea8e1937e6 Mon Sep 17 00:00:00 2001 From: martin Date: Fri, 24 Nov 2017 08:15:40 +0100 Subject: [PATCH 064/122] update cmake files --- cmake/lapack.cmake | 2 +- cmake/lapacke.cmake | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake index 327e7696c..d1d2cdd3b 100644 --- a/cmake/lapack.cmake +++ b/cmake/lapack.cmake @@ -373,7 +373,7 @@ set(ZLASRC zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f zsytrs_aa.f zsytrs_aa_2stage.f zsytri_rook.f zsycon_rook.f zsysv_rook.f zsytf2_rk.f zsytrf_rk.f zsytrf_aa.f zsytrf_aa_2stage.f zsytrs_3.f - zsytri_3.f zsytri_3x.f zsycon_3.f zsysv_rk.f zsysv_aa.f + zsytri_3.f zsytri_3x.f zsycon_3.f zsysv_rk.f zsysv_aa.f zsysv_aa_2stage.f ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f ztprfs.f ztptri.f diff --git a/cmake/lapacke.cmake b/cmake/lapacke.cmake index b7b121948..ef11e0d05 100644 --- a/cmake/lapacke.cmake +++ b/cmake/lapacke.cmake @@ -2434,6 +2434,7 @@ set(Utils_SRC lapacke_ctp_nancheck.c lapacke_dtr_trans.c lapacke_str_trans.c lapacke_ztp_trans.c lapacke_ctp_trans.c lapacke_lsame.c lapacke_xerbla.c lapacke_ztr_nancheck.c lapacke_ctr_nancheck.c lapacke_make_complex_double.c lapacke_z_nancheck.c lapacke_ztr_trans.c + lapacke_nancheck.c ) set(LAPACKE_REL_SRC "") From 5056a044b2704183fe7cefb90840ebf2421dc20d Mon Sep 17 00:00:00 2001 From: martin Date: Fri, 24 Nov 2017 09:15:20 +0100 Subject: [PATCH 065/122] fix location of lapacke_nancheck --- cmake/lapacke.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/lapacke.cmake b/cmake/lapacke.cmake index ef11e0d05..7c7c0d8a9 100644 --- a/cmake/lapacke.cmake +++ b/cmake/lapacke.cmake @@ -2394,6 +2394,7 @@ set(MATGEN lapacke_slagsy_work.c lapacke_zlagsy.c lapacke_zlagsy_work.c + lapacke_nancheck.c ) set(Utils_SRC @@ -2434,7 +2435,6 @@ set(Utils_SRC lapacke_ctp_nancheck.c lapacke_dtr_trans.c lapacke_str_trans.c lapacke_ztp_trans.c lapacke_ctp_trans.c lapacke_lsame.c lapacke_xerbla.c lapacke_ztr_nancheck.c lapacke_ctr_nancheck.c lapacke_make_complex_double.c lapacke_z_nancheck.c lapacke_ztr_trans.c - lapacke_nancheck.c ) set(LAPACKE_REL_SRC "") From 7e9b29b9b80ab82282eea5fde837d16849636826 Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 24 Nov 2017 18:36:37 +0100 Subject: [PATCH 066/122] fix spurious compiler warning (no code change) --- kernel/x86_64/zgemv_t_4.c | 4 ++-- kernel/zarch/zgemv_t_4.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/x86_64/zgemv_t_4.c b/kernel/x86_64/zgemv_t_4.c index 20ccf06f7..f5e5572f9 100644 --- a/kernel/x86_64/zgemv_t_4.c +++ b/kernel/x86_64/zgemv_t_4.c @@ -313,7 +313,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, for( i = 0; i < n1 ; i++) { - memset(ybuffer,0,64); + memset(ybuffer,0,sizeoff(ybuffer)); zgemv_kernel_4x4(NB,ap,xbuffer,ybuffer,alpha); ap[0] += lda4; ap[1] += lda4; @@ -338,7 +338,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, for( i = 0; i < n2 ; i++) { - memset(ybuffer,0,64); + memset(ybuffer,0,sizeof(ybuffer)); zgemv_kernel_4x1(NB,a_ptr,xbuffer,ybuffer,alpha); a_ptr += lda; y_ptr[0] += ybuffer[0]; diff --git a/kernel/zarch/zgemv_t_4.c b/kernel/zarch/zgemv_t_4.c index ba992b767..94a056cab 100644 --- a/kernel/zarch/zgemv_t_4.c +++ b/kernel/zarch/zgemv_t_4.c @@ -518,7 +518,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, for( i = 0; i < n1 ; i++) { - memset(ybuffer,0,64); + memset(ybuffer,0,sizeof(ybuffer)); zgemv_kernel_4x4(NB,ap,xbuffer,ybuffer,alpha); ap[0] += lda4; ap[1] += lda4; @@ -543,7 +543,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, for( i = 0; i < n2 ; i++) { - memset(ybuffer,0,64); + memset(ybuffer,0,sizeof(ybuffer)); zgemv_kernel_4x1(NB,a_ptr,xbuffer,ybuffer,alpha); a_ptr += lda; y_ptr[0] += ybuffer[0]; From e89b979b2ce88b589fe2a9805dd64338e67b1ae2 Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 24 Nov 2017 18:39:04 +0100 Subject: [PATCH 067/122] fix spurious compiler warning fix (no code change) --- kernel/x86_64/zgemv_t_4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/zgemv_t_4.c b/kernel/x86_64/zgemv_t_4.c index f5e5572f9..c4a38202b 100644 --- a/kernel/x86_64/zgemv_t_4.c +++ b/kernel/x86_64/zgemv_t_4.c @@ -313,7 +313,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, for( i = 0; i < n1 ; i++) { - memset(ybuffer,0,sizeoff(ybuffer)); + memset(ybuffer,0,sizeof(ybuffer)); zgemv_kernel_4x4(NB,ap,xbuffer,ybuffer,alpha); ap[0] += lda4; ap[1] += lda4; From e14d50d86ec66651494d3577bab8c24956d91453 Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 24 Nov 2017 19:13:24 +0100 Subject: [PATCH 068/122] eliminate Wunused-const gcc7 warning --- driver/level2/tbmv_L.c | 2 +- driver/level2/tbmv_U.c | 2 +- driver/level2/tbsv_L.c | 2 +- driver/level2/tbsv_U.c | 2 +- driver/level2/tpmv_L.c | 2 +- driver/level2/tpmv_U.c | 2 +- driver/level2/ztbmv_L.c | 2 +- driver/level2/ztbmv_U.c | 2 +- driver/level2/ztbsv_L.c | 2 +- driver/level2/ztbsv_U.c | 2 +- driver/level2/ztpsv_L.c | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/driver/level2/tbmv_L.c b/driver/level2/tbmv_L.c index e40e79396..03aa22f2f 100644 --- a/driver/level2/tbmv_L.c +++ b/driver/level2/tbmv_L.c @@ -40,7 +40,7 @@ #include #include "common.h" -const static FLOAT dp1 = 1.; +// const static FLOAT dp1 = 1.; int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ diff --git a/driver/level2/tbmv_U.c b/driver/level2/tbmv_U.c index 529fd863f..2fdf120d1 100644 --- a/driver/level2/tbmv_U.c +++ b/driver/level2/tbmv_U.c @@ -40,7 +40,7 @@ #include #include "common.h" -const static FLOAT dp1 = 1.; +// const static FLOAT dp1 = 1.; int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ diff --git a/driver/level2/tbsv_L.c b/driver/level2/tbsv_L.c index f62400b5e..db8782ac0 100644 --- a/driver/level2/tbsv_L.c +++ b/driver/level2/tbsv_L.c @@ -40,7 +40,7 @@ #include #include "common.h" -const static FLOAT dp1 = 1.; +// const static FLOAT dp1 = 1.; int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ diff --git a/driver/level2/tbsv_U.c b/driver/level2/tbsv_U.c index 1dc7f2006..1d5a6c42d 100644 --- a/driver/level2/tbsv_U.c +++ b/driver/level2/tbsv_U.c @@ -40,7 +40,7 @@ #include #include "common.h" -const static FLOAT dp1 = 1.; +// const static FLOAT dp1 = 1.; int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ diff --git a/driver/level2/tpmv_L.c b/driver/level2/tpmv_L.c index d01478c66..89618077b 100644 --- a/driver/level2/tpmv_L.c +++ b/driver/level2/tpmv_L.c @@ -40,7 +40,7 @@ #include #include "common.h" -const static FLOAT dp1 = 1.; +// const static FLOAT dp1 = 1.; int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ diff --git a/driver/level2/tpmv_U.c b/driver/level2/tpmv_U.c index 5d311f8bd..4f21c5318 100644 --- a/driver/level2/tpmv_U.c +++ b/driver/level2/tpmv_U.c @@ -40,7 +40,7 @@ #include #include "common.h" -const static FLOAT dp1 = 1.; +// const static FLOAT dp1 = 1.; int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ diff --git a/driver/level2/ztbmv_L.c b/driver/level2/ztbmv_L.c index e7bd35796..f50ba8141 100644 --- a/driver/level2/ztbmv_L.c +++ b/driver/level2/ztbmv_L.c @@ -40,7 +40,7 @@ #include #include "common.h" -const static FLOAT dp1 = 1.; +// const static FLOAT dp1 = 1.; int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ diff --git a/driver/level2/ztbmv_U.c b/driver/level2/ztbmv_U.c index c2d810a04..85d95d474 100644 --- a/driver/level2/ztbmv_U.c +++ b/driver/level2/ztbmv_U.c @@ -40,7 +40,7 @@ #include #include "common.h" -const static FLOAT dp1 = 1.; +// const static FLOAT dp1 = 1.; int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ diff --git a/driver/level2/ztbsv_L.c b/driver/level2/ztbsv_L.c index 44329f5c7..e7394cef4 100644 --- a/driver/level2/ztbsv_L.c +++ b/driver/level2/ztbsv_L.c @@ -40,7 +40,7 @@ #include #include "common.h" -const static FLOAT dp1 = 1.; +// const static FLOAT dp1 = 1.; int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ diff --git a/driver/level2/ztbsv_U.c b/driver/level2/ztbsv_U.c index 530194aa3..0b374159f 100644 --- a/driver/level2/ztbsv_U.c +++ b/driver/level2/ztbsv_U.c @@ -40,7 +40,7 @@ #include #include "common.h" -const static FLOAT dp1 = 1.; +// const static FLOAT dp1 = 1.; int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ diff --git a/driver/level2/ztpsv_L.c b/driver/level2/ztpsv_L.c index 5ce07f43b..2c5bdd756 100644 --- a/driver/level2/ztpsv_L.c +++ b/driver/level2/ztpsv_L.c @@ -40,7 +40,7 @@ #include #include "common.h" -const static FLOAT dm1 = -1.; +// const static FLOAT dm1 = -1.; int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ From f0333333d1379aba40e3461743ee68dd2fe88064 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 24 Nov 2017 19:59:28 +0100 Subject: [PATCH 069/122] Add trivially optimized dsdot based on sdot --- kernel/x86_64/KERNEL.NEHALEM | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/x86_64/KERNEL.NEHALEM b/kernel/x86_64/KERNEL.NEHALEM index 8feef5c31..aa8ace29e 100644 --- a/kernel/x86_64/KERNEL.NEHALEM +++ b/kernel/x86_64/KERNEL.NEHALEM @@ -2,6 +2,7 @@ SAXPYKERNEL = saxpy.c DAXPYKERNEL = daxpy.c SDOTKERNEL = sdot.c +DSDOTKERNEL = sdot.c DDOTKERNEL = ddot.c DSYMV_U_KERNEL = dsymv_U.c From 6bd163f37af08d77eb16d6a936accc9e34646459 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 24 Nov 2017 20:00:23 +0100 Subject: [PATCH 070/122] Add trivially optimized dsdot based on sdot --- kernel/x86_64/KERNEL.SANDYBRIDGE | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/x86_64/KERNEL.SANDYBRIDGE b/kernel/x86_64/KERNEL.SANDYBRIDGE index 355d1e2f1..328538cb0 100644 --- a/kernel/x86_64/KERNEL.SANDYBRIDGE +++ b/kernel/x86_64/KERNEL.SANDYBRIDGE @@ -10,6 +10,7 @@ SGEMVTKERNEL = sgemv_t_4.c ZGEMVNKERNEL = zgemv_n_4.c SDOTKERNEL = sdot.c +DSDOTKERNEL = sdot.c DDOTKERNEL = ddot.c CDOTKERNEL = cdot.c ZDOTKERNEL = zdot.c From 1bb6a96ebca26ded675b289c44524a23f2081bcf Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 24 Nov 2017 20:01:42 +0100 Subject: [PATCH 071/122] Add trivially optimized dsdot based on sdot --- kernel/x86_64/KERNEL.STEAMROLLER | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/x86_64/KERNEL.STEAMROLLER b/kernel/x86_64/KERNEL.STEAMROLLER index 568319ca6..e0479cc44 100644 --- a/kernel/x86_64/KERNEL.STEAMROLLER +++ b/kernel/x86_64/KERNEL.STEAMROLLER @@ -8,6 +8,7 @@ CAXPYKERNEL = caxpy.c ZAXPYKERNEL = zaxpy.c SDOTKERNEL = sdot.c +DSDOTKERNEL = sdot.c DDOTKERNEL = ddot.c CDOTKERNEL = cdot.c ZDOTKERNEL = zdot.c From 4fbdcfa8238d107e856b17aed2228244a74d95b2 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 24 Nov 2017 20:02:28 +0100 Subject: [PATCH 072/122] Add trivially optimized dsdot based on sdot --- kernel/x86_64/KERNEL.BULLDOZER | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/x86_64/KERNEL.BULLDOZER b/kernel/x86_64/KERNEL.BULLDOZER index eb91118e2..26a119fd3 100644 --- a/kernel/x86_64/KERNEL.BULLDOZER +++ b/kernel/x86_64/KERNEL.BULLDOZER @@ -7,6 +7,7 @@ CAXPYKERNEL = caxpy.c ZAXPYKERNEL = zaxpy.c SDOTKERNEL = sdot.c +DSDOTKERNEL = sdot.c CDOTKERNEL = cdot.c ZDOTKERNEL = zdot.c From 3d891c31061d4ff1bf07882ecc69cd67d709b4bd Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 24 Nov 2017 20:03:40 +0100 Subject: [PATCH 073/122] Add trivially optimized dsdot based on sdot --- kernel/x86_64/KERNEL.EXCAVATOR | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/x86_64/KERNEL.EXCAVATOR b/kernel/x86_64/KERNEL.EXCAVATOR index 568319ca6..e0479cc44 100644 --- a/kernel/x86_64/KERNEL.EXCAVATOR +++ b/kernel/x86_64/KERNEL.EXCAVATOR @@ -8,6 +8,7 @@ CAXPYKERNEL = caxpy.c ZAXPYKERNEL = zaxpy.c SDOTKERNEL = sdot.c +DSDOTKERNEL = sdot.c DDOTKERNEL = ddot.c CDOTKERNEL = cdot.c ZDOTKERNEL = zdot.c From cae5d9a20b6c3738a922868c34ee15c4b81c8e77 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 24 Nov 2017 20:04:29 +0100 Subject: [PATCH 074/122] Add trivially optimized dsdot based on sdot --- kernel/x86_64/KERNEL.PILEDRIVER | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/x86_64/KERNEL.PILEDRIVER b/kernel/x86_64/KERNEL.PILEDRIVER index 39891a67b..1e8112d94 100644 --- a/kernel/x86_64/KERNEL.PILEDRIVER +++ b/kernel/x86_64/KERNEL.PILEDRIVER @@ -18,6 +18,7 @@ DGEMVNKERNEL = dgemv_n_bulldozer.S DGEMVTKERNEL = dgemv_t_bulldozer.S SDOTKERNEL = sdot.c +DSDOTKERNEL = sdot.c DDOTKERNEL = ddot.c CDOTKERNEL = cdot.c ZDOTKERNEL = zdot.c From c92cd6d1623f66e895aab67e4f8731223cccfc95 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 24 Nov 2017 20:05:27 +0100 Subject: [PATCH 075/122] Add trivially optimized dsdot based on sdot --- kernel/x86_64/KERNEL.ZEN | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/x86_64/KERNEL.ZEN b/kernel/x86_64/KERNEL.ZEN index f2e1374d3..be4503d47 100644 --- a/kernel/x86_64/KERNEL.ZEN +++ b/kernel/x86_64/KERNEL.ZEN @@ -20,6 +20,7 @@ DSYMV_L_KERNEL = dsymv_L.c DSYMV_U_KERNEL = dsymv_U.c SDOTKERNEL = sdot.c +DSDOTKERNEL = sdot.c DDOTKERNEL = ddot.c CDOTKERNEL = cdot.c ZDOTKERNEL = zdot.c From ef95cd471f21ce79c9720798cf7c147eae5fa462 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 25 Nov 2017 02:54:37 +0100 Subject: [PATCH 076/122] elminate unread variable, after reiteration 3 of them (clang4) --- driver/level3/level3_gemm3m_thread.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/driver/level3/level3_gemm3m_thread.c b/driver/level3/level3_gemm3m_thread.c index 340066625..bfd991ffb 100644 --- a/driver/level3/level3_gemm3m_thread.c +++ b/driver/level3/level3_gemm3m_thread.c @@ -974,7 +974,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos){ BLASLONG m = args -> m; - BLASLONG n = args -> n; + // BLASLONG n = args -> n; BLASLONG nthreads = args -> nthreads; BLASLONG divN, divT; int mode; @@ -985,13 +985,14 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO m = m_to - m_from; } - +/* if (range_n) { BLASLONG n_from = *(((BLASLONG *)range_n) + 0); BLASLONG n_to = *(((BLASLONG *)range_n) + 1); n = n_to - n_from; } +*/ if ((args -> m < nthreads * SWITCH_RATIO) || (args -> n < nthreads * SWITCH_RATIO)) { GEMM3M_LOCAL(args, range_m, range_n, sa, sb, 0); From 1236dbe5a6d2f6efe2f3a831fabbb81f5c1dd4b3 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sun, 26 Nov 2017 13:26:11 +0100 Subject: [PATCH 077/122] Eliminate 2-8 dead increments code --- kernel/generic/trmm_lncopy_2.c | 12 ++++++------ kernel/generic/trmm_lncopy_8.c | 8 ++++---- kernel/generic/trmm_ltcopy_2.c | 10 +++++----- kernel/generic/trmm_ltcopy_8.c | 16 ++++++++-------- kernel/generic/trmm_uncopy_2.c | 8 ++++---- kernel/generic/trmm_uncopy_8.c | 8 ++++---- kernel/generic/trmm_utcopy_2.c | 8 ++++---- kernel/generic/trmm_utcopy_8.c | 16 ++++++++-------- kernel/generic/trsm_ltcopy_8.c | 8 ++++---- kernel/generic/trsm_uncopy_8.c | 6 +++--- kernel/generic/trsm_utcopy_8.c | 8 ++++---- kernel/generic/zgemm3m_tcopy_8.c | 4 ++-- kernel/generic/zgemm_ncopy_4.c | 8 ++++---- kernel/generic/zgemm_tcopy_2.c | 4 ++-- kernel/generic/zgemm_tcopy_4.c | 10 +++++----- kernel/generic/ztrmm_lncopy_2.c | 8 ++++---- kernel/generic/ztrmm_lncopy_4.c | 18 +++++++++--------- kernel/generic/ztrmm_ltcopy_2.c | 8 ++++---- kernel/generic/ztrmm_ltcopy_4.c | 20 ++++++++++---------- kernel/generic/ztrmm_uncopy_2.c | 6 +++--- kernel/generic/ztrmm_uncopy_4.c | 22 +++++++++++----------- kernel/generic/ztrmm_utcopy_2.c | 6 +++--- kernel/generic/ztrmm_utcopy_4.c | 12 ++++++------ kernel/generic/ztrsm_lncopy_4.c | 16 ++++++++-------- kernel/generic/ztrsm_ltcopy_4.c | 14 +++++++------- kernel/generic/ztrsm_uncopy_4.c | 16 ++++++++-------- kernel/generic/ztrsm_utcopy_4.c | 14 +++++++------- 27 files changed, 147 insertions(+), 147 deletions(-) diff --git a/kernel/generic/trmm_lncopy_2.c b/kernel/generic/trmm_lncopy_2.c index ed28b661b..616ae2508 100644 --- a/kernel/generic/trmm_lncopy_2.c +++ b/kernel/generic/trmm_lncopy_2.c @@ -121,12 +121,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 0] = data01; b[ 1] = data03; - ao1 += 1; - ao2 += 1; + /* ao1 += 1; + ao2 += 1; */ b += 2; } else if (X < posY) { - ao1 += lda; + // ao1 += lda; b += 2; } else { #ifdef UNIT @@ -141,8 +141,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 0] = data01; b[ 1] = data03; #endif - ao1 += 1; - ao2 += 1; + /* ao1 += 1; + ao2 += 1; */ b += 2; } } @@ -191,7 +191,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } while (i > 0); } - posY += 1; + // posY += 1; } return 0; diff --git a/kernel/generic/trmm_lncopy_8.c b/kernel/generic/trmm_lncopy_8.c index 8f5fbce87..69429411e 100644 --- a/kernel/generic/trmm_lncopy_8.c +++ b/kernel/generic/trmm_lncopy_8.c @@ -608,16 +608,16 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } else if (X < posY) { if (m & 4) { - ao1 += 4 * lda; + /* ao1 += 4 * lda; ao2 += 4 * lda; ao3 += 4 * lda; - ao4 += 4 * lda; + ao4 += 4 * lda; */ b += 32; } if (m & 2) { - ao1 += 2 * lda; + // ao1 += 2 * lda; b += 16; } @@ -1018,7 +1018,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } else if (X < posY) { if (m & 2) { - ao1 += 2 * lda; + // ao1 += 2 * lda; b += 8; } diff --git a/kernel/generic/trmm_ltcopy_2.c b/kernel/generic/trmm_ltcopy_2.c index e9ad45fa0..13a3bc53c 100644 --- a/kernel/generic/trmm_ltcopy_2.c +++ b/kernel/generic/trmm_ltcopy_2.c @@ -116,8 +116,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (m & 1) { if (X > posY) { - ao1 += 1; - ao2 += 1; + /* ao1 += 1; + ao2 += 1; */ b += 2; } else if (X < posY) { @@ -126,7 +126,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 0] = data01; b[ 1] = data02; - ao1 += lda; + // ao1 += lda; b += 2; } else { #ifdef UNIT @@ -141,7 +141,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 0] = data01; b[ 1] = data02; #endif - ao1 += 2; + // ao1 += 2; b += 2; } } @@ -190,7 +190,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } while (i > 0); } - posY += 1; + // posY += 1; } return 0; diff --git a/kernel/generic/trmm_ltcopy_8.c b/kernel/generic/trmm_ltcopy_8.c index 101272829..71597d4e5 100644 --- a/kernel/generic/trmm_ltcopy_8.c +++ b/kernel/generic/trmm_ltcopy_8.c @@ -443,27 +443,27 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (X > posY) { if (m & 4) { - ao1 += 4; + /* ao1 += 4; ao2 += 4; ao3 += 4; ao4 += 4; ao5 += 4; ao6 += 4; ao7 += 4; - ao8 += 4; + ao8 += 4; */ b += 32; } if (m & 2) { - ao1 += 2; + /* ao1 += 2; ao2 += 2; ao3 += 2; ao4 += 2; ao5 += 2; ao6 += 2; ao7 += 2; - ao8 += 2; + ao8 += 2; */ b += 16; } @@ -548,8 +548,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON ao1 += 4 * lda; ao2 += 4 * lda; - ao3 += 4 * lda; - ao4 += 4 * lda; + /* ao3 += 4 * lda; + ao4 += 4 * lda; */ b += 32; } @@ -964,10 +964,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (X > posY) { if (m & 2) { - ao1 += 2; + /* ao1 += 2; ao2 += 2; ao3 += 2; - ao4 += 2; + ao4 += 2; */ b += 8; } diff --git a/kernel/generic/trmm_uncopy_2.c b/kernel/generic/trmm_uncopy_2.c index 61303a2ba..d4f107d66 100644 --- a/kernel/generic/trmm_uncopy_2.c +++ b/kernel/generic/trmm_uncopy_2.c @@ -122,12 +122,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 0] = data01; b[ 1] = data03; - ao1 += 1; - ao2 += 1; + /* ao1 += 1; + ao2 += 1; */ b += 2; } else if (X > posY) { - ao1 += lda; + // ao1 += lda; b += 2; } else { #ifdef UNIT @@ -142,7 +142,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 0] = data01; b[ 1] = data03; #endif - ao1 += lda; + // ao1 += lda; b += 2; } } diff --git a/kernel/generic/trmm_uncopy_8.c b/kernel/generic/trmm_uncopy_8.c index ecfefd041..d54dae996 100644 --- a/kernel/generic/trmm_uncopy_8.c +++ b/kernel/generic/trmm_uncopy_8.c @@ -610,16 +610,16 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } else if (X > posY) { if (m & 4) { - ao1 += 4 * lda; + /* ao1 += 4 * lda; ao2 += 4 * lda; ao3 += 4 * lda; - ao4 += 4 * lda; + ao4 += 4 * lda; */ b += 32; } if (m & 2) { - ao1 += 2 * lda; + // ao1 += 2 * lda; b += 16; } @@ -1019,7 +1019,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } else if (X > posY) { if (m & 2) { - ao1 += 2 * lda; + // ao1 += 2 * lda; b += 8; } diff --git a/kernel/generic/trmm_utcopy_2.c b/kernel/generic/trmm_utcopy_2.c index ae4a19e32..efa68162c 100644 --- a/kernel/generic/trmm_utcopy_2.c +++ b/kernel/generic/trmm_utcopy_2.c @@ -117,8 +117,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (m & 1) { if (X < posY) { - ao1 += 1; - ao2 += 1; + /* ao1 += 1; + ao2 += 1; */ b += 2; } else if (X > posY) { @@ -127,7 +127,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 0] = data01; b[ 1] = data02; - ao1 += lda; + // ao1 += lda; b += 2; } else { #ifdef UNIT @@ -139,7 +139,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 0] = data01; b[ 1] = ZERO; #endif - ao1 += lda; + // ao1 += lda; b += 2; } } diff --git a/kernel/generic/trmm_utcopy_8.c b/kernel/generic/trmm_utcopy_8.c index 65fee357b..63106ac72 100644 --- a/kernel/generic/trmm_utcopy_8.c +++ b/kernel/generic/trmm_utcopy_8.c @@ -501,27 +501,27 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (X < posY) { if (m & 4) { - ao1 += 4; + /* ao1 += 4; ao2 += 4; ao3 += 4; ao4 += 4; ao5 += 4; ao6 += 4; ao7 += 4; - ao8 += 4; + ao8 += 4; */ b += 32; } if (m & 2) { - ao1 += 2; + /* ao1 += 2; ao2 += 2; ao3 += 2; ao4 += 2; ao5 += 2; ao6 += 2; ao7 += 2; - ao8 += 2; + ao8 += 2; */ b += 16; } @@ -606,8 +606,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON ao1 += 4 * lda; ao2 += 4 * lda; - ao3 += 4 * lda; - ao4 += 4 * lda; + /* ao3 += 4 * lda; + ao4 += 4 * lda; */ b += 32; } @@ -1022,10 +1022,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (X < posY) { if (m & 2) { - ao1 += 2; + /* ao1 += 2; ao2 += 2; ao3 += 2; - ao4 += 2; + ao4 += 2; */ b += 8; } diff --git a/kernel/generic/trsm_ltcopy_8.c b/kernel/generic/trsm_ltcopy_8.c index 35179d185..581578161 100644 --- a/kernel/generic/trsm_ltcopy_8.c +++ b/kernel/generic/trsm_ltcopy_8.c @@ -487,8 +487,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT a1 += 4 * lda; a2 += 4 * lda; - a3 += 4 * lda; - a4 += 4 * lda; + /* a3 += 4 * lda; + a4 += 4 * lda; */ b += 32; ii += 4; @@ -574,7 +574,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT } a1 += 2 * lda; - a2 += 2 * lda; + // a2 += 2 * lda; b += 16; ii += 2; @@ -779,7 +779,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT } a1 += 2 * lda; - a2 += 2 * lda; + // a2 += 2 * lda; b += 8; ii += 2; } diff --git a/kernel/generic/trsm_uncopy_8.c b/kernel/generic/trsm_uncopy_8.c index 40903d44f..ec71f3f8d 100644 --- a/kernel/generic/trsm_uncopy_8.c +++ b/kernel/generic/trsm_uncopy_8.c @@ -646,7 +646,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT *(b + 7) = data57; } b += 8; - ii += 1; + // ii += 1; } a += 8 * lda; @@ -835,7 +835,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT *(b + 3) = data25; } b += 4; - ii += 1; + // ii += 1; } a += 4 * lda; @@ -908,7 +908,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT *(b + 1) = data09; } b += 2; - ii += 1; + // ii += 1; } a += 2 * lda; diff --git a/kernel/generic/trsm_utcopy_8.c b/kernel/generic/trsm_utcopy_8.c index 97da66f87..47feb5974 100644 --- a/kernel/generic/trsm_utcopy_8.c +++ b/kernel/generic/trsm_utcopy_8.c @@ -453,8 +453,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT a1 += 4 * lda; a2 += 4 * lda; - a3 += 4 * lda; - a4 += 4 * lda; + /* a3 += 4 * lda; + a4 += 4 * lda; */ b += 32; ii += 4; } @@ -513,7 +513,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT } a1 += 2 * lda; - a2 += 2 * lda; + // a2 += 2 * lda; b += 16; ii += 2; } @@ -680,7 +680,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT } a1 += 2 * lda; - a2 += 2 * lda; + // a2 += 2 * lda; b += 8; ii += 2; } diff --git a/kernel/generic/zgemm3m_tcopy_8.c b/kernel/generic/zgemm3m_tcopy_8.c index fddbdd8cc..5f7160253 100644 --- a/kernel/generic/zgemm3m_tcopy_8.c +++ b/kernel/generic/zgemm3m_tcopy_8.c @@ -1044,7 +1044,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, *(b_offset2 + 3) = CMULT(a7, a8); a_offset1 += 8; - b_offset2 += 4; + // b_offset2 += 4; } if (n & 2){ @@ -1057,7 +1057,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, *(b_offset3 + 1) = CMULT(a3, a4); a_offset1 += 4; - b_offset3 += 2; + // b_offset3 += 2; } if (n & 1){ diff --git a/kernel/generic/zgemm_ncopy_4.c b/kernel/generic/zgemm_ncopy_4.c index 0c2959b5f..fde96807a 100644 --- a/kernel/generic/zgemm_ncopy_4.c +++ b/kernel/generic/zgemm_ncopy_4.c @@ -225,10 +225,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(boffset + 6) = ctemp07; *(boffset + 7) = ctemp08; - aoffset1 += 2; + /* aoffset1 += 2; aoffset2 += 2; aoffset3 += 2; - aoffset4 += 2; + aoffset4 += 2; */ boffset += 8; } j--; @@ -323,8 +323,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(boffset + 2) = ctemp03; *(boffset + 3) = ctemp04; - aoffset1 += 2; - aoffset2 += 2; + /* aoffset1 += 2; + aoffset2 += 2; */ boffset += 4; } } diff --git a/kernel/generic/zgemm_tcopy_2.c b/kernel/generic/zgemm_tcopy_2.c index 70e202b71..2b12bb7dd 100644 --- a/kernel/generic/zgemm_tcopy_2.c +++ b/kernel/generic/zgemm_tcopy_2.c @@ -140,7 +140,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(b_offset1 + 6) = ctemp11; *(b_offset1 + 7) = ctemp12; - b_offset1 += m * 4; + // b_offset1 += m * 4; a_offset1 += 4; a_offset2 += 4; } @@ -204,7 +204,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(b_offset + 2) = ctemp3; *(b_offset + 3) = ctemp4; - b_offset += m * 4; + // b_offset += m * 4; a_offset += 4; } diff --git a/kernel/generic/zgemm_tcopy_4.c b/kernel/generic/zgemm_tcopy_4.c index 969928d80..9b02e0eed 100644 --- a/kernel/generic/zgemm_tcopy_4.c +++ b/kernel/generic/zgemm_tcopy_4.c @@ -233,10 +233,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(boffset3 + 6) = ctemp07; *(boffset3 + 7) = ctemp08; - aoffset1 += 2; + /* aoffset1 += 2; aoffset2 += 2; aoffset3 += 2; - aoffset4 += 2; + aoffset4 += 2; */ boffset3 += 8; } @@ -338,8 +338,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(boffset3 + 2) = ctemp03; *(boffset3 + 3) = ctemp04; - aoffset1 += 2; - aoffset2 += 2; + /* aoffset1 += 2; + aoffset2 += 2; */ boffset3 += 4; } } @@ -387,7 +387,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(boffset2 + 3) = ctemp04; aoffset1 += 4; - boffset2 += 4; + // boffset2 += 4; } if (n & 1){ diff --git a/kernel/generic/ztrmm_lncopy_2.c b/kernel/generic/ztrmm_lncopy_2.c index c620c78c1..dcd1f8c07 100644 --- a/kernel/generic/ztrmm_lncopy_2.c +++ b/kernel/generic/ztrmm_lncopy_2.c @@ -148,12 +148,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 2] = data03; b[ 3] = data04; - ao1 += 2; - ao2 += 2; + /* ao1 += 2; + ao2 += 2; */ b += 4; } else if (X < posY) { - ao1 += lda; + // ao1 += lda; b += 4; } else { #ifdef UNIT @@ -224,7 +224,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } while (i > 0); } - posY += 1; + // posY += 1; } return 0; } diff --git a/kernel/generic/ztrmm_lncopy_4.c b/kernel/generic/ztrmm_lncopy_4.c index 5442105c5..7cb562ba3 100644 --- a/kernel/generic/ztrmm_lncopy_4.c +++ b/kernel/generic/ztrmm_lncopy_4.c @@ -355,23 +355,23 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 6] = data25; b[ 7] = data26; - ao1 += 2; + /* ao1 += 2; ao2 += 2; ao3 += 2; - ao4 += 2; + ao4 += 2; */ b += 8; } } else if (X < posY) { if (m & 2) { - ao1 += 2 * lda; - ao2 += 2 * lda; + /* ao1 += 2 * lda; + ao2 += 2 * lda; */ b += 16; } if (m & 1) { - ao1 += lda; + // ao1 += lda; b += 8; } @@ -586,12 +586,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 2] = data09; b[ 3] = data10; - ao1 += 2; - ao2 += 2; + /* ao1 += 2; + ao2 += 2; */ b += 4; } else if (X < posY) { - ao1 += lda; + // ao1 += lda; b += 4; } else { #ifdef UNIT @@ -657,7 +657,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } while (i > 0); } - posY += 1; + // posY += 1; } return 0; diff --git a/kernel/generic/ztrmm_ltcopy_2.c b/kernel/generic/ztrmm_ltcopy_2.c index 457890ceb..deb675f73 100644 --- a/kernel/generic/ztrmm_ltcopy_2.c +++ b/kernel/generic/ztrmm_ltcopy_2.c @@ -141,8 +141,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (m & 1) { if (X > posY) { - ao1 += 2; - ao2 += 2; + /* ao1 += 2; + ao2 += 2; */ b += 4; } else @@ -157,7 +157,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 2] = data3; b[ 3] = data4; - ao1 += lda; + // ao1 += lda; b += 4; } else { #ifdef UNIT @@ -233,7 +233,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } while (i > 0); } - posY += 1; + // posY += 1; } return 0; diff --git a/kernel/generic/ztrmm_ltcopy_4.c b/kernel/generic/ztrmm_ltcopy_4.c index 42a809ba4..246a82641 100644 --- a/kernel/generic/ztrmm_ltcopy_4.c +++ b/kernel/generic/ztrmm_ltcopy_4.c @@ -292,18 +292,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (X > posY) { if (m & 2) { - ao1 += 4; + /* ao1 += 4; ao2 += 4; ao3 += 4; - ao4 += 4; + ao4 += 4; */ b += 16; } if (m & 1) { - ao1 += 2; + /* ao1 += 2; ao2 += 2; ao3 += 2; - ao4 += 2; + ao4 += 2; */ b += 8; } @@ -347,7 +347,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[15] = data16; ao1 += 2 * lda; - ao2 += 2 * lda; + // ao2 += 2 * lda; b += 16; } @@ -371,7 +371,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 6] = data07; b[ 7] = data08; - ao1 += lda; + // ao1 += lda; b += 8; } @@ -588,8 +588,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (i) { if (X > posY) { - ao1 += 2; - ao2 += 2; + /* ao1 += 2; + ao2 += 2; */ b += 4; } else @@ -604,7 +604,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 2] = data03; b[ 3] = data04; - ao1 += lda; + // ao1 += lda; b += 4; } else { @@ -678,7 +678,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } while (i > 0); } - posY += 1; + // posY += 1; } return 0; diff --git a/kernel/generic/ztrmm_uncopy_2.c b/kernel/generic/ztrmm_uncopy_2.c index c2521d3c3..5cb0ffc54 100644 --- a/kernel/generic/ztrmm_uncopy_2.c +++ b/kernel/generic/ztrmm_uncopy_2.c @@ -153,12 +153,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 2] = data03; b[ 3] = data04; - ao1 += 2; - ao2 += 2; + /* ao1 += 2; + ao2 += 2; */ b += 4; } else if (X > posY) { - ao1 += lda; + // ao1 += lda; b += 4; } else { #ifdef UNIT diff --git a/kernel/generic/ztrmm_uncopy_4.c b/kernel/generic/ztrmm_uncopy_4.c index 249faac1d..e282bd1c7 100644 --- a/kernel/generic/ztrmm_uncopy_4.c +++ b/kernel/generic/ztrmm_uncopy_4.c @@ -354,23 +354,23 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 6] = data25; b[ 7] = data26; - ao1 += 2; + /* ao1 += 2; ao2 += 2; ao3 += 2; - ao4 += 2; + ao4 += 2; */ b += 8; } } else if (X > posY) { if (m & 2) { - ao1 += 2 * lda; - ao2 += 2 * lda; + /* ao1 += 2 * lda; + ao2 += 2 * lda; */ b += 16; } if (m & 1) { - ao1 += lda; + // ao1 += lda; b += 8; } @@ -596,13 +596,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 1] = data02; b[ 2] = data09; b[ 3] = data10; - ao1 += 2; - ao2 += 2; + /* ao1 += 2; + ao2 += 2; */ b += 4; } else if (X > posY) { - ao1 += 2 * lda; - ao2 += 2 * lda; + /* ao1 += 2 * lda; + ao2 += 2 * lda; */ b += 4; } else { #ifdef UNIT @@ -624,8 +624,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 2] = data09; b[ 3] = data10; #endif - ao1 += 2; - ao2 += 2; + /* ao1 += 2; + ao2 += 2; */ b += 4; } } diff --git a/kernel/generic/ztrmm_utcopy_2.c b/kernel/generic/ztrmm_utcopy_2.c index 840821e16..6c7288ae9 100644 --- a/kernel/generic/ztrmm_utcopy_2.c +++ b/kernel/generic/ztrmm_utcopy_2.c @@ -142,8 +142,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (m & 1) { if (X < posY) { - ao1 += 2; - ao2 += 2; + /* ao1 += 2; + ao2 += 2; */ b += 4; } else if (X > posY) { @@ -157,7 +157,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 2] = data3; b[ 3] = data4; - ao1 += lda; + // ao1 += lda; b += 4; } else { diff --git a/kernel/generic/ztrmm_utcopy_4.c b/kernel/generic/ztrmm_utcopy_4.c index 9a5c8c362..e6b89e7bf 100644 --- a/kernel/generic/ztrmm_utcopy_4.c +++ b/kernel/generic/ztrmm_utcopy_4.c @@ -294,18 +294,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (X < posY) { if (m & 2) { - ao1 += 4; + /* ao1 += 4; ao2 += 4; ao3 += 4; - ao4 += 4; + ao4 += 4; */ b += 16; } if (m & 1) { - ao1 += 2; + /* ao1 += 2; ao2 += 2; ao3 += 2; - ao4 += 2; + ao4 += 2; */ b += 8; } @@ -349,7 +349,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[15] = data16; ao1 += 2 * lda; - ao2 += 2 * lda; + // ao2 += 2 * lda; b += 16; } @@ -372,7 +372,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 6] = data07; b[ 7] = data08; - ao1 += lda; + // ao1 += lda; b += 8; } diff --git a/kernel/generic/ztrsm_lncopy_4.c b/kernel/generic/ztrsm_lncopy_4.c index 2ad05401a..2a1302e53 100644 --- a/kernel/generic/ztrsm_lncopy_4.c +++ b/kernel/generic/ztrsm_lncopy_4.c @@ -313,13 +313,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT *(b + 7) = data26; } - a1 += 2; + /* a1 += 2; a2 += 2; a3 += 2; - a4 += 2; + a4 += 2; */ b += 8; - ii += 1; + // ii += 1; } a += 4 * lda; jj += 4; @@ -410,11 +410,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT *(b + 3) = data10; } - a1 += 2; - a2 += 2; + /* a1 += 2; + a2 += 2; */ b += 4; - ii += 1; + // ii += 1; } a += 2 * lda; jj += 2; @@ -451,8 +451,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT ii += 1; } - a += lda; - jj += 1; + // a += lda; + // jj += 1; } return 0; diff --git a/kernel/generic/ztrsm_ltcopy_4.c b/kernel/generic/ztrsm_ltcopy_4.c index c1152710e..be28ba646 100644 --- a/kernel/generic/ztrsm_ltcopy_4.c +++ b/kernel/generic/ztrsm_ltcopy_4.c @@ -286,7 +286,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT } a1 += 2 * lda; - a2 += 2 * lda; + // a2 += 2 * lda; b += 16; ii += 2; @@ -335,9 +335,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT *(b + 7) = data08; } - a1 += lda; + // a1 += lda; b += 8; - ii += 1; + // ii += 1; } a += 8; @@ -430,9 +430,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT *(b + 3) = data04; } - a1 += lda; + // a1 += lda; b += 4; - ii += 1; + // ii += 1; } a += 4; @@ -471,8 +471,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT ii += 1; } - a += 2; - jj += 1; + // a += 2; + // jj += 1; } return 0; diff --git a/kernel/generic/ztrsm_uncopy_4.c b/kernel/generic/ztrsm_uncopy_4.c index 9d0e2438d..fe33e41c7 100644 --- a/kernel/generic/ztrsm_uncopy_4.c +++ b/kernel/generic/ztrsm_uncopy_4.c @@ -344,13 +344,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT *(b + 7) = data26; } - a1 += 2; + /* a1 += 2; a2 += 2; a3 += 2; - a4 += 2; + a4 += 2; */ b += 8; - ii += 1; + // ii += 1; } a += 4 * lda; @@ -444,11 +444,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT *(b + 3) = data10; } - a1 += 2; - a2 += 2; + /* a1 += 2; + a2 += 2; */ b += 4; - ii += 1; + // ii += 1; } a += 2 *lda; @@ -488,8 +488,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT ii += 1; } - a += lda; - jj += 1; + // a += lda; + // jj += 1; } return 0; diff --git a/kernel/generic/ztrsm_utcopy_4.c b/kernel/generic/ztrsm_utcopy_4.c index f19badd33..b419f6b90 100644 --- a/kernel/generic/ztrsm_utcopy_4.c +++ b/kernel/generic/ztrsm_utcopy_4.c @@ -266,7 +266,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT } a1 += 2 * lda; - a2 += 2 * lda; + // a2 += 2 * lda; b += 16; ii += 2; @@ -303,10 +303,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT *(b + 7) = data08; } - a1 += lda; + // a1 += lda; b += 8; - ii += 1; + // ii += 1; } a += 8; @@ -394,10 +394,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT *(b + 3) = data04; } - a1 += lda; + // a1 += lda; b += 4; - ii += 1; + // ii += 1; } a += 4; @@ -436,8 +436,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT ii += 1; } - a += 2; - jj += 1; + // a += 2; + // jj += 1; } return 0; From 441a9c8385b1a55f879deb8a4ad8bc85b62cc5d0 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sun, 26 Nov 2017 17:24:08 +0100 Subject: [PATCH 078/122] more dead increments clang4 scan-build deadcode.deadstores --- kernel/generic/gemm_tcopy_8.c | 8 ++++---- kernel/generic/trmm_lncopy_4.c | 18 +++++++++--------- kernel/generic/trmm_ltcopy_4.c | 20 ++++++++++---------- kernel/generic/trmm_uncopy_4.c | 22 +++++++++++----------- kernel/generic/trmm_utcopy_4.c | 16 ++++++++-------- kernel/generic/trsm_ltcopy_4.c | 2 +- kernel/generic/trsm_utcopy_4.c | 2 +- kernel/generic/zgemm_tcopy_8.c | 4 ++-- kernel/generic/ztrmm_lncopy_8.c | 16 ++++++++-------- kernel/generic/ztrmm_ltcopy_8.c | 18 +++++++++--------- kernel/generic/ztrmm_uncopy_8.c | 16 ++++++++-------- kernel/generic/ztrmm_utcopy_8.c | 8 ++++---- 12 files changed, 75 insertions(+), 75 deletions(-) diff --git a/kernel/generic/gemm_tcopy_8.c b/kernel/generic/gemm_tcopy_8.c index 9770d110d..b28f3d219 100644 --- a/kernel/generic/gemm_tcopy_8.c +++ b/kernel/generic/gemm_tcopy_8.c @@ -719,10 +719,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ if (m & 1){ aoffset1 = aoffset; - aoffset += lda; + // aoffset += lda; boffset1 = boffset; - boffset += 8; + // boffset += 8; i = (n >> 3); if (i > 0){ @@ -762,7 +762,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(boffset2 + 1) = ctemp02; *(boffset2 + 2) = ctemp03; *(boffset2 + 3) = ctemp04; - boffset2 += 4; + // boffset2 += 4; } if (n & 2){ @@ -772,7 +772,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(boffset3 + 0) = ctemp01; *(boffset3 + 1) = ctemp02; - boffset3 += 2; + // boffset3 += 2; } if (n & 1){ diff --git a/kernel/generic/trmm_lncopy_4.c b/kernel/generic/trmm_lncopy_4.c index 0dcfb965a..25632d242 100644 --- a/kernel/generic/trmm_lncopy_4.c +++ b/kernel/generic/trmm_lncopy_4.c @@ -237,24 +237,24 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 2] = data03; b[ 3] = data04; - ao1 += 1; + /* ao1 += 1; ao2 += 1; ao3 += 1; - ao4 += 1; + ao4 += 1; */ b += 4; } } else if (X < posY) { if (m & 2) { - ao1 += 2 * lda; - ao2 += 2 * lda; + /* ao1 += 2 * lda; + ao2 += 2 * lda; */ b += 8; } if (m & 1) { - ao1 += lda; + // ao1 += lda; b += 4; } @@ -414,12 +414,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 0] = data01; b[ 1] = data02; - ao1 += 1; - ao2 += 1; + /* ao1 += 1; + ao2 += 1; */ b += 2; } else if (X < posY) { - ao1 += lda; + // ao1 += lda; b += 2; } else { #ifdef UNIT @@ -477,7 +477,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } while (i > 0); } - posY += 1; + // posY += 1; } return 0; diff --git a/kernel/generic/trmm_ltcopy_4.c b/kernel/generic/trmm_ltcopy_4.c index 66a7325bb..128536aad 100644 --- a/kernel/generic/trmm_ltcopy_4.c +++ b/kernel/generic/trmm_ltcopy_4.c @@ -204,18 +204,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (X > posY) { if (m & 2) { - ao1 += 2; + /* ao1 += 2; ao2 += 2; ao3 += 2; - ao4 += 2; + ao4 += 2; */ b += 8; } if (m & 1) { - ao1 += 1; + /* ao1 += 1; ao2 += 1; ao3 += 1; - ao4 += 1; + ao4 += 1; */ b += 4; } @@ -241,7 +241,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 7] = data08; ao1 += 2 * lda; - ao2 += 2 * lda; + // ao2 += 2 * lda; b += 8; } @@ -257,7 +257,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 2] = data03; b[ 3] = data04; - ao1 += lda; + // ao1 += lda; b += 4; } @@ -412,8 +412,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (i) { if (X > posY) { - ao1 += 1; - ao2 += 1; + /* ao1 += 1; + ao2 += 1; */ b += 2; } else @@ -423,7 +423,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 0] = data01; b[ 1] = data02; - ao1 += lda; + // ao1 += lda; b += 2; } else { #ifdef UNIT @@ -481,7 +481,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } while (i > 0); } - posY += 1; + // posY += 1; } return 0; diff --git a/kernel/generic/trmm_uncopy_4.c b/kernel/generic/trmm_uncopy_4.c index 0218a0e31..9ec040d5d 100644 --- a/kernel/generic/trmm_uncopy_4.c +++ b/kernel/generic/trmm_uncopy_4.c @@ -241,23 +241,23 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 2] = data05; b[ 3] = data07; - ao1 += 1; + /* ao1 += 1; ao2 += 1; ao3 += 1; - ao4 += 1; + ao4 += 1; */ b += 4; } } else if (X > posY) { if (m & 2) { - ao1 += 2 * lda; - ao2 += 2 * lda; + /* ao1 += 2 * lda; + ao2 += 2 * lda; */ b += 8; } if (m & 1) { - ao1 += lda; + // ao1 += lda; b += 4; } @@ -418,13 +418,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 0] = data01; b[ 1] = data05; - ao1 += 1; - ao2 += 1; + /* ao1 += 1; + ao2 += 1; */ b += 2; } else if (X > posY) { - ao1 += lda; - ao2 += lda; + /* ao1 += lda; + ao2 += lda; */ b += 2; } else { #ifdef UNIT @@ -438,8 +438,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 0] = data01; b[ 1] = data05; #endif - ao1 += lda; - ao2 += lda; + /* ao1 += lda; + ao2 += lda; */ b += 2; } } diff --git a/kernel/generic/trmm_utcopy_4.c b/kernel/generic/trmm_utcopy_4.c index 441f7338b..3e7726b61 100644 --- a/kernel/generic/trmm_utcopy_4.c +++ b/kernel/generic/trmm_utcopy_4.c @@ -201,18 +201,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (X < posY) { if (m & 2) { - ao1 += 2; + /* ao1 += 2; ao2 += 2; ao3 += 2; - ao4 += 2; + ao4 += 2; */ b += 8; } if (m & 1) { - ao1 += 1; + /* ao1 += 1; ao2 += 1; ao3 += 1; - ao4 += 1; + ao4 += 1; */ b += 4; } @@ -238,7 +238,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 7] = data08; ao1 += 2 * lda; - ao2 += 2 * lda; + // ao2 += 2 * lda; b += 8; } @@ -253,7 +253,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 2] = data03; b[ 3] = data04; - ao1 += lda; + // ao1 += lda; b += 4; } @@ -401,7 +401,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (i) { if (X < posY) { - ao1 += 2; + // ao1 += 2; b += 2; } else if (X > posY) { @@ -411,7 +411,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 0] = data01; b[ 1] = data02; - ao1 += lda; + // ao1 += lda; b += 2; } else { #ifdef UNIT diff --git a/kernel/generic/trsm_ltcopy_4.c b/kernel/generic/trsm_ltcopy_4.c index 12043eb33..07bb137d4 100644 --- a/kernel/generic/trsm_ltcopy_4.c +++ b/kernel/generic/trsm_ltcopy_4.c @@ -206,7 +206,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT } a1 += 2 * lda; - a2 += 2 * lda; + // a2 += 2 * lda; b += 8; ii += 2; diff --git a/kernel/generic/trsm_utcopy_4.c b/kernel/generic/trsm_utcopy_4.c index f83617224..bd1b839cf 100644 --- a/kernel/generic/trsm_utcopy_4.c +++ b/kernel/generic/trsm_utcopy_4.c @@ -194,7 +194,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT } a1 += 2 * lda; - a2 += 2 * lda; + // a2 += 2 * lda; b += 8; ii += 2; diff --git a/kernel/generic/zgemm_tcopy_8.c b/kernel/generic/zgemm_tcopy_8.c index bad835bb8..8d8988716 100644 --- a/kernel/generic/zgemm_tcopy_8.c +++ b/kernel/generic/zgemm_tcopy_8.c @@ -324,7 +324,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ if (n & 1){ aoffset1 = aoffset; aoffset2 = aoffset + lda; - aoffset += 2; + // aoffset += 2; i = (m >> 1); if (i > 0){ @@ -353,7 +353,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(boffset + 0) = ctemp01; *(boffset + 1) = ctemp02; - boffset += 2; + // boffset += 2; } } diff --git a/kernel/generic/ztrmm_lncopy_8.c b/kernel/generic/ztrmm_lncopy_8.c index 71d3bf1ce..0f173a17e 100644 --- a/kernel/generic/ztrmm_lncopy_8.c +++ b/kernel/generic/ztrmm_lncopy_8.c @@ -350,14 +350,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } } else if (X < posY) { - ao1 += i * lda; + /* ao1 += i * lda; ao2 += i * lda; ao3 += i * lda; ao4 += i * lda; ao5 += i * lda; ao6 += i * lda; ao7 += i * lda; - ao8 += i * lda; + ao8 += i * lda; */ b += 16 * i; } else { #ifdef UNIT @@ -675,10 +675,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } } else if (X < posY) { - ao1 += i * lda; + /* ao1 += i * lda; ao2 += i * lda; ao3 += i * lda; - ao4 += i * lda; + ao4 += i * lda; */ b += 8 * i; } else { #ifdef UNIT @@ -804,13 +804,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 1] = *(ao1 + 1); b[ 2] = *(ao2 + 0); b[ 3] = *(ao2 + 1); - ao1 += 2; - ao2 += 2; + /* ao1 += 2; + ao2 += 2; */ b += 4; } else if (X < posY) { - ao1 += 2 * lda; - ao2 += 2 * lda; + /* ao1 += 2 * lda; + ao2 += 2 * lda; */ b += 4; } else { #ifdef UNIT diff --git a/kernel/generic/ztrmm_ltcopy_8.c b/kernel/generic/ztrmm_ltcopy_8.c index 09cb8037e..0af2420c3 100644 --- a/kernel/generic/ztrmm_ltcopy_8.c +++ b/kernel/generic/ztrmm_ltcopy_8.c @@ -317,14 +317,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON i = (m & 7); if (i > 0) { if (X > posY) { - a01 += 2 * i; + /* a01 += 2 * i; a02 += 2 * i; a03 += 2 * i; a04 += 2 * i; a05 += 2 * i; a06 += 2 * i; a07 += 2 * i; - a08 += 2 * i; + a08 += 2 * i; */ b += 16 * i; } else if (X < posY) { @@ -661,10 +661,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON i = (m & 3); if (i > 0) { if (X > posY) { - a01 += 2 * i; + /* a01 += 2 * i; a02 += 2 * i; a03 += 2 * i; - a04 += 2 * i; + a04 += 2 * i; */ b += 8 * i; } else if (X < posY) { @@ -802,8 +802,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON i = (m & 1); if (i > 0) { if (X > posY) { - a01 += 2; - a02 += 2; + /* a01 += 2; + a02 += 2; */ b += 4; } else if (X < posY) { @@ -812,8 +812,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 2] = *(a01 + 2); b[ 3] = *(a01 + 3); - a01 += lda; - a02 += lda; + /* a01 += lda; + a02 += lda; */ b += 4; } else { #ifdef UNIT @@ -869,7 +869,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON i --; } while (i > 0); } - posY += 1; + // posY += 1; } return 0; diff --git a/kernel/generic/ztrmm_uncopy_8.c b/kernel/generic/ztrmm_uncopy_8.c index faadd2196..c3ea0c85e 100644 --- a/kernel/generic/ztrmm_uncopy_8.c +++ b/kernel/generic/ztrmm_uncopy_8.c @@ -350,14 +350,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } } else if (X > posY) { - ao1 += i * lda; + /* ao1 += i * lda; ao2 += i * lda; ao3 += i * lda; ao4 += i * lda; ao5 += i * lda; ao6 += i * lda; ao7 += i * lda; - ao8 += i * lda; + ao8 += i * lda; */ b += 16 * i; } else { #ifdef UNIT @@ -677,10 +677,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } } else if (X > posY) { - ao1 += i * lda; + /* ao1 += i * lda; ao2 += i * lda; ao3 += i * lda; - ao4 += i * lda; + ao4 += i * lda; */ b += 8 * i; } else { #ifdef UNIT @@ -807,13 +807,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b[ 1] = *(ao1 + 1); b[ 2] = *(ao2 + 0); b[ 3] = *(ao2 + 1); - ao1 += 2; - ao2 += 2; + /* ao1 += 2; + ao2 += 2; */ b += 4; } else if (X > posY) { - ao1 += 2 * lda; - ao2 += 2 * lda; + /* ao1 += 2 * lda; + ao2 += 2 * lda; */ b += 4; } else { #ifdef UNIT diff --git a/kernel/generic/ztrmm_utcopy_8.c b/kernel/generic/ztrmm_utcopy_8.c index fb286d0e6..ed4578579 100644 --- a/kernel/generic/ztrmm_utcopy_8.c +++ b/kernel/generic/ztrmm_utcopy_8.c @@ -320,14 +320,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (X < posY) { - a01 += 2 * i; + /* a01 += 2 * i; a02 += 2 * i; a03 += 2 * i; a04 += 2 * i; a05 += 2 * i; a06 += 2 * i; a07 += 2 * i; - a08 += 2 * i; + a08 += 2 * i; */ b += 16 * i; } else if (X > posY) { @@ -664,10 +664,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (i) { if (X < posY) { - a01 += 2 * i; + /* a01 += 2 * i; a02 += 2 * i; a03 += 2 * i; - a04 += 2 * i; + a04 += 2 * i; */ b += 8 * i; } else if (X > posY) { From 7a4b3cfbf8666267a2e7466818d880076c163b2f Mon Sep 17 00:00:00 2001 From: martin Date: Tue, 28 Nov 2017 18:38:07 +0100 Subject: [PATCH 079/122] Add trivially optimized DSDOT for POWER8 --- kernel/power/KERNEL.POWER8 | 1 + kernel/power/sdot.c | 55 +++++++++++++++++++++++++++++++------- 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/kernel/power/KERNEL.POWER8 b/kernel/power/KERNEL.POWER8 index b9f44db91..594abf795 100644 --- a/kernel/power/KERNEL.POWER8 +++ b/kernel/power/KERNEL.POWER8 @@ -122,6 +122,7 @@ ZCOPYKERNEL = zcopy.c # SDOTKERNEL = sdot.c DDOTKERNEL = ddot.c +DSDOTKERNEL = sdot.c #CDOTKERNEL = ../arm/zdot.c ZDOTKERNEL = zdot.c # diff --git a/kernel/power/sdot.c b/kernel/power/sdot.c index 31f473485..4fdc2f5b5 100644 --- a/kernel/power/sdot.c +++ b/kernel/power/sdot.c @@ -1,5 +1,5 @@ /*************************************************************************** -Copyright (c) 2013-2016, The OpenBLAS Project +Copyright (c) 2013-2017, The OpenBLAS Project All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -66,42 +66,76 @@ static FLOAT sdot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y) #endif +#if defined (DSDOT) +double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#else FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#endif { BLASLONG i=0; BLASLONG ix=0,iy=0; + double dot = 0.0 ; - FLOAT dot = 0.0 ; +#if defined (DSDOT) + double mydot = 0.0; + FLOAT asmdot = 0.0; +#else + FLOAT mydot=0.0; +#endif + BLASLONG n1; if ( n <= 0 ) return(dot); if ( (inc_x == 1) && (inc_y == 1) ) { - BLASLONG n1 = n & -32; + n1 = n & (BLASLONG)(-32); if ( n1 ) - dot = sdot_kernel_16(n1, x, y); - +#if defined(DSDOT) + { + FLOAT *x1=x; + FLOAT *y1=y; + BLASLONG n2 = 32; + while (i Date: Wed, 29 Nov 2017 19:57:35 +0100 Subject: [PATCH 080/122] Correct zgeadd_k prototype --- common_param.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common_param.h b/common_param.h index 0513ace9f..8f162c01f 100644 --- a/common_param.h +++ b/common_param.h @@ -888,7 +888,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG); int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG); - int (*zgeadd_k) (BLASLONG, BLASLONG, float, double, double *, BLASLONG, double, double, double *, BLASLONG); + int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG); } gotoblas_t; From f017e169dcd3ad9aa5886650efad156aa89e1473 Mon Sep 17 00:00:00 2001 From: Kevin Ji Date: Wed, 29 Nov 2017 15:21:12 -0800 Subject: [PATCH 081/122] README: Use the SVG Travis badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 562f6d17f..ec32c1f60 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Join the chat at https://gitter.im/xianyi/OpenBLAS](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -Travis CI: [![Build Status](https://travis-ci.org/xianyi/OpenBLAS.png?branch=develop)](https://travis-ci.org/xianyi/OpenBLAS) +Travis CI: [![Build Status](https://travis-ci.org/xianyi/OpenBLAS.svg?branch=develop)](https://travis-ci.org/xianyi/OpenBLAS) AppVeyor: [![Build status](https://ci.appveyor.com/api/projects/status/09sohd35n8nkkx64/branch/develop?svg=true)](https://ci.appveyor.com/project/xianyi/openblas/branch/develop) ## Introduction From c917278d232929a843bdec82309d89478955375c Mon Sep 17 00:00:00 2001 From: xoviat Date: Thu, 30 Nov 2017 15:30:10 -0600 Subject: [PATCH 082/122] [appveyor] Use out-of-tree build and cache --- appveyor.yml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index bb4e06fcb..10fc3b8b9 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -23,6 +23,9 @@ skip_commits: # Add [av skip] to commit messages message: /\[av skip\]/ +cache: + - %APPVEYOR_BUILD_FOLDER%\build + environment: global: CONDA_INSTALL_LOCN: C:\\Miniconda36-x64 @@ -44,12 +47,12 @@ install: - if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" before_build: - - echo Running cmake... - - cd c:\projects\OpenBLAS - - if [%COMPILER%]==[cl] cmake -G "Visual Studio 12 Win64" . - - if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl . - - if [%WITH_FORTRAN%]==[yes] cmake -G "NMake Makefiles" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 . - - if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON . + - ps: if (-Not (Test-Path .\build)) { mkdir build } + - cd build + - if [%COMPILER%]==[cl] cmake -G "Visual Studio 12 Win64" .. + - if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl .. + - if [%WITH_FORTRAN%]==[yes] cmake -G "NMake Makefiles" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 .. + - if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON .. build_script: - cmake --build . From c567e34e6be533d5622c59c8722e6de5c2f8a8c5 Mon Sep 17 00:00:00 2001 From: xoviat Date: Thu, 30 Nov 2017 15:33:32 -0600 Subject: [PATCH 083/122] [appveyor] fix syntax --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 10fc3b8b9..15661ee63 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -24,7 +24,7 @@ skip_commits: message: /\[av skip\]/ cache: - - %APPVEYOR_BUILD_FOLDER%\build + - '%APPVEYOR_BUILD_FOLDER%\build' environment: global: From 7fce11a5b83f92c89a13da0a0e7cf968c2b37900 Mon Sep 17 00:00:00 2001 From: xoviat Date: Thu, 30 Nov 2017 16:31:09 -0600 Subject: [PATCH 084/122] [appveyor] fix test directory --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 15661ee63..ceb6d7f59 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -59,5 +59,5 @@ build_script: test_script: - echo Running Test - - cd c:\projects\OpenBLAS\utest + - cd utest - openblas_utest From e0ddd7d12411320c16026e741863ccdea8df3b52 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Fri, 1 Dec 2017 01:39:46 -0600 Subject: [PATCH 085/122] Allow overriding NUM_THREADS --- cmake/system.cmake | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cmake/system.cmake b/cmake/system.cmake index 236a7e888..064e7e4f2 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -94,16 +94,17 @@ if (NOT CMAKE_CROSSCOMPILING) ProcessorCount(NUM_CORES) endif() +endif() + +if (NOT DEFINED NUM_THREADS) if (NOT NUM_CORES EQUAL 0) # HT? set(NUM_THREADS ${NUM_CORES}) + else () + set(NUM_THREADS 0) endif () endif() -if (NOT DEFINED NUM_THREADS) - set(NUM_THREADS 0) -endif() - if (${NUM_THREADS} LESS 2) set(USE_THREAD 0) elseif(NOT DEFINED USE_THREAD) From 1d2da67841851960f686ab1a137f2770668565b3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 2 Dec 2017 12:59:27 +0100 Subject: [PATCH 086/122] Prefix make jobs with travis_wait (#1378) * Prefix make with travis_wait to prevent it getting killed for producing no output * Extend travis_wait to 30mins for the windows build * Trying 45 mins wait time * Increase travis_wait time to 45 minutes for linux builds as well --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index c6d43cd92..0b280c2fc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,7 +17,7 @@ jobs: - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32" script: - set -e - - make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE + - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE - make -C test $COMMON_FLAGS $BTYPE - make -C ctest $COMMON_FLAGS $BTYPE - make -C utest $COMMON_FLAGS $BTYPE @@ -67,7 +67,7 @@ jobs: - gfortran-mingw-w64-x86-64 before_script: *common-before script: - - make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE + - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE env: - TARGET_BOX=WIN64 - BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran" From 8213385ab88f13a75d91d2e2d69204259627aceb Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 2 Dec 2017 22:51:58 +0100 Subject: [PATCH 087/122] Work around compiler warnings for unused variables in the generic zgemm3m_Xcopy kernels --- kernel/generic/zgemm3m_ncopy_2.c | 9 +++++++++ kernel/generic/zgemm3m_ncopy_4.c | 13 +++++++++++++ kernel/generic/zgemm3m_ncopy_8.c | 21 +++++++++++++++++++++ kernel/generic/zgemm3m_tcopy_2.c | 13 +++++++++++++ kernel/generic/zgemm3m_tcopy_4.c | 13 +++++++++++++ kernel/generic/zgemm3m_tcopy_8.c | 21 +++++++++++++++++++++ 6 files changed, 90 insertions(+) diff --git a/kernel/generic/zgemm3m_ncopy_2.c b/kernel/generic/zgemm3m_ncopy_2.c index dd5a732f2..1dee78098 100644 --- a/kernel/generic/zgemm3m_ncopy_2.c +++ b/kernel/generic/zgemm3m_ncopy_2.c @@ -67,6 +67,15 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b_offset; FLOAT a1, a2, a3, a4; +/* silence compiler warnings about unused-but-set variables: + depending on compile-time arguments either the odd or the + even-numbered variables will not be used */ + + (void)a1; + (void)a2; + (void)a3; + (void)a4; + lda *= 2; a_offset = a; diff --git a/kernel/generic/zgemm3m_ncopy_4.c b/kernel/generic/zgemm3m_ncopy_4.c index b4d23e236..a975e67ee 100644 --- a/kernel/generic/zgemm3m_ncopy_4.c +++ b/kernel/generic/zgemm3m_ncopy_4.c @@ -67,6 +67,19 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b_offset; FLOAT a1, a2, a3, a4, a5, a6, a7, a8; +/* silence compiler warnings about unused-but-set variables: + depending on compile-time arguments either the odd or the + even-numbered variables will not be used */ + + (void)a1; + (void)a2; + (void)a3; + (void)a4; + (void)a5; + (void)a6; + (void)a7; + (void)a8; + lda *= 2; a_offset = a; diff --git a/kernel/generic/zgemm3m_ncopy_8.c b/kernel/generic/zgemm3m_ncopy_8.c index d3e5da8fa..5067656c3 100644 --- a/kernel/generic/zgemm3m_ncopy_8.c +++ b/kernel/generic/zgemm3m_ncopy_8.c @@ -69,6 +69,27 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT a1, a2, a3, a4, a5, a6, a7, a8; FLOAT a9, a10, a11, a12, a13, a14, a15, a16; +/* silence compiler warnings about unused-but-set variables: + depending on compile-time arguments either the odd or the + even-numbered variables will not be used */ + + (void)a1; + (void)a2; + (void)a3; + (void)a4; + (void)a5; + (void)a6; + (void)a7; + (void)a8; + (void)a9; + (void)a10; + (void)a11; + (void)a12; + (void)a13; + (void)a14; + (void)a15; + (void)a16; + #if 0 #ifdef REAL_ONLY fprintf(stderr, "NON Real "); diff --git a/kernel/generic/zgemm3m_tcopy_2.c b/kernel/generic/zgemm3m_tcopy_2.c index b8a2626ef..0ef3bf9c6 100644 --- a/kernel/generic/zgemm3m_tcopy_2.c +++ b/kernel/generic/zgemm3m_tcopy_2.c @@ -67,6 +67,19 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b_offset, *b_offset1, *b_offset2; FLOAT a1, a2, a3, a4, a5, a6, a7, a8; +/* silence compiler warnings about unused-but-set variables: + depending on compile-time arguments either the odd or the + even-numbered variables will not be used */ + + (void)a1; + (void)a2; + (void)a3; + (void)a4; + (void)a5; + (void)a6; + (void)a7; + (void)a8; + a_offset = a; b_offset = b; diff --git a/kernel/generic/zgemm3m_tcopy_4.c b/kernel/generic/zgemm3m_tcopy_4.c index 2c071ff91..86ec255ae 100644 --- a/kernel/generic/zgemm3m_tcopy_4.c +++ b/kernel/generic/zgemm3m_tcopy_4.c @@ -67,6 +67,19 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b_offset, *b_offset1, *b_offset2, *b_offset3; FLOAT a1, a2, a3, a4, a5, a6, a7, a8; +/* silence compiler warnings about unused-but-set variables: + depending on compile-time arguments either the odd or the + even-numbered variables will not be used */ + + (void)a1; + (void)a2; + (void)a3; + (void)a4; + (void)a5; + (void)a6; + (void)a7; + (void)a8; + a_offset = a; b_offset = b; diff --git a/kernel/generic/zgemm3m_tcopy_8.c b/kernel/generic/zgemm3m_tcopy_8.c index 5f7160253..3a9eb8c1d 100644 --- a/kernel/generic/zgemm3m_tcopy_8.c +++ b/kernel/generic/zgemm3m_tcopy_8.c @@ -69,6 +69,27 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT a1, a2, a3, a4, a5, a6, a7, a8; FLOAT a9, a10, a11, a12, a13, a14, a15, a16; +/* silence compiler warnings about unused-but-set variables: + depending on compile-time arguments either the odd or the + even-numbered variables will not be used */ + + (void)a1; + (void)a2; + (void)a3; + (void)a4; + (void)a5; + (void)a6; + (void)a7; + (void)a8; + (void)a9; + (void)a10; + (void)a11; + (void)a12; + (void)a13; + (void)a14; + (void)a15; + (void)a16; + #if 0 #ifdef REAL_ONLY fprintf(stderr, "TNS Real "); From 8c8313983b6999d4afd44f2896c02daddd8ad197 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 3 Dec 2017 18:19:30 +0100 Subject: [PATCH 088/122] Fix compiler warnings in ctest Various fixes for const correctness, stray tab characters and unused labels --- ctest/c_cblas1.c | 6 +++--- ctest/c_cblas2.c | 6 +++--- ctest/c_cblat2.f | 6 +++--- ctest/c_cblat3.f | 52 ++++++++++++++++++++++---------------------- ctest/c_dblas1.c | 12 +++++------ ctest/c_dblat1.f | 4 ++-- ctest/c_dblat2.f | 56 ++++++++++++++++++++++++------------------------ ctest/c_dblat3.f | 50 +++++++++++++++++++++--------------------- ctest/c_sblas1.c | 14 ++++++------ ctest/c_sblat1.f | 4 ++-- ctest/c_sblat2.f | 56 ++++++++++++++++++++++++------------------------ ctest/c_sblat3.f | 28 ++++++++++++------------ ctest/c_xerbla.c | 2 +- ctest/c_zblas1.c | 10 ++++----- ctest/c_zblas2.c | 6 +++--- ctest/c_zblat2.f | 4 ++-- ctest/c_zblat3.f | 52 ++++++++++++++++++++++---------------------- 17 files changed, 184 insertions(+), 184 deletions(-) diff --git a/ctest/c_cblas1.c b/ctest/c_cblas1.c index d723fd682..83925ad5e 100644 --- a/ctest/c_cblas1.c +++ b/ctest/c_cblas1.c @@ -9,7 +9,7 @@ #include "common.h" #include "cblas_test.h" -void F77_caxpy(const int *N, const void *alpha, void *X, +void F77_caxpy(const int *N, OPENBLAS_CONST void *alpha, void *X, const int *incX, void *Y, const int *incY) { cblas_caxpy(*N, alpha, X, *incX, Y, *incY); @@ -58,13 +58,13 @@ void F77_cswap( const int *N, void *X, const int *incX, return; } -int F77_icamax(const int *N, const void *X, const int *incX) +int F77_icamax(const int *N, OPENBLAS_CONST void *X, const int *incX) { if (*N < 1 || *incX < 1) return(0); return (cblas_icamax(*N, X, *incX)+1); } -float F77_scnrm2(const int *N, const void *X, const int *incX) +float F77_scnrm2(const int *N, OPENBLAS_CONST void *X, const int *incX) { return cblas_scnrm2(*N, X, *incX); } diff --git a/ctest/c_cblas2.c b/ctest/c_cblas2.c index 8fbe3b089..057096f32 100644 --- a/ctest/c_cblas2.c +++ b/ctest/c_cblas2.c @@ -9,9 +9,9 @@ #include "cblas_test.h" void F77_cgemv(int *order, char *transp, int *m, int *n, - const void *alpha, - CBLAS_TEST_COMPLEX *a, int *lda, const void *x, int *incx, - const void *beta, void *y, int *incy) { + OPENBLAS_CONST void *alpha, + CBLAS_TEST_COMPLEX *a, int *lda, OPENBLAS_CONST void *x, int *incx, + OPENBLAS_CONST void *beta, void *y, int *incy) { CBLAS_TEST_COMPLEX *A; int i,j,LDA; diff --git a/ctest/c_cblat2.f b/ctest/c_cblat2.f index d934ebb49..9252339d4 100644 --- a/ctest/c_cblat2.f +++ b/ctest/c_cblat2.f @@ -349,13 +349,13 @@ CALL CCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, $ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z, - $ 0 ) + $ 0 ) END IF IF (RORDER) THEN CALL CCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, $ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z, - $ 1 ) + $ 1 ) END IF GO TO 200 * Test CGERC, 12, CGERU, 13. @@ -2660,7 +2660,7 @@ 50 CONTINUE END IF * - 60 CONTINUE +C 60 CONTINUE LCERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/ctest/c_cblat3.f b/ctest/c_cblat3.f index 94144b875..96f190352 100644 --- a/ctest/c_cblat3.f +++ b/ctest/c_cblat3.f @@ -329,13 +329,13 @@ CALL CCHK3(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB, $ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C, - $ 0 ) + $ 0 ) END IF IF (RORDER) THEN CALL CCHK3(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB, $ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C, - $ 1 ) + $ 1 ) END IF GO TO 190 * Test CHERK, 06, CSYRK, 07. @@ -357,13 +357,13 @@ CALL CCHK5(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W, - $ 0 ) + $ 0 ) END IF IF (RORDER) THEN CALL CCHK5(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W, - $ 1 ) + $ 1 ) END IF GO TO 190 * @@ -707,9 +707,9 @@ 9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) - 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',', - $ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, - $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' ) +C 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',', +C $ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, +C $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' ) 9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -1033,9 +1033,9 @@ 9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) - 9995 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), - $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1, - $ ',', F4.1, '), C,', I3, ') .' ) +C 9995 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), +C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1, +C $ ',', F4.1, '), C,', I3, ') .' ) 9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -1385,9 +1385,9 @@ 9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT(' ******* ', A12,' FAILED ON CALL NUMBER:' ) - 9995 FORMAT(1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ), - $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ') ', - $ ' .' ) +C 9995 FORMAT(1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ), +C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ') ', +C $ ' .' ) 9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -1768,12 +1768,12 @@ $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) - 9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), - $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') ', - $ ' .' ) - 9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), - $ '(', F4.1, ',', F4.1, ') , A,', I3, ',(', F4.1, ',', F4.1, - $ '), C,', I3, ') .' ) +C 9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), +C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') ', +C $ ' .' ) +C 9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), +C $ '(', F4.1, ',', F4.1, ') , A,', I3, ',(', F4.1, ',', F4.1, +C $ '), C,', I3, ') .' ) 9992 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -2221,12 +2221,12 @@ $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) - 9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), - $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',', F4.1, - $ ', C,', I3, ') .' ) - 9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), - $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1, - $ ',', F4.1, '), C,', I3, ') .' ) +C 9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), +C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',', F4.1, +C $ ', C,', I3, ') .' ) +C 9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), +C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1, +C $ ',', F4.1, '), C,', I3, ') .' ) 9992 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -2702,7 +2702,7 @@ 50 CONTINUE END IF * - 60 CONTINUE +C 60 CONTINUE LCERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/ctest/c_dblas1.c b/ctest/c_dblas1.c index 764a75cdf..a288154c2 100644 --- a/ctest/c_dblas1.c +++ b/ctest/c_dblas1.c @@ -14,7 +14,7 @@ double F77_dasum(const int *N, double *X, const int *incX) return cblas_dasum(*N, X, *incX); } -void F77_daxpy(const int *N, const double *alpha, const double *X, +void F77_daxpy(const int *N, const double *alpha, OPENBLAS_CONST double *X, const int *incX, double *Y, const int *incY) { cblas_daxpy(*N, *alpha, X, *incX, Y, *incY); @@ -28,13 +28,13 @@ void F77_dcopy(const int *N, double *X, const int *incX, return; } -double F77_ddot(const int *N, const double *X, const int *incX, - const double *Y, const int *incY) +double F77_ddot(const int *N, OPENBLAS_CONST double *X, const int *incX, + OPENBLAS_CONST double *Y, const int *incY) { return cblas_ddot(*N, X, *incX, Y, *incY); } -double F77_dnrm2(const int *N, const double *X, const int *incX) +double F77_dnrm2(const int *N, OPENBLAS_CONST double *X, const int *incX) { return cblas_dnrm2(*N, X, *incX); } @@ -72,12 +72,12 @@ double F77_dzasum(const int *N, void *X, const int *incX) return cblas_dzasum(*N, X, *incX); } -double F77_dznrm2(const int *N, const void *X, const int *incX) +double F77_dznrm2(const int *N, OPENBLAS_CONST void *X, const int *incX) { return cblas_dznrm2(*N, X, *incX); } -int F77_idamax(const int *N, const double *X, const int *incX) +int F77_idamax(const int *N, OPENBLAS_CONST double *X, const int *incX) { if (*N < 1 || *incX < 1) return(0); return (cblas_idamax(*N, X, *incX)+1); diff --git a/ctest/c_dblat1.f b/ctest/c_dblat1.f index 0aeba45b1..c570a9140 100644 --- a/ctest/c_dblat1.f +++ b/ctest/c_dblat1.f @@ -211,11 +211,11 @@ IF (ICASE.EQ.7) THEN * .. DNRM2TEST .. STEMP(1) = DTRUE1(NP1) - CALL STEST1(DNRM2TEST(N,SX,INCX),STEMP,STEMP,SFAC) + CALL STEST1(DNRM2TEST(N,SX,INCX),STEMP(1),STEMP,SFAC) ELSE IF (ICASE.EQ.8) THEN * .. DASUMTEST .. STEMP(1) = DTRUE3(NP1) - CALL STEST1(DASUMTEST(N,SX,INCX),STEMP,STEMP,SFAC) + CALL STEST1(DASUMTEST(N,SX,INCX),STEMP(1),STEMP,SFAC) ELSE IF (ICASE.EQ.9) THEN * .. DSCALTEST .. CALL DSCALTEST(N,SA((INCX-1)*5+NP1),SX,INCX) diff --git a/ctest/c_dblat2.f b/ctest/c_dblat2.f index 27ceda622..0c7801d77 100644 --- a/ctest/c_dblat2.f +++ b/ctest/c_dblat2.f @@ -345,13 +345,13 @@ CALL DCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, $ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z, - $ 0 ) + $ 0 ) END IF IF (RORDER) THEN CALL DCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, $ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z, - $ 1 ) + $ 1 ) END IF GO TO 200 * Test DGER, 12. @@ -797,9 +797,9 @@ $ ' (', I6, ' CALL', 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) - 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', - $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, - $ ' - SUSPECT *******' ) +C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', +C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, +C $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', 4( I3, ',' ), F4.1, $ ', A,', I3, ',',/ 10x,'X,', I2, ',', F4.1, ', Y,', @@ -1004,7 +1004,7 @@ $ REWIND NTRA CALL CDSBMV( IORDER, UPLO, N, K, ALPHA, $ AA, LDA, XX, INCX, BETA, YY, - $ INCY ) + $ INCY ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, @@ -1156,9 +1156,9 @@ $ ' (', I6, ' CALL', 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) - 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', - $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, - $ ' - SUSPECT *******' ) +C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', +C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, +C $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', AP', $ ', X,', I2, ',', F4.1, ', Y,', I2, ') .' ) @@ -1191,7 +1191,7 @@ * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NIDIM, NINC, NKB, NMAX, NOUT, NTRA, - $ IORDER + $ IORDER LOGICAL FATAL, REWI, TRACE CHARACTER*12 SNAME * .. Array Arguments .. @@ -1216,7 +1216,7 @@ EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMVCH, CDTBMV, CDTBSV, CDTPMV, - $ CDTPSV, CDTRMV, CDTRSV + $ CDTPSV, CDTRMV, CDTRSV * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. @@ -1544,9 +1544,9 @@ $ ' (', I6, ' CALL', 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) - 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', - $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, - $ ' - SUSPECT *******' ) +C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', +C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, +C $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ',A12, '(', 3( A14,',' ),/ 10x, I3, ', AP, ', $ 'X,', I2, ') .' ) @@ -1579,7 +1579,7 @@ * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA, - $ IORDER + $ IORDER LOGICAL FATAL, REWI, TRACE CHARACTER*12 SNAME * .. Array Arguments .. @@ -1819,9 +1819,9 @@ $ ' (', I6, ' CALL', 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) - 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', - $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, - $ ' - SUSPECT *******' ) +C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', +C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, +C $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ',A12, '(', 2( I3, ',' ), F4.1, ', X,', I2, @@ -1851,7 +1851,7 @@ * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA, - $ IORDER + $ IORDER LOGICAL FATAL, REWI, TRACE CHARACTER*12 SNAME * .. Array Arguments .. @@ -1973,7 +1973,7 @@ IF( REWI ) $ REWIND NTRA CALL CDSYR( IORDER, UPLO, N, ALPHA, XX, INCX, - $ AA, LDA ) + $ AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, CUPLO, N, @@ -2113,9 +2113,9 @@ $ ' (', I6, ' CALL', 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) - 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', - $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, - $ ' - SUSPECT *******' ) +C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', +C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, +C $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', X,', @@ -2147,7 +2147,7 @@ * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA, - $ IORDER + $ IORDER LOGICAL FATAL, REWI, TRACE CHARACTER*12 SNAME * .. Array Arguments .. @@ -2445,9 +2445,9 @@ $ ' (', I6, ' CALL', 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) - 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', - $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, - $ ' - SUSPECT *******' ) +C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', +C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, +C $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', X,', @@ -2833,7 +2833,7 @@ 50 CONTINUE END IF * - 60 CONTINUE +C 60 CONTINUE LDERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/ctest/c_dblat3.f b/ctest/c_dblat3.f index 72ad80c92..252fe3b71 100644 --- a/ctest/c_dblat3.f +++ b/ctest/c_dblat3.f @@ -56,7 +56,7 @@ * .. Local Scalars .. DOUBLE PRECISION EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NTRA, - $ LAYOUT + $ LAYOUT LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR, CORDER, RORDER CHARACTER*1 TRANSA, TRANSB @@ -78,7 +78,7 @@ EXTERNAL DDIFF, LDE * .. External Subroutines .. EXTERNAL DCHK1, DCHK2, DCHK3, DCHK4, DCHK5, CD3CHKE, - $ DMMCH + $ DMMCH * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Scalars in Common .. @@ -323,13 +323,13 @@ CALL DCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB, $ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C, - $ 0 ) + $ 0 ) END IF IF (RORDER) THEN CALL DCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB, $ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C, - $ 1 ) + $ 1 ) END IF GO TO 190 * Test DSYRK, 05. @@ -351,13 +351,13 @@ CALL DCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W, - $ 0 ) + $ 0 ) END IF IF (RORDER) THEN CALL DCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W, - $ 1 ) + $ 1 ) END IF GO TO 190 * @@ -588,7 +588,7 @@ $ REWIND NTRA CALL CDGEMM( IORDER, TRANSA, TRANSB, M, N, $ K, ALPHA, AA, LDA, BB, LDB, - $ BETA, CC, LDC ) + $ BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * @@ -694,9 +694,9 @@ 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) - 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',', - $ 3( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ', - $ 'C,', I3, ').' ) +C 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',', +C $ 3( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ', +C $ 'C,', I3, ').' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -1007,9 +1007,9 @@ 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) - 9995 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), - $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ', - $ ' .' ) +C 9995 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), +C $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ', +C $ ' .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -1201,7 +1201,7 @@ $ REWIND NTRA CALL CDTRMM( IORDER, SIDE, UPLO, TRANSA, $ DIAG, M, N, ALPHA, AA, LDA, - $ BB, LDB ) + $ BB, LDB ) ELSE IF( SNAME( 10: 11 ).EQ.'sm' )THEN IF( TRACE ) $ CALL DPRCN3( NTRA, NC, SNAME, IORDER, @@ -1211,7 +1211,7 @@ $ REWIND NTRA CALL CDTRSM( IORDER, SIDE, UPLO, TRANSA, $ DIAG, M, N, ALPHA, AA, LDA, - $ BB, LDB ) + $ BB, LDB ) END IF * * Check if error-exit was taken incorrectly. @@ -1355,8 +1355,8 @@ 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) - 9995 FORMAT( 1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ), - $ F4.1, ', A,', I3, ', B,', I3, ') .' ) +C 9995 FORMAT( 1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ), +C $ F4.1, ', A,', I3, ', B,', I3, ') .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -1681,8 +1681,8 @@ $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) - 9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), - $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' ) +C 9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), +C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -1726,7 +1726,7 @@ SUBROUTINE DCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ AB, AA, AS, BB, BS, C, CC, CS, CT, G, W, - $ IORDER ) + $ IORDER ) * * Tests DSYR2K. * @@ -1888,7 +1888,7 @@ $ REWIND NTRA CALL CDSYR2K( IORDER, UPLO, TRANS, N, K, $ ALPHA, AA, LDA, BB, LDB, BETA, - $ CC, LDC ) + $ CC, LDC ) * * Check if error-exit was taken incorrectly. * @@ -2037,9 +2037,9 @@ $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) - 9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), - $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ', - $ ' .' ) +C 9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), +C $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ', +C $ ' .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -2399,7 +2399,7 @@ 50 CONTINUE END IF * - 60 CONTINUE +C 60 CONTINUE LDERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/ctest/c_sblas1.c b/ctest/c_sblas1.c index f1b18b6c8..1f301a693 100644 --- a/ctest/c_sblas1.c +++ b/ctest/c_sblas1.c @@ -14,7 +14,7 @@ float F77_sasum(blasint *N, float *X, blasint *incX) return cblas_sasum(*N, X, *incX); } -void F77_saxpy(blasint *N, const float *alpha, const float *X, +void F77_saxpy(blasint *N, const float *alpha, OPENBLAS_CONST float *X, blasint *incX, float *Y, blasint *incY) { cblas_saxpy(*N, *alpha, X, *incX, Y, *incY); @@ -26,25 +26,25 @@ float F77_scasum(blasint *N, float *X, blasint *incX) return cblas_scasum(*N, X, *incX); } -float F77_scnrm2(blasint *N, const float *X, blasint *incX) +float F77_scnrm2(blasint *N, OPENBLAS_CONST float *X, blasint *incX) { return cblas_scnrm2(*N, X, *incX); } -void F77_scopy(blasint *N, const float *X, blasint *incX, +void F77_scopy(blasint *N, OPENBLAS_CONST float *X, blasint *incX, float *Y, blasint *incY) { cblas_scopy(*N, X, *incX, Y, *incY); return; } -float F77_sdot(blasint *N, const float *X, blasint *incX, - const float *Y, blasint *incY) +float F77_sdot(blasint *N, OPENBLAS_CONST float *X, blasint *incX, + OPENBLAS_CONST float *Y, blasint *incY) { return cblas_sdot(*N, X, *incX, Y, *incY); } -float F77_snrm2(blasint *N, const float *X, blasint *incX) +float F77_snrm2(blasint *N, OPENBLAS_CONST float *X, blasint *incX) { return cblas_snrm2(*N, X, *incX); } @@ -76,7 +76,7 @@ void F77_sswap( blasint *N, float *X, blasint *incX, return; } -int F77_isamax(blasint *N, const float *X, blasint *incX) +int F77_isamax(blasint *N, OPENBLAS_CONST float *X, blasint *incX) { if (*N < 1 || *incX < 1) return(0); return (cblas_isamax(*N, X, *incX)+1); diff --git a/ctest/c_sblat1.f b/ctest/c_sblat1.f index de2b0380b..773787d6f 100644 --- a/ctest/c_sblat1.f +++ b/ctest/c_sblat1.f @@ -211,11 +211,11 @@ IF (ICASE.EQ.7) THEN * .. SNRM2TEST .. STEMP(1) = DTRUE1(NP1) - CALL STEST1(SNRM2TEST(N,SX,INCX),STEMP,STEMP,SFAC) + CALL STEST1(SNRM2TEST(N,SX,INCX),STEMP(1),STEMP,SFAC) ELSE IF (ICASE.EQ.8) THEN * .. SASUMTEST .. STEMP(1) = DTRUE3(NP1) - CALL STEST1(SASUMTEST(N,SX,INCX),STEMP,STEMP,SFAC) + CALL STEST1(SASUMTEST(N,SX,INCX),STEMP(1),STEMP,SFAC) ELSE IF (ICASE.EQ.9) THEN * .. SSCALTEST .. CALL SSCALTEST(N,SA((INCX-1)*5+NP1),SX,INCX) diff --git a/ctest/c_sblat2.f b/ctest/c_sblat2.f index 8bd23c3e9..6386abe04 100644 --- a/ctest/c_sblat2.f +++ b/ctest/c_sblat2.f @@ -345,13 +345,13 @@ CALL SCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, $ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z, - $ 0 ) + $ 0 ) END IF IF (RORDER) THEN CALL SCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, $ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z, - $ 1 ) + $ 1 ) END IF GO TO 200 * Test SGER, 12. @@ -797,9 +797,9 @@ $ ' (', I6, ' CALL', 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) - 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', - $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, - $ ' - SUSPECT *******' ) +C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', +C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, +C $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', 4( I3, ',' ), F4.1, $ ', A,', I3, ',',/ 10x, 'X,', I2, ',', F4.1, ', Y,', @@ -1004,7 +1004,7 @@ $ REWIND NTRA CALL CSSBMV( IORDER, UPLO, N, K, ALPHA, $ AA, LDA, XX, INCX, BETA, YY, - $ INCY ) + $ INCY ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, @@ -1156,9 +1156,9 @@ $ ' (', I6, ' CALL', 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) - 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', - $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, - $ ' - SUSPECT *******' ) +C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', +C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, +C $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', AP', $ ', X,', I2, ',', F4.1, ', Y,', I2, ') .' ) @@ -1191,7 +1191,7 @@ * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NIDIM, NINC, NKB, NMAX, NOUT, NTRA, - $ IORDER + $ IORDER LOGICAL FATAL, REWI, TRACE CHARACTER*12 SNAME * .. Array Arguments .. @@ -1216,7 +1216,7 @@ EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMVCH, CSTBMV, CSTBSV, CSTPMV, - $ CSTPSV, CSTRMV, CSTRSV + $ CSTPSV, CSTRMV, CSTRSV * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. @@ -1544,9 +1544,9 @@ $ ' (', I6, ' CALL', 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) - 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', - $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, - $ ' - SUSPECT *******' ) +C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', +C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, +C $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ',A12, '(', 3( A14,',' ),/ 10x, I3, ', AP, ', $ 'X,', I2, ') .' ) @@ -1579,7 +1579,7 @@ * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA, - $ IORDER + $ IORDER LOGICAL FATAL, REWI, TRACE CHARACTER*12 SNAME * .. Array Arguments .. @@ -1819,9 +1819,9 @@ $ ' (', I6, ' CALL', 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) - 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', - $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, - $ ' - SUSPECT *******' ) +C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', +C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, +C $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ',A12, '(', 2( I3, ',' ), F4.1, ', X,', I2, @@ -1851,7 +1851,7 @@ * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA, - $ IORDER + $ IORDER LOGICAL FATAL, REWI, TRACE CHARACTER*12 SNAME * .. Array Arguments .. @@ -1973,7 +1973,7 @@ IF( REWI ) $ REWIND NTRA CALL CSSYR( IORDER, UPLO, N, ALPHA, XX, INCX, - $ AA, LDA ) + $ AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, CUPLO, N, @@ -2113,9 +2113,9 @@ $ ' (', I6, ' CALL', 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) - 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', - $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, - $ ' - SUSPECT *******' ) +C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', +C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, +C $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', X,', @@ -2147,7 +2147,7 @@ * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA, - $ IORDER + $ IORDER LOGICAL FATAL, REWI, TRACE CHARACTER*12 SNAME * .. Array Arguments .. @@ -2445,9 +2445,9 @@ $ ' (', I6, ' CALL', 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) - 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', - $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, - $ ' - SUSPECT *******' ) +C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', +C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, +C $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', X,', @@ -2833,7 +2833,7 @@ 50 CONTINUE END IF * - 60 CONTINUE +C 60 CONTINUE LSERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/ctest/c_sblat3.f b/ctest/c_sblat3.f index 31babd9a1..4cfc1c706 100644 --- a/ctest/c_sblat3.f +++ b/ctest/c_sblat3.f @@ -694,9 +694,9 @@ 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) - 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',', - $ 3( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ', - $ 'C,', I3, ').' ) +C 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',', +C $ 3( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ', +C $ 'C,', I3, ').' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -1011,9 +1011,9 @@ 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) - 9995 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), - $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ', - $ ' .' ) +C 9995 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), +C $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ', +C $ ' .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -1359,8 +1359,8 @@ 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) - 9995 FORMAT( 1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ), - $ F4.1, ', A,', I3, ', B,', I3, ') .' ) +C 9995 FORMAT( 1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ), +C $ F4.1, ', A,', I3, ', B,', I3, ') .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -1686,8 +1686,8 @@ $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) - 9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), - $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' ) +C 9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), +C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -2041,9 +2041,9 @@ $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) - 9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), - $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ', - $ ' .' ) +C 9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), +C $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ', +C $ ' .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -2403,7 +2403,7 @@ 50 CONTINUE END IF * - 60 CONTINUE +C 60 CONTINUE LSERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/ctest/c_xerbla.c b/ctest/c_xerbla.c index dd23a4990..9c5357653 100644 --- a/ctest/c_xerbla.c +++ b/ctest/c_xerbla.c @@ -131,7 +131,7 @@ void F77_xerbla(char *srname, void *vinfo) int BLASFUNC(xerbla)(char *name, blasint *info, blasint length) { F77_xerbla(name, info); - + return 0; }; diff --git a/ctest/c_zblas1.c b/ctest/c_zblas1.c index 160ef4ba8..14c90d049 100644 --- a/ctest/c_zblas1.c +++ b/ctest/c_zblas1.c @@ -9,7 +9,7 @@ #include "common.h" #include "cblas_test.h" -void F77_zaxpy(const int *N, const void *alpha, void *X, +void F77_zaxpy(const int *N, OPENBLAS_CONST void *alpha, void *X, const int *incX, void *Y, const int *incY) { cblas_zaxpy(*N, alpha, X, *incX, Y, *incY); @@ -23,8 +23,8 @@ void F77_zcopy(const int *N, void *X, const int *incX, return; } -void F77_zdotc(const int *N, const void *X, const int *incX, - const void *Y, const int *incY,void *dotc) +void F77_zdotc(const int *N, OPENBLAS_CONST void *X, const int *incX, + OPENBLAS_CONST void *Y, const int *incY,void *dotc) { cblas_zdotc_sub(*N, X, *incX, Y, *incY, dotc); return; @@ -58,13 +58,13 @@ void F77_zswap( const int *N, void *X, const int *incX, return; } -int F77_izamax(const int *N, const void *X, const int *incX) +int F77_izamax(const int *N, OPENBLAS_CONST void *X, const int *incX) { if (*N < 1 || *incX < 1) return(0); return(cblas_izamax(*N, X, *incX)+1); } -double F77_dznrm2(const int *N, const void *X, const int *incX) +double F77_dznrm2(const int *N, OPENBLAS_CONST void *X, const int *incX) { return cblas_dznrm2(*N, X, *incX); } diff --git a/ctest/c_zblas2.c b/ctest/c_zblas2.c index ab1bd79bd..8854dcc6d 100644 --- a/ctest/c_zblas2.c +++ b/ctest/c_zblas2.c @@ -9,9 +9,9 @@ #include "cblas_test.h" void F77_zgemv(int *order, char *transp, int *m, int *n, - const void *alpha, - CBLAS_TEST_ZOMPLEX *a, int *lda, const void *x, int *incx, - const void *beta, void *y, int *incy) { + OPENBLAS_CONST void *alpha, + CBLAS_TEST_ZOMPLEX *a, int *lda, OPENBLAS_CONST void *x, int *incx, + OPENBLAS_CONST void *beta, void *y, int *incy) { CBLAS_TEST_ZOMPLEX *A; int i,j,LDA; diff --git a/ctest/c_zblat2.f b/ctest/c_zblat2.f index 439260230..cc5c1bad1 100644 --- a/ctest/c_zblat2.f +++ b/ctest/c_zblat2.f @@ -349,13 +349,13 @@ CALL ZCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, $ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z, - $ 0 ) + $ 0 ) END IF IF (RORDER) THEN CALL ZCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, $ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z, - $ 1 ) + $ 1 ) END IF GO TO 200 * Test ZGERC, 12, ZGERU, 13. diff --git a/ctest/c_zblat3.f b/ctest/c_zblat3.f index 21e743d17..5df834b2e 100644 --- a/ctest/c_zblat3.f +++ b/ctest/c_zblat3.f @@ -330,13 +330,13 @@ CALL ZCHK3(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB, $ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C, - $ 0 ) + $ 0 ) END IF IF (RORDER) THEN CALL ZCHK3(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB, $ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C, - $ 1 ) + $ 1 ) END IF GO TO 190 * Test ZHERK, 06, ZSYRK, 07. @@ -358,13 +358,13 @@ CALL ZCHK5(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W, - $ 0 ) + $ 0 ) END IF IF (RORDER) THEN CALL ZCHK5(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W, - $ 1 ) + $ 1 ) END IF GO TO 190 * @@ -708,9 +708,9 @@ 9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) - 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',', - $ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, - $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' ) +C 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',', +C $ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, +C $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' ) 9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -1034,9 +1034,9 @@ 9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) - 9995 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), - $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1, - $ ',', F4.1, '), C,', I3, ') .' ) +C 9995 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), +C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1, +C $ ',', F4.1, '), C,', I3, ') .' ) 9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -1386,9 +1386,9 @@ 9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT(' ******* ', A12,' FAILED ON CALL NUMBER:' ) - 9995 FORMAT(1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ), - $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ') ', - $ ' .' ) +C 9995 FORMAT(1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ), +C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ') ', +C $ ' .' ) 9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -1769,12 +1769,12 @@ $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) - 9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), - $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') ', - $ ' .' ) - 9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), - $ '(', F4.1, ',', F4.1, ') , A,', I3, ',(', F4.1, ',', F4.1, - $ '), C,', I3, ') .' ) +C 9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), +C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') ', +C $ ' .' ) +C 9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), +C $ '(', F4.1, ',', F4.1, ') , A,', I3, ',(', F4.1, ',', F4.1, +C $ '), C,', I3, ') .' ) 9992 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -2222,12 +2222,12 @@ $ 'ANGED INCORRECTLY *******' ) 9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) - 9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), - $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',', F4.1, - $ ', C,', I3, ') .' ) - 9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), - $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1, - $ ',', F4.1, '), C,', I3, ') .' ) +C 9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), +C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',', F4.1, +C $ ', C,', I3, ') .' ) +C 9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ), +C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1, +C $ ',', F4.1, '), C,', I3, ') .' ) 9992 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * @@ -2706,7 +2706,7 @@ 50 CONTINUE END IF * - 60 CONTINUE +C 60 CONTINUE LZERES = .TRUE. GO TO 80 70 CONTINUE From 38763ec4f3939769cd73c71a470de4f689f348a1 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 3 Dec 2017 22:40:54 +0100 Subject: [PATCH 089/122] Disable multithreading for trmv as a (hopefully temporary) workaround for #1332 --- interface/trmv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/interface/trmv.c b/interface/trmv.c index 2e52527a3..7d17dc67b 100644 --- a/interface/trmv.c +++ b/interface/trmv.c @@ -220,6 +220,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, #ifdef SMP nthreads = num_cpu_avail(2); +/*FIXME trmv_thread was found to be broken, see issue 1332 */ + nthreads = 1; + if (nthreads == 1) { #endif From b414283f48f50fe195601fa83650d0531eebe9b3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 3 Dec 2017 22:41:54 +0100 Subject: [PATCH 090/122] Disable gemv unrolling as a (hopefully temporary) workaround for #1332 --- driver/level2/trmv_U.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/driver/level2/trmv_U.c b/driver/level2/trmv_U.c index a0aa7ef0e..7f8895e7f 100644 --- a/driver/level2/trmv_U.c +++ b/driver/level2/trmv_U.c @@ -54,12 +54,16 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu COPY_K(m, b, incb, buffer, 1); } - for (is = 0; is < m; is += DTB_ENTRIES){ +/*FIXME the GEMV unrolling performed here was found to be broken, see issue 1332 */ +/* Multiplying DTB size by 100 is just a quick-and-dirty hack to disable it for now[B */ - min_i = MIN(m - is, DTB_ENTRIES); + for (is = 0; is < m; is += DTB_ENTRIES * 100){ + + min_i = MIN(m - is, DTB_ENTRIES * 100); #ifndef TRANSA if (is > 0){ +fprintf(stderr,"WARNING unrolling of the trmv_U loop may give wrong results\n"); GEMV_N(is, min_i, 0, dp1, a + is * lda, lda, B + is, 1, From 28ae3ca76fffe35206afaff947283c1dfff7f5e8 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 5 Dec 2017 12:54:15 +0100 Subject: [PATCH 091/122] Limit MAX_CPU to 1024 for now Some Linux distributions (notably SuSE) have raised CPU_SETSIZE to 4096, apparently disregarding API limitations. From #1348, the highest value to survive array initialization (on a desktop system) is 3232, and 1024 - which is the more usual CPU_SETSIZE limit, was demonstrated to work fine on an actual bignuma system. --- driver/others/init.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/driver/others/init.c b/driver/others/init.c index 5fb032fd5..c1bbdac3f 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -90,9 +90,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(BIGNUMA) // max number of nodes as defined in numa.h -// max cpus as defined in sched.h +// max cpus as defined in most sched.h +// cannot use CPU_SETSIZE directly as some +// Linux distributors set it to 4096 #define MAX_NODES 128 -#define MAX_CPUS CPU_SETSIZE +#define MAX_CPUS 1024 #else #define MAX_NODES 16 #define MAX_CPUS 256 From 9381ac2748ff972faeb745ae26ea1465da15ac03 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 5 Dec 2017 13:02:48 +0100 Subject: [PATCH 092/122] Explicitly link against libm on Android with cmake as well Patch from #1384 --- CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c52b2501..1bdcd52ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -151,6 +151,11 @@ endif() # add objects to the openblas lib add_library(${OpenBLAS_LIBNAME} ${LA_SOURCES} ${LAPACKE_SOURCES} ${RELA_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) +# Android needs to explicitly link against libm +if(ANDROID) + target_link_libraries(${OpenBLAS_LIBNAME} m) +endif() + # Handle MSVC exports if(MSVC AND BUILD_SHARED_LIBS) if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 3.4) From 281a2b952ffec9a132f4d28a1b6aa86f54bd6cf7 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 5 Dec 2017 19:54:10 +0100 Subject: [PATCH 093/122] warning cleanup (#1380) * dead increments in driver/level2 * dead increments in kernel/generic * part dead increments in kernel/x86_64 --- driver/level2/gbmv_thread.c | 2 +- driver/level2/sbmv_thread.c | 2 +- driver/level2/tbmv_thread.c | 2 +- driver/level2/tpmv_thread.c | 6 +----- kernel/generic/gemm_ncopy_16.c | 2 +- kernel/generic/gemm_tcopy_16.c | 2 +- kernel/generic/trmm_lncopy_16.c | 16 ++++++++-------- kernel/generic/trmm_ltcopy_16.c | 18 +++++++++--------- kernel/generic/trmm_uncopy_16.c | 16 ++++++++-------- kernel/generic/trmm_utcopy_16.c | 24 ++++++++++++------------ kernel/x86_64/cgemv_n_4.c | 4 ++-- kernel/x86_64/cgemv_t_4.c | 4 ++-- kernel/x86_64/sgemv_n_4.c | 4 ++-- kernel/x86_64/sgemv_t_4.c | 4 ++-- kernel/x86_64/zgemv_n_4.c | 4 ++-- kernel/x86_64/zgemv_t_4.c | 4 ++-- 16 files changed, 55 insertions(+), 59 deletions(-) diff --git a/driver/level2/gbmv_thread.c b/driver/level2/gbmv_thread.c index 9d374676e..c0cb947dc 100644 --- a/driver/level2/gbmv_thread.c +++ b/driver/level2/gbmv_thread.c @@ -96,7 +96,7 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F COPY_K(args -> m, x, incx, buffer, 1); x = buffer; - buffer += ((COMPSIZE * args -> m + 1023) & ~1023); + // buffer += ((COMPSIZE * args -> m + 1023) & ~1023); } #endif diff --git a/driver/level2/sbmv_thread.c b/driver/level2/sbmv_thread.c index ce841ee0e..e59451c5a 100644 --- a/driver/level2/sbmv_thread.c +++ b/driver/level2/sbmv_thread.c @@ -91,7 +91,7 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F COPY_K(n, x, incx, buffer, 1); x = buffer; - buffer += ((COMPSIZE * n + 1023) & ~1023); + // buffer += ((COMPSIZE * n + 1023) & ~1023); } SCAL_K(n, 0, 0, ZERO, diff --git a/driver/level2/tbmv_thread.c b/driver/level2/tbmv_thread.c index aaf4958e2..35e5ff1cd 100644 --- a/driver/level2/tbmv_thread.c +++ b/driver/level2/tbmv_thread.c @@ -107,7 +107,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F COPY_K(args -> n, x, incx, buffer, 1); x = buffer; - buffer += ((args -> n * COMPSIZE + 1023) & ~1023); + // buffer += ((args -> n * COMPSIZE + 1023) & ~1023); } if (range_n) y += *range_n * COMPSIZE; diff --git a/driver/level2/tpmv_thread.c b/driver/level2/tpmv_thread.c index 79438ba29..b7eb10571 100644 --- a/driver/level2/tpmv_thread.c +++ b/driver/level2/tpmv_thread.c @@ -112,7 +112,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #endif x = buffer; - buffer += ((COMPSIZE * args -> m + 1023) & ~1023); + // buffer += ((COMPSIZE * args -> m + 1023) & ~1023); } #ifndef TRANS @@ -234,11 +234,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F return 0; } -#ifndef COMPLEX -int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthreads){ -#else int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthreads){ -#endif blas_arg_t args; blas_queue_t queue[MAX_CPU_NUMBER]; diff --git a/kernel/generic/gemm_ncopy_16.c b/kernel/generic/gemm_ncopy_16.c index 9bd40f121..5f91d0dbe 100644 --- a/kernel/generic/gemm_ncopy_16.c +++ b/kernel/generic/gemm_ncopy_16.c @@ -429,7 +429,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ ctemp01 = *(aoffset1 + 0); *(boffset + 0) = ctemp01; - boffset += 1; + // boffset += 1; } } diff --git a/kernel/generic/gemm_tcopy_16.c b/kernel/generic/gemm_tcopy_16.c index 6528d9489..56268ebf2 100644 --- a/kernel/generic/gemm_tcopy_16.c +++ b/kernel/generic/gemm_tcopy_16.c @@ -379,7 +379,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ if (m & 1){ ctemp01 = *(aoffset1 + 0); *(boffset + 0) = ctemp01; - boffset += 1; + // boffset += 1; } } diff --git a/kernel/generic/trmm_lncopy_16.c b/kernel/generic/trmm_lncopy_16.c index 0795a8386..4c0a76cbd 100644 --- a/kernel/generic/trmm_lncopy_16.c +++ b/kernel/generic/trmm_lncopy_16.c @@ -556,7 +556,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } } else if (X < posY) { - a01 += i * lda; + /* a01 += i * lda; a02 += i * lda; a03 += i * lda; a04 += i * lda; @@ -571,7 +571,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON a13 += i * lda; a14 += i * lda; a15 += i * lda; - a16 += i * lda; + a16 += i * lda; */ b += 16 * i; } else { #ifdef UNIT @@ -1147,14 +1147,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } } else if (X < posY) { - a01 += i * lda; + /* a01 += i * lda; a02 += i * lda; a03 += i * lda; a04 += i * lda; a05 += i * lda; a06 += i * lda; a07 += i * lda; - a08 += i * lda; + a08 += i * lda; */ b += 8 * i; } else { #ifdef UNIT @@ -1380,10 +1380,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } } else if (X < posY) { - a01 += i * lda; + /* a01 += i * lda; a02 += i * lda; a03 += i * lda; - a04 += i * lda; + a04 += i * lda; */ b += 4 * i; } else { #ifdef UNIT @@ -1488,8 +1488,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b += 2; } else if (X < posY) { - a01 += lda; - a02 += lda; + /* a01 += lda; + a02 += lda; */ b += 2; } else { #ifdef UNIT diff --git a/kernel/generic/trmm_ltcopy_16.c b/kernel/generic/trmm_ltcopy_16.c index b8469d00a..c8fde524e 100644 --- a/kernel/generic/trmm_ltcopy_16.c +++ b/kernel/generic/trmm_ltcopy_16.c @@ -518,7 +518,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON i = (m & 15); if (i > 0) { if (X > posY) { - a01 += i; + /* a01 += i; a02 += i; a03 += i; a04 += i; @@ -533,7 +533,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON a13 += i; a14 += i; a15 += i; - a16 += i; + a16 += i; */ b += 16 * i; } else if (X < posY) { @@ -1128,14 +1128,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON i = (m & 7); if (i > 0) { if (X > posY) { - a01 += i; + /* a01 += i; a02 += i; a03 += i; a04 += i; a05 += i; a06 += i; a07 += i; - a08 += i; + a08 += i; */ b += 8 * i; } else if (X < posY) { @@ -1368,10 +1368,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON i = (m & 3); if (i > 0) { if (X > posY) { - a01 += i; + /* a01 += i; a02 += i; a03 += i; - a04 += i; + a04 += i; */ b += 4 * i; } else if (X < posY) { @@ -1490,8 +1490,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (X < posY) { b[ 0] = *(a01 + 0); b[ 1] = *(a01 + 1); - a01 += lda; - a02 += lda; + /* a01 += lda; + a02 += lda; */ b += 2; } } else { @@ -1540,7 +1540,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON i --; } while (i > 0); } - posY += 1; + // posY += 1; } return 0; diff --git a/kernel/generic/trmm_uncopy_16.c b/kernel/generic/trmm_uncopy_16.c index 19b2fdd68..5fb769bbc 100644 --- a/kernel/generic/trmm_uncopy_16.c +++ b/kernel/generic/trmm_uncopy_16.c @@ -556,7 +556,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } } else if (X > posY) { - a01 += i * lda; + /* a01 += i * lda; a02 += i * lda; a03 += i * lda; a04 += i * lda; @@ -571,7 +571,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON a13 += i * lda; a14 += i * lda; a15 += i * lda; - a16 += i * lda; + a16 += i * lda; */ b += 16 * i; } else { #ifdef UNIT @@ -1147,14 +1147,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } } else if (X > posY) { - a01 += i * lda; + /* a01 += i * lda; a02 += i * lda; a03 += i * lda; a04 += i * lda; a05 += i * lda; a06 += i * lda; a07 += i * lda; - a08 += i * lda; + a08 += i * lda; */ b += 8 * i; } else { #ifdef UNIT @@ -1379,10 +1379,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } } else if (X > posY) { - a01 += i * lda; + /* a01 += i * lda; a02 += i * lda; a03 += i * lda; - a04 += i * lda; + a04 += i * lda; */ b += 4 * i; } else { #ifdef UNIT @@ -1488,8 +1488,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b += 2; } else if (X > posY) { - a01 += lda; - a02 += lda; + /* a01 += lda; + a02 += lda; */ b += 2; } else { #ifdef UNIT diff --git a/kernel/generic/trmm_utcopy_16.c b/kernel/generic/trmm_utcopy_16.c index b83989f55..5d0f7dd2e 100644 --- a/kernel/generic/trmm_utcopy_16.c +++ b/kernel/generic/trmm_utcopy_16.c @@ -518,7 +518,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON i = (m & 15); if (i > 0) { if (X < posY) { - a01 += i; + /* a01 += i; a02 += i; a03 += i; a04 += i; @@ -533,7 +533,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON a13 += i; a14 += i; a15 += i; - a16 += i; + a16 += i; */ b += 16 * i; } else if (X > posY) { @@ -1130,14 +1130,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON i = (m & 7); if (i > 0) { if (X < posY) { - a01 += i; + /* a01 += i; a02 += i; a03 += i; a04 += i; a05 += i; a06 += i; a07 += i; - a08 += i; + a08 += i; */ b += 8 * i; } else if (X > posY) { @@ -1156,13 +1156,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON b += 8; } - a02 += i * lda; + /* a02 += i * lda; a03 += i * lda; a04 += i * lda; a05 += i * lda; a06 += i * lda; a07 += i * lda; - a08 += i * lda; + a08 += i * lda; */ } else { #ifdef UNIT b[ 0] = ONE; @@ -1371,10 +1371,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON i = (m & 3); if (i > 0) { if (X < posY) { - a01 += i; + /* a01 += i; a02 += i; a03 += i; - a04 += i; + a04 += i; */ b += 4 * i; } else if (X > posY) { @@ -1387,9 +1387,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON a01 += lda; b += 4; } - a02 += lda; + /* a02 += lda; a03 += lda; - a04 += lda; + a04 += lda; */ } else { #ifdef UNIT @@ -1492,7 +1492,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (X > posY) { b[ 0] = *(a01 + 0); b[ 1] = *(a01 + 1); - a01 += lda; + // a01 += lda; b += 2; } else { #ifdef UNIT @@ -1543,7 +1543,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON i --; } while (i > 0); } - posY += 1; + // posY += 1; } return 0; diff --git a/kernel/x86_64/cgemv_n_4.c b/kernel/x86_64/cgemv_n_4.c index 14cc9fe09..770c955b2 100644 --- a/kernel/x86_64/cgemv_n_4.c +++ b/kernel/x86_64/cgemv_n_4.c @@ -298,8 +298,8 @@ printf("%s %d %d %.16f %.16f %d %d %d\n","zgemv_n",m,n,alpha_r,alpha_i,lda,inc_x if ( n2 & 1 ) { cgemv_kernel_4x1(NB,a_ptr,x_ptr,ybuffer); - x_ptr += 2; - a_ptr += lda; + /* x_ptr += 2; + a_ptr += lda; */ } } diff --git a/kernel/x86_64/cgemv_t_4.c b/kernel/x86_64/cgemv_t_4.c index 3dc19dc4e..d75e58fdd 100644 --- a/kernel/x86_64/cgemv_t_4.c +++ b/kernel/x86_64/cgemv_t_4.c @@ -300,8 +300,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, if ( n2 & 1 ) { cgemv_kernel_4x1(NB,a_ptr,xbuffer,y_ptr,alpha); - a_ptr += lda; - y_ptr += 2; + /* a_ptr += lda; + y_ptr += 2; */ } diff --git a/kernel/x86_64/sgemv_n_4.c b/kernel/x86_64/sgemv_n_4.c index 60074d3d9..fd028964b 100644 --- a/kernel/x86_64/sgemv_n_4.c +++ b/kernel/x86_64/sgemv_n_4.c @@ -392,8 +392,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO if ( n2 & 1 ) { sgemv_kernel_4x1(NB,a_ptr,x_ptr,ybuffer,&alpha); - a_ptr += lda; - x_ptr += 1; + /* a_ptr += lda; + x_ptr += 1a; */ } diff --git a/kernel/x86_64/sgemv_t_4.c b/kernel/x86_64/sgemv_t_4.c index 6f9c7caa0..f04d461f7 100644 --- a/kernel/x86_64/sgemv_t_4.c +++ b/kernel/x86_64/sgemv_t_4.c @@ -406,9 +406,9 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO { sgemv_kernel_4x1(NB,a_ptr,xbuffer,ybuffer); - a_ptr += lda; + // a_ptr += lda; *y_ptr += ybuffer[0] * alpha; - y_ptr += inc_y; + // y_ptr += inc_y; } a += NB; diff --git a/kernel/x86_64/zgemv_n_4.c b/kernel/x86_64/zgemv_n_4.c index 2d7fd5798..f6f88155c 100644 --- a/kernel/x86_64/zgemv_n_4.c +++ b/kernel/x86_64/zgemv_n_4.c @@ -300,8 +300,8 @@ printf("%s %d %d %.16f %.16f %d %d %d\n","zgemv_n",m,n,alpha_r,alpha_i,lda,inc_x if ( n2 & 1 ) { zgemv_kernel_4x1(NB,a_ptr,x_ptr,ybuffer); - x_ptr += 2; - a_ptr += lda; + /* x_ptr += 2; + a_ptr += lda; */ } } diff --git a/kernel/x86_64/zgemv_t_4.c b/kernel/x86_64/zgemv_t_4.c index c4a38202b..3e4b7d5df 100644 --- a/kernel/x86_64/zgemv_t_4.c +++ b/kernel/x86_64/zgemv_t_4.c @@ -302,8 +302,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, if ( n2 & 1 ) { zgemv_kernel_4x1(NB,a_ptr,xbuffer,y_ptr,alpha); - a_ptr += lda; - y_ptr += 2; + /* a_ptr += lda; + y_ptr += 2; */ } From 177b78c8b426185c298b3b31b4a35ce7a4658cc2 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 9 Dec 2017 22:29:03 +0100 Subject: [PATCH 094/122] Issue1388 (#1389) * Calculation of chunk range limits was ignoring num_cpu bug introduced by me in #1262 - should fix #1388 * Calculation of range limits was ignoring num_cpu bug introduced by me in #1262 * Calculation of chunk range limits was ignoring num_cpu bug introduced by me in #1262 * Calculation of chunk range limits was ignoring num_cpu bug introduced by me in #1262 * Calculation of chunk range limits was ignoring num_cpu bug introduced by me in #1262 * Calculation of chunk range limits was ignoring num_cpu bug introduced by me in #1262 --- driver/level2/gbmv_thread.c | 4 ++-- driver/level2/sbmv_thread.c | 6 +++--- driver/level2/spmv_thread.c | 4 ++-- driver/level2/symv_thread.c | 4 ++-- driver/level2/tbmv_thread.c | 6 +++--- driver/level2/tpmv_thread.c | 4 ++-- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/driver/level2/gbmv_thread.c b/driver/level2/gbmv_thread.c index c0cb947dc..4fce9744f 100644 --- a/driver/level2/gbmv_thread.c +++ b/driver/level2/gbmv_thread.c @@ -230,10 +230,10 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT #ifndef TRANSA range_m[num_cpu] = num_cpu * ((m + 15) & ~15); - if (range_m[num_cpu] > m) range_m[num_cpu] = m; + if (range_m[num_cpu] > m * num_cpu) range_m[num_cpu] = m * num_cpu; #else range_m[num_cpu] = num_cpu * ((n + 15) & ~15); - if (range_m[num_cpu] > n) range_m[num_cpu] = n; + if (range_m[num_cpu] > n * num_cpu) range_m[num_cpu] = n * num_cpu; #endif queue[num_cpu].mode = mode; diff --git a/driver/level2/sbmv_thread.c b/driver/level2/sbmv_thread.c index e59451c5a..50efa350a 100644 --- a/driver/level2/sbmv_thread.c +++ b/driver/level2/sbmv_thread.c @@ -246,7 +246,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); - if (range_n[num_cpu] > n) range_n[num_cpu] = n; + if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = sbmv_kernel; @@ -286,7 +286,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); - if (range_n[num_cpu] > n) range_n[num_cpu] = n; + if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = sbmv_kernel; @@ -318,7 +318,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * ((n + 15) & ~15); - if (range_n[num_cpu] > n) range_n[num_cpu] = n; + if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = sbmv_kernel; diff --git a/driver/level2/spmv_thread.c b/driver/level2/spmv_thread.c index 0b4087430..e52b08d0e 100644 --- a/driver/level2/spmv_thread.c +++ b/driver/level2/spmv_thread.c @@ -246,7 +246,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y, range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); - if (range_n[num_cpu] > m) range_n[num_cpu] = m; + if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = spmv_kernel; @@ -286,7 +286,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y, range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); - if (range_n[num_cpu] > m) range_n[num_cpu] = m; + if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = spmv_kernel; diff --git a/driver/level2/symv_thread.c b/driver/level2/symv_thread.c index 8d4cd249c..ab783de2b 100644 --- a/driver/level2/symv_thread.c +++ b/driver/level2/symv_thread.c @@ -177,7 +177,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); - if (range_n[num_cpu] > m) range_n[num_cpu] = m; + if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu; queue[MAX_CPU_NUMBER - num_cpu - 1].mode = mode; queue[MAX_CPU_NUMBER - num_cpu - 1].routine = symv_kernel; @@ -226,7 +226,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); - if (range_n[num_cpu] > m) range_n[num_cpu] = m; + if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = symv_kernel; diff --git a/driver/level2/tbmv_thread.c b/driver/level2/tbmv_thread.c index 35e5ff1cd..67109b53f 100644 --- a/driver/level2/tbmv_thread.c +++ b/driver/level2/tbmv_thread.c @@ -288,7 +288,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); - if (range_n[num_cpu] > n) range_n[num_cpu] = n; + if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; @@ -328,7 +328,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); - if (range_n[num_cpu] > n) range_n[num_cpu] = n; + if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; @@ -358,7 +358,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); - if (range_n[num_cpu] > n) range_n[num_cpu] = n; + if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; diff --git a/driver/level2/tpmv_thread.c b/driver/level2/tpmv_thread.c index b7eb10571..a077591a5 100644 --- a/driver/level2/tpmv_thread.c +++ b/driver/level2/tpmv_thread.c @@ -303,7 +303,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); - if (range_n[num_cpu] > m) range_n[num_cpu] = m; + if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = tpmv_kernel; @@ -343,7 +343,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); - if (range_n[num_cpu] > m) range_n[num_cpu] = m; + if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = tpmv_kernel; From 0623636c982d6b98606b56b553ccdc60c9bbda66 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 10 Dec 2017 19:24:31 +0100 Subject: [PATCH 095/122] Use Sandybridge daxpy kernel on Haswell and Zen for now The testcase from #1332 exposes a problem in daxpy_microk_haswell-2.c that is not seen with any of the other Intel x86_64 microkernels. --- kernel/x86_64/daxpy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/x86_64/daxpy.c b/kernel/x86_64/daxpy.c index 4bde62824..36410d360 100644 --- a/kernel/x86_64/daxpy.c +++ b/kernel/x86_64/daxpy.c @@ -38,7 +38,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #elif defined(PILEDRIVER) #include "daxpy_microk_piledriver-2.c" #elif defined(HASWELL) || defined(ZEN) +/* +this appears to be broken, see issue 1332 #include "daxpy_microk_haswell-2.c" +*/ +#include "daxpy_microk_sandy-2.c" #elif defined(SANDYBRIDGE) #include "daxpy_microk_sandy-2.c" #endif From 43c0622e7b2713c77106426a968547fe7c991a10 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 13 Dec 2017 18:40:39 +0100 Subject: [PATCH 096/122] Retire Piledriver/Steamroller/Excavator daxpy microkernels as well related to issue #1332 --- kernel/x86_64/daxpy.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/x86_64/daxpy.c b/kernel/x86_64/daxpy.c index 36410d360..20075b815 100644 --- a/kernel/x86_64/daxpy.c +++ b/kernel/x86_64/daxpy.c @@ -33,15 +33,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "daxpy_microk_nehalem-2.c" #elif defined(BULLDOZER) #include "daxpy_microk_bulldozer-2.c" +/* +these appear to be broken, see issue 1332 #elif defined(STEAMROLLER) || defined(EXCAVATOR) #include "daxpy_microk_steamroller-2.c" #elif defined(PILEDRIVER) #include "daxpy_microk_piledriver-2.c" #elif defined(HASWELL) || defined(ZEN) -/* -this appears to be broken, see issue 1332 #include "daxpy_microk_haswell-2.c" */ +#elif defined(HASWELL) || defined(ZEN) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #include "daxpy_microk_sandy-2.c" #elif defined(SANDYBRIDGE) #include "daxpy_microk_sandy-2.c" From 599de9e598d3e7eeb847ea96cea29ffcae056696 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 21 Dec 2017 19:43:09 +0100 Subject: [PATCH 097/122] Restore LAPACKE files for Xgeqpf, Xggsvd and Xggsvp These were inadvertently dropped from the list in my PR #1095 --- cmake/lapacke.cmake | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/cmake/lapacke.cmake b/cmake/lapacke.cmake index 7c7c0d8a9..0fc88b882 100644 --- a/cmake/lapacke.cmake +++ b/cmake/lapacke.cmake @@ -70,6 +70,8 @@ set(CSRC lapacke_cgeqlf_work.c lapacke_cgeqp3.c lapacke_cgeqp3_work.c + lapacke_cgeqpf.c + lapacke_cgeqpf_work.c lapacke_cgeqr.c lapacke_cgeqr_work.c lapacke_cgeqr2.c @@ -140,8 +142,12 @@ set(CSRC lapacke_cggqrf_work.c lapacke_cggrqf.c lapacke_cggrqf_work.c + lapacke_cggsvd.c + lapacke_cggsvd_work.c lapacke_cggsvd3.c lapacke_cggsvd3_work.c + lapacke_cggsvp.c + lapacke_cggsvp_work.c lapacke_cggsvp3.c lapacke_cggsvp3_work.c lapacke_cgtcon.c @@ -683,6 +689,8 @@ set(DSRC lapacke_dgeqlf_work.c lapacke_dgeqp3.c lapacke_dgeqp3_work.c + lapacke_dgeqpf.c + lapacke_dgeqpf_work.c lapacke_dgeqr.c lapacke_dgeqr_work.c lapacke_dgeqr2.c @@ -753,8 +761,12 @@ set(DSRC lapacke_dggqrf_work.c lapacke_dggrqf.c lapacke_dggrqf_work.c + lapacke_dggsvd.c + lapacke_dggsvd_work.c lapacke_dggsvd3.c lapacke_dggsvd3_work.c + lapacke_dggsvp.c + lapacke_dggsvp_work.c lapacke_dggsvp3.c lapacke_dggsvp3_work.c lapacke_dgtcon.c @@ -1249,6 +1261,8 @@ set(SSRC lapacke_sgeqlf_work.c lapacke_sgeqp3.c lapacke_sgeqp3_work.c + lapacke_sgeqpf.c + lapacke_sgeqpf_work.c lapacke_sgeqr.c lapacke_sgeqr_work.c lapacke_sgeqr2.c @@ -1319,8 +1333,12 @@ set(SSRC lapacke_sggqrf_work.c lapacke_sggrqf.c lapacke_sggrqf_work.c + lapacke_sggsvd.c + lapacke_sggsvd_work.c lapacke_sggsvd3.c lapacke_sggsvd3_work.c + lapacke_sggsvp.c + lapacke_sggsvp_work.c lapacke_sggsvp3.c lapacke_sggsvp3_work.c lapacke_sgtcon.c @@ -1809,6 +1827,8 @@ set(ZSRC lapacke_zgeqlf_work.c lapacke_zgeqp3.c lapacke_zgeqp3_work.c + lapacke_zgeqpf.c + lapacke_zgeqpf_work.c lapacke_zgeqr.c lapacke_zgeqr_work.c lapacke_zgeqr2.c @@ -1879,8 +1899,12 @@ set(ZSRC lapacke_zggqrf_work.c lapacke_zggrqf.c lapacke_zggrqf_work.c + lapacke_zggsvd.c + lapacke_zggsvd_work.c lapacke_zggsvd3.c lapacke_zggsvd3_work.c + lapacke_zggsvp.c + lapacke_zggsvp_work.c lapacke_zggsvp3.c lapacke_zggsvp3_work.c lapacke_zgtcon.c From 374260027df00af208445b8239fb643e63f74745 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 21 Dec 2017 20:42:30 +0100 Subject: [PATCH 098/122] Add conditionals around ar calls for optional modules The macOS ar aborts when it gets called with no input, see #1398 --- lapack-netlib/LAPACKE/src/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lapack-netlib/LAPACKE/src/Makefile b/lapack-netlib/LAPACKE/src/Makefile index 600984308..44884d4a5 100644 --- a/lapack-netlib/LAPACKE/src/Makefile +++ b/lapack-netlib/LAPACKE/src/Makefile @@ -2457,9 +2457,15 @@ all: ../../$(LAPACKELIB) ../../$(LAPACKELIB): $(OBJ_A) $(OBJ_B) $(DEPRECATED) $(EXTENDED) $(MATGEN) $(ARCH) $(ARCHFLAGS) $@ $(OBJ_A) $(ARCH) $(ARCHFLAGS) $@ $(OBJ_B) +ifdef BUILD_DEPRECATED $(ARCH) $(ARCHFLAGS) $@ $(DEPRECATED) +endif +ifdef (USEXBLAS) $(ARCH) $(ARCHFLAGS) $@ $(EXTENDED) +endif +ifdef LAPACKE_WITH_TMG $(ARCH) $(ARCHFLAGS) $@ $(MATGEN) +endif $(RANLIB) $@ clean: cleanobj From bfc2a885946a0860897bad22b0fb06733f35350c Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 22 Dec 2017 00:55:40 +0100 Subject: [PATCH 099/122] remove unused buffer --- driver/level2/gbmv_k.c | 4 ++-- driver/level2/sbmv_k.c | 4 ++-- driver/level2/spmv_k.c | 4 ++-- driver/level2/zgbmv_k.c | 4 ++-- driver/level2/zhbmv_k.c | 4 ++-- driver/level2/zhpmv_k.c | 4 ++-- driver/level2/zsbmv_k.c | 4 ++-- driver/level2/zspmv_k.c | 4 ++-- driver/level3/level3.c | 2 +- driver/others/memory.c | 4 ++-- 10 files changed, 19 insertions(+), 19 deletions(-) diff --git a/driver/level2/gbmv_k.c b/driver/level2/gbmv_k.c index 4b29d70d1..498f5fef7 100644 --- a/driver/level2/gbmv_k.c +++ b/driver/level2/gbmv_k.c @@ -62,13 +62,13 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha, if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + M * sizeof(FLOAT) + 4095) & ~4095); - gemvbuffer = bufferX; + // gemvbuffer = bufferX; COPY_K(M, y, incy, Y, 1); } if (incx != 1) { X = bufferX; - gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + N * sizeof(FLOAT) + 4095) & ~4095); + // gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + N * sizeof(FLOAT) + 4095) & ~4095); COPY_K(N, x, incx, X, 1); } diff --git a/driver/level2/sbmv_k.c b/driver/level2/sbmv_k.c index ef7fa378c..58f57df8b 100644 --- a/driver/level2/sbmv_k.c +++ b/driver/level2/sbmv_k.c @@ -55,13 +55,13 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha, if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) + 4095) & ~4095); - sbmvbuffer = bufferX; + // sbmvbuffer = bufferX; COPY_K(n, y, incy, Y, 1); } if (incx != 1) { X = bufferX; - sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) + 4095) & ~4095); + // sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) + 4095) & ~4095); COPY_K(n, x, incx, X, 1); } diff --git a/driver/level2/spmv_k.c b/driver/level2/spmv_k.c index 8ce0abdf7..e48b8b3f1 100644 --- a/driver/level2/spmv_k.c +++ b/driver/level2/spmv_k.c @@ -53,13 +53,13 @@ int CNAME(BLASLONG m, FLOAT alpha, FLOAT *a, if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) + 4095) & ~4095); - gemvbuffer = bufferX; + // gemvbuffer = bufferX; COPY_K(m, y, incy, Y, 1); } if (incx != 1) { X = bufferX; - gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) + 4095) & ~4095); + // gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) + 4095) & ~4095); COPY_K(m, x, incx, X, 1); } diff --git a/driver/level2/zgbmv_k.c b/driver/level2/zgbmv_k.c index d89932e33..b143e2545 100644 --- a/driver/level2/zgbmv_k.c +++ b/driver/level2/zgbmv_k.c @@ -83,13 +83,13 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + M * sizeof(FLOAT) * 2 + 4095) & ~4095); - gemvbuffer = bufferX; + // gemvbuffer = bufferX; COPY_K(M, y, incy, Y, 1); } if (incx != 1) { X = bufferX; - gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + N * sizeof(FLOAT) * 2 + 4095) & ~4095); + // gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + N * sizeof(FLOAT) * 2 + 4095) & ~4095); COPY_K(N, x, incx, X, 1); } diff --git a/driver/level2/zhbmv_k.c b/driver/level2/zhbmv_k.c index 33f70d2c5..a164ed1fb 100644 --- a/driver/level2/zhbmv_k.c +++ b/driver/level2/zhbmv_k.c @@ -61,13 +61,13 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); - sbmvbuffer = bufferX; + // sbmvbuffer = bufferX; COPY_K(n, y, incy, Y, 1); } if (incx != 1) { X = bufferX; - sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); + // sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); COPY_K(n, x, incx, X, 1); } diff --git a/driver/level2/zhpmv_k.c b/driver/level2/zhpmv_k.c index 9e7ed7b0e..d92740c18 100644 --- a/driver/level2/zhpmv_k.c +++ b/driver/level2/zhpmv_k.c @@ -56,13 +56,13 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095); - gemvbuffer = bufferX; + // gemvbuffer = bufferX; COPY_K(m, y, incy, Y, 1); } if (incx != 1) { X = bufferX; - gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095); + // gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095); COPY_K(m, x, incx, X, 1); } diff --git a/driver/level2/zsbmv_k.c b/driver/level2/zsbmv_k.c index 3ae74ce80..f4b7c5579 100644 --- a/driver/level2/zsbmv_k.c +++ b/driver/level2/zsbmv_k.c @@ -60,13 +60,13 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); - sbmvbuffer = bufferX; + // sbmvbuffer = bufferX; COPY_K(n, y, incy, Y, 1); } if (incx != 1) { X = bufferX; - sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); + // sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); COPY_K(n, x, incx, X, 1); } diff --git a/driver/level2/zspmv_k.c b/driver/level2/zspmv_k.c index 432205e83..3989714e9 100644 --- a/driver/level2/zspmv_k.c +++ b/driver/level2/zspmv_k.c @@ -55,13 +55,13 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095); - gemvbuffer = bufferX; + // gemvbuffer = bufferX; COPY_K(m, y, incy, Y, 1); } if (incx != 1) { X = bufferX; - gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095); + // gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095); COPY_K(m, x, incx, X, 1); } diff --git a/driver/level3/level3.c b/driver/level3/level3.c index 0ee189af4..fbed14339 100644 --- a/driver/level3/level3.c +++ b/driver/level3/level3.c @@ -293,7 +293,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, min_l = k - ls; if (min_l >= GEMM_Q * 2) { - gemm_p = GEMM_P; + // gemm_p = GEMM_P; min_l = GEMM_Q; } else { if (min_l > GEMM_Q) { diff --git a/driver/others/memory.c b/driver/others/memory.c index a4d26b0e1..e4caa093a 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -348,7 +348,7 @@ int blas_get_cpu_number(void){ max_num = get_num_procs(); #endif - blas_goto_num = 0; + // blas_goto_num = 0; #ifndef USE_OPENMP blas_goto_num=openblas_num_threads_env(); if (blas_goto_num < 0) blas_goto_num = 0; @@ -360,7 +360,7 @@ int blas_get_cpu_number(void){ #endif - blas_omp_num = 0; + // blas_omp_num = 0; blas_omp_num=openblas_omp_num_threads_env(); if (blas_omp_num < 0) blas_omp_num = 0; From 47deec2c1acc03b6d5190a584f70946040d129c8 Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 22 Dec 2017 00:56:35 +0100 Subject: [PATCH 100/122] fix couple of dead assignment warnings --- interface/trmv.c | 4 ++-- kernel/generic/ztrmm_uncopy_2.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/interface/trmv.c b/interface/trmv.c index 7d17dc67b..7c40ae976 100644 --- a/interface/trmv.c +++ b/interface/trmv.c @@ -218,9 +218,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, buffer = (FLOAT *)blas_memory_alloc(1); #ifdef SMP - nthreads = num_cpu_avail(2); +/* nthreads = num_cpu_avail(2); -/*FIXME trmv_thread was found to be broken, see issue 1332 */ +FIXME trmv_thread was found to be broken, see issue 1332 */ nthreads = 1; if (nthreads == 1) { diff --git a/kernel/generic/ztrmm_uncopy_2.c b/kernel/generic/ztrmm_uncopy_2.c index 5cb0ffc54..7eb433df9 100644 --- a/kernel/generic/ztrmm_uncopy_2.c +++ b/kernel/generic/ztrmm_uncopy_2.c @@ -113,8 +113,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON #else data01 = *(ao1 + 0); data02 = *(ao1 + 1); - data03 = *(ao1 + 2); - data04 = *(ao1 + 3); + /* data03 = *(ao1 + 2); + data04 = *(ao1 + 3); */ data05 = *(ao2 + 0); data06 = *(ao2 + 1); data07 = *(ao2 + 2); From 03e5ff068793a3b31083b7f30d8925dfbfe939e4 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 26 Dec 2017 09:24:24 +0100 Subject: [PATCH 101/122] initialize potentially unitialized variables (clang5) --- kernel/generic/ztrsm_lncopy_1.c | 2 +- kernel/generic/ztrsm_lncopy_2.c | 4 ++-- kernel/generic/ztrsm_lncopy_4.c | 4 ++-- kernel/generic/ztrsm_ltcopy_1.c | 2 +- kernel/generic/ztrsm_ltcopy_2.c | 4 ++-- kernel/generic/ztrsm_ltcopy_4.c | 4 ++-- kernel/generic/ztrsm_uncopy_1.c | 2 +- kernel/generic/ztrsm_uncopy_2.c | 4 ++-- kernel/generic/ztrsm_uncopy_4.c | 4 ++-- kernel/generic/ztrsm_utcopy_1.c | 2 +- kernel/generic/ztrsm_utcopy_2.c | 4 ++-- kernel/generic/ztrsm_utcopy_4.c | 4 ++-- 12 files changed, 20 insertions(+), 20 deletions(-) diff --git a/kernel/generic/ztrsm_lncopy_1.c b/kernel/generic/ztrsm_lncopy_1.c index 8dab45144..c1d62ead2 100644 --- a/kernel/generic/ztrsm_lncopy_1.c +++ b/kernel/generic/ztrsm_lncopy_1.c @@ -43,7 +43,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT BLASLONG i, ii, j, jj; - FLOAT data01, data02; + FLOAT data01 = 0.0, data02 = 0.0; FLOAT *a1; lda *= 2; diff --git a/kernel/generic/ztrsm_lncopy_2.c b/kernel/generic/ztrsm_lncopy_2.c index 1e76af707..a98bbb54e 100644 --- a/kernel/generic/ztrsm_lncopy_2.c +++ b/kernel/generic/ztrsm_lncopy_2.c @@ -43,8 +43,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT BLASLONG i, ii, j, jj; - FLOAT data01, data02, data03, data04; - FLOAT data05, data06, data07, data08; + FLOAT data01 = 0.0, data02 = 0.0, data03, data04; + FLOAT data05, data06, data07 = 0.0, data08 = 0.0; FLOAT *a1, *a2; lda *= 2; diff --git a/kernel/generic/ztrsm_lncopy_4.c b/kernel/generic/ztrsm_lncopy_4.c index 2a1302e53..f44127282 100644 --- a/kernel/generic/ztrsm_lncopy_4.c +++ b/kernel/generic/ztrsm_lncopy_4.c @@ -43,9 +43,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT BLASLONG i, ii, j, jj; - FLOAT data01, data02, data03, data04; + FLOAT data01 = 0.0, data02 = 0.0, data03, data04; FLOAT data05, data06, data07, data08; - FLOAT data09, data10, data11, data12; + FLOAT data09, data10, data11 = 0.0, data12 = 0.0; FLOAT data13, data14, data15, data16; FLOAT data17, data18, data19, data20; FLOAT data21, data22, data23, data24; diff --git a/kernel/generic/ztrsm_ltcopy_1.c b/kernel/generic/ztrsm_ltcopy_1.c index af4ac127d..f24cec863 100644 --- a/kernel/generic/ztrsm_ltcopy_1.c +++ b/kernel/generic/ztrsm_ltcopy_1.c @@ -43,7 +43,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT BLASLONG i, ii, j, jj; - FLOAT data01, data02; + FLOAT data01 = 0.0, data02 = 0.0; FLOAT *a1; lda *= 2; diff --git a/kernel/generic/ztrsm_ltcopy_2.c b/kernel/generic/ztrsm_ltcopy_2.c index 21bd0fa1f..93a32bf04 100644 --- a/kernel/generic/ztrsm_ltcopy_2.c +++ b/kernel/generic/ztrsm_ltcopy_2.c @@ -43,8 +43,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT BLASLONG i, ii, j, jj; - FLOAT data01, data02, data03, data04; - FLOAT data05, data06, data07, data08; + FLOAT data01 = 0.0, data02 = 0.0, data03, data04; + FLOAT data05, data06, data07 = 0.0, data08 = 0.0; FLOAT *a1, *a2; lda *= 2; diff --git a/kernel/generic/ztrsm_ltcopy_4.c b/kernel/generic/ztrsm_ltcopy_4.c index be28ba646..5a8f138d6 100644 --- a/kernel/generic/ztrsm_ltcopy_4.c +++ b/kernel/generic/ztrsm_ltcopy_4.c @@ -43,9 +43,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT BLASLONG i, ii, j, jj; - FLOAT data01, data02, data03, data04; + FLOAT data01 = 0.0, data02 = 0.0, data03, data04; FLOAT data05, data06, data07, data08; - FLOAT data09, data10, data11, data12; + FLOAT data09, data10, data11 = 0.0, data12 = 0.0; FLOAT data13, data14, data15, data16; FLOAT data17, data18, data19, data20; FLOAT data21, data22, data23, data24; diff --git a/kernel/generic/ztrsm_uncopy_1.c b/kernel/generic/ztrsm_uncopy_1.c index dc9157bdc..be1aac5e2 100644 --- a/kernel/generic/ztrsm_uncopy_1.c +++ b/kernel/generic/ztrsm_uncopy_1.c @@ -43,7 +43,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT BLASLONG i, ii, j, jj; - FLOAT data01, data02; + FLOAT data01 = 0.0, data02 = 0.0; FLOAT *a1; lda *= 2; diff --git a/kernel/generic/ztrsm_uncopy_2.c b/kernel/generic/ztrsm_uncopy_2.c index fecab88e7..91051b148 100644 --- a/kernel/generic/ztrsm_uncopy_2.c +++ b/kernel/generic/ztrsm_uncopy_2.c @@ -43,8 +43,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT BLASLONG i, ii, j, jj; - FLOAT data01, data02, data03, data04; - FLOAT data05, data06, data07, data08; + FLOAT data01 = 0.0, data02 = 0.0, data03, data04; + FLOAT data05, data06, data07 = 0.0, data08 = 0.0; FLOAT *a1, *a2; lda *= 2; diff --git a/kernel/generic/ztrsm_uncopy_4.c b/kernel/generic/ztrsm_uncopy_4.c index fe33e41c7..cb6ed0101 100644 --- a/kernel/generic/ztrsm_uncopy_4.c +++ b/kernel/generic/ztrsm_uncopy_4.c @@ -43,9 +43,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT BLASLONG i, ii, j, jj; - FLOAT data01, data02, data03, data04; + FLOAT data01 = 0.0, data02 = 0.0, data03, data04; FLOAT data05, data06, data07, data08; - FLOAT data09, data10, data11, data12; + FLOAT data09, data10, data11 = 0.0, data12 = 0.0; FLOAT data13, data14, data15, data16; FLOAT data17, data18, data19, data20; FLOAT data21, data22, data23, data24; diff --git a/kernel/generic/ztrsm_utcopy_1.c b/kernel/generic/ztrsm_utcopy_1.c index 08f85e891..0e33a7d18 100644 --- a/kernel/generic/ztrsm_utcopy_1.c +++ b/kernel/generic/ztrsm_utcopy_1.c @@ -43,7 +43,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT BLASLONG i, ii, j, jj; - FLOAT data01, data02; + FLOAT data01 = 0.0, data02 = 0.0; FLOAT *a1; lda *= 2; diff --git a/kernel/generic/ztrsm_utcopy_2.c b/kernel/generic/ztrsm_utcopy_2.c index 387bb2532..c34d741ee 100644 --- a/kernel/generic/ztrsm_utcopy_2.c +++ b/kernel/generic/ztrsm_utcopy_2.c @@ -43,8 +43,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT BLASLONG i, ii, j, jj; - FLOAT data01, data02, data03, data04; - FLOAT data05, data06, data07, data08; + FLOAT data01 = 0.0, data02 = 0.0, data03, data04; + FLOAT data05, data06, data07 = 0.0, data08 = 0.0; FLOAT *a1, *a2; lda *= 2; diff --git a/kernel/generic/ztrsm_utcopy_4.c b/kernel/generic/ztrsm_utcopy_4.c index b419f6b90..9617fc192 100644 --- a/kernel/generic/ztrsm_utcopy_4.c +++ b/kernel/generic/ztrsm_utcopy_4.c @@ -43,9 +43,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT BLASLONG i, ii, j, jj; - FLOAT data01, data02, data03, data04; + FLOAT data01 = 0.0, data02 = 0.0, data03, data04; FLOAT data05, data06, data07, data08; - FLOAT data09, data10, data11, data12; + FLOAT data09, data10, data11 = 0.0, data12 = 0.0; FLOAT data13, data14, data15, data16; FLOAT data17, data18, data19, data20; FLOAT data21, data22, data23, data24; From 723f396a204a152c746e145274a5e13383f4d422 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 29 Dec 2017 23:56:41 +0100 Subject: [PATCH 102/122] Tag %1 and %2 as both input and output The inline assembly modifies its input operands, so mark them as output to avoid surprises with optimization. Fixes #1292 --- kernel/x86_64/cgemv_n_microk_haswell-4.c | 24 ++++++++++++------------ kernel/x86_64/cgemv_t_microk_haswell-4.c | 18 +++++++++--------- kernel/x86_64/dgemv_n_microk_haswell-4.c | 12 ++++++------ kernel/x86_64/dgemv_t_microk_haswell-4.c | 6 +++--- kernel/x86_64/sgemv_n_microk_haswell-4.c | 14 +++++++------- kernel/x86_64/sgemv_t_microk_haswell-4.c | 6 +++--- kernel/x86_64/zgemv_n_microk_haswell-4.c | 24 ++++++++++++------------ kernel/x86_64/zgemv_t_microk_haswell-4.c | 18 +++++++++--------- 8 files changed, 61 insertions(+), 61 deletions(-) diff --git a/kernel/x86_64/cgemv_n_microk_haswell-4.c b/kernel/x86_64/cgemv_n_microk_haswell-4.c index 360345018..2a8c0ea63 100644 --- a/kernel/x86_64/cgemv_n_microk_haswell-4.c +++ b/kernel/x86_64/cgemv_n_microk_haswell-4.c @@ -159,9 +159,9 @@ static void cgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n1), // 1 + "+r" (i), // 0 + "+r" (n1) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -283,9 +283,9 @@ static void cgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n1), // 1 + "+r" (i), // 0 + "+r" (n1) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -390,9 +390,9 @@ static void cgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n1), // 1 + "+r" (i), // 0 + "+r" (n1) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap), // 4 @@ -520,9 +520,9 @@ static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest,FLOAT a "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n1), // 1 + "+r" (i), // 0 + "+r" (n1) // 1 + : "r" (src), // 2 "r" (dest), // 3 "r" (&alpha_r), // 4 diff --git a/kernel/x86_64/cgemv_t_microk_haswell-4.c b/kernel/x86_64/cgemv_t_microk_haswell-4.c index 1c2075104..1674f0ffa 100644 --- a/kernel/x86_64/cgemv_t_microk_haswell-4.c +++ b/kernel/x86_64/cgemv_t_microk_haswell-4.c @@ -230,9 +230,9 @@ static void cgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -391,9 +391,9 @@ static void cgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -519,9 +519,9 @@ static void cgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT * "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap), // 4 diff --git a/kernel/x86_64/dgemv_n_microk_haswell-4.c b/kernel/x86_64/dgemv_n_microk_haswell-4.c index 7b36ffeb7..584a6c6b5 100644 --- a/kernel/x86_64/dgemv_n_microk_haswell-4.c +++ b/kernel/x86_64/dgemv_n_microk_haswell-4.c @@ -93,9 +93,9 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -172,9 +172,9 @@ static void dgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 diff --git a/kernel/x86_64/dgemv_t_microk_haswell-4.c b/kernel/x86_64/dgemv_t_microk_haswell-4.c index 07fca8526..958fd3e0a 100644 --- a/kernel/x86_64/dgemv_t_microk_haswell-4.c +++ b/kernel/x86_64/dgemv_t_microk_haswell-4.c @@ -107,9 +107,9 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 diff --git a/kernel/x86_64/sgemv_n_microk_haswell-4.c b/kernel/x86_64/sgemv_n_microk_haswell-4.c index b4b88edce..2c90f8aa9 100644 --- a/kernel/x86_64/sgemv_n_microk_haswell-4.c +++ b/kernel/x86_64/sgemv_n_microk_haswell-4.c @@ -153,10 +153,10 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 - "r" (x), // 2 + "+r" (i), // 0 + "+r" (n) // 1 + : + "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 "r" (ap[1]), // 5 @@ -276,9 +276,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 diff --git a/kernel/x86_64/sgemv_t_microk_haswell-4.c b/kernel/x86_64/sgemv_t_microk_haswell-4.c index eca85867f..8c370b4c0 100644 --- a/kernel/x86_64/sgemv_t_microk_haswell-4.c +++ b/kernel/x86_64/sgemv_t_microk_haswell-4.c @@ -128,9 +128,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 diff --git a/kernel/x86_64/zgemv_n_microk_haswell-4.c b/kernel/x86_64/zgemv_n_microk_haswell-4.c index 559ed5b80..4d3a032b0 100644 --- a/kernel/x86_64/zgemv_n_microk_haswell-4.c +++ b/kernel/x86_64/zgemv_n_microk_haswell-4.c @@ -115,9 +115,9 @@ static void zgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -203,9 +203,9 @@ static void zgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -277,9 +277,9 @@ static void zgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap) // 4 @@ -379,9 +379,9 @@ static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest,FLOAT a "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (src), // 2 "r" (dest), // 3 "r" (&alpha_r), // 4 diff --git a/kernel/x86_64/zgemv_t_microk_haswell-4.c b/kernel/x86_64/zgemv_t_microk_haswell-4.c index e391012d8..585783c60 100644 --- a/kernel/x86_64/zgemv_t_microk_haswell-4.c +++ b/kernel/x86_64/zgemv_t_microk_haswell-4.c @@ -181,9 +181,9 @@ static void zgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -308,9 +308,9 @@ static void zgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -407,9 +407,9 @@ static void zgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT * "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n), // 1 + "+r" (i), // 0 + "+r" (n) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap), // 4 From b0652184ae2dd934b288d12ab1faaafb67b88804 Mon Sep 17 00:00:00 2001 From: xoviat Date: Fri, 29 Dec 2017 19:58:35 -0600 Subject: [PATCH 103/122] Appveyor: enable building fortran with ninja --- appveyor.yml | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index ceb6d7f59..141d3a130 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -5,6 +5,8 @@ version: 0.2.19.{build} platform: - x64 +os: Visual Studio 2017 + configuration: Release clone_folder: c:\projects\OpenBLAS @@ -23,9 +25,6 @@ skip_commits: # Add [av skip] to commit messages message: /\[av skip\]/ -cache: - - '%APPVEYOR_BUILD_FOLDER%\build' - environment: global: CONDA_INSTALL_LOCN: C:\\Miniconda36-x64 @@ -40,18 +39,22 @@ environment: install: - if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat - if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force - - if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev ninja cmake + - if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake + + - if [%WITH_FORTRAN%]==[no] conda install --yes --quiet ninja + - if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet -c isuruf kitware-ninja - if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet flang - - if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 + + - if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 - if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" - if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" before_build: - ps: if (-Not (Test-Path .\build)) { mkdir build } - cd build - - if [%COMPILER%]==[cl] cmake -G "Visual Studio 12 Win64" .. + - if [%COMPILER%]==[cl] cmake -G "Visual Studio 15 2017 Win64" .. - if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl .. - - if [%WITH_FORTRAN%]==[yes] cmake -G "NMake Makefiles" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 .. + - if [%WITH_FORTRAN%]==[yes] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 .. - if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON .. build_script: From b973990df23af2e0f1cbe450cba1f1c0d17e68f8 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 31 Dec 2017 18:03:36 +0100 Subject: [PATCH 104/122] Tag %1 and %2 as both input and output operands fix from #1292 extended to the other gemv microkernels --- kernel/x86_64/cgemv_n_microk_bulldozer-4.c | 24 ++++++++++----------- kernel/x86_64/cgemv_t_microk_bulldozer-4.c | 12 +++++------ kernel/x86_64/dgemv_n_microk_nehalem-4.c | 4 ++-- kernel/x86_64/dgemv_n_microk_piledriver-4.c | 8 +++---- kernel/x86_64/sgemv_n_microk_bulldozer-4.c | 8 +++---- kernel/x86_64/sgemv_n_microk_nehalem-4.c | 8 +++---- kernel/x86_64/sgemv_n_microk_sandy-4.c | 8 +++---- kernel/x86_64/sgemv_t_microk_bulldozer-4.c | 4 ++-- kernel/x86_64/sgemv_t_microk_nehalem-4.c | 4 ++-- kernel/x86_64/sgemv_t_microk_sandy-4.c | 4 ++-- kernel/x86_64/zgemv_n_microk_bulldozer-4.c | 20 ++++++++--------- kernel/x86_64/zgemv_n_microk_sandy-4.c | 16 +++++++------- kernel/x86_64/zgemv_t_microk_bulldozer-4.c | 12 +++++------ 13 files changed, 66 insertions(+), 66 deletions(-) diff --git a/kernel/x86_64/cgemv_n_microk_bulldozer-4.c b/kernel/x86_64/cgemv_n_microk_bulldozer-4.c index a74b41269..ad21d899d 100644 --- a/kernel/x86_64/cgemv_n_microk_bulldozer-4.c +++ b/kernel/x86_64/cgemv_n_microk_bulldozer-4.c @@ -158,9 +158,9 @@ static void cgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n1), // 1 + "+r" (i), // 0 + "+r" (n1) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -282,9 +282,9 @@ static void cgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n1), // 1 + "+r" (i), // 0 + "+r" (n1) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -389,9 +389,9 @@ static void cgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n1), // 1 + "+r" (i), // 0 + "+r" (n1) // 1 + : "r" (x), // 2 "r" (y), // 3 "r" (ap), // 4 @@ -519,9 +519,9 @@ static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest,FLOAT a "vzeroupper \n\t" : - : - "r" (i), // 0 - "r" (n1), // 1 + "+r" (i), // 0 + "+r" (n1) // 1 + : "r" (src), // 2 "r" (dest), // 3 "r" (&alpha_r), // 4 diff --git a/kernel/x86_64/cgemv_t_microk_bulldozer-4.c b/kernel/x86_64/cgemv_t_microk_bulldozer-4.c index 941b9cfc7..f01a24a6b 100644 --- a/kernel/x86_64/cgemv_t_microk_bulldozer-4.c +++ b/kernel/x86_64/cgemv_t_microk_bulldozer-4.c @@ -231,9 +231,9 @@ static void cgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -392,9 +392,9 @@ static void cgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -521,9 +521,9 @@ static void cgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT * "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap), // 4 diff --git a/kernel/x86_64/dgemv_n_microk_nehalem-4.c b/kernel/x86_64/dgemv_n_microk_nehalem-4.c index d8c29831a..09be7c2bb 100644 --- a/kernel/x86_64/dgemv_n_microk_nehalem-4.c +++ b/kernel/x86_64/dgemv_n_microk_nehalem-4.c @@ -149,9 +149,9 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "movups %%xmm5 , -16(%3,%0,8) \n\t" // 2 * y : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 diff --git a/kernel/x86_64/dgemv_n_microk_piledriver-4.c b/kernel/x86_64/dgemv_n_microk_piledriver-4.c index e1587b57c..530780bab 100644 --- a/kernel/x86_64/dgemv_n_microk_piledriver-4.c +++ b/kernel/x86_64/dgemv_n_microk_piledriver-4.c @@ -124,9 +124,9 @@ static void dgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -224,9 +224,9 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 diff --git a/kernel/x86_64/sgemv_n_microk_bulldozer-4.c b/kernel/x86_64/sgemv_n_microk_bulldozer-4.c index 2b83b1045..31001c7f3 100644 --- a/kernel/x86_64/sgemv_n_microk_bulldozer-4.c +++ b/kernel/x86_64/sgemv_n_microk_bulldozer-4.c @@ -183,9 +183,9 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO "4: \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -246,9 +246,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "jnz 1b \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 diff --git a/kernel/x86_64/sgemv_n_microk_nehalem-4.c b/kernel/x86_64/sgemv_n_microk_nehalem-4.c index 167c4be05..36dfb14ee 100644 --- a/kernel/x86_64/sgemv_n_microk_nehalem-4.c +++ b/kernel/x86_64/sgemv_n_microk_nehalem-4.c @@ -106,9 +106,9 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO "jnz 1b \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -181,9 +181,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "jnz 1b \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 diff --git a/kernel/x86_64/sgemv_n_microk_sandy-4.c b/kernel/x86_64/sgemv_n_microk_sandy-4.c index 7377b545c..f617ccd5a 100644 --- a/kernel/x86_64/sgemv_n_microk_sandy-4.c +++ b/kernel/x86_64/sgemv_n_microk_sandy-4.c @@ -196,9 +196,9 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -345,9 +345,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 diff --git a/kernel/x86_64/sgemv_t_microk_bulldozer-4.c b/kernel/x86_64/sgemv_t_microk_bulldozer-4.c index 6e822fba3..1b5b8b2ff 100644 --- a/kernel/x86_64/sgemv_t_microk_bulldozer-4.c +++ b/kernel/x86_64/sgemv_t_microk_bulldozer-4.c @@ -126,9 +126,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "vmovss %%xmm7, 12(%3) \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 diff --git a/kernel/x86_64/sgemv_t_microk_nehalem-4.c b/kernel/x86_64/sgemv_t_microk_nehalem-4.c index 4f07d9640..b3c07126c 100644 --- a/kernel/x86_64/sgemv_t_microk_nehalem-4.c +++ b/kernel/x86_64/sgemv_t_microk_nehalem-4.c @@ -78,9 +78,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "movss %%xmm7, 12(%3) \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 diff --git a/kernel/x86_64/sgemv_t_microk_sandy-4.c b/kernel/x86_64/sgemv_t_microk_sandy-4.c index 76868ab14..ca49fe7e6 100644 --- a/kernel/x86_64/sgemv_t_microk_sandy-4.c +++ b/kernel/x86_64/sgemv_t_microk_sandy-4.c @@ -152,9 +152,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 diff --git a/kernel/x86_64/zgemv_n_microk_bulldozer-4.c b/kernel/x86_64/zgemv_n_microk_bulldozer-4.c index f367ad607..59473d071 100644 --- a/kernel/x86_64/zgemv_n_microk_bulldozer-4.c +++ b/kernel/x86_64/zgemv_n_microk_bulldozer-4.c @@ -125,9 +125,9 @@ static void zgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "2: \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -231,9 +231,9 @@ static void zgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "2: \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -321,9 +321,9 @@ static void zgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -393,9 +393,9 @@ static void zgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap) // 4 @@ -493,9 +493,9 @@ static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest,FLOAT a "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (src), // 2 "r" (dest), // 3 "r" (&alpha_r), // 4 diff --git a/kernel/x86_64/zgemv_n_microk_sandy-4.c b/kernel/x86_64/zgemv_n_microk_sandy-4.c index 82fc543de..245f45d05 100644 --- a/kernel/x86_64/zgemv_n_microk_sandy-4.c +++ b/kernel/x86_64/zgemv_n_microk_sandy-4.c @@ -127,9 +127,9 @@ static void zgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -220,9 +220,9 @@ static void zgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -295,9 +295,9 @@ static void zgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y) "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap) // 4 @@ -396,9 +396,9 @@ static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest,FLOAT a "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (src), // 2 "r" (dest), // 3 "r" (&alpha_r), // 4 diff --git a/kernel/x86_64/zgemv_t_microk_bulldozer-4.c b/kernel/x86_64/zgemv_t_microk_bulldozer-4.c index 792c7e952..d86d221eb 100644 --- a/kernel/x86_64/zgemv_t_microk_bulldozer-4.c +++ b/kernel/x86_64/zgemv_t_microk_bulldozer-4.c @@ -198,9 +198,9 @@ static void zgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -333,9 +333,9 @@ static void zgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap[0]), // 4 @@ -437,9 +437,9 @@ static void zgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT * "vzeroupper \n\t" : + "+r" (i), // 0 + "+r" (n) // 1 : - "r" (i), // 0 - "r" (n), // 1 "r" (x), // 2 "r" (y), // 3 "r" (ap), // 4 From 4d0b005e5b951ef76b8630846c5db8a1f325e2f0 Mon Sep 17 00:00:00 2001 From: Andrew Date: Mon, 1 Jan 2018 20:54:39 +0100 Subject: [PATCH 105/122] Eliminate remaining unused results in kernels (clang5 analyzer) --- kernel/generic/trmm_lncopy_8.c | 20 ++++++++++---------- kernel/generic/trmm_utcopy_8.c | 30 +++++++++++++++--------------- kernel/generic/trsm_uncopy_8.c | 8 ++++---- kernel/generic/trsm_utcopy_8.c | 4 ++-- kernel/x86_64/dgemv_n_4.c | 4 ++-- kernel/x86_64/dgemv_t_4.c | 4 ++-- 6 files changed, 35 insertions(+), 35 deletions(-) diff --git a/kernel/generic/trmm_lncopy_8.c b/kernel/generic/trmm_lncopy_8.c index 69429411e..07186d302 100644 --- a/kernel/generic/trmm_lncopy_8.c +++ b/kernel/generic/trmm_lncopy_8.c @@ -635,7 +635,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON data05 = *(ao1 + 4); data06 = *(ao1 + 5); data07 = *(ao1 + 6); - data08 = *(ao1 + 7); + // data08 = *(ao1 + 7); if (i >= 2) { #ifndef UNIT @@ -646,7 +646,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON data13 = *(ao2 + 4); data14 = *(ao2 + 5); data15 = *(ao2 + 6); - data16 = *(ao2 + 7); + // data16 = *(ao2 + 7); } if (i >= 3) { @@ -657,7 +657,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON data21 = *(ao3 + 4); data22 = *(ao3 + 5); data23 = *(ao3 + 6); - data24 = *(ao3 + 7); + // data24 = *(ao3 + 7); } if (i >= 4) { @@ -667,7 +667,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON data29 = *(ao4 + 4); data30 = *(ao4 + 5); data31 = *(ao4 + 6); - data32 = *(ao4 + 7); + // data32 = *(ao4 + 7); } if (i >= 5) { @@ -676,7 +676,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON #endif data38 = *(ao5 + 5); data39 = *(ao5 + 6); - data40 = *(ao5 + 7); + // data40 = *(ao5 + 7); } if (i >= 6) { @@ -684,14 +684,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON data46 = *(ao6 + 5); #endif data47 = *(ao6 + 6); - data48 = *(ao6 + 7); + // data48 = *(ao6 + 7); } if (i >= 7) { #ifndef UNIT data55 = *(ao7 + 6); #endif - data56 = *(ao7 + 7); + // data56 = *(ao7 + 7); } #ifdef UNIT @@ -1032,14 +1032,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON #endif data02 = *(ao1 + 1); data03 = *(ao1 + 2); - data04 = *(ao1 + 3); + // data04 = *(ao1 + 3); if (i >= 2) { #ifndef UNIT data10 = *(ao2 + 1); #endif data11 = *(ao2 + 2); - data12 = *(ao2 + 3); + // data12 = *(ao2 + 3); } @@ -1047,7 +1047,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON #ifndef UNIT data19 = *(ao3 + 2); #endif - data20 = *(ao3 + 3); + // data20 = *(ao3 + 3); } #ifdef UNIT diff --git a/kernel/generic/trmm_utcopy_8.c b/kernel/generic/trmm_utcopy_8.c index 63106ac72..c85a0a508 100644 --- a/kernel/generic/trmm_utcopy_8.c +++ b/kernel/generic/trmm_utcopy_8.c @@ -685,7 +685,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON data33 = *(ao5 + 0); data41 = *(ao6 + 0); data49 = *(ao7 + 0); - data57 = *(ao8 + 0); + // data57 = *(ao8 + 0); if (i >= 2) { #ifndef UNIT @@ -696,7 +696,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON data34 = *(ao5 + 1); data42 = *(ao6 + 1); data50 = *(ao7 + 1); - data58 = *(ao8 + 1); + // data58 = *(ao8 + 1); } if (i >= 3) { @@ -707,7 +707,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON data35 = *(ao5 + 2); data43 = *(ao6 + 2); data51 = *(ao7 + 2); - data59 = *(ao8 + 2); + // data59 = *(ao8 + 2); } if (i >= 4) { @@ -717,7 +717,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON data36 = *(ao5 + 3); data44 = *(ao6 + 3); data52 = *(ao7 + 3); - data60 = *(ao8 + 3); + // data60 = *(ao8 + 3); } if (i >= 5) { @@ -726,7 +726,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON #endif data45 = *(ao6 + 4); data53 = *(ao7 + 4); - data61 = *(ao8 + 4); + // data61 = *(ao8 + 4); } if (i >= 6) { @@ -734,14 +734,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON data46 = *(ao6 + 5); #endif data54 = *(ao7 + 5); - data62 = *(ao8 + 5); + // data62 = *(ao8 + 5); } if (i >= 7) { #ifndef UNIT data55 = *(ao7 + 6); #endif - data63 = *(ao8 + 6); + // data63 = *(ao8 + 6); } #ifdef UNIT @@ -1074,27 +1074,27 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } } else { -#ifndef UNIT +/* #ifndef UNIT data01 = *(ao1 + 0); -#endif +#endif */ data09 = *(ao2 + 0); data17 = *(ao3 + 0); - data25 = *(ao4 + 0); + // data25 = *(ao4 + 0); if (i >= 2) { -#ifndef UNIT +/* #ifndef UNIT data10 = *(ao2 + 1); -#endif +#endif */ data18 = *(ao3 + 1); - data26 = *(ao4 + 1); + // data26 = *(ao4 + 1); } - if (i >= 3) { +/* if (i >= 3) { #ifndef UNIT data19 = *(ao3 + 2); #endif data27 = *(ao4 + 2); - } + } */ #ifndef UNIT b[ 0] = ONE; diff --git a/kernel/generic/trsm_uncopy_8.c b/kernel/generic/trsm_uncopy_8.c index ec71f3f8d..bcff48436 100644 --- a/kernel/generic/trsm_uncopy_8.c +++ b/kernel/generic/trsm_uncopy_8.c @@ -628,13 +628,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT if (ii < jj) { data01 = *(a1 + 0); - data02 = *(a1 + 1); + // data02 = *(a1 + 1); data09 = *(a2 + 0); - data10 = *(a2 + 1); + // data10 = *(a2 + 1); data17 = *(a3 + 0); - data18 = *(a3 + 1); + // data18 = *(a3 + 1); data25 = *(a4 + 0); - data26 = *(a4 + 1); + // data26 = *(a4 + 1); *(b + 0) = data01; *(b + 1) = data09; diff --git a/kernel/generic/trsm_utcopy_8.c b/kernel/generic/trsm_utcopy_8.c index 47feb5974..9780f3bc8 100644 --- a/kernel/generic/trsm_utcopy_8.c +++ b/kernel/generic/trsm_utcopy_8.c @@ -649,14 +649,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT if (m & 2) { if (ii == jj) { -#ifndef UNIT +/* #ifndef UNIT data01 = *(a1 + 0); #endif data09 = *(a2 + 0); #ifndef UNIT data10 = *(a2 + 1); -#endif +#endif */ } if (ii > jj) { diff --git a/kernel/x86_64/dgemv_n_4.c b/kernel/x86_64/dgemv_n_4.c index a8437a016..1b9ca7a60 100644 --- a/kernel/x86_64/dgemv_n_4.c +++ b/kernel/x86_64/dgemv_n_4.c @@ -281,8 +281,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO if ( n2 & 1 ) { dgemv_kernel_4x1(NB,a_ptr,x_ptr,ybuffer,&alpha); - a_ptr += lda; - x_ptr += 1; + /* a_ptr += lda; + x_ptr += 1; */ } diff --git a/kernel/x86_64/dgemv_t_4.c b/kernel/x86_64/dgemv_t_4.c index 3891e16cc..6b99d6fdd 100644 --- a/kernel/x86_64/dgemv_t_4.c +++ b/kernel/x86_64/dgemv_t_4.c @@ -393,9 +393,9 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO { dgemv_kernel_4x1(NB,a_ptr,xbuffer,ybuffer); - a_ptr += lda; + // a_ptr += lda; *y_ptr += ybuffer[0] * alpha; - y_ptr += inc_y; + // y_ptr += inc_y; } a += NB; From 11a627c54e21fb87a9fae5a0c5c2344c6ae23146 Mon Sep 17 00:00:00 2001 From: Andrew Date: Mon, 1 Jan 2018 20:56:26 +0100 Subject: [PATCH 106/122] remove surplus parentheses to silence clang5 --- driver/level3/level3.c | 6 +++--- driver/level3/level3_syr2k.c | 4 ++-- driver/level3/level3_syrk.c | 4 ++-- driver/level3/level3_syrk_threaded.c | 4 ++-- driver/level3/level3_thread.c | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/driver/level3/level3.c b/driver/level3/level3.c index fbed14339..1ab7a740e 100644 --- a/driver/level3/level3.c +++ b/driver/level3/level3.c @@ -251,11 +251,11 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, if ((k == 0) || (alpha == NULL)) return 0; #if !defined(XDOUBLE) || !defined(QUAD_PRECISION) - if ((alpha[0] == ZERO) + if ( alpha[0] == ZERO #ifdef COMPLEX - && (alpha[1] == ZERO) + && alpha[1] == ZERO #endif - ) return 0; + ) return 0; #else if (((alpha[0].x[0] | alpha[0].x[1] #ifdef COMPLEX diff --git a/driver/level3/level3_syr2k.c b/driver/level3/level3_syr2k.c index 8bdd921c9..3a2f86972 100644 --- a/driver/level3/level3_syr2k.c +++ b/driver/level3/level3_syr2k.c @@ -154,9 +154,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO if ((k == 0) || (alpha == NULL)) return 0; - if ((alpha[0] == ZERO) + if (alpha[0] == ZERO #ifdef COMPLEX - && (alpha[1] == ZERO) + && alpha[1] == ZERO #endif ) return 0; diff --git a/driver/level3/level3_syrk.c b/driver/level3/level3_syrk.c index f3202eb88..7f9fef479 100644 --- a/driver/level3/level3_syrk.c +++ b/driver/level3/level3_syrk.c @@ -158,9 +158,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO if ((k == 0) || (alpha == NULL)) return 0; - if ((alpha[0] == ZERO) + if (alpha[0] == ZERO #if defined(COMPLEX) && !defined(HERK) - && (alpha[1] == ZERO) + && alpha[1] == ZERO #endif ) return 0; diff --git a/driver/level3/level3_syrk_threaded.c b/driver/level3/level3_syrk_threaded.c index 66732897a..65002ae46 100644 --- a/driver/level3/level3_syrk_threaded.c +++ b/driver/level3/level3_syrk_threaded.c @@ -200,9 +200,9 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, if ((k == 0) || (alpha == NULL)) return 0; - if ((alpha[0] == ZERO) + if (alpha[0] == ZERO #if defined(COMPLEX) && !defined(HERK) - && (alpha[1] == ZERO) + && alpha[1] == ZERO #endif ) return 0; diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c index 77ceac6e8..a1ed8bbb1 100644 --- a/driver/level3/level3_thread.c +++ b/driver/level3/level3_thread.c @@ -295,9 +295,9 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, /* Return early if no more computation is needed */ if ((k == 0) || (alpha == NULL)) return 0; - if ((alpha[0] == ZERO) + if (alpha[0] == ZERO #ifdef COMPLEX - && (alpha[1] == ZERO) + && alpha[1] == ZERO #endif ) return 0; From 8aafa0473ce97b01d14ba0e93749ee7d98e57e83 Mon Sep 17 00:00:00 2001 From: Andrew Date: Mon, 1 Jan 2018 20:57:12 +0100 Subject: [PATCH 107/122] address last warnings as seen by gcc7 --- driver/others/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/driver/others/init.c b/driver/others/init.c index c1bbdac3f..0cb8c78a2 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -237,7 +237,7 @@ static inline void get_cpumap(int node, unsigned long * node_info) { if(k!=0){ name[k]='\0'; affinity[count++] = strtoul(name, &dummy, 16); - k=0; + // k=0; } // 0-63bit -> node_info[0], 64-128bit -> node_info[1] .... // revert the sequence @@ -293,7 +293,7 @@ static inline void get_share(int cpu, int level, unsigned long * share) { if(k!=0){ name[k]='\0'; affinity[count++] = strtoul(name, &dummy, 16); - k=0; + // k=0; } // 0-63bit -> node_info[0], 64-128bit -> node_info[1] .... // revert the sequence From d602b993861b8bb04b15d846e58eb83024bb0865 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 2 Jan 2018 14:38:50 +0100 Subject: [PATCH 108/122] LAPACK helpers in C that need care too --- kernel/generic/laswp_ncopy_8.c | 2 +- kernel/generic/neg_tcopy_16.c | 2 +- kernel/generic/neg_tcopy_8.c | 8 ++++---- kernel/generic/zlaswp_ncopy_2.c | 2 +- kernel/generic/zlaswp_ncopy_4.c | 2 +- kernel/generic/zneg_tcopy_2.c | 4 ++-- kernel/generic/zneg_tcopy_4.c | 14 +++++++------- kernel/generic/zneg_tcopy_8.c | 4 ++-- lapack/getrf/getrf_single.c | 4 ++-- lapack/trti2/ztrti2_L.c | 5 +++-- lapack/trti2/ztrti2_U.c | 5 +++-- 11 files changed, 27 insertions(+), 25 deletions(-) diff --git a/kernel/generic/laswp_ncopy_8.c b/kernel/generic/laswp_ncopy_8.c index bb7408c61..ae4acc1e7 100644 --- a/kernel/generic/laswp_ncopy_8.c +++ b/kernel/generic/laswp_ncopy_8.c @@ -288,7 +288,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint i++; } while (i <= k2); - a += lda; + // a += lda; } return 0; diff --git a/kernel/generic/neg_tcopy_16.c b/kernel/generic/neg_tcopy_16.c index a93372abe..757000b57 100644 --- a/kernel/generic/neg_tcopy_16.c +++ b/kernel/generic/neg_tcopy_16.c @@ -379,7 +379,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ if (m & 1){ ctemp01 = *(aoffset1 + 0); *(boffset + 0) = -ctemp01; - boffset += 1; + // boffset += 1; } } diff --git a/kernel/generic/neg_tcopy_8.c b/kernel/generic/neg_tcopy_8.c index a45ecc7d4..5a092591e 100644 --- a/kernel/generic/neg_tcopy_8.c +++ b/kernel/generic/neg_tcopy_8.c @@ -719,10 +719,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ if (m & 1){ aoffset1 = aoffset; - aoffset += lda; + // aoffset += lda; boffset1 = boffset; - boffset += 8; + // boffset += 8; i = (n >> 3); if (i > 0){ @@ -762,7 +762,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(boffset2 + 1) = -ctemp02; *(boffset2 + 2) = -ctemp03; *(boffset2 + 3) = -ctemp04; - boffset2 += 4; + // boffset2 += 4; } if (n & 2){ @@ -772,7 +772,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(boffset3 + 0) = -ctemp01; *(boffset3 + 1) = -ctemp02; - boffset3 += 2; + // boffset3 += 2; } if (n & 1){ diff --git a/kernel/generic/zlaswp_ncopy_2.c b/kernel/generic/zlaswp_ncopy_2.c index d02a788b3..407821a2f 100644 --- a/kernel/generic/zlaswp_ncopy_2.c +++ b/kernel/generic/zlaswp_ncopy_2.c @@ -372,7 +372,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint *(b1 + 0) = A1; *(b1 + 1) = A2; } - buffer += 2; + // buffer += 2; } } diff --git a/kernel/generic/zlaswp_ncopy_4.c b/kernel/generic/zlaswp_ncopy_4.c index b79166692..192494b07 100644 --- a/kernel/generic/zlaswp_ncopy_4.c +++ b/kernel/generic/zlaswp_ncopy_4.c @@ -702,7 +702,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint *(b1 + 0) = A1; *(b1 + 1) = A2; } - buffer += 2; + // buffer += 2; } } diff --git a/kernel/generic/zneg_tcopy_2.c b/kernel/generic/zneg_tcopy_2.c index 074f2f1fb..680bc74f6 100644 --- a/kernel/generic/zneg_tcopy_2.c +++ b/kernel/generic/zneg_tcopy_2.c @@ -140,7 +140,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(b_offset1 + 6) = -ctemp11; *(b_offset1 + 7) = -ctemp12; - b_offset1 += m * 4; + // b_offset1 += m * 4; a_offset1 += 4; a_offset2 += 4; } @@ -204,7 +204,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(b_offset + 2) = -ctemp3; *(b_offset + 3) = -ctemp4; - b_offset += m * 4; + // b_offset += m * 4; a_offset += 4; } diff --git a/kernel/generic/zneg_tcopy_4.c b/kernel/generic/zneg_tcopy_4.c index cfdd23bfd..a643b87b1 100644 --- a/kernel/generic/zneg_tcopy_4.c +++ b/kernel/generic/zneg_tcopy_4.c @@ -233,10 +233,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(boffset3 + 6) = -ctemp07; *(boffset3 + 7) = -ctemp08; - aoffset1 += 2; + /* aoffset1 += 2; aoffset2 += 2; aoffset3 += 2; - aoffset4 += 2; + aoffset4 += 2; */ boffset3 += 8; } @@ -293,8 +293,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ aoffset1 += 8; aoffset2 += 8; - aoffset3 += 8; - aoffset4 += 8; + /* aoffset3 += 8; + aoffset4 += 8; */ boffset1 += m * 8; i --; @@ -338,8 +338,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(boffset3 + 2) = -ctemp03; *(boffset3 + 3) = -ctemp04; - aoffset1 += 2; - aoffset2 += 2; + /* aoffset1 += 2; + aoffset2 += 2; */ boffset3 += 4; } } @@ -387,7 +387,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(boffset2 + 3) = -ctemp04; aoffset1 += 4; - boffset2 += 4; + // boffset2 += 4; } if (n & 1){ diff --git a/kernel/generic/zneg_tcopy_8.c b/kernel/generic/zneg_tcopy_8.c index cb1a62d60..c2846b4f1 100644 --- a/kernel/generic/zneg_tcopy_8.c +++ b/kernel/generic/zneg_tcopy_8.c @@ -324,7 +324,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ if (n & 1){ aoffset1 = aoffset; aoffset2 = aoffset + lda; - aoffset += 2; + // aoffset += 2; i = (m >> 1); if (i > 0){ @@ -353,7 +353,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){ *(boffset + 0) = -ctemp01; *(boffset + 1) = -ctemp02; - boffset += 2; + // boffset += 2; } } diff --git a/lapack/getrf/getrf_single.c b/lapack/getrf/getrf_single.c index 581feeb2e..865c51bb5 100644 --- a/lapack/getrf/getrf_single.c +++ b/lapack/getrf/getrf_single.c @@ -51,7 +51,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, BLASLONG jjs, min_jj; blasint *ipiv, iinfo, info; BLASLONG jb, mn, blocking; - FLOAT *a, *offsetA, *offsetB; + FLOAT *a, *offsetA; //, *offsetB; BLASLONG range_N[2]; FLOAT *sbb; @@ -99,7 +99,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, if (jb > blocking) jb = blocking; offsetA = a + j * lda * COMPSIZE; - offsetB = a + (j + jb) * lda * COMPSIZE; + // offsetB = a + (j + jb) * lda * COMPSIZE; range_N[0] = offset + j; range_N[1] = offset + j + jb; diff --git a/lapack/trti2/ztrti2_L.c b/lapack/trti2/ztrti2_L.c index 819bff261..98ea2128d 100644 --- a/lapack/trti2/ztrti2_L.c +++ b/lapack/trti2/ztrti2_L.c @@ -67,8 +67,6 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, for (j = n - 1; j >= 0; j--) { - ajj_r = ONE; - ajj_i = ZERO; #ifndef UNIT ajj_r = *(a + (j + j * lda) * COMPSIZE + 0); @@ -88,6 +86,9 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, *(a + (j + j * lda) * COMPSIZE + 0) = ajj_r; *(a + (j + j * lda) * COMPSIZE + 1) = ajj_i; +#else + ajj_r = ONE; + ajj_i = ZERO; #endif ZTRMV (n - j - 1, diff --git a/lapack/trti2/ztrti2_U.c b/lapack/trti2/ztrti2_U.c index 972329acd..3dac56c3f 100644 --- a/lapack/trti2/ztrti2_U.c +++ b/lapack/trti2/ztrti2_U.c @@ -67,8 +67,6 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, for (j = 0; j < n; j++) { - ajj_r = ONE; - ajj_i = ZERO; #ifndef UNIT ajj_r = *(a + (j + j * lda) * COMPSIZE + 0); @@ -89,6 +87,9 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, *(a + (j + j * lda) * COMPSIZE + 0) = ajj_r; *(a + (j + j * lda) * COMPSIZE + 1) = ajj_i; +#else + ajj_r = ONE; + ajj_i = ZERO; #endif ZTRMV (j, From 7c7e2d9dc622f9a215cfd3ba87bec80c7879cdc7 Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Fri, 5 Jan 2018 17:08:55 -0500 Subject: [PATCH 109/122] Make: escape paths to pkg-config file Add double quotes around the path to the pkg-config file so that a path containing whitespace does not break the build. --- Makefile.install | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile.install b/Makefile.install index 4c79c0781..81d097215 100644 --- a/Makefile.install +++ b/Makefile.install @@ -100,12 +100,12 @@ endif #Generating openblas.pc - @echo Generating openblas.pc in $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR) - @echo 'libdir='$(OPENBLAS_LIBRARY_DIR) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc - @echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc - @echo 'version='$(VERSION) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc - @echo 'extralib='$(EXTRALIB) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc - @cat openblas.pc.in >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc + @echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)" + @echo 'libdir='$(OPENBLAS_LIBRARY_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" + @echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" + @echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" + @echo 'extralib='$(EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" + @cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" #Generating OpenBLASConfig.cmake From b3f24c96640db9d01c073c5d61600a88e52b5dd5 Mon Sep 17 00:00:00 2001 From: xoviat Date: Thu, 11 Jan 2018 11:34:53 -0600 Subject: [PATCH 110/122] CMake: Use the correct library name on windows FindBLAS and FindLAPACK don't correctly detect the OpenBLAS library name on windows. I am unsure why this was originally set this way, but it has caused me some trouble. --- CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1bdcd52ee..b5789119a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,11 +12,7 @@ set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${Open # Adhere to GNU filesystem layout conventions include(GNUInstallDirs) -if(MSVC) -set(OpenBLAS_LIBNAME libopenblas) -else() set(OpenBLAS_LIBNAME openblas) -endif() ####### if(MSVC) From 13e137fbc947c69edf782ea47d50e0bdc8234bef Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 12 Jan 2018 22:33:41 +0100 Subject: [PATCH 111/122] Initialize uninitialized variables (cppcheck) --- kernel/mips/cgemv_n_msa.c | 2 +- kernel/mips/dgemv_n_msa.c | 4 ++-- kernel/mips/sgemv_n_msa.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/mips/cgemv_n_msa.c b/kernel/mips/cgemv_n_msa.c index a9db04aaf..12fa7ca02 100644 --- a/kernel/mips/cgemv_n_msa.c +++ b/kernel/mips/cgemv_n_msa.c @@ -511,7 +511,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, BLASLONG i, j, k, k_pref, pref_offset; FLOAT *y_org = y; FLOAT *pa0, *pa1, *pa2, *pa3; - FLOAT temp_r, temp_i, res0, res1, temp0_r; + FLOAT temp_r = 0.0, temp_i = 0.0, res0, res1, temp0_r; FLOAT temp0_i, temp1_r, temp1_i, temp2_r, temp2_i, temp3_r, temp3_i; v4f32 alphar, alphai; v4f32 x0, x1, y0, y1, y2, y3, x0r, x0i, y0r, y1r, y0i, y1i; diff --git a/kernel/mips/dgemv_n_msa.c b/kernel/mips/dgemv_n_msa.c index 09bb063ff..82c3a96cf 100644 --- a/kernel/mips/dgemv_n_msa.c +++ b/kernel/mips/dgemv_n_msa.c @@ -484,10 +484,10 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *A, FLOAT *pa0, *pa1, *pa2, *pa3, *pa4, *pa5, *pa6, *pa7; FLOAT temp, temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; v2f64 v_alpha; - v2f64 x0, x1, x2, x3, y0, y1, y2, y3; + v2f64 x0, x1, x2, x3, y0 = 0.0, y1 = 0.0, y2 = 0.0, y3 = 0.0; v2f64 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15; v2f64 t16, t17, t18, t19, t20, t21, t22, t23, t24, t25, t26, t27, t28, t29; - v2f64 t30, t31, tp0, tp1, tp2, tp3, tp4, tp5, tp6, tp7; + v2f64 t30, t31, tp0 = 0.0, tp1 = 0.0, tp2 = 0.0, tp3 = 0.0, tp4 = 0.0, tp5 = 0.0, tp6 = 0.0, tp7 = 0.0; v_alpha = COPY_DOUBLE_TO_VECTOR(alpha); diff --git a/kernel/mips/sgemv_n_msa.c b/kernel/mips/sgemv_n_msa.c index ae6e6558f..e1ecb5473 100644 --- a/kernel/mips/sgemv_n_msa.c +++ b/kernel/mips/sgemv_n_msa.c @@ -423,9 +423,9 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *A, FLOAT *y_org = y; FLOAT *pa0, *pa1, *pa2, *pa3, *pa4, *pa5, *pa6, *pa7; FLOAT temp, temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; - v4f32 v_alpha, x0, x1, y0, y1; + v4f32 v_alpha, x0, x1, y0 = 0.0, y1 = 0.0; v4f32 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15; - v4f32 tp0, tp1, tp2, tp3, tp4, tp5, tp6, tp7; + v4f32 tp0 = 0.0, tp1 = 0.0, tp2 = 0.0, tp3 = 0.0, tp4 = 0.0, tp5 = 0.0, tp6 = 0.0, tp7 = 0.0; v_alpha = COPY_FLOAT_TO_VECTOR(alpha); From 3eed97f6b9902beff1f6079b54f6f29eb7631f39 Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 12 Jan 2018 22:35:00 +0100 Subject: [PATCH 112/122] Initialize values to silence cppcheck --- kernel/generic/trmmkernel_16x2.c | 2 ++ kernel/generic/trmmkernel_2x2.c | 2 ++ kernel/generic/trmmkernel_8x2.c | 2 ++ kernel/generic/ztrmmkernel_2x2.c | 2 ++ kernel/generic/ztrmmkernel_4x4.c | 2 ++ 5 files changed, 10 insertions(+) diff --git a/kernel/generic/trmmkernel_16x2.c b/kernel/generic/trmmkernel_16x2.c index 078a91dd5..8e96edf85 100644 --- a/kernel/generic/trmmkernel_16x2.c +++ b/kernel/generic/trmmkernel_16x2.c @@ -52,6 +52,8 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL #if !defined(LEFT) off = -offset; +#else + off = 0; #endif diff --git a/kernel/generic/trmmkernel_2x2.c b/kernel/generic/trmmkernel_2x2.c index 40fbeeabb..05fe1876c 100644 --- a/kernel/generic/trmmkernel_2x2.c +++ b/kernel/generic/trmmkernel_2x2.c @@ -11,6 +11,8 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL BLASLONG off, temp; #if defined(TRMMKERNEL) && !defined(LEFT) off = -offset; +#else + off = 0; #endif for (j=0; j Date: Fri, 12 Jan 2018 22:35:48 +0100 Subject: [PATCH 113/122] add missing bracket for old glibc (cppcheck) --- driver/others/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/driver/others/init.c b/driver/others/init.c index 0cb8c78a2..012ef6647 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -903,6 +903,7 @@ void gotoblas_affinity_init(void) { } #else common->num_procs = CPU_COUNT(sizeof(cpu_set_t),cpusetp); + } #endif #endif From 9fa986337d4bc80caab30c262ce9a82bd745ae6c Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 19 Jan 2018 23:11:12 +0100 Subject: [PATCH 114/122] add missing brackets to silence indentation warnings gcc721 --- kernel/generic/laswp_ncopy_2.c | 6 ++++-- kernel/generic/zlaswp_ncopy_2.c | 18 +++++++++--------- kernel/generic/zlaswp_ncopy_4.c | 6 ++++-- lapack/laswp/generic/laswp_k_4.c | 6 ++++-- 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/kernel/generic/laswp_ncopy_2.c b/kernel/generic/laswp_ncopy_2.c index a29562df9..32dbf871b 100644 --- a/kernel/generic/laswp_ncopy_2.c +++ b/kernel/generic/laswp_ncopy_2.c @@ -116,7 +116,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint *b2 = A2; *b4 = A4; } - } else + } else { if (b1 == a2) { if (b2 == a2) { *(buffer + 0) = A2; @@ -139,7 +139,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint *(buffer + 3) = A4; *b1 = A1; *b3 = A3; - } else + } else { if (b2 == b1) { *(buffer + 0) = B1; *(buffer + 1) = B3; @@ -157,6 +157,8 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint *b3 = A3; *b4 = A4; } + } + } } buffer += 4; diff --git a/kernel/generic/zlaswp_ncopy_2.c b/kernel/generic/zlaswp_ncopy_2.c index 407821a2f..7dbcf870a 100644 --- a/kernel/generic/zlaswp_ncopy_2.c +++ b/kernel/generic/zlaswp_ncopy_2.c @@ -204,20 +204,20 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint *(b4 + 0) = A7; *(b4 + 1) = A8; } - } + } - buffer += 8; + buffer += 8; - b1 = a + ip1; - b2 = a + ip2; + b1 = a + ip1; + b2 = a + ip2; - b3 = b1 + lda; - b4 = b2 + lda; + b3 = b1 + lda; + b4 = b2 + lda; - a1 += 4; - a3 += 4; + a1 += 4; + a3 += 4; - i --; + i --; } while (i > 0); } diff --git a/kernel/generic/zlaswp_ncopy_4.c b/kernel/generic/zlaswp_ncopy_4.c index 192494b07..0de393838 100644 --- a/kernel/generic/zlaswp_ncopy_4.c +++ b/kernel/generic/zlaswp_ncopy_4.c @@ -462,7 +462,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint *(b4 + 0) = A7; *(b4 + 1) = A8; } - } else + } else { if (b1 == a2) { if (b2 == a2) { *(buffer + 0) = A3; @@ -503,7 +503,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint *(b1 + 1) = A2; *(b3 + 0) = A5; *(b3 + 1) = A6; - } else + } else { if (b2 == b1) { *(buffer + 0) = B1; *(buffer + 1) = B2; @@ -536,6 +536,8 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint *(b4 + 0) = A7; *(b4 + 1) = A8; } + } + } } buffer += 8; diff --git a/lapack/laswp/generic/laswp_k_4.c b/lapack/laswp/generic/laswp_k_4.c index 5cb65170a..191a229a9 100644 --- a/lapack/laswp/generic/laswp_k_4.c +++ b/lapack/laswp/generic/laswp_k_4.c @@ -174,7 +174,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG *a8 = B8; *b8 = A8; } - } else + } else { if (b1 == a2) { if (b2 != a1) { if (b2 == a2) { @@ -225,7 +225,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG *b5 = A5; *a7 = B7; *b7 = A7; - } else + } else { if (b2 == b1) { *a1 = B1; *a2 = A1; @@ -257,6 +257,8 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG *b7 = A7; *b8 = A8; } + } + } } b1 = a + ip1; From 4938faa822b02aa1fed580e29e85e449cb4da4bf Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 19 Jan 2018 23:15:58 +0100 Subject: [PATCH 115/122] core.IdenticalExpr clang501 checker --- kernel/x86_64/dtrmm_kernel_4x8_haswell.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/x86_64/dtrmm_kernel_4x8_haswell.c b/kernel/x86_64/dtrmm_kernel_4x8_haswell.c index 70be88f07..289af772e 100644 --- a/kernel/x86_64/dtrmm_kernel_4x8_haswell.c +++ b/kernel/x86_64/dtrmm_kernel_4x8_haswell.c @@ -777,9 +777,9 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL res3_2 = 0; res3_3 = 0; - temp = backwards ? bk-off : - left ? off + 4 : // number of values in A - off + 4; // number of values in B + temp = backwards ? bk-off : off + 4; + /* left ? off + 4 : // number of values in A + off + 4; // number of values in B */ for (k=0; k Date: Fri, 19 Jan 2018 23:17:43 +0100 Subject: [PATCH 116/122] core.IdenticalExpr clang501 checker --- kernel/generic/trmm_ltcopy_2.c | 26 +++++-------------- kernel/generic/trmm_ltcopy_4.c | 37 +++++++-------------------- kernel/generic/trmm_utcopy_16.c | 45 ++++++++++++++------------------- kernel/generic/trmm_utcopy_2.c | 29 ++++++++------------- kernel/generic/trmm_utcopy_4.c | 25 ++++++++---------- kernel/generic/ztrmm_ltcopy_2.c | 44 +++++--------------------------- kernel/generic/ztrmm_utcopy_1.c | 31 ++++++++--------------- kernel/generic/ztrmm_utcopy_2.c | 33 ++++++++---------------- kernel/generic/ztrmm_utcopy_8.c | 21 +++++++-------- 9 files changed, 93 insertions(+), 198 deletions(-) diff --git a/kernel/generic/trmm_ltcopy_2.c b/kernel/generic/trmm_ltcopy_2.c index 13a3bc53c..60cdeed1c 100644 --- a/kernel/generic/trmm_ltcopy_2.c +++ b/kernel/generic/trmm_ltcopy_2.c @@ -120,30 +120,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON ao2 += 1; */ b += 2; } else +#ifdef UNIT if (X < posY) { - data01 = *(ao1 + 0); - data02 = *(ao1 + 1); - - b[ 0] = data01; - b[ 1] = data02; - // ao1 += lda; - b += 2; - } else { +#endif + b[ 0] = *(ao1 + 0); #ifdef UNIT - data02 = *(ao1 + 1); + } else { b[ 0] = ONE; - b[ 1] = data02; -#else - data01 = *(ao1 + 0); - data02 = *(ao1 + 1); - - b[ 0] = data01; - b[ 1] = data02; -#endif - // ao1 += 2; - b += 2; } +#endif + b[ 1] = *(ao1 + 1); + b += 2; } posY += 2; diff --git a/kernel/generic/trmm_ltcopy_4.c b/kernel/generic/trmm_ltcopy_4.c index 128536aad..e90d89209 100644 --- a/kernel/generic/trmm_ltcopy_4.c +++ b/kernel/generic/trmm_ltcopy_4.c @@ -410,36 +410,17 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON i = (m & 1); if (i) { - - if (X > posY) { - /* ao1 += 1; - ao2 += 1; */ - - b += 2; - } else - if (X < posY) { - data01 = *(ao1 + 0); - data02 = *(ao1 + 1); - - b[ 0] = data01; - b[ 1] = data02; - // ao1 += lda; - b += 2; - } else { #ifdef UNIT - data02 = *(ao1 + 1); - - b[ 0] = ONE; - b[ 1] = data02; -#else - data01 = *(ao1 + 0); - data02 = *(ao1 + 1); - - b[ 0] = data01; - b[ 1] = data02; + if (X < posY) { #endif - b += 2; - } + b[ 0] = *(ao1 + 0); +#ifdef UNIT + } else { + b[ 0] = ONE; + } +#endif + b[ 1] = *(ao1 + 1); + b += 2; } posY += 2; } diff --git a/kernel/generic/trmm_utcopy_16.c b/kernel/generic/trmm_utcopy_16.c index 5d0f7dd2e..12642e7db 100644 --- a/kernel/generic/trmm_utcopy_16.c +++ b/kernel/generic/trmm_utcopy_16.c @@ -1487,23 +1487,19 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (X < posY) { a01 ++; a02 ++; - b += 2; - } else + } else { +#ifdef UNIT if (X > posY) { +#endif b[ 0] = *(a01 + 0); - b[ 1] = *(a01 + 1); - // a01 += lda; - b += 2; - } else { #ifdef UNIT + } else { b[ 0] = ONE; - b[ 1] = *(a01 + 1); -#else - b[ 0] = *(a01 + 0); - b[ 1] = *(a01 + 1); -#endif - b += 2; } +#endif + b[ 1] = *(a01 + 1); + } + b += 2; } posY += 2; } @@ -1522,25 +1518,22 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON if (i > 0) { do { if (X < posY) { - a01 += 1; - b ++; - } else + a01 ++; + } else { +#ifdef UNIT if (X > posY) { +#endif b[ 0] = *(a01 + 0); - a01 += lda; - b ++; - } else { #ifdef UNIT + } else { b[ 0] = ONE; -#else - b[ 0] = *(a01 + 0); -#endif - a01 += lda; - b ++; } - - X += 1; - i --; +#endif + a01 += lda; + } + b ++; + X ++; + i --; } while (i > 0); } // posY += 1; diff --git a/kernel/generic/trmm_utcopy_2.c b/kernel/generic/trmm_utcopy_2.c index efa68162c..75076c382 100644 --- a/kernel/generic/trmm_utcopy_2.c +++ b/kernel/generic/trmm_utcopy_2.c @@ -161,27 +161,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON i = m; if (m > 0) { do { - if (X < posY) { - b += 1; - ao1 += 1; - } else - if (X > posY) { - data01 = *(ao1 + 0); - b[ 0] = data01; - b += 1; - ao1 += lda; - } else { #ifdef UNIT - b[ 0] = ONE; -#else - data01 = *(ao1 + 0); - b[ 0] = data01; + if (X > posY) { #endif - b += 1; - ao1 += lda; - } - - X += 1; + b[ 0] = *(ao1 + 0); +#ifdef UNIT + } else { + b[ 0] = ONE; + } +#endif + b ++; + ao1 += lda; + X ++; i --; } while (i > 0); } diff --git a/kernel/generic/trmm_utcopy_4.c b/kernel/generic/trmm_utcopy_4.c index 3e7726b61..e5844094e 100644 --- a/kernel/generic/trmm_utcopy_4.c +++ b/kernel/generic/trmm_utcopy_4.c @@ -443,26 +443,21 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON do { if (X < posY) { - b += 1; ao1 += 1; - } else + } else { +#ifdef UNIT if (X > posY) { - data01 = *(ao1 + 0); - b[ 0] = data01; - ao1 += lda; - b += 1; - } else { +#endif + b[ 0] = *(ao1 + 0); #ifdef UNIT + } else { b[ 0] = ONE; -#else - data01 = *(ao1 + 0); - b[ 0] = data01; -#endif - ao1 += lda; - b += 1; } - - X += 1; +#endif + ao1 += lda; + } + b ++; + X ++; i --; } while (i > 0); } diff --git a/kernel/generic/ztrmm_ltcopy_2.c b/kernel/generic/ztrmm_ltcopy_2.c index deb675f73..7969f4f3d 100644 --- a/kernel/generic/ztrmm_ltcopy_2.c +++ b/kernel/generic/ztrmm_ltcopy_2.c @@ -139,48 +139,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON } if (m & 1) { - - if (X > posY) { - /* ao1 += 2; - ao2 += 2; */ - b += 4; - - } else +#ifdef UNIT if (X < posY) { - data1 = *(ao1 + 0); - data2 = *(ao1 + 1); - data3 = *(ao1 + 2); - data4 = *(ao1 + 3); - - b[ 0] = data1; - b[ 1] = data2; - b[ 2] = data3; - b[ 3] = data4; - - // ao1 += lda; - b += 4; - } else { +#endif + b[ 0] = *(ao1 + 0); + b[ 1] = *(ao1 + 1); #ifdef UNIT - data3 = *(ao1 + 2); - data4 = *(ao1 + 3); - + } else { b[ 0] = ONE; b[ 1] = ZERO; - b[ 2] = data3; - b[ 3] = data4; -#else - data1 = *(ao1 + 0); - data2 = *(ao1 + 1); - data3 = *(ao1 + 2); - data4 = *(ao1 + 3); - - b[ 0] = data1; - b[ 1] = data2; - b[ 2] = data3; - b[ 3] = data4; -#endif - b += 4; } +#endif + b += 4; } posY += 2; diff --git a/kernel/generic/ztrmm_utcopy_1.c b/kernel/generic/ztrmm_utcopy_1.c index 2746c5f5c..94d23ef9b 100644 --- a/kernel/generic/ztrmm_utcopy_1.c +++ b/kernel/generic/ztrmm_utcopy_1.c @@ -44,7 +44,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON BLASLONG i, js; BLASLONG X; - FLOAT data01, data02; +// FLOAT data01, data02; FLOAT *ao1; lda += lda; @@ -65,30 +65,21 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON do { if (X < posY) { ao1 += 2; - b += 2; - } else + } else { +#ifdef UNIT if (X > posY) { - data01 = *(ao1 + 0); - data02 = *(ao1 + 1); - b[ 0] = data01; - b[ 1] = data02; - ao1 += lda; - b += 2; - - } else { +#endif + b[ 0] = *(ao1 + 0); + b[ 1] = *(ao1 + 1); #ifdef UNIT + } else { b[ 0] = ONE; b[ 1] = ZERO; -#else - data01 = *(ao1 + 0); - data02 = *(ao1 + 1); - b[ 0] = data01; - b[ 1] = data02; -#endif - ao1 += lda; - b += 2; } - +#endif + ao1 += lda; + } + b += 2; X ++; i --; } while (i > 0); diff --git a/kernel/generic/ztrmm_utcopy_2.c b/kernel/generic/ztrmm_utcopy_2.c index 6c7288ae9..03ce93d99 100644 --- a/kernel/generic/ztrmm_utcopy_2.c +++ b/kernel/generic/ztrmm_utcopy_2.c @@ -203,33 +203,22 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON do { if (X < posY) { ao1 += 2; - b += 2; - } else + } else { +#ifdef UNIT if (X > posY) { - data1 = *(ao1 + 0); - data2 = *(ao1 + 1); - - b[ 0] = data1; - b[ 1] = data2; - - ao1 += lda; - b += 2; - } else { +#endif + b[ 0] = *(ao1 + 0); + b[ 1] = *(ao1 + 1); #ifdef UNIT + } else { b[ 0] = ONE; b[ 1] = ZERO; -#else - data1 = *(ao1 + 0); - data2 = *(ao1 + 1); - - b[ 0] = data1; - b[ 1] = data2; -#endif - ao1 += lda; - b += 2; } - - X += 1; +#endif + ao1 += lda; + } + b += 2; + X ++; i --; } while (i > 0); } diff --git a/kernel/generic/ztrmm_utcopy_8.c b/kernel/generic/ztrmm_utcopy_8.c index ed4578579..946c136e7 100644 --- a/kernel/generic/ztrmm_utcopy_8.c +++ b/kernel/generic/ztrmm_utcopy_8.c @@ -856,25 +856,22 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON do { if (X < posY) { a01 += 2; - b += 2; - } else + } else { +#ifdef UNIT if (X > posY) { +#endif b[ 0] = *(a01 + 0); b[ 1] = *(a01 + 1); - a01 += lda; - b += 2; - } else { #ifdef UNIT + } else { b[ 0] = ONE; b[ 1] = ZERO; -#else - b[ 0] = *(a01 + 0); - b[ 1] = *(a01 + 1); -#endif - a01 += lda; - b += 2; } - X += 1; +#endif + a01 += lda; + } + b += 2; + X ++; i --; } while (i > 0); } From e5752ff9b322c665a7393d6109c2da7ad6ee2523 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 20 Jan 2018 11:42:31 +0100 Subject: [PATCH 117/122] take out unused variables --- driver/others/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index e4caa093a..1d5b70003 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -177,7 +177,7 @@ int get_num_procs(void) { cpu_set_t *cpusetp; size_t size; int ret; -int i,n; +// int i,n; if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); #if !defined(OS_LINUX) From 4a4f6658de0fb838a19dc493a0f6dc3b40127145 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 23 Jan 2018 21:33:21 +0100 Subject: [PATCH 118/122] When forcing USE_THREAD=0, override USE_OPENMP as well This avoids an error exit a few lines down as USE_THREAD=0 conflicts with USE_OPENMP=1 --- Makefile.system | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile.system b/Makefile.system index 972238f36..5caad0b03 100644 --- a/Makefile.system +++ b/Makefile.system @@ -181,6 +181,7 @@ endif ifeq ($(NUM_THREADS), 1) override USE_THREAD = 0 +override USE_OPENMP = 0 endif ifdef USE_THREAD From 038bfbb86c890555aa58e987326044b1bf6f74ff Mon Sep 17 00:00:00 2001 From: xoviat Date: Fri, 26 Jan 2018 14:09:48 -0600 Subject: [PATCH 119/122] CMake: Remove unused wall option when FC=flang --- cmake/fc.cmake | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/cmake/fc.cmake b/cmake/fc.cmake index f1c69d923..1446a900d 100644 --- a/cmake/fc.cmake +++ b/cmake/fc.cmake @@ -5,13 +5,8 @@ if (${F_COMPILER} STREQUAL "FLANG") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FLANG") - if (BINARY64) - if (INTERFACE64) - set(FCOMMON_OPT "${FCOMMON_OPT} -i8") - endif () - set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") - else () - set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") + if (BINARY64 AND INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -i8") endif () if (USE_OPENMP) set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp") From 485df776126256667dcd011c8d098803cc962a6e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 26 Jan 2018 23:20:00 +0100 Subject: [PATCH 120/122] Make USE_TRMM depend on TARGET_CORE not TARGET Fixes #1432 (and possibly other DTRMM-related failures on Haswell and related architectures when built with cmake) --- kernel/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index a720f6249..c06d1eae8 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -121,7 +121,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) # Makefile.L3 set(USE_TRMM false) - if (ARM OR ARM64 OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "{CORE}" STREQUAL "zen") + if (ARM OR ARM64 OR "${TARGET_CORE}" STREQUAL "LONGSOON3B" OR "${TARGET_CORE}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET_CORE}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "${CORE}" STREQUAL "zen") set(USE_TRMM true) endif () From f653e7a18d42d9aa5303254432d097b972f8dddb Mon Sep 17 00:00:00 2001 From: Abdelrauf Date: Wed, 31 Jan 2018 07:49:38 -0800 Subject: [PATCH 121/122] small fix small fix inside ifdef z13mvc . (z13mvc code is not used in production) --- kernel/zarch/dcopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/zarch/dcopy.c b/kernel/zarch/dcopy.c index 968f63e1b..5dbfd4ad1 100644 --- a/kernel/zarch/dcopy.c +++ b/kernel/zarch/dcopy.c @@ -44,7 +44,7 @@ static void dcopy_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y) { "brctg %[n_tmp],1b" : [mem_y] "=m" (*(double (*)[n])y), [n_tmp] "+&r"(n) : [mem_x] "m" (*(const double (*)[n])x), - [ptr_x] "a"(x), [ptr_y] "a"(y) + [ptr_x] "+&a"(x), [ptr_y] "+&a"(y) : "cc" ); return; From 0ac824f6a5fb746c4dfbc554b9f7495e66485f10 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 31 Jan 2018 22:02:00 +0100 Subject: [PATCH 122/122] Also #define SPARC in config.h when autodetecting Fixes #1442 --- cpuid_sparc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/cpuid_sparc.c b/cpuid_sparc.c index b1e212b2f..f390f0d7f 100644 --- a/cpuid_sparc.c +++ b/cpuid_sparc.c @@ -49,6 +49,7 @@ void get_subdirname(void){ } void get_cpuconfig(void){ + printf("#define SPARC\n"); printf("#define V9\n"); printf("#define DTB_DEFAULT_ENTRIES 32\n"); }