| @@ -68,3 +68,4 @@ test/zblat2 | |||||
| test/zblat3 | test/zblat3 | ||||
| build | build | ||||
| build.* | build.* | ||||
| *.swp | |||||
| @@ -24,7 +24,12 @@ before_install: | |||||
| - if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi | - if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi | ||||
| - if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi | - if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi | ||||
| script: make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE | |||||
| script: | |||||
| - set -e | |||||
| - make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE | |||||
| - if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C test DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi | |||||
| - if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C ctest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi | |||||
| - if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C utest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi | |||||
| # whitelist | # whitelist | ||||
| branches: | branches: | ||||
| @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.4) | |||||
| project(OpenBLAS) | project(OpenBLAS) | ||||
| set(OpenBLAS_MAJOR_VERSION 0) | set(OpenBLAS_MAJOR_VERSION 0) | ||||
| set(OpenBLAS_MINOR_VERSION 2) | set(OpenBLAS_MINOR_VERSION 2) | ||||
| set(OpenBLAS_PATCH_VERSION 14) | |||||
| set(OpenBLAS_PATCH_VERSION 16) | |||||
| set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") | set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") | ||||
| enable_language(ASM) | enable_language(ASM) | ||||
| @@ -54,10 +54,6 @@ if (NOT DYNAMIC_ARCH) | |||||
| list(APPEND BLASDIRS kernel) | list(APPEND BLASDIRS kernel) | ||||
| endif () | endif () | ||||
| if (DEFINED UTEST_CHECK) | |||||
| set(SANITY_CHECK 1) | |||||
| endif () | |||||
| if (DEFINED SANITY_CHECK) | if (DEFINED SANITY_CHECK) | ||||
| list(APPEND BLASDIRS reference) | list(APPEND BLASDIRS reference) | ||||
| endif () | endif () | ||||
| @@ -110,6 +106,10 @@ if (${NO_STATIC} AND ${NO_SHARED}) | |||||
| message(FATAL_ERROR "Neither static nor shared are enabled.") | message(FATAL_ERROR "Neither static nor shared are enabled.") | ||||
| endif () | endif () | ||||
| #Set default output directory | |||||
| set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib ) | |||||
| set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib ) | |||||
| # get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html) | # get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html) | ||||
| set(TARGET_OBJS "") | set(TARGET_OBJS "") | ||||
| foreach (SUBDIR ${SUBDIRS}) | foreach (SUBDIR ${SUBDIRS}) | ||||
| @@ -139,6 +139,17 @@ add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET | |||||
| include("${CMAKE_SOURCE_DIR}/cmake/export.cmake") | include("${CMAKE_SOURCE_DIR}/cmake/export.cmake") | ||||
| # Set output for libopenblas | |||||
| set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) | |||||
| foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES}) | |||||
| string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG ) | |||||
| set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib) | |||||
| set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib) | |||||
| set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib) | |||||
| endforeach() | |||||
| enable_testing() | |||||
| add_subdirectory(utest) | |||||
| if(NOT MSVC) | if(NOT MSVC) | ||||
| #only build shared library for MSVC | #only build shared library for MSVC | ||||
| @@ -152,7 +163,6 @@ target_link_libraries(${OpenBLAS_LIBNAME}_static pthread) | |||||
| endif() | endif() | ||||
| #build test and ctest | #build test and ctest | ||||
| enable_testing() | |||||
| add_subdirectory(test) | add_subdirectory(test) | ||||
| if(NOT NO_CBLAS) | if(NOT NO_CBLAS) | ||||
| add_subdirectory(ctest) | add_subdirectory(ctest) | ||||
| @@ -121,6 +121,17 @@ In chronological order: | |||||
| * [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1). | * [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1). | ||||
| ARMv8 support. | ARMv8 support. | ||||
| * Jerome Robert <jeromerobert@gmx.com> | |||||
| * [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478) | |||||
| * [2015-12-23] `stack_check` in `gemv.c` (bug #722) | |||||
| * [2015-12-28] Allow to force the number of parallel make job | |||||
| * [2015-12-28] Fix detection of AMD E2-3200 detection | |||||
| * [2015-12-31] Let `make MAX_STACK_ALLOC=0` do what expected | |||||
| * [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731) | |||||
| * [2016-01-24] Use `GEMM_MULTITHREAD_THRESHOLD` as a number of ops (bug #742) | |||||
| * [2016-01-26] Let `openblas_get_num_threads` return the number of active threads (bug #760) | |||||
| * [2016-01-30] Speed-up small `zger`, `zgemv`, `ztrmv` using stack allocation (bug #727) | |||||
| * Dan Kortschak | * Dan Kortschak | ||||
| * [2015-01-07] Added test for drotmg bug #484. | * [2015-01-07] Added test for drotmg bug #484. | ||||
| @@ -130,5 +141,11 @@ In chronological order: | |||||
| * Martin Koehler <https://github.com/grisuthedragon/> | * Martin Koehler <https://github.com/grisuthedragon/> | ||||
| * [2015-09-07] Improved imatcopy | * [2015-09-07] Improved imatcopy | ||||
| * Ashwin Sekhar T K <https://github.com/ashwinyes/> | |||||
| * [2015-11-09] Assembly kernels for Cortex-A57 (ARMv8) | |||||
| * [2015-11-20] lapack-test fixes for Cortex-A57 | |||||
| * [2016-03-14] Additional functional Assembly Kernels for Cortex-A57 | |||||
| * [2016-03-14] Optimize Dgemm 4x4 for Cortex-A57 | |||||
| * [Your name or handle] <[email or website]> | * [Your name or handle] <[email or website]> | ||||
| * [Date] [Brief summary of your changes] | * [Date] [Brief summary of your changes] | ||||
| @@ -1,4 +1,57 @@ | |||||
| OpenBLAS ChangeLog | OpenBLAS ChangeLog | ||||
| ==================================================================== | |||||
| Version 0.2.16 | |||||
| 15-Mar-2016 | |||||
| common: | |||||
| * Avoid potential getenv segfault. (#716) | |||||
| * Import LAPACK svn bugfix #142-#147,#150-#155 | |||||
| x86/x86_64: | |||||
| * Optimize c/zgemv for AMD Bulldozer, Piledriver, Steamroller | |||||
| * Fix bug with scipy linalg test. | |||||
| ARM: | |||||
| * Improve DGEMM for ARM Cortex-A57. (Thanks, Ashwin Sekhar T K) | |||||
| POWER: | |||||
| * Optimize D and Z BLAS3 functions for Power8. | |||||
| ==================================================================== | |||||
| Version 0.2.16.rc1 | |||||
| 23-Feb-2016 | |||||
| common: | |||||
| * Upgrade LAPACK to 3.6.0 version. | |||||
| Add BUILD_LAPACK_DEPRECATED option in Makefile.rule to build | |||||
| LAPACK deprecated functions. | |||||
| * Add MAKE_NB_JOBS option in Makefile. | |||||
| Force number of make jobs.This is particularly | |||||
| useful when using distcc. (#735. Thanks, Jerome Robert.) | |||||
| * Redesign unit test. Run unit/regression test at every build (Travis-CI and Appveyor). | |||||
| * Disable multi-threading for small size swap and ger. (#744. Thanks, Jerome Robert) | |||||
| * Improve small zger, zgemv, ztrmv using stack alloction (#727. Thanks, Jerome Robert) | |||||
| * Let openblas_get_num_threads return the number of active threads. | |||||
| (#760. Thanks, Jerome Robert) | |||||
| * Support illumos(OmniOS). (#749. Thanks, Lauri Tirkkonen) | |||||
| * Fix LAPACK Dormbr, Dormlq bug. (#711, #713. Thanks, Brendan Tracey) | |||||
| * Update scipy benchmark script. (#745. Thanks, John Kirkham) | |||||
| x86/x86_64: | |||||
| * Optimize trsm kernels for AMD Bulldozer, Piledriver, Steamroller. | |||||
| * Detect Intel Avoton. | |||||
| * Detect AMD Trinity, Richland, E2-3200. | |||||
| * Fix gemv performance bug on Mac OSX Intel Haswell. | |||||
| * Fix some bugs with CMake and Visual Studio | |||||
| ARM: | |||||
| * Support and optimize Cortex-A57 AArch64. | |||||
| (#686. Thanks, Ashwin Sekhar TK) | |||||
| * Fix Android build on ARMV7 (#778. Thanks, Paul Mustiere) | |||||
| * Update ARMV6 kernels. | |||||
| POWER: | |||||
| * Fix detection of POWER architecture | |||||
| (#684. Thanks, Sebastien Villemot) | |||||
| ==================================================================== | ==================================================================== | ||||
| Version 0.2.15 | Version 0.2.15 | ||||
| 27-Oct-2015 | 27-Oct-2015 | ||||
| @@ -7,10 +7,6 @@ ifneq ($(DYNAMIC_ARCH), 1) | |||||
| BLASDIRS += kernel | BLASDIRS += kernel | ||||
| endif | endif | ||||
| ifdef UTEST_CHECK | |||||
| SANITY_CHECK = 1 | |||||
| endif | |||||
| ifdef SANITY_CHECK | ifdef SANITY_CHECK | ||||
| BLASDIRS += reference | BLASDIRS += reference | ||||
| endif | endif | ||||
| @@ -85,22 +81,22 @@ endif | |||||
| shared : | shared : | ||||
| ifndef NO_SHARED | ifndef NO_SHARED | ||||
| ifeq ($(OSNAME), Linux) | |||||
| ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS)) | |||||
| @$(MAKE) -C exports so | @$(MAKE) -C exports so | ||||
| @-ln -fs $(LIBSONAME) $(LIBPREFIX).so | |||||
| @-ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) | |||||
| @ln -fs $(LIBSONAME) $(LIBPREFIX).so | |||||
| @ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) | |||||
| endif | endif | ||||
| ifeq ($(OSNAME), FreeBSD) | ifeq ($(OSNAME), FreeBSD) | ||||
| @$(MAKE) -C exports so | @$(MAKE) -C exports so | ||||
| @-ln -fs $(LIBSONAME) $(LIBPREFIX).so | |||||
| @ln -fs $(LIBSONAME) $(LIBPREFIX).so | |||||
| endif | endif | ||||
| ifeq ($(OSNAME), NetBSD) | ifeq ($(OSNAME), NetBSD) | ||||
| @$(MAKE) -C exports so | @$(MAKE) -C exports so | ||||
| @-ln -fs $(LIBSONAME) $(LIBPREFIX).so | |||||
| @ln -fs $(LIBSONAME) $(LIBPREFIX).so | |||||
| endif | endif | ||||
| ifeq ($(OSNAME), Darwin) | ifeq ($(OSNAME), Darwin) | ||||
| @$(MAKE) -C exports dyn | @$(MAKE) -C exports dyn | ||||
| @-ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib | |||||
| @ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib | |||||
| endif | endif | ||||
| ifeq ($(OSNAME), WINNT) | ifeq ($(OSNAME), WINNT) | ||||
| @$(MAKE) -C exports dll | @$(MAKE) -C exports dll | ||||
| @@ -117,10 +113,8 @@ ifndef CROSS | |||||
| touch $(LIBNAME) | touch $(LIBNAME) | ||||
| ifndef NO_FBLAS | ifndef NO_FBLAS | ||||
| $(MAKE) -C test all | $(MAKE) -C test all | ||||
| ifdef UTEST_CHECK | |||||
| $(MAKE) -C utest all | $(MAKE) -C utest all | ||||
| endif | endif | ||||
| endif | |||||
| ifndef NO_CBLAS | ifndef NO_CBLAS | ||||
| $(MAKE) -C ctest all | $(MAKE) -C ctest all | ||||
| endif | endif | ||||
| @@ -249,16 +243,23 @@ ifndef NOFORTRAN | |||||
| -@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| ifeq ($(FC), gfortran) | |||||
| ifeq ($(F_COMPILER), GFORTRAN) | |||||
| -@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| ifdef SMP | ifdef SMP | ||||
| ifeq ($(OSNAME), WINNT) | |||||
| -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| else | |||||
| -@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| endif | |||||
| else | else | ||||
| -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| endif | endif | ||||
| else | else | ||||
| -@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| endif | |||||
| ifeq ($(BUILD_LAPACK_DEPRECATED), 1) | |||||
| -@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| endif | endif | ||||
| -@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc | -@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| endif | endif | ||||
| @@ -288,8 +289,18 @@ endif | |||||
| lapack-test : | lapack-test : | ||||
| (cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out) | (cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out) | ||||
| make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc | make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc | ||||
| ifneq ($(CROSS), 1) | |||||
| ( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \ | |||||
| ./testsecond; ./testdsecnd; ./testieee; ./testversion ) | |||||
| (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) | |||||
| endif | |||||
| lapack-runtest: | |||||
| ( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \ | |||||
| ./testsecond; ./testdsecnd; ./testieee; ./testversion ) | |||||
| (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) | (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) | ||||
| blas-test: | blas-test: | ||||
| (cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out) | (cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out) | ||||
| make -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing | make -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing | ||||
| @@ -11,8 +11,8 @@ endif | |||||
| ifeq ($(CORE), ARMV7) | ifeq ($(CORE), ARMV7) | ||||
| ifeq ($(OSNAME), Android) | ifeq ($(OSNAME), Android) | ||||
| CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a | |||||
| FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a | |||||
| CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch | |||||
| FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch | |||||
| else | else | ||||
| CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | ||||
| FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | ||||
| @@ -29,5 +29,3 @@ ifeq ($(CORE), ARMV5) | |||||
| CCOMMON_OPT += -marm -march=armv5 | CCOMMON_OPT += -marm -march=armv5 | ||||
| FCOMMON_OPT += -marm -march=armv5 | FCOMMON_OPT += -marm -march=armv5 | ||||
| endif | endif | ||||
| @@ -4,4 +4,8 @@ CCOMMON_OPT += -march=armv8-a | |||||
| FCOMMON_OPT += -march=armv8-a | FCOMMON_OPT += -march=armv8-a | ||||
| endif | endif | ||||
| ifeq ($(CORE), CORTEXA57) | |||||
| CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 | |||||
| FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 | |||||
| endif | |||||
| @@ -29,7 +29,7 @@ install : lib.grd | |||||
| #for inc | #for inc | ||||
| @echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | @echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | ||||
| @echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | @echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | ||||
| @awk 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | |||||
| @$(AWK) 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | |||||
| @echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | @echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | ||||
| @cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | @cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | ||||
| @echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | @echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | ||||
| @@ -48,10 +48,10 @@ endif | |||||
| ifndef NO_LAPACKE | ifndef NO_LAPACKE | ||||
| @echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | @echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | ||||
| @-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h | |||||
| @-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h | |||||
| @-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h | |||||
| @-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h | |||||
| @-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h | |||||
| @-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h | |||||
| @-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h | |||||
| @-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h | |||||
| endif | endif | ||||
| #for install static library | #for install static library | ||||
| @@ -64,7 +64,7 @@ endif | |||||
| #for install shared library | #for install shared library | ||||
| ifndef NO_SHARED | ifndef NO_SHARED | ||||
| @echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | @echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | ||||
| ifeq ($(OSNAME), Linux) | |||||
| ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS)) | |||||
| @install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | @install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | ||||
| @cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \ | @cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \ | ||||
| ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \ | ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \ | ||||
| @@ -3,7 +3,7 @@ | |||||
| # | # | ||||
| # This library's version | # This library's version | ||||
| VERSION = 0.2.15 | |||||
| VERSION = 0.2.16 | |||||
| # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | ||||
| # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | ||||
| @@ -79,6 +79,9 @@ VERSION = 0.2.15 | |||||
| # If you don't need LAPACKE (C Interface to LAPACK), please comment it in. | # If you don't need LAPACKE (C Interface to LAPACK), please comment it in. | ||||
| # NO_LAPACKE = 1 | # NO_LAPACKE = 1 | ||||
| # Build LAPACK Deprecated functions since LAPACK 3.6.0 | |||||
| # BUILD_LAPACK_DEPRECATED = 1 | |||||
| # If you want to use legacy threaded Level 3 implementation. | # If you want to use legacy threaded Level 3 implementation. | ||||
| # USE_SIMPLE_THREADED_LEVEL3 = 1 | # USE_SIMPLE_THREADED_LEVEL3 = 1 | ||||
| @@ -108,6 +111,10 @@ NO_AFFINITY = 1 | |||||
| # Don't use parallel make. | # Don't use parallel make. | ||||
| # NO_PARALLEL_MAKE = 1 | # NO_PARALLEL_MAKE = 1 | ||||
| # Force number of make jobs. The default is the number of logical CPU of the host. | |||||
| # This is particularly useful when using distcc | |||||
| # MAKE_NB_JOBS = 2 | |||||
| # If you would like to know minute performance report of GotoBLAS. | # If you would like to know minute performance report of GotoBLAS. | ||||
| # FUNCTION_PROFILE = 1 | # FUNCTION_PROFILE = 1 | ||||
| @@ -138,10 +145,6 @@ NO_AFFINITY = 1 | |||||
| # slow (Not implemented yet). | # slow (Not implemented yet). | ||||
| # SANITY_CHECK = 1 | # SANITY_CHECK = 1 | ||||
| # Run testcases in utest/ . When you enable UTEST_CHECK, it would enable | |||||
| # SANITY_CHECK to compare the result with reference BLAS. | |||||
| # UTEST_CHECK = 1 | |||||
| # The installation directory. | # The installation directory. | ||||
| # PREFIX = /opt/OpenBLAS | # PREFIX = /opt/OpenBLAS | ||||
| @@ -159,10 +162,11 @@ COMMON_PROF = -pg | |||||
| # Build Debug version | # Build Debug version | ||||
| # DEBUG = 1 | # DEBUG = 1 | ||||
| # Improve GEMV and GER for small matrices by stack allocation. | |||||
| # For details, https://github.com/xianyi/OpenBLAS/pull/482 | |||||
| # Set maximum stack allocation. | |||||
| # The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV | |||||
| # performance. For details, https://github.com/xianyi/OpenBLAS/pull/482 | |||||
| # | # | ||||
| MAX_STACK_ALLOC=2048 | |||||
| # MAX_STACK_ALLOC = 0 | |||||
| # Add a prefix or suffix to all exported symbol names in the shared library. | # Add a prefix or suffix to all exported symbol names in the shared library. | ||||
| # Avoid conflicts with other BLAS libraries, especially when using | # Avoid conflicts with other BLAS libraries, especially when using | ||||
| @@ -139,6 +139,10 @@ NO_PARALLEL_MAKE=0 | |||||
| endif | endif | ||||
| GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE) | GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE) | ||||
| ifdef MAKE_NB_JOBS | |||||
| GETARCH_FLAGS += -DMAKE_NB_JOBS=$(MAKE_NB_JOBS) | |||||
| endif | |||||
| ifeq ($(HOSTCC), loongcc) | ifeq ($(HOSTCC), loongcc) | ||||
| GETARCH_FLAGS += -static | GETARCH_FLAGS += -static | ||||
| endif | endif | ||||
| @@ -292,12 +296,14 @@ endif | |||||
| ifneq ($(OSNAME), WINNT) | ifneq ($(OSNAME), WINNT) | ||||
| ifneq ($(OSNAME), CYGWIN_NT) | ifneq ($(OSNAME), CYGWIN_NT) | ||||
| ifneq ($(OSNAME), Interix) | ifneq ($(OSNAME), Interix) | ||||
| ifneq ($(OSNAME), Android) | |||||
| ifdef SMP | ifdef SMP | ||||
| EXTRALIB += -lpthread | EXTRALIB += -lpthread | ||||
| endif | endif | ||||
| endif | endif | ||||
| endif | endif | ||||
| endif | endif | ||||
| endif | |||||
| # ifeq logical or | # ifeq logical or | ||||
| ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix)) | ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix)) | ||||
| @@ -324,7 +330,8 @@ ifdef SANITY_CHECK | |||||
| CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU) | CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU) | ||||
| endif | endif | ||||
| ifdef MAX_STACK_ALLOC | |||||
| MAX_STACK_ALLOC ?= 2048 | |||||
| ifneq ($(MAX_STACK_ALLOC), 0) | |||||
| CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC) | CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC) | ||||
| endif | endif | ||||
| @@ -374,7 +381,7 @@ FCOMMON_OPT += -m128bit-long-double | |||||
| endif | endif | ||||
| ifeq ($(C_COMPILER), CLANG) | ifeq ($(C_COMPILER), CLANG) | ||||
| EXPRECISION = 1 | EXPRECISION = 1 | ||||
| CCOMMON_OPT += -DEXPRECISION | |||||
| CCOMMON_OPT += -DEXPRECISION | |||||
| FCOMMON_OPT += -m128bit-long-double | FCOMMON_OPT += -m128bit-long-double | ||||
| endif | endif | ||||
| endif | endif | ||||
| @@ -388,7 +395,7 @@ endif | |||||
| ifeq ($(USE_OPENMP), 1) | ifeq ($(USE_OPENMP), 1) | ||||
| #check | |||||
| #check | |||||
| ifeq ($(USE_THREAD), 0) | ifeq ($(USE_THREAD), 0) | ||||
| $(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.) | $(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.) | ||||
| endif | endif | ||||
| @@ -952,17 +959,18 @@ ifeq ($(OSNAME), SunOS) | |||||
| TAR = gtar | TAR = gtar | ||||
| PATCH = gpatch | PATCH = gpatch | ||||
| GREP = ggrep | GREP = ggrep | ||||
| AWK = nawk | |||||
| else | else | ||||
| TAR = tar | TAR = tar | ||||
| PATCH = patch | PATCH = patch | ||||
| GREP = grep | GREP = grep | ||||
| AWK = awk | |||||
| endif | endif | ||||
| ifndef MD5SUM | ifndef MD5SUM | ||||
| MD5SUM = md5sum | MD5SUM = md5sum | ||||
| endif | endif | ||||
| AWK = awk | |||||
| REVISION = -r$(VERSION) | REVISION = -r$(VERSION) | ||||
| MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION))) | MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION))) | ||||
| @@ -971,16 +979,25 @@ ifeq ($(DEBUG), 1) | |||||
| COMMON_OPT += -g | COMMON_OPT += -g | ||||
| endif | endif | ||||
| ifeq ($(DEBUG), 1) | |||||
| FCOMMON_OPT += -g | |||||
| endif | |||||
| ifndef COMMON_OPT | ifndef COMMON_OPT | ||||
| COMMON_OPT = -O2 | COMMON_OPT = -O2 | ||||
| endif | endif | ||||
| ifndef FCOMMON_OPT | |||||
| FCOMMON_OPT = -O2 -frecursive | |||||
| endif | |||||
| override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) | override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) | ||||
| override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF) | override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF) | ||||
| override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) | |||||
| override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF) | |||||
| override FFLAGS += $(FCOMMON_OPT) | |||||
| override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF) | |||||
| #MAKEOVERRIDES = | #MAKEOVERRIDES = | ||||
| #For LAPACK Fortran codes. | #For LAPACK Fortran codes. | ||||
| @@ -1170,4 +1187,3 @@ SUNPATH = /opt/sunstudio12.1 | |||||
| else | else | ||||
| SUNPATH = /opt/SUNWspro | SUNPATH = /opt/SUNWspro | ||||
| endif | endif | ||||
| @@ -75,10 +75,11 @@ Please read GotoBLAS_01Readme.txt | |||||
| #### ARM64: | #### ARM64: | ||||
| - **ARMV8**: Experimental | - **ARMV8**: Experimental | ||||
| - **ARM Cortex-A57**: Experimental | |||||
| ### Support OS: | ### Support OS: | ||||
| - **GNU/Linux** | - **GNU/Linux** | ||||
| - **MingWin/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>. | |||||
| - **MingWin or Visual Studio(CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>. | |||||
| - **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X. | - **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X. | ||||
| - **FreeBSD**: Supported by community. We didn't test the library on this OS. | - **FreeBSD**: Supported by community. We didn't test the library on this OS. | ||||
| @@ -74,3 +74,5 @@ ARMV5 | |||||
| 7.ARM 64-bit CPU: | 7.ARM 64-bit CPU: | ||||
| ARMV8 | ARMV8 | ||||
| CORTEXA57 | |||||
| @@ -0,0 +1,199 @@ | |||||
| # Notes on OpenBLAS usage | |||||
| ## Usage | |||||
| #### Program is Terminated. Because you tried to allocate too many memory regions | |||||
| In OpenBLAS, we mange a pool of memory buffers and allocate the number of | |||||
| buffers as the following. | |||||
| ``` | |||||
| #define NUM_BUFFERS (MAX_CPU_NUMBER * 2) | |||||
| ``` | |||||
| This error indicates that the program exceeded the number of buffers. | |||||
| Please build OpenBLAS with larger `NUM_THREADS`. For example, `make | |||||
| NUM_THREADS=32` or `make NUM_THREADS=64`. In `Makefile.system`, we will set | |||||
| `MAX_CPU_NUMBER=NUM_THREADS`. | |||||
| #### How can I use OpenBLAS in multi-threaded applications? | |||||
| If your application is already multi-threaded, it will conflict with OpenBLAS | |||||
| multi-threading. Thus, you must set OpenBLAS to use single thread in any of the | |||||
| following ways: | |||||
| * `export OPENBLAS_NUM_THREADS=1` in the environment variables. | |||||
| * Call `openblas_set_num_threads(1)` in the application on runtime. | |||||
| * Build OpenBLAS single thread version, e.g. `make USE_THREAD=0` | |||||
| If the application is parallelized by OpenMP, please use OpenBLAS built with | |||||
| `USE_OPENMP=1` | |||||
| #### How to choose TARGET manually at runtime when compiled with DYNAMIC_ARCH | |||||
| The environment variable which control the kernel selection is | |||||
| `OPENBLAS_CORETYPE` (see `driver/others/dynamic.c`) e.g. `export | |||||
| OPENBLAS_CORETYPE=Haswell` and the function `char* openblas_get_corename()` | |||||
| returns the used target. | |||||
| #### How could I disable OpenBLAS threading affinity on runtime? | |||||
| You can define the `OPENBLAS_MAIN_FREE` or `GOTOBLAS_MAIN_FREE` environment | |||||
| variable to disable threading affinity on runtime. For example, before the | |||||
| running, | |||||
| ``` | |||||
| export OPENBLAS_MAIN_FREE=1 | |||||
| ``` | |||||
| Alternatively, you can disable affinity feature with enabling `NO_AFFINITY=1` | |||||
| in `Makefile.rule`. | |||||
| ## Linking with the library | |||||
| * Link with shared library | |||||
| `gcc -o test test.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas` | |||||
| If the library is multithreaded, please add `-lpthread`. If the library | |||||
| contains LAPACK functions, please add `-lgfortran` or other Fortran libs. | |||||
| * Link with static library | |||||
| `gcc -o test test.c /your/path/libopenblas.a` | |||||
| You can download `test.c` from https://gist.github.com/xianyi/5780018 | |||||
| On Linux, if OpenBLAS was compiled with threading support (`USE_THREAD=1` by | |||||
| default), custom programs statically linked against `libopenblas.a` should also | |||||
| link with the pthread library e.g.: | |||||
| ``` | |||||
| gcc -static -I/opt/OpenBLAS/include -L/opt/OpenBLAS/lib -o my_program my_program.c -lopenblas -lpthread | |||||
| ``` | |||||
| Failing to add the `-lpthread` flag will cause errors such as: | |||||
| ``` | |||||
| /opt/OpenBLAS/libopenblas.a(memory.o): In function `_touch_memory': | |||||
| memory.c:(.text+0x15): undefined reference to `pthread_mutex_lock' | |||||
| memory.c:(.text+0x41): undefined reference to `pthread_mutex_unlock' | |||||
| ... | |||||
| ``` | |||||
| ## Code examples | |||||
| #### Call CBLAS interface | |||||
| This example shows calling cblas_dgemm in C. https://gist.github.com/xianyi/6930656 | |||||
| ``` | |||||
| #include <cblas.h> | |||||
| #include <stdio.h> | |||||
| void main() | |||||
| { | |||||
| int i=0; | |||||
| double A[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0}; | |||||
| double B[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0}; | |||||
| double C[9] = {.5,.5,.5,.5,.5,.5,.5,.5,.5}; | |||||
| cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,3,3,2,1,A, 3, B, 3,2,C,3); | |||||
| for(i=0; i<9; i++) | |||||
| printf("%lf ", C[i]); | |||||
| printf("\n"); | |||||
| } | |||||
| ``` | |||||
| `gcc -o test_cblas_open test_cblas_dgemm.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas -lpthread -lgfortran` | |||||
| #### Call BLAS Fortran interface | |||||
| This example shows calling dgemm Fortran interface in C. https://gist.github.com/xianyi/5780018 | |||||
| ``` | |||||
| #include "stdio.h" | |||||
| #include "stdlib.h" | |||||
| #include "sys/time.h" | |||||
| #include "time.h" | |||||
| extern void dgemm_(char*, char*, int*, int*,int*, double*, double*, int*, double*, int*, double*, double*, int*); | |||||
| int main(int argc, char* argv[]) | |||||
| { | |||||
| int i; | |||||
| printf("test!\n"); | |||||
| if(argc<4){ | |||||
| printf("Input Error\n"); | |||||
| return 1; | |||||
| } | |||||
| int m = atoi(argv[1]); | |||||
| int n = atoi(argv[2]); | |||||
| int k = atoi(argv[3]); | |||||
| int sizeofa = m * k; | |||||
| int sizeofb = k * n; | |||||
| int sizeofc = m * n; | |||||
| char ta = 'N'; | |||||
| char tb = 'N'; | |||||
| double alpha = 1.2; | |||||
| double beta = 0.001; | |||||
| struct timeval start,finish; | |||||
| double duration; | |||||
| double* A = (double*)malloc(sizeof(double) * sizeofa); | |||||
| double* B = (double*)malloc(sizeof(double) * sizeofb); | |||||
| double* C = (double*)malloc(sizeof(double) * sizeofc); | |||||
| srand((unsigned)time(NULL)); | |||||
| for (i=0; i<sizeofa; i++) | |||||
| A[i] = i%3+1;//(rand()%100)/10.0; | |||||
| for (i=0; i<sizeofb; i++) | |||||
| B[i] = i%3+1;//(rand()%100)/10.0; | |||||
| for (i=0; i<sizeofc; i++) | |||||
| C[i] = i%3+1;//(rand()%100)/10.0; | |||||
| //#if 0 | |||||
| printf("m=%d,n=%d,k=%d,alpha=%lf,beta=%lf,sizeofc=%d\n",m,n,k,alpha,beta,sizeofc); | |||||
| gettimeofday(&start, NULL); | |||||
| dgemm_(&ta, &tb, &m, &n, &k, &alpha, A, &m, B, &k, &beta, C, &m); | |||||
| gettimeofday(&finish, NULL); | |||||
| duration = ((double)(finish.tv_sec-start.tv_sec)*1000000 + (double)(finish.tv_usec-start.tv_usec)) / 1000000; | |||||
| double gflops = 2.0 * m *n*k; | |||||
| gflops = gflops/duration*1.0e-6; | |||||
| FILE *fp; | |||||
| fp = fopen("timeDGEMM.txt", "a"); | |||||
| fprintf(fp, "%dx%dx%d\t%lf s\t%lf MFLOPS\n", m, n, k, duration, gflops); | |||||
| fclose(fp); | |||||
| free(A); | |||||
| free(B); | |||||
| free(C); | |||||
| return 0; | |||||
| } | |||||
| ``` | |||||
| ` gcc -o time_dgemm time_dgemm.c /your/path/libopenblas.a` | |||||
| ` ./time_dgemm <m> <n> <k> ` | |||||
| ## Troubleshooting | |||||
| * Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first. | |||||
| * Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD. | |||||
| * Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code. | |||||
| * The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1. | |||||
| * OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html). | |||||
| * On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell. | |||||
| ## BLAS reference manual | |||||
| If you want to understand every BLAS function and definition, please read | |||||
| [Intel MKL reference manual](https://software.intel.com/sites/products/documentation/doclib/iss/2013/mkl/mklman/GUID-F7ED9FB8-6663-4F44-A62B-61B63C4F0491.htm) | |||||
| or [netlib.org](http://netlib.org/blas/) | |||||
| Here are [OpenBLAS extension functions](https://github.com/xianyi/OpenBLAS/wiki/OpenBLAS-Extensions) | |||||
| ## How to reference OpenBLAS. | |||||
| You can reference our [papers](https://github.com/xianyi/OpenBLAS/wiki/publications). | |||||
| Alternatively, you can cite the OpenBLAS homepage http://www.openblas.net directly. | |||||
| @@ -39,4 +39,6 @@ before_build: | |||||
| - cmake -G "Visual Studio 12 Win64" . | - cmake -G "Visual Studio 12 Win64" . | ||||
| test_script: | test_script: | ||||
| - echo Build OK! | |||||
| - echo Running Test | |||||
| - cd c:\projects\OpenBLAS\utest | |||||
| - openblas_utest | |||||
| @@ -166,7 +166,8 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \ | |||||
| sgeev.goto dgeev.goto cgeev.goto zgeev.goto \ | sgeev.goto dgeev.goto cgeev.goto zgeev.goto \ | ||||
| sgetri.goto dgetri.goto cgetri.goto zgetri.goto \ | sgetri.goto dgetri.goto cgetri.goto zgetri.goto \ | ||||
| spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \ | spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \ | ||||
| ssymm.goto dsymm.goto csymm.goto zsymm.goto | |||||
| ssymm.goto dsymm.goto csymm.goto zsymm.goto \ | |||||
| smallscaling | |||||
| acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \ | acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \ | ||||
| scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \ | scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \ | ||||
| @@ -2132,6 +2133,8 @@ cgemm3m.$(SUFFIX) : gemm3m.c | |||||
| zgemm3m.$(SUFFIX) : gemm3m.c | zgemm3m.$(SUFFIX) : gemm3m.c | ||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | ||||
| smallscaling: smallscaling.c ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(EXTRALIB) -fopenmp -lm | |||||
| clean :: | clean :: | ||||
| @rm -f *.goto *.mkl *.acml *.atlas *.veclib | @rm -f *.goto *.mkl *.acml *.atlas *.veclib | ||||
| @@ -172,7 +172,7 @@ int main(int argc, char *argv[]){ | |||||
| srandom(getpid()); | srandom(getpid()); | ||||
| #endif | #endif | ||||
| for(j = 0; j < m; j++){ | |||||
| for(j = 0; j < to; j++){ | |||||
| for(i = 0; i < to * COMPSIZE; i++){ | for(i = 0; i < to * COMPSIZE; i++){ | ||||
| a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| @@ -0,0 +1,58 @@ | |||||
| #!/usr/bin/env python | |||||
| import os | |||||
| import sys | |||||
| import time | |||||
| import numpy | |||||
| from numpy import zeros | |||||
| from numpy.random import randn | |||||
| from scipy.linalg import blas | |||||
| def run_dsyrk(N, l): | |||||
| A = randn(N, N).astype('float64', order='F') | |||||
| C = zeros((N, N), dtype='float64', order='F') | |||||
| start = time.time() | |||||
| for i in range(0, l): | |||||
| blas.dsyrk(1.0, A, c=C, overwrite_c=True) | |||||
| end = time.time() | |||||
| timediff = (end - start) | |||||
| mflops = (N * N * N) * l / timediff | |||||
| mflops *= 1e-6 | |||||
| size = "%dx%d" % (N, N) | |||||
| print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff)) | |||||
| if __name__ == "__main__": | |||||
| N = 128 | |||||
| NMAX = 2048 | |||||
| NINC = 128 | |||||
| LOOPS = 1 | |||||
| z = 0 | |||||
| for arg in sys.argv: | |||||
| if z == 1: | |||||
| N = int(arg) | |||||
| elif z == 2: | |||||
| NMAX = int(arg) | |||||
| elif z == 3: | |||||
| NINC = int(arg) | |||||
| elif z == 4: | |||||
| LOOPS = int(arg) | |||||
| z = z + 1 | |||||
| if 'OPENBLAS_LOOPS' in os.environ: | |||||
| p = os.environ['OPENBLAS_LOOPS'] | |||||
| if p: | |||||
| LOOPS = int(p) | |||||
| print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
| print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
| for i in range(N, NMAX + NINC, NINC): | |||||
| run_dsyrk(i, LOOPS) | |||||
| @@ -0,0 +1,58 @@ | |||||
| #!/usr/bin/env python | |||||
| import os | |||||
| import sys | |||||
| import time | |||||
| import numpy | |||||
| from numpy import zeros | |||||
| from numpy.random import randn | |||||
| from scipy.linalg import blas | |||||
| def run_ssyrk(N, l): | |||||
| A = randn(N, N).astype('float32', order='F') | |||||
| C = zeros((N, N), dtype='float32', order='F') | |||||
| start = time.time() | |||||
| for i in range(0, l): | |||||
| blas.ssyrk(1.0, A, c=C, overwrite_c=True) | |||||
| end = time.time() | |||||
| timediff = (end - start) | |||||
| mflops = (N * N * N) * l / timediff | |||||
| mflops *= 1e-6 | |||||
| size = "%dx%d" % (N, N) | |||||
| print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff)) | |||||
| if __name__ == "__main__": | |||||
| N = 128 | |||||
| NMAX = 2048 | |||||
| NINC = 128 | |||||
| LOOPS = 1 | |||||
| z = 0 | |||||
| for arg in sys.argv: | |||||
| if z == 1: | |||||
| N = int(arg) | |||||
| elif z == 2: | |||||
| NMAX = int(arg) | |||||
| elif z == 3: | |||||
| NINC = int(arg) | |||||
| elif z == 4: | |||||
| LOOPS = int(arg) | |||||
| z = z + 1 | |||||
| if 'OPENBLAS_LOOPS' in os.environ: | |||||
| p = os.environ['OPENBLAS_LOOPS'] | |||||
| if p: | |||||
| LOOPS = int(p) | |||||
| print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
| print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
| for i in range(N, NMAX + NINC, NINC): | |||||
| run_ssyrk(i, LOOPS) | |||||
| @@ -0,0 +1,196 @@ | |||||
| // run with OPENBLAS_NUM_THREADS=1 and OMP_NUM_THREADS=n | |||||
| #include <math.h> | |||||
| #include <stdlib.h> | |||||
| #include <stdio.h> | |||||
| #include <time.h> | |||||
| #include <cblas.h> | |||||
| #include <omp.h> | |||||
| #define MIN_SIZE 5 | |||||
| #define MAX_SIZE 60 | |||||
| #define NB_SIZE 10 | |||||
| // number of loop for a 1x1 matrix. Lower it if the test is | |||||
| // too slow on you computer. | |||||
| #define NLOOP 2e7 | |||||
| typedef struct { | |||||
| int matrix_size; | |||||
| int n_loop; | |||||
| void (* bench_func)(); | |||||
| void (* blas_func)(); | |||||
| void * (* create_matrix)(int size); | |||||
| } BenchParam; | |||||
| void * s_create_matrix(int size) { | |||||
| float * r = malloc(size * sizeof(double)); | |||||
| int i; | |||||
| for(i = 0; i < size; i++) | |||||
| r[i] = 1e3 * i / size; | |||||
| return r; | |||||
| } | |||||
| void * c_create_matrix(int size) { | |||||
| float * r = malloc(size * 2 * sizeof(double)); | |||||
| int i; | |||||
| for(i = 0; i < 2 * size; i++) | |||||
| r[i] = 1e3 * i / size; | |||||
| return r; | |||||
| } | |||||
| void * z_create_matrix(int size) { | |||||
| double * r = malloc(size * 2 * sizeof(double)); | |||||
| int i; | |||||
| for(i = 0; i < 2 * size; i++) | |||||
| r[i] = 1e3 * i / size; | |||||
| return r; | |||||
| } | |||||
| void * d_create_matrix(int size) { | |||||
| double * r = malloc(size * sizeof(double)); | |||||
| int i; | |||||
| for(i = 0; i < size; i++) | |||||
| r[i] = 1e3 * i / size; | |||||
| return r; | |||||
| } | |||||
| void trmv_bench(BenchParam * param) | |||||
| { | |||||
| int i, n; | |||||
| int size = param->matrix_size; | |||||
| n = param->n_loop / size; | |||||
| int one = 1; | |||||
| void * A = param->create_matrix(size * size); | |||||
| void * y = param->create_matrix(size); | |||||
| for(i = 0; i < n; i++) { | |||||
| param->blas_func("U", "N", "N", &size, A, &size, y, &one); | |||||
| } | |||||
| free(A); | |||||
| free(y); | |||||
| } | |||||
| void gemv_bench(BenchParam * param) | |||||
| { | |||||
| int i, n; | |||||
| int size = param->matrix_size; | |||||
| n = param->n_loop / size; | |||||
| double v = 1.01; | |||||
| int one = 1; | |||||
| void * A = param->create_matrix(size * size); | |||||
| void * y = param->create_matrix(size); | |||||
| for(i = 0; i < n; i++) { | |||||
| param->blas_func("N", &size, &size, &v, A, &size, y, &one, &v, y, &one); | |||||
| } | |||||
| free(A); | |||||
| free(y); | |||||
| } | |||||
| void ger_bench(BenchParam * param) { | |||||
| int i, n; | |||||
| int size = param->matrix_size; | |||||
| n = param->n_loop / size; | |||||
| double v = 1.01; | |||||
| int one = 1; | |||||
| void * A = param->create_matrix(size * size); | |||||
| void * y = param->create_matrix(size); | |||||
| for(i = 0; i < n; i++) { | |||||
| param->blas_func(&size, &size, &v, y, &one, y, &one, A, &size); | |||||
| } | |||||
| free(A); | |||||
| free(y); | |||||
| } | |||||
| #ifndef _WIN32 | |||||
| void * pthread_func_wrapper(void * param) { | |||||
| ((BenchParam *)param)->bench_func(param); | |||||
| pthread_exit(NULL); | |||||
| } | |||||
| #endif | |||||
| #define NB_TESTS 5 | |||||
| void * TESTS[4 * NB_TESTS] = { | |||||
| trmv_bench, ztrmv_, z_create_matrix, "ztrmv", | |||||
| gemv_bench, dgemv_, d_create_matrix, "dgemv", | |||||
| gemv_bench, zgemv_, z_create_matrix, "zgemv", | |||||
| ger_bench, dger_, d_create_matrix, "dger", | |||||
| ger_bench, zgerc_, z_create_matrix, "zgerc", | |||||
| }; | |||||
| inline static double delta_time(struct timespec tick) { | |||||
| struct timespec tock; | |||||
| clock_gettime(CLOCK_MONOTONIC, &tock); | |||||
| return (tock.tv_sec - tick.tv_sec) + (tock.tv_nsec - tick.tv_nsec) / 1e9; | |||||
| } | |||||
| double pthread_bench(BenchParam * param, int nb_threads) | |||||
| { | |||||
| #ifdef _WIN32 | |||||
| return 0; | |||||
| #else | |||||
| BenchParam threaded_param = *param; | |||||
| pthread_t threads[nb_threads]; | |||||
| int t, rc; | |||||
| struct timespec tick; | |||||
| threaded_param.n_loop /= nb_threads; | |||||
| clock_gettime(CLOCK_MONOTONIC, &tick); | |||||
| for(t=0; t<nb_threads; t++){ | |||||
| rc = pthread_create(&threads[t], NULL, pthread_func_wrapper, &threaded_param); | |||||
| if (rc){ | |||||
| printf("ERROR; return code from pthread_create() is %d\n", rc); | |||||
| exit(-1); | |||||
| } | |||||
| } | |||||
| for(t=0; t<nb_threads; t++){ | |||||
| pthread_join(threads[t], NULL); | |||||
| } | |||||
| return delta_time(tick); | |||||
| #endif | |||||
| } | |||||
| double seq_bench(BenchParam * param) { | |||||
| struct timespec tick; | |||||
| clock_gettime(CLOCK_MONOTONIC, &tick); | |||||
| param->bench_func(param); | |||||
| return delta_time(tick); | |||||
| } | |||||
| double omp_bench(BenchParam * param) { | |||||
| BenchParam threaded_param = *param; | |||||
| struct timespec tick; | |||||
| int t; | |||||
| int nb_threads = omp_get_max_threads(); | |||||
| threaded_param.n_loop /= nb_threads; | |||||
| clock_gettime(CLOCK_MONOTONIC, &tick); | |||||
| #pragma omp parallel for | |||||
| for(t = 0; t < nb_threads; t ++){ | |||||
| param->bench_func(&threaded_param); | |||||
| } | |||||
| return delta_time(tick); | |||||
| } | |||||
| int main(int argc, char * argv[]) { | |||||
| double inc_factor = exp(log((double)MAX_SIZE / MIN_SIZE) / NB_SIZE); | |||||
| BenchParam param; | |||||
| int test_id; | |||||
| printf ("Running on %d threads\n", omp_get_max_threads()); | |||||
| for(test_id = 0; test_id < NB_TESTS; test_id ++) { | |||||
| double size = MIN_SIZE; | |||||
| param.bench_func = TESTS[test_id * 4]; | |||||
| param.blas_func = TESTS[test_id * 4 + 1]; | |||||
| param.create_matrix = TESTS[test_id * 4 + 2]; | |||||
| printf("\nBenchmark of %s\n", (char*)TESTS[test_id * 4 + 3]); | |||||
| param.n_loop = NLOOP; | |||||
| while(size <= MAX_SIZE) { | |||||
| param.matrix_size = (int)(size + 0.5); | |||||
| double seq_time = seq_bench(¶m); | |||||
| double omp_time = omp_bench(¶m); | |||||
| double pthread_time = pthread_bench(¶m, omp_get_max_threads()); | |||||
| printf("matrix size %d, sequential %gs, openmp %gs, speedup %g, " | |||||
| "pthread %gs, speedup %g\n", | |||||
| param.matrix_size, seq_time, | |||||
| omp_time, seq_time / omp_time, | |||||
| pthread_time, seq_time / pthread_time); | |||||
| size *= inc_factor; | |||||
| } | |||||
| } | |||||
| return(0); | |||||
| } | |||||
| @@ -6,6 +6,7 @@ $hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch); | |||||
| $hostarch = "x86_64" if ($hostarch eq "amd64"); | $hostarch = "x86_64" if ($hostarch eq "amd64"); | ||||
| $hostarch = "arm" if ($hostarch =~ /^arm.*/); | $hostarch = "arm" if ($hostarch =~ /^arm.*/); | ||||
| $hostarch = "arm64" if ($hostarch eq "aarch64"); | $hostarch = "arm64" if ($hostarch eq "aarch64"); | ||||
| $hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/); | |||||
| $binary = $ENV{"BINARY"}; | $binary = $ENV{"BINARY"}; | ||||
| @@ -14,12 +14,12 @@ if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64") | |||||
| if (NOT NO_EXPRECISION) | if (NOT NO_EXPRECISION) | ||||
| if (${F_COMPILER} MATCHES "GFORTRAN") | if (${F_COMPILER} MATCHES "GFORTRAN") | ||||
| # N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa | # N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa | ||||
| if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") | |||||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB") | |||||
| set(EXPRECISION 1) | set(EXPRECISION 1) | ||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double") | set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double") | ||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") | set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") | ||||
| endif () | endif () | ||||
| if (${CMAKE_C_COMPILER} STREQUAL "Clang") | |||||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang") | |||||
| set(EXPRECISION 1) | set(EXPRECISION 1) | ||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION") | set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION") | ||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") | set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") | ||||
| @@ -28,35 +28,35 @@ if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64") | |||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| if (${CMAKE_C_COMPILER} STREQUAL "Intel") | |||||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -wd981") | set(CCOMMON_OPT "${CCOMMON_OPT} -wd981") | ||||
| endif () | endif () | ||||
| if (USE_OPENMP) | if (USE_OPENMP) | ||||
| if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") | |||||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") | set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") | ||||
| endif () | endif () | ||||
| if (${CMAKE_C_COMPILER} STREQUAL "Clang") | |||||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang") | |||||
| message(WARNING "Clang doesn't support OpenMP yet.") | message(WARNING "Clang doesn't support OpenMP yet.") | ||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") | set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") | ||||
| endif () | endif () | ||||
| if (${CMAKE_C_COMPILER} STREQUAL "Intel") | |||||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -openmp") | set(CCOMMON_OPT "${CCOMMON_OPT} -openmp") | ||||
| endif () | endif () | ||||
| if (${CMAKE_C_COMPILER} STREQUAL "PGI") | |||||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | ||||
| endif () | endif () | ||||
| if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") | |||||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "OPEN64") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | ||||
| set(CEXTRALIB "${CEXTRALIB} -lstdc++") | set(CEXTRALIB "${CEXTRALIB} -lstdc++") | ||||
| endif () | endif () | ||||
| if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") | |||||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "PATHSCALE") | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | ||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| @@ -87,7 +87,7 @@ if (${ARCH} STREQUAL "ia64") | |||||
| set(BINARY_DEFINED 1) | set(BINARY_DEFINED 1) | ||||
| if (${F_COMPILER} MATCHES "GFORTRAN") | if (${F_COMPILER} MATCHES "GFORTRAN") | ||||
| if (${CMAKE_C_COMPILER} STREQUAL "GNU") | |||||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") | |||||
| # EXPRECISION = 1 | # EXPRECISION = 1 | ||||
| # CCOMMON_OPT += -DEXPRECISION | # CCOMMON_OPT += -DEXPRECISION | ||||
| endif () | endif () | ||||
| @@ -48,18 +48,18 @@ set(SLASRC | |||||
| sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f | sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f | ||||
| sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f | sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f | ||||
| sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f | sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f | ||||
| sgegs.f sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f | |||||
| sgels.f sgelsd.f sgelss.f sgelsx.f sgelsy.f sgeql2.f sgeqlf.f | |||||
| sgeqp3.f sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f | |||||
| DEPRECATED/sgegs.f DEPRECATED/sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f | |||||
| sgels.f sgelsd.f sgelss.f DEPRECATED/sgelsx.f sgelsy.f sgeql2.f sgeqlf.f | |||||
| sgeqp3.f DEPRECATED/sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f | |||||
| sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f | sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f | ||||
| sgetc2.f sgetri.f | sgetc2.f sgetri.f | ||||
| sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f | sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f | ||||
| sggglm.f sgghrd.f sgglse.f sggqrf.f | sggglm.f sgghrd.f sgglse.f sggqrf.f | ||||
| sggrqf.f sggsvd.f sggsvp.f sgtcon.f sgtrfs.f sgtsv.f | |||||
| sggrqf.f DEPRECATED/sggsvd.f DEPRECATED/sggsvp.f sgtcon.f sgtrfs.f sgtsv.f | |||||
| sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f | sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f | ||||
| shsein.f shseqr.f slabrd.f slacon.f slacn2.f | shsein.f shseqr.f slabrd.f slacon.f slacn2.f | ||||
| slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f | slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f | ||||
| slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f | |||||
| DEPRECATED/slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f | |||||
| slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f | slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f | ||||
| slansy.f slantb.f slantp.f slantr.f slanv2.f | slansy.f slantb.f slantp.f slantr.f slanv2.f | ||||
| slapll.f slapmt.f | slapll.f slapmt.f | ||||
| @@ -69,7 +69,7 @@ set(SLASRC | |||||
| slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f | slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f | ||||
| slarrv.f slartv.f | slarrv.f slartv.f | ||||
| slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f | slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f | ||||
| slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f slatzm.f | |||||
| slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f DEPRECATED/slatzm.f | |||||
| sopgtr.f sopmtr.f sorg2l.f sorg2r.f | sopgtr.f sopmtr.f sorg2l.f sorg2r.f | ||||
| sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f | sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f | ||||
| sorgrq.f sorgtr.f sorm2l.f sorm2r.f | sorgrq.f sorgtr.f sorm2l.f sorm2r.f | ||||
| @@ -97,7 +97,7 @@ set(SLASRC | |||||
| stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f | stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f | ||||
| stptrs.f | stptrs.f | ||||
| strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f | strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f | ||||
| strtrs.f stzrqf.f stzrzf.f sstemr.f | |||||
| strtrs.f DEPRECATED/stzrqf.f stzrzf.f sstemr.f | |||||
| slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f | slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f | ||||
| stfttr.f stpttf.f stpttr.f strttf.f strttp.f | stfttr.f stpttf.f stpttr.f strttf.f strttp.f | ||||
| sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f | sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f | ||||
| @@ -114,14 +114,14 @@ set(CLASRC | |||||
| cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f | cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f | ||||
| cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f | cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f | ||||
| cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f | cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f | ||||
| cgegs.f cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f | |||||
| cgels.f cgelsd.f cgelss.f cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f | |||||
| cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f | |||||
| DEPRECATED/cgegs.f DEPRECATED/cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f | |||||
| cgels.f cgelsd.f cgelss.f DEPRECATED/cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f | |||||
| DEPRECATED/cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f | |||||
| cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f | cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f | ||||
| cgesvx.f cgetc2.f cgetri.f | cgesvx.f cgetc2.f cgetri.f | ||||
| cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f | cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f | ||||
| cgghrd.f cgglse.f cggqrf.f cggrqf.f | cgghrd.f cgglse.f cggqrf.f cggrqf.f | ||||
| cggsvd.f cggsvp.f | |||||
| DEPRECATED/cggsvd.f DEPRECATED/cggsvp.f | |||||
| cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f | cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f | ||||
| chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f | chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f | ||||
| checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f | checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f | ||||
| @@ -138,7 +138,7 @@ set(CLASRC | |||||
| claed0.f claed7.f claed8.f | claed0.f claed7.f claed8.f | ||||
| claein.f claesy.f claev2.f clags2.f clagtm.f | claein.f claesy.f claev2.f clags2.f clagtm.f | ||||
| clahef.f clahef_rook.f clahqr.f | clahef.f clahef_rook.f clahqr.f | ||||
| clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f | |||||
| DEPRECATED/clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f | |||||
| clanhb.f clanhe.f | clanhb.f clanhe.f | ||||
| clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f | clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f | ||||
| clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f | clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f | ||||
| @@ -149,7 +149,7 @@ set(CLASRC | |||||
| clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f | clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f | ||||
| clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f | clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f | ||||
| clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f | clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f | ||||
| clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f | |||||
| DEPRECATED/clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f | |||||
| cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f | cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f | ||||
| cposv.f cposvx.f cpstrf.f cpstf2.f | cposv.f cposvx.f cpstrf.f cpstf2.f | ||||
| cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f | cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f | ||||
| @@ -166,7 +166,7 @@ set(CLASRC | |||||
| ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f | ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f | ||||
| ctprfs.f ctptri.f | ctprfs.f ctptri.f | ||||
| ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f | ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f | ||||
| ctrsyl.f ctrtrs.f ctzrqf.f ctzrzf.f cung2l.f cung2r.f | |||||
| ctrsyl.f ctrtrs.f DEPRECATED/ctzrqf.f ctzrzf.f cung2l.f cung2r.f | |||||
| cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f | cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f | ||||
| cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f | cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f | ||||
| cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f | cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f | ||||
| @@ -186,18 +186,18 @@ set(DLASRC | |||||
| dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f | dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f | ||||
| dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f | dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f | ||||
| dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f | dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f | ||||
| dgegs.f dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f | |||||
| dgels.f dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f | |||||
| dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f | |||||
| DEPRECATED/dgegs.f DEPRECATED/dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f | |||||
| dgels.f dgelsd.f dgelss.f DEPRECATED/dgelsx.f dgelsy.f dgeql2.f dgeqlf.f | |||||
| dgeqp3.f DEPRECATED/dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f | |||||
| dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f | dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f | ||||
| dgetc2.f dgetri.f | dgetc2.f dgetri.f | ||||
| dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f | dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f | ||||
| dggglm.f dgghrd.f dgglse.f dggqrf.f | dggglm.f dgghrd.f dgglse.f dggqrf.f | ||||
| dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f | |||||
| dggrqf.f DEPRECATED/dggsvd.f DEPRECATED/dggsvp.f dgtcon.f dgtrfs.f dgtsv.f | |||||
| dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f | dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f | ||||
| dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f | dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f | ||||
| dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f | dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f | ||||
| dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f | |||||
| DEPRECATED/dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f | |||||
| dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f | dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f | ||||
| dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f | dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f | ||||
| dlapll.f dlapmt.f | dlapll.f dlapmt.f | ||||
| @@ -207,7 +207,7 @@ set(DLASRC | |||||
| dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f | dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f | ||||
| dlargv.f dlarrv.f dlartv.f | dlargv.f dlarrv.f dlartv.f | ||||
| dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f | dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f | ||||
| dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f | |||||
| dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f DEPRECATED/dlatzm.f | |||||
| dopgtr.f dopmtr.f dorg2l.f dorg2r.f | dopgtr.f dopmtr.f dorg2l.f dorg2r.f | ||||
| dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f | dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f | ||||
| dorgrq.f dorgtr.f dorm2l.f dorm2r.f | dorgrq.f dorgtr.f dorm2l.f dorm2r.f | ||||
| @@ -235,7 +235,7 @@ set(DLASRC | |||||
| dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f | dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f | ||||
| dtptrs.f | dtptrs.f | ||||
| dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f | dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f | ||||
| dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f | |||||
| dtrtrs.f DEPRECATED/dtzrqf.f dtzrzf.f dstemr.f | |||||
| dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f | dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f | ||||
| dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f | dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f | ||||
| dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f | dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f | ||||
| @@ -251,14 +251,14 @@ set(ZLASRC | |||||
| zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f | zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f | ||||
| zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f | zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f | ||||
| zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f | zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f | ||||
| zgegs.f zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f | |||||
| zgels.f zgelsd.f zgelss.f zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f | |||||
| zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f | |||||
| DEPRECATED/zgegs.f DEPRECATED/zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f | |||||
| zgels.f zgelsd.f zgelss.f DEPRECATED/zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f | |||||
| DEPRECATED/zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f | |||||
| zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f | zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f | ||||
| zgetri.f | zgetri.f | ||||
| zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f | zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f | ||||
| zgghrd.f zgglse.f zggqrf.f zggrqf.f | zgghrd.f zgglse.f zggqrf.f zggrqf.f | ||||
| zggsvd.f zggsvp.f | |||||
| DEPRECATED/zggsvd.f DEPRECATED/zggsvp.f | |||||
| zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f | zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f | ||||
| zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f | zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f | ||||
| zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f | zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f | ||||
| @@ -275,7 +275,7 @@ set(ZLASRC | |||||
| zlaed0.f zlaed7.f zlaed8.f | zlaed0.f zlaed7.f zlaed8.f | ||||
| zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f | zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f | ||||
| zlahef.f zlahef_rook.f zlahqr.f | zlahef.f zlahef_rook.f zlahqr.f | ||||
| zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f | |||||
| DEPRECATED/zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f | |||||
| zlangt.f zlanhb.f | zlangt.f zlanhb.f | ||||
| zlanhe.f | zlanhe.f | ||||
| zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f | zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f | ||||
| @@ -288,7 +288,7 @@ set(ZLASRC | |||||
| zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f | zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f | ||||
| zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f | zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f | ||||
| zlassq.f zlasyf.f zlasyf_rook.f | zlassq.f zlasyf.f zlasyf_rook.f | ||||
| zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlatzm.f | |||||
| zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f DEPRECATED/zlatzm.f | |||||
| zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f | zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f | ||||
| zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f | zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f | ||||
| zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f | zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f | ||||
| @@ -306,7 +306,7 @@ set(ZLASRC | |||||
| ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f | ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f | ||||
| ztprfs.f ztptri.f | ztprfs.f ztptri.f | ||||
| ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f | ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f | ||||
| ztrsyl.f ztrtrs.f ztzrqf.f ztzrzf.f zung2l.f | |||||
| ztrsyl.f ztrtrs.f DEPRECATED/ztzrqf.f ztzrzf.f zung2l.f | |||||
| zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f | zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f | ||||
| zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f | zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f | ||||
| zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f | zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f | ||||
| @@ -2038,6 +2038,59 @@ set(MATGEN | |||||
| lapacke_zlagsy_work.c | lapacke_zlagsy_work.c | ||||
| ) | ) | ||||
| set(Utils_SRC | |||||
| lapacke_cgb_nancheck.c lapacke_dpf_nancheck.c lapacke_ssy_trans.c | |||||
| lapacke_cgb_trans.c lapacke_dpf_trans.c lapacke_stb_nancheck.c | |||||
| lapacke_cge_nancheck.c lapacke_dpo_nancheck.c lapacke_stb_trans.c | |||||
| lapacke_cge_trans.c lapacke_dpo_trans.c lapacke_stf_nancheck.c | |||||
| lapacke_cgg_nancheck.c lapacke_dpp_nancheck.c lapacke_stf_trans.c | |||||
| lapacke_cgg_trans.c lapacke_dpp_trans.c lapacke_stp_nancheck.c | |||||
| lapacke_cgt_nancheck.c lapacke_dpt_nancheck.c lapacke_stp_trans.c | |||||
| lapacke_chb_nancheck.c lapacke_dsb_nancheck.c lapacke_str_nancheck.c | |||||
| lapacke_chb_trans.c lapacke_dsb_trans.c lapacke_str_trans.c | |||||
| lapacke_che_nancheck.c lapacke_dsp_nancheck.c lapacke_xerbla.c | |||||
| lapacke_che_trans.c lapacke_dsp_trans.c lapacke_zgb_nancheck.c | |||||
| lapacke_chp_nancheck.c lapacke_dst_nancheck.c lapacke_zgb_trans.c | |||||
| lapacke_chp_trans.c lapacke_dsy_nancheck.c lapacke_zge_nancheck.c | |||||
| lapacke_chs_nancheck.c lapacke_dsy_trans.c lapacke_zge_trans.c | |||||
| lapacke_chs_trans.c lapacke_dtb_nancheck.c lapacke_zgg_nancheck.c | |||||
| lapacke_c_nancheck.c lapacke_dtb_trans.c lapacke_zgg_trans.c | |||||
| lapacke_cpb_nancheck.c lapacke_dtf_nancheck.c lapacke_zgt_nancheck.c | |||||
| lapacke_cpb_trans.c lapacke_dtf_trans.c lapacke_zhb_nancheck.c | |||||
| lapacke_cpf_nancheck.c lapacke_dtp_nancheck.c lapacke_zhb_trans.c | |||||
| lapacke_cpf_trans.c lapacke_dtp_trans.c lapacke_zhe_nancheck.c | |||||
| lapacke_cpo_nancheck.c lapacke_dtr_nancheck.c lapacke_zhe_trans.c | |||||
| lapacke_cpo_trans.c lapacke_dtr_trans.c lapacke_zhp_nancheck.c | |||||
| lapacke_cpp_nancheck.c lapacke_lsame.c lapacke_zhp_trans.c | |||||
| lapacke_cpp_trans.c lapacke_make_complex_double.c lapacke_zhs_nancheck.c | |||||
| lapacke_cpt_nancheck.c lapacke_make_complex_float.c lapacke_zhs_trans.c | |||||
| lapacke_csp_nancheck.c lapacke_sgb_nancheck.c lapacke_z_nancheck.c | |||||
| lapacke_csp_trans.c lapacke_sgb_trans.c lapacke_zpb_nancheck.c | |||||
| lapacke_cst_nancheck.c lapacke_sge_nancheck.c lapacke_zpb_trans.c | |||||
| lapacke_csy_nancheck.c lapacke_sge_trans.c lapacke_zpf_nancheck.c | |||||
| lapacke_csy_trans.c lapacke_sgg_nancheck.c lapacke_zpf_trans.c | |||||
| lapacke_ctb_nancheck.c lapacke_sgg_trans.c lapacke_zpo_nancheck.c | |||||
| lapacke_ctb_trans.c lapacke_sgt_nancheck.c lapacke_zpo_trans.c | |||||
| lapacke_ctf_nancheck.c lapacke_shs_nancheck.c lapacke_zpp_nancheck.c | |||||
| lapacke_ctf_trans.c lapacke_shs_trans.c lapacke_zpp_trans.c | |||||
| lapacke_ctp_nancheck.c lapacke_s_nancheck.c lapacke_zpt_nancheck.c | |||||
| lapacke_ctp_trans.c lapacke_spb_nancheck.c lapacke_zsp_nancheck.c | |||||
| lapacke_ctr_nancheck.c lapacke_spb_trans.c lapacke_zsp_trans.c | |||||
| lapacke_ctr_trans.c lapacke_spf_nancheck.c lapacke_zst_nancheck.c | |||||
| lapacke_dgb_nancheck.c lapacke_spf_trans.c lapacke_zsy_nancheck.c | |||||
| lapacke_dgb_trans.c lapacke_spo_nancheck.c lapacke_zsy_trans.c | |||||
| lapacke_dge_nancheck.c lapacke_spo_trans.c lapacke_ztb_nancheck.c | |||||
| lapacke_dge_trans.c lapacke_spp_nancheck.c lapacke_ztb_trans.c | |||||
| lapacke_dgg_nancheck.c lapacke_spp_trans.c lapacke_ztf_nancheck.c | |||||
| lapacke_dgg_trans.c lapacke_spt_nancheck.c lapacke_ztf_trans.c | |||||
| lapacke_dgt_nancheck.c lapacke_ssb_nancheck.c lapacke_ztp_nancheck.c | |||||
| lapacke_dhs_nancheck.c lapacke_ssb_trans.c lapacke_ztp_trans.c | |||||
| lapacke_dhs_trans.c lapacke_ssp_nancheck.c lapacke_ztr_nancheck.c | |||||
| lapacke_d_nancheck.c lapacke_ssp_trans.c lapacke_ztr_trans.c | |||||
| lapacke_dpb_nancheck.c lapacke_sst_nancheck.c | |||||
| lapacke_dpb_trans.c lapacke_ssy_nancheck.c | |||||
| ) | |||||
| set(LAPACKE_REL_SRC "") | set(LAPACKE_REL_SRC "") | ||||
| if (BUILD_SINGLE) | if (BUILD_SINGLE) | ||||
| list(APPEND LAPACKE_REL_SRC ${SSRC}) | list(APPEND LAPACKE_REL_SRC ${SSRC}) | ||||
| @@ -2058,10 +2111,14 @@ endif () | |||||
| # add lapack-netlib folder to the sources | # add lapack-netlib folder to the sources | ||||
| set(LAPACKE_SOURCES "") | set(LAPACKE_SOURCES "") | ||||
| foreach (LAE_FILE ${LAPACKE_REL_SRC}) | foreach (LAE_FILE ${LAPACKE_REL_SRC}) | ||||
| list(APPEND LAPACKE_SOURCES "${NETLIB_LAPACK_DIR}/lapacke/src/${LAE_FILE}") | |||||
| list(APPEND LAPACKE_SOURCES "${NETLIB_LAPACK_DIR}/LAPACKE/src/${LAE_FILE}") | |||||
| endforeach () | |||||
| foreach (Utils_FILE ${Utils_SRC}) | |||||
| list(APPEND LAPACKE_SOURCES "${NETLIB_LAPACK_DIR}/LAPACKE/utils/${Utils_FILE}") | |||||
| endforeach () | endforeach () | ||||
| set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/lapacke/include") | |||||
| set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/LAPACKE/include") | |||||
| execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${lapacke_include_dir}/lapacke_mangling_with_flags.h" "${lapacke_include_dir}/lapacke_mangling.h") | execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${lapacke_include_dir}/lapacke_mangling_with_flags.h" "${lapacke_include_dir}/lapacke_mangling.h") | ||||
| include_directories(${lapacke_include_dir}) | include_directories(${lapacke_include_dir}) | ||||
| set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}") | set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}") | ||||
| @@ -86,13 +86,14 @@ extern "C" { | |||||
| #if !defined(_MSC_VER) | #if !defined(_MSC_VER) | ||||
| #include <unistd.h> | #include <unistd.h> | ||||
| #endif | #endif | ||||
| #include <time.h> | |||||
| #ifdef OS_LINUX | #ifdef OS_LINUX | ||||
| #include <malloc.h> | #include <malloc.h> | ||||
| #include <sched.h> | #include <sched.h> | ||||
| #endif | #endif | ||||
| #if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) | |||||
| #if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_ANDROID) | |||||
| #include <sched.h> | #include <sched.h> | ||||
| #endif | #endif | ||||
| @@ -331,12 +332,13 @@ typedef int blasint; | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| /* | |||||
| #ifdef PILEDRIVER | #ifdef PILEDRIVER | ||||
| #ifndef YIELDING | #ifndef YIELDING | ||||
| #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n"); | #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n"); | ||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| */ | |||||
| /* | /* | ||||
| #ifdef STEAMROLLER | #ifdef STEAMROLLER | ||||
| @@ -410,7 +412,7 @@ please https://github.com/xianyi/OpenBLAS/issues/246 | |||||
| #ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
| #ifdef OS_WINDOWS | #ifdef OS_WINDOWS | ||||
| typedef char env_var_t[MAX_PATH]; | typedef char env_var_t[MAX_PATH]; | ||||
| #define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p)) | |||||
| #define readenv(p, n) GetEnvironmentVariable((LPCTSTR)(n), (LPTSTR)(p), sizeof(p)) | |||||
| #else | #else | ||||
| typedef char* env_var_t; | typedef char* env_var_t; | ||||
| #define readenv(p, n) ((p)=getenv(n)) | #define readenv(p, n) ((p)=getenv(n)) | ||||
| @@ -726,6 +728,7 @@ typedef struct { | |||||
| #endif | #endif | ||||
| #ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
| #include "common_stackalloc.h" | |||||
| #if 0 | #if 0 | ||||
| #include "symcopy.h" | #include "symcopy.h" | ||||
| #endif | #endif | ||||
| @@ -43,28 +43,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
| static void __inline blas_lock(volatile BLASULONG *address){ | static void __inline blas_lock(volatile BLASULONG *address){ | ||||
| long register ret; | |||||
| BLASULONG ret; | |||||
| do { | do { | ||||
| while (*address) {YIELDING;}; | while (*address) {YIELDING;}; | ||||
| __asm__ __volatile__( | __asm__ __volatile__( | ||||
| "ldaxr %0, [%1] \n\t" | |||||
| "stlxr w2, %2, [%1] \n\t" | |||||
| "orr %0, %0, x2 \n\t" | |||||
| : "=r"(ret) | |||||
| : "r"(address), "r"(1l) | |||||
| : "memory", "x2" | |||||
| "mov x4, #1 \n\t" | |||||
| "1: \n\t" | |||||
| "ldaxr x2, [%1] \n\t" | |||||
| "cbnz x2, 1b \n\t" | |||||
| "2: \n\t" | |||||
| "stxr w3, x4, [%1] \n\t" | |||||
| "cbnz w3, 1b \n\t" | |||||
| "mov %0, #0 \n\t" | |||||
| : "=r"(ret), "=r"(address) | |||||
| : "1"(address) | |||||
| : "memory", "x2" , "x3", "x4" | |||||
| ); | ); | ||||
| } while (ret); | } while (ret); | ||||
| MB; | |||||
| } | } | ||||
| #define BLAS_LOCK_DEFINED | #define BLAS_LOCK_DEFINED | ||||
| static inline int blas_quickdivide(blasint x, blasint y){ | static inline int blas_quickdivide(blasint x, blasint y){ | ||||
| return x / y; | return x / y; | ||||
| } | } | ||||
| @@ -89,8 +100,10 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||||
| #if defined(ASSEMBLER) && !defined(NEEDPARAM) | #if defined(ASSEMBLER) && !defined(NEEDPARAM) | ||||
| #define PROLOGUE \ | #define PROLOGUE \ | ||||
| .text ;\ | |||||
| .align 4 ;\ | |||||
| .global REALNAME ;\ | .global REALNAME ;\ | ||||
| .func REALNAME ;\ | |||||
| .type REALNAME, %function ;\ | |||||
| REALNAME: | REALNAME: | ||||
| #define EPILOGUE | #define EPILOGUE | ||||
| @@ -107,7 +120,11 @@ REALNAME: | |||||
| #endif | #endif | ||||
| #define HUGE_PAGESIZE ( 4 << 20) | #define HUGE_PAGESIZE ( 4 << 20) | ||||
| #if defined(CORTEXA57) | |||||
| #define BUFFER_SIZE (20 << 20) | |||||
| #else | |||||
| #define BUFFER_SIZE (16 << 20) | #define BUFFER_SIZE (16 << 20) | ||||
| #endif | |||||
| #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) | #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) | ||||
| @@ -236,7 +236,7 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||||
| #define HAVE_PREFETCH | #define HAVE_PREFETCH | ||||
| #endif | #endif | ||||
| #if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) | |||||
| #if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) | |||||
| #define DCBT_ARG 0 | #define DCBT_ARG 0 | ||||
| #else | #else | ||||
| #define DCBT_ARG 8 | #define DCBT_ARG 8 | ||||
| @@ -258,6 +258,13 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||||
| #define L1_PREFETCH dcbtst | #define L1_PREFETCH dcbtst | ||||
| #endif | #endif | ||||
| #if defined(POWER8) | |||||
| #define L1_DUALFETCH | |||||
| #define L1_PREFETCHSIZE (16 + 128 * 100) | |||||
| #define L1_PREFETCH dcbtst | |||||
| #endif | |||||
| # | |||||
| #ifndef L1_PREFETCH | #ifndef L1_PREFETCH | ||||
| #define L1_PREFETCH dcbt | #define L1_PREFETCH dcbt | ||||
| #endif | #endif | ||||
| @@ -790,6 +797,8 @@ Lmcount$lazy_ptr: | |||||
| #define BUFFER_SIZE ( 2 << 20) | #define BUFFER_SIZE ( 2 << 20) | ||||
| #elif defined(PPC440FP2) | #elif defined(PPC440FP2) | ||||
| #define BUFFER_SIZE ( 16 << 20) | #define BUFFER_SIZE ( 16 << 20) | ||||
| #elif defined(POWER8) | |||||
| #define BUFFER_SIZE ( 64 << 20) | |||||
| #else | #else | ||||
| #define BUFFER_SIZE ( 16 << 20) | #define BUFFER_SIZE ( 16 << 20) | ||||
| #endif | #endif | ||||
| @@ -0,0 +1,73 @@ | |||||
| /******************************************************************************* | |||||
| Copyright (c) 2016, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *******************************************************************************/ | |||||
| #define STACK_ALLOC_PROTECT | |||||
| #ifdef STACK_ALLOC_PROTECT | |||||
| // Try to detect stack smashing | |||||
| #include <assert.h> | |||||
| #define STACK_ALLOC_PROTECT_SET volatile int stack_check = 0x7fc01234; | |||||
| #define STACK_ALLOC_PROTECT_CHECK assert(stack_check == 0x7fc01234); | |||||
| #else | |||||
| #define STACK_ALLOC_PROTECT_SET | |||||
| #define STACK_ALLOC_PROTECT_CHECK | |||||
| #endif | |||||
| #if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0 | |||||
| /* | |||||
| * Allocate a buffer on the stack if the size is smaller than MAX_STACK_ALLOC. | |||||
| * Stack allocation is much faster than blas_memory_alloc or malloc, particularly | |||||
| * when OpenBLAS is used from a multi-threaded application. | |||||
| * SIZE must be carefully chosen to be: | |||||
| * - as small as possible to maximize the number of stack allocation | |||||
| * - large enough to support all architectures and kernel | |||||
| * Chosing a too small SIZE will lead to a stack smashing. | |||||
| */ | |||||
| #define STACK_ALLOC(SIZE, TYPE, BUFFER) \ | |||||
| /* make it volatile because some function (ex: dgemv_n.S) */ \ | |||||
| /* do not restore all register */ \ | |||||
| volatile int stack_alloc_size = SIZE; \ | |||||
| if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) \ | |||||
| stack_alloc_size = 0; \ | |||||
| STACK_ALLOC_PROTECT_SET \ | |||||
| TYPE stack_buffer[stack_alloc_size] __attribute__((aligned(0x20))); \ | |||||
| BUFFER = stack_alloc_size ? stack_buffer : (TYPE *)blas_memory_alloc(1); | |||||
| #else | |||||
| //Original OpenBLAS/GotoBLAS codes. | |||||
| #define STACK_ALLOC(SIZE, TYPE, BUFFER) BUFFER = (TYPE *)blas_memory_alloc(1) | |||||
| #endif | |||||
| #if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0 | |||||
| #define STACK_FREE(BUFFER) \ | |||||
| STACK_ALLOC_PROTECT_CHECK \ | |||||
| if(!stack_alloc_size) \ | |||||
| blas_memory_free(BUFFER); | |||||
| #else | |||||
| #define STACK_FREE(BUFFER) blas_memory_free(BUFFER) | |||||
| #endif | |||||
| @@ -41,6 +41,10 @@ | |||||
| #ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
| #ifdef C_MSVC | |||||
| #include <intrin.h> | |||||
| #endif | |||||
| #define MB | #define MB | ||||
| #define WMB | #define WMB | ||||
| @@ -170,12 +174,13 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ | |||||
| if (y <= 1) return x; | if (y <= 1) return x; | ||||
| y = blas_quick_divide_table[y]; | |||||
| #if defined(_MSC_VER) && !defined(__clang__) | #if defined(_MSC_VER) && !defined(__clang__) | ||||
| (void*)result; | |||||
| return x*y; | |||||
| result = x/y; | |||||
| return result; | |||||
| #else | #else | ||||
| y = blas_quick_divide_table[y]; | |||||
| __asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y)); | __asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y)); | ||||
| return result; | return result; | ||||
| @@ -396,7 +396,7 @@ REALNAME: | |||||
| #define PROFCODE | #define PROFCODE | ||||
| #define EPILOGUE .end REALNAME | |||||
| #define EPILOGUE .end | |||||
| #endif | #endif | ||||
| #if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI) | #if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI) | ||||
| @@ -115,6 +115,9 @@ int detect(void) | |||||
| if (strstr(p, "0xc0f")) { | if (strstr(p, "0xc0f")) { | ||||
| return CPU_CORTEXA15; | return CPU_CORTEXA15; | ||||
| } | } | ||||
| if (strstr(p, "0xd07")) { | |||||
| return CPU_ARMV7; //ARMV8 on 32-bit | |||||
| } | |||||
| } | } | ||||
| @@ -158,6 +161,27 @@ int detect(void) | |||||
| } | } | ||||
| p = (char *) NULL ; | |||||
| infile = fopen("/proc/cpuinfo", "r"); | |||||
| while (fgets(buffer, sizeof(buffer), infile)) | |||||
| { | |||||
| if ((!strncmp("CPU architecture", buffer, 16))) | |||||
| { | |||||
| p = strchr(buffer, ':') + 2; | |||||
| break; | |||||
| } | |||||
| } | |||||
| fclose(infile); | |||||
| if(p != NULL) { | |||||
| if (strstr(p, "8")) { | |||||
| return CPU_ARMV7; //ARMV8 on 32-bit | |||||
| } | |||||
| } | |||||
| #endif | #endif | ||||
| return CPU_UNKNOWN; | return CPU_UNKNOWN; | ||||
| @@ -29,12 +29,19 @@ | |||||
| #define CPU_UNKNOWN 0 | #define CPU_UNKNOWN 0 | ||||
| #define CPU_ARMV8 1 | #define CPU_ARMV8 1 | ||||
| #define CPU_CORTEXA57 2 | |||||
| static char *cpuname[] = { | static char *cpuname[] = { | ||||
| "UNKOWN", | |||||
| "ARMV8" | |||||
| "UNKNOWN", | |||||
| "ARMV8" , | |||||
| "CORTEXA57" | |||||
| }; | }; | ||||
| static char *cpuname_lower[] = { | |||||
| "unknown", | |||||
| "armv8" , | |||||
| "cortexa57" | |||||
| }; | |||||
| int get_feature(char *search) | int get_feature(char *search) | ||||
| { | { | ||||
| @@ -53,13 +60,13 @@ int get_feature(char *search) | |||||
| { | { | ||||
| p = strchr(buffer, ':') + 2; | p = strchr(buffer, ':') + 2; | ||||
| break; | break; | ||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| fclose(infile); | |||||
| fclose(infile); | |||||
| if( p == NULL ) return; | |||||
| if( p == NULL ) return 0; | |||||
| t = strtok(p," "); | t = strtok(p," "); | ||||
| while( t = strtok(NULL," ")) | while( t = strtok(NULL," ")) | ||||
| @@ -82,11 +89,30 @@ int detect(void) | |||||
| p = (char *) NULL ; | p = (char *) NULL ; | ||||
| infile = fopen("/proc/cpuinfo", "r"); | infile = fopen("/proc/cpuinfo", "r"); | ||||
| while (fgets(buffer, sizeof(buffer), infile)) | |||||
| { | |||||
| if (!strncmp("CPU part", buffer, 8)) | |||||
| { | |||||
| p = strchr(buffer, ':') + 2; | |||||
| break; | |||||
| } | |||||
| } | |||||
| fclose(infile); | |||||
| if(p != NULL) { | |||||
| if (strstr(p, "0xd07")) { | |||||
| return CPU_CORTEXA57; | |||||
| } | |||||
| } | |||||
| p = (char *) NULL ; | |||||
| infile = fopen("/proc/cpuinfo", "r"); | |||||
| while (fgets(buffer, sizeof(buffer), infile)) | while (fgets(buffer, sizeof(buffer), infile)) | ||||
| { | { | ||||
| if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9))) | |||||
| if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9)) || | |||||
| (!strncmp("CPU architecture", buffer, 16))) | |||||
| { | { | ||||
| p = strchr(buffer, ':') + 2; | p = strchr(buffer, ':') + 2; | ||||
| break; | break; | ||||
| @@ -100,7 +126,7 @@ int detect(void) | |||||
| if (strstr(p, "AArch64")) | if (strstr(p, "AArch64")) | ||||
| { | { | ||||
| return CPU_ARMV8; | |||||
| return CPU_ARMV8; | |||||
| } | } | ||||
| @@ -118,23 +144,13 @@ char *get_corename(void) | |||||
| void get_architecture(void) | void get_architecture(void) | ||||
| { | { | ||||
| printf("ARM"); | |||||
| printf("ARM64"); | |||||
| } | } | ||||
| void get_subarchitecture(void) | void get_subarchitecture(void) | ||||
| { | { | ||||
| int d = detect(); | int d = detect(); | ||||
| switch (d) | |||||
| { | |||||
| case CPU_ARMV8: | |||||
| printf("ARMV8"); | |||||
| break; | |||||
| default: | |||||
| printf("UNKNOWN"); | |||||
| break; | |||||
| } | |||||
| printf("%s", cpuname[d]); | |||||
| } | } | ||||
| void get_subdirname(void) | void get_subdirname(void) | ||||
| @@ -160,26 +176,34 @@ void get_cpuconfig(void) | |||||
| printf("#define L2_ASSOCIATIVE 4\n"); | printf("#define L2_ASSOCIATIVE 4\n"); | ||||
| break; | break; | ||||
| case CPU_CORTEXA57: | |||||
| printf("#define CORTEXA57\n"); | |||||
| printf("#define HAVE_VFP\n"); | |||||
| printf("#define HAVE_VFPV3\n"); | |||||
| printf("#define HAVE_NEON\n"); | |||||
| printf("#define HAVE_VFPV4\n"); | |||||
| printf("#define L1_CODE_SIZE 49152\n"); | |||||
| printf("#define L1_CODE_LINESIZE 64\n"); | |||||
| printf("#define L1_CODE_ASSOCIATIVE 3\n"); | |||||
| printf("#define L1_DATA_SIZE 32768\n"); | |||||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||||
| printf("#define L1_DATA_ASSOCIATIVE 2\n"); | |||||
| printf("#define L2_SIZE 2097152\n"); | |||||
| printf("#define L2_LINESIZE 64\n"); | |||||
| printf("#define L2_ASSOCIATIVE 16\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| break; | |||||
| } | } | ||||
| } | } | ||||
| void get_libname(void) | void get_libname(void) | ||||
| { | { | ||||
| int d = detect(); | int d = detect(); | ||||
| switch (d) | |||||
| { | |||||
| case CPU_ARMV8: | |||||
| printf("armv8\n"); | |||||
| break; | |||||
| } | |||||
| printf("%s", cpuname_lower[d]); | |||||
| } | } | ||||
| void get_features(void) | void get_features(void) | ||||
| { | { | ||||
| @@ -55,6 +55,7 @@ | |||||
| #define CPUTYPE_POWER6 5 | #define CPUTYPE_POWER6 5 | ||||
| #define CPUTYPE_CELL 6 | #define CPUTYPE_CELL 6 | ||||
| #define CPUTYPE_PPCG4 7 | #define CPUTYPE_PPCG4 7 | ||||
| #define CPUTYPE_POWER8 8 | |||||
| char *cpuname[] = { | char *cpuname[] = { | ||||
| "UNKNOWN", | "UNKNOWN", | ||||
| @@ -65,6 +66,7 @@ char *cpuname[] = { | |||||
| "POWER6", | "POWER6", | ||||
| "CELL", | "CELL", | ||||
| "PPCG4", | "PPCG4", | ||||
| "POWER8" | |||||
| }; | }; | ||||
| char *lowercpuname[] = { | char *lowercpuname[] = { | ||||
| @@ -76,6 +78,7 @@ char *lowercpuname[] = { | |||||
| "power6", | "power6", | ||||
| "cell", | "cell", | ||||
| "ppcg4", | "ppcg4", | ||||
| "power8" | |||||
| }; | }; | ||||
| char *corename[] = { | char *corename[] = { | ||||
| @@ -87,6 +90,7 @@ char *corename[] = { | |||||
| "POWER6", | "POWER6", | ||||
| "CELL", | "CELL", | ||||
| "PPCG4", | "PPCG4", | ||||
| "POWER8" | |||||
| }; | }; | ||||
| int detect(void){ | int detect(void){ | ||||
| @@ -115,7 +119,7 @@ int detect(void){ | |||||
| if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5; | if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5; | ||||
| if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6; | if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6; | ||||
| if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | ||||
| if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER6; | |||||
| if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8; | |||||
| if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | ||||
| if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | ||||
| @@ -1172,6 +1172,9 @@ int get_cpuname(void){ | |||||
| #endif | #endif | ||||
| else | else | ||||
| return CPUTYPE_NEHALEM; | return CPUTYPE_NEHALEM; | ||||
| case 13: | |||||
| // Avoton | |||||
| return CPUTYPE_NEHALEM; | |||||
| } | } | ||||
| break; | break; | ||||
| case 5: | case 5: | ||||
| @@ -1229,6 +1232,7 @@ int get_cpuname(void){ | |||||
| case 2: | case 2: | ||||
| return CPUTYPE_OPTERON; | return CPUTYPE_OPTERON; | ||||
| case 1: | case 1: | ||||
| case 3: | |||||
| case 10: | case 10: | ||||
| return CPUTYPE_BARCELONA; | return CPUTYPE_BARCELONA; | ||||
| case 6: | case 6: | ||||
| @@ -1239,13 +1243,19 @@ int get_cpuname(void){ | |||||
| return CPUTYPE_BULLDOZER; | return CPUTYPE_BULLDOZER; | ||||
| else | else | ||||
| return CPUTYPE_BARCELONA; //OS don't support AVX. | return CPUTYPE_BARCELONA; //OS don't support AVX. | ||||
| case 2: | |||||
| case 2: //AMD Piledriver | |||||
| case 3: //AMD Richland | |||||
| if(support_avx()) | if(support_avx()) | ||||
| return CPUTYPE_PILEDRIVER; | return CPUTYPE_PILEDRIVER; | ||||
| else | else | ||||
| return CPUTYPE_BARCELONA; //OS don't support AVX. | return CPUTYPE_BARCELONA; //OS don't support AVX. | ||||
| case 0: | case 0: | ||||
| switch(exmodel){ | switch(exmodel){ | ||||
| case 1: //AMD Trinity | |||||
| if(support_avx()) | |||||
| return CPUTYPE_PILEDRIVER; | |||||
| else | |||||
| return CPUTYPE_BARCELONA; //OS don't support AVX. | |||||
| case 3: | case 3: | ||||
| if(support_avx()) | if(support_avx()) | ||||
| return CPUTYPE_STEAMROLLER; | return CPUTYPE_STEAMROLLER; | ||||
| @@ -1668,6 +1678,9 @@ int get_coretype(void){ | |||||
| #endif | #endif | ||||
| else | else | ||||
| return CORE_NEHALEM; | return CORE_NEHALEM; | ||||
| case 13: | |||||
| // Avoton | |||||
| return CORE_NEHALEM; | |||||
| } | } | ||||
| break; | break; | ||||
| case 5: | case 5: | ||||
| @@ -1718,7 +1731,8 @@ int get_coretype(void){ | |||||
| return CORE_BULLDOZER; | return CORE_BULLDOZER; | ||||
| else | else | ||||
| return CORE_BARCELONA; //OS don't support AVX. | return CORE_BARCELONA; //OS don't support AVX. | ||||
| case 2: | |||||
| case 2: //AMD Piledriver | |||||
| case 3: //AMD Richland | |||||
| if(support_avx()) | if(support_avx()) | ||||
| return CORE_PILEDRIVER; | return CORE_PILEDRIVER; | ||||
| else | else | ||||
| @@ -1726,6 +1740,12 @@ int get_coretype(void){ | |||||
| case 0: | case 0: | ||||
| switch(exmodel){ | switch(exmodel){ | ||||
| case 1: //AMD Trinity | |||||
| if(support_avx()) | |||||
| return CORE_PILEDRIVER; | |||||
| else | |||||
| return CORE_BARCELONA; //OS don't support AVX. | |||||
| case 3: | case 3: | ||||
| if(support_avx()) | if(support_avx()) | ||||
| return CORE_STEAMROLLER; | return CORE_STEAMROLLER; | ||||
| @@ -1365,8 +1365,9 @@ | |||||
| * | * | ||||
| 150 CONTINUE | 150 CONTINUE | ||||
| WRITE( NOUT, FMT = 9996 )SNAME | WRITE( NOUT, FMT = 9996 )SNAME | ||||
| CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
| $ M, N, ALPHA, LDA, LDB) | |||||
| IF( TRACE ) | |||||
| $ CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
| $ M, N, ALPHA, LDA, LDB) | |||||
| * | * | ||||
| 160 CONTINUE | 160 CONTINUE | ||||
| RETURN | RETURN | ||||
| @@ -1365,8 +1365,9 @@ | |||||
| * | * | ||||
| 150 CONTINUE | 150 CONTINUE | ||||
| WRITE( NOUT, FMT = 9996 )SNAME | WRITE( NOUT, FMT = 9996 )SNAME | ||||
| CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
| $ M, N, ALPHA, LDA, LDB) | |||||
| IF( TRACE ) | |||||
| $ CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
| $ M, N, ALPHA, LDA, LDB) | |||||
| * | * | ||||
| 160 CONTINUE | 160 CONTINUE | ||||
| RETURN | RETURN | ||||
| @@ -1335,8 +1335,9 @@ | |||||
| * | * | ||||
| 150 CONTINUE | 150 CONTINUE | ||||
| WRITE( NOUT, FMT = 9996 )SNAME | WRITE( NOUT, FMT = 9996 )SNAME | ||||
| CALL DPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
| $ M, N, ALPHA, LDA, LDB) | |||||
| IF( TRACE ) | |||||
| $ CALL DPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
| $ M, N, ALPHA, LDA, LDB) | |||||
| * | * | ||||
| 160 CONTINUE | 160 CONTINUE | ||||
| RETURN | RETURN | ||||
| @@ -1339,8 +1339,9 @@ | |||||
| * | * | ||||
| 150 CONTINUE | 150 CONTINUE | ||||
| WRITE( NOUT, FMT = 9996 )SNAME | WRITE( NOUT, FMT = 9996 )SNAME | ||||
| CALL SPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
| $ M, N, ALPHA, LDA, LDB) | |||||
| IF( TRACE ) | |||||
| $ CALL SPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
| $ M, N, ALPHA, LDA, LDB) | |||||
| * | * | ||||
| 160 CONTINUE | 160 CONTINUE | ||||
| RETURN | RETURN | ||||
| @@ -1350,7 +1350,7 @@ | |||||
| * | * | ||||
| * Call the subroutine. | * Call the subroutine. | ||||
| * | * | ||||
| IF( SNAME( 4: 5 ).EQ.'mv' )THEN | |||||
| IF( SNAME( 10: 11 ).EQ.'mv' )THEN | |||||
| IF( FULL )THEN | IF( FULL )THEN | ||||
| IF( TRACE ) | IF( TRACE ) | ||||
| $ WRITE( NTRA, FMT = 9993 )NC, SNAME, | $ WRITE( NTRA, FMT = 9993 )NC, SNAME, | ||||
| @@ -1376,7 +1376,7 @@ | |||||
| CALL CZTPMV( IORDER, UPLO, TRANS, DIAG, | CALL CZTPMV( IORDER, UPLO, TRANS, DIAG, | ||||
| $ N, AA, XX, INCX ) | $ N, AA, XX, INCX ) | ||||
| END IF | END IF | ||||
| ELSE IF( SNAME( 4: 5 ).EQ.'sv' )THEN | |||||
| ELSE IF( SNAME( 10: 11 ).EQ.'sv' )THEN | |||||
| IF( FULL )THEN | IF( FULL )THEN | ||||
| IF( TRACE ) | IF( TRACE ) | ||||
| $ WRITE( NTRA, FMT = 9993 )NC, SNAME, | $ WRITE( NTRA, FMT = 9993 )NC, SNAME, | ||||
| @@ -1465,7 +1465,7 @@ | |||||
| END IF | END IF | ||||
| * | * | ||||
| IF( .NOT.NULL )THEN | IF( .NOT.NULL )THEN | ||||
| IF( SNAME( 4: 5 ).EQ.'mv' )THEN | |||||
| IF( SNAME( 10: 11 ).EQ.'mv' )THEN | |||||
| * | * | ||||
| * Check the result. | * Check the result. | ||||
| * | * | ||||
| @@ -1473,7 +1473,7 @@ | |||||
| $ INCX, ZERO, Z, INCX, XT, G, | $ INCX, ZERO, Z, INCX, XT, G, | ||||
| $ XX, EPS, ERR, FATAL, NOUT, | $ XX, EPS, ERR, FATAL, NOUT, | ||||
| $ .TRUE. ) | $ .TRUE. ) | ||||
| ELSE IF( SNAME( 4: 5 ).EQ.'sv' )THEN | |||||
| ELSE IF( SNAME( 10: 11 ).EQ.'sv' )THEN | |||||
| * | * | ||||
| * Compute approximation to original vector. | * Compute approximation to original vector. | ||||
| * | * | ||||
| @@ -1611,7 +1611,7 @@ | |||||
| * .. Common blocks .. | * .. Common blocks .. | ||||
| COMMON /INFOC/INFOT, NOUTC, OK | COMMON /INFOC/INFOT, NOUTC, OK | ||||
| * .. Executable Statements .. | * .. Executable Statements .. | ||||
| CONJ = SNAME( 5: 5 ).EQ.'c' | |||||
| CONJ = SNAME( 11: 11 ).EQ.'c' | |||||
| * Define the number of arguments. | * Define the number of arguments. | ||||
| NARGS = 9 | NARGS = 9 | ||||
| * | * | ||||
| @@ -1366,8 +1366,9 @@ | |||||
| * | * | ||||
| 150 CONTINUE | 150 CONTINUE | ||||
| WRITE( NOUT, FMT = 9996 )SNAME | WRITE( NOUT, FMT = 9996 )SNAME | ||||
| CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
| $ M, N, ALPHA, LDA, LDB) | |||||
| IF( TRACE ) | |||||
| $ CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
| $ M, N, ALPHA, LDA, LDB) | |||||
| * | * | ||||
| 160 CONTINUE | 160 CONTINUE | ||||
| RETURN | RETURN | ||||
| @@ -1366,8 +1366,9 @@ | |||||
| * | * | ||||
| 150 CONTINUE | 150 CONTINUE | ||||
| WRITE( NOUT, FMT = 9996 )SNAME | WRITE( NOUT, FMT = 9996 )SNAME | ||||
| CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
| $ M, N, ALPHA, LDA, LDB) | |||||
| IF( TRACE ) | |||||
| $ CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
| $ M, N, ALPHA, LDA, LDB) | |||||
| * | * | ||||
| 160 CONTINUE | 160 CONTINUE | ||||
| RETURN | RETURN | ||||
| @@ -1,7 +1,7 @@ | |||||
| 'CBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'CBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| 2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) | 2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) | ||||
| 16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -1,7 +1,7 @@ | |||||
| 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| 16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -1,7 +1,7 @@ | |||||
| 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| 16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -1,7 +1,7 @@ | |||||
| 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| 16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -1,7 +1,7 @@ | |||||
| 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| 16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -1,7 +1,7 @@ | |||||
| 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| 2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) | 2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) | ||||
| 16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -1,7 +1,7 @@ | |||||
| 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| 16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -1,7 +1,7 @@ | |||||
| 'ZBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'ZBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| 2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) | 2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) | ||||
| 16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -1,7 +1,7 @@ | |||||
| 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| 16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -1,7 +1,7 @@ | |||||
| 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
| -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
| F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
| F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| 16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| @@ -55,7 +55,7 @@ | |||||
| static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){ | static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){ | ||||
| FLOAT *a, *x, *y; | FLOAT *a, *x, *y; | ||||
| BLASLONG incx, incy; | |||||
| BLASLONG incx; | |||||
| BLASLONG m_from, m_to, i; | BLASLONG m_from, m_to, i; | ||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| FLOAT result; | FLOAT result; | ||||
| @@ -68,7 +68,6 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| y = (FLOAT *)args -> c; | y = (FLOAT *)args -> c; | ||||
| incx = args -> ldb; | incx = args -> ldb; | ||||
| incy = args -> ldc; | |||||
| m_from = 0; | m_from = 0; | ||||
| m_to = args -> m; | m_to = args -> m; | ||||
| @@ -43,7 +43,7 @@ | |||||
| static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){ | static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){ | ||||
| FLOAT *a, *x, *y; | FLOAT *a, *x, *y; | ||||
| BLASLONG lda, incx, incy; | |||||
| BLASLONG incx, incy; | |||||
| BLASLONG i, m_from, m_to; | BLASLONG i, m_from, m_to; | ||||
| FLOAT alpha_r; | FLOAT alpha_r; | ||||
| #ifdef COMPLEX | #ifdef COMPLEX | ||||
| @@ -56,7 +56,6 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL | |||||
| incx = args -> lda; | incx = args -> lda; | ||||
| incy = args -> ldb; | incy = args -> ldb; | ||||
| lda = args -> ldc; | |||||
| alpha_r = *((FLOAT *)args -> alpha + 0); | alpha_r = *((FLOAT *)args -> alpha + 0); | ||||
| #ifdef COMPLEX | #ifdef COMPLEX | ||||
| @@ -46,7 +46,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL | |||||
| BLASLONG incx; | BLASLONG incx; | ||||
| BLASLONG i, m_from, m_to; | BLASLONG i, m_from, m_to; | ||||
| FLOAT alpha_r; | FLOAT alpha_r; | ||||
| #if defined(COMPLEX) && !defined(HER) && !defined(HERREV) | |||||
| #if defined(COMPLEX) && !defined(HEMV) && !defined(HEMVREV) | |||||
| FLOAT alpha_i; | FLOAT alpha_i; | ||||
| #endif | #endif | ||||
| @@ -56,7 +56,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL | |||||
| incx = args -> lda; | incx = args -> lda; | ||||
| alpha_r = *((FLOAT *)args -> alpha + 0); | alpha_r = *((FLOAT *)args -> alpha + 0); | ||||
| #if defined(COMPLEX) && !defined(HER) && !defined(HERREV) | |||||
| #if defined(COMPLEX) && !defined(HEMV) && !defined(HEMVREV) | |||||
| alpha_i = *((FLOAT *)args -> alpha + 1); | alpha_i = *((FLOAT *)args -> alpha + 1); | ||||
| #endif | #endif | ||||
| @@ -55,7 +55,7 @@ | |||||
| static int symv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){ | static int symv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){ | ||||
| FLOAT *a, *x, *y; | FLOAT *a, *x, *y; | ||||
| BLASLONG lda, incx, incy; | |||||
| BLASLONG lda, incx; | |||||
| BLASLONG m_from, m_to; | BLASLONG m_from, m_to; | ||||
| a = (FLOAT *)args -> a; | a = (FLOAT *)args -> a; | ||||
| @@ -64,7 +64,6 @@ static int symv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| lda = args -> lda; | lda = args -> lda; | ||||
| incx = args -> ldb; | incx = args -> ldb; | ||||
| incy = args -> ldc; | |||||
| m_from = 0; | m_from = 0; | ||||
| m_to = args -> m; | m_to = args -> m; | ||||
| @@ -45,13 +45,11 @@ const static FLOAT dp1 = 1.; | |||||
| int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095); | |||||
| COPY_K(n, b, incb, buffer, 1); | COPY_K(n, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -45,13 +45,11 @@ const static FLOAT dp1 = 1.; | |||||
| int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095); | |||||
| COPY_K(n, b, incb, buffer, 1); | COPY_K(n, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -45,13 +45,11 @@ const static FLOAT dp1 = 1.; | |||||
| int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095); | |||||
| COPY_K(n, b, incb, buffer, 1); | COPY_K(n, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -45,13 +45,11 @@ const static FLOAT dp1 = 1.; | |||||
| int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095); | |||||
| COPY_K(n, b, incb, buffer, 1); | COPY_K(n, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -43,12 +43,10 @@ | |||||
| int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) + 4095) & ~4095); | |||||
| COPY_K(m, b, incb, buffer, 1); | COPY_K(m, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -43,12 +43,10 @@ | |||||
| int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) + 4095) & ~4095); | |||||
| COPY_K(m, b, incb, buffer, 1); | COPY_K(m, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -119,7 +119,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| #endif | #endif | ||||
| x = buffer; | x = buffer; | ||||
| buffer += ((COMPSIZE * args -> m + 1023) & ~1023); | |||||
| buffer += ((COMPSIZE * args -> m + 3) & ~3); | |||||
| } | } | ||||
| #ifndef TRANS | #ifndef TRANS | ||||
| @@ -403,7 +403,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu | |||||
| if (num_cpu) { | if (num_cpu) { | ||||
| queue[0].sa = NULL; | queue[0].sa = NULL; | ||||
| queue[0].sb = buffer + num_cpu * (((m + 255) & ~255) + 16) * COMPSIZE; | |||||
| queue[0].sb = buffer + num_cpu * (((m + 3) & ~3) + 16) * COMPSIZE; | |||||
| queue[num_cpu - 1].next = NULL; | queue[num_cpu - 1].next = NULL; | ||||
| @@ -45,7 +45,6 @@ const static FLOAT dp1 = 1.; | |||||
| int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| @@ -57,7 +56,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc | |||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) * COMPSIZE+ 4095) & ~4095); | |||||
| COPY_K(n, b, incb, buffer, 1); | COPY_K(n, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -45,7 +45,6 @@ const static FLOAT dp1 = 1.; | |||||
| int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| @@ -57,7 +56,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc | |||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); | |||||
| COPY_K(n, b, incb, buffer, 1); | COPY_K(n, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -45,7 +45,6 @@ const static FLOAT dp1 = 1.; | |||||
| int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| @@ -57,7 +56,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc | |||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); | |||||
| COPY_K(n, b, incb, buffer, 1); | COPY_K(n, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -45,7 +45,6 @@ const static FLOAT dp1 = 1.; | |||||
| int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| @@ -57,7 +56,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc | |||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) * COMPSIZE+ 4095) & ~4095); | |||||
| COPY_K(n, b, incb, buffer, 1); | COPY_K(n, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -49,12 +49,10 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | |||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT atemp1, atemp2, btemp1, btemp2; | FLOAT atemp1, atemp2, btemp1, btemp2; | ||||
| #endif | #endif | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); | |||||
| COPY_K(m, b, incb, buffer, 1); | COPY_K(m, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -49,12 +49,10 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | |||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT atemp1, atemp2, btemp1, btemp2; | FLOAT atemp1, atemp2, btemp1, btemp2; | ||||
| #endif | #endif | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); | |||||
| COPY_K(m, b, incb, buffer, 1); | COPY_K(m, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -51,12 +51,10 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | |||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT ar, ai, br, bi, ratio, den; | FLOAT ar, ai, br, bi, ratio, den; | ||||
| #endif | #endif | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); | |||||
| COPY_K(m, b, incb, buffer, 1); | COPY_K(m, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -49,12 +49,10 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | |||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT ar, ai, br, bi, ratio, den; | FLOAT ar, ai, br, bi, ratio, den; | ||||
| #endif | #endif | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); | |||||
| COPY_K(m, b, incb, buffer, 1); | COPY_K(m, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -56,7 +56,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu | |||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); | |||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 15) & ~15); | |||||
| COPY_K(m, b, incb, buffer, 1); | COPY_K(m, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -56,7 +56,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu | |||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); | |||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 15) & ~15); | |||||
| COPY_K(m, b, incb, buffer, 1); | COPY_K(m, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -48,8 +48,7 @@ foreach (float_type ${FLOAT_TYPES}) | |||||
| # TRANS needs to be set/unset when CONJ is set/unset, so can't use it as a combination | # TRANS needs to be set/unset when CONJ is set/unset, so can't use it as a combination | ||||
| GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK" 3 "herk_N" false ${float_type}) | GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK" 3 "herk_N" false ${float_type}) | ||||
| GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type}) | GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type}) | ||||
| GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type}) | |||||
| GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type}) | |||||
| # Need to set CONJ for trmm and trsm | # Need to set CONJ for trmm and trsm | ||||
| GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type}) | GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type}) | ||||
| GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_LC" false ${float_type}) | GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_LC" false ${float_type}) | ||||
| @@ -72,6 +71,10 @@ foreach (float_type ${FLOAT_TYPES}) | |||||
| GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type}) | GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type}) | ||||
| if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) | if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) | ||||
| #herk | |||||
| GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type}) | |||||
| GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type}) | |||||
| #hemm | #hemm | ||||
| GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type}) | GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type}) | ||||
| GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type}) | GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type}) | ||||
| @@ -96,6 +99,17 @@ foreach (float_type ${FLOAT_TYPES}) | |||||
| endif() | endif() | ||||
| endif () | endif () | ||||
| endforeach () | endforeach () | ||||
| # for gemm3m | |||||
| if(USE_GEMM3M) | |||||
| foreach (GEMM_DEFINE ${GEMM_DEFINES}) | |||||
| string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) | |||||
| GenerateNamedObjects("gemm3m.c" "${GEMM_DEFINE}" "gemm3m_${GEMM_DEFINE_LC}" false "" "" false ${float_type}) | |||||
| if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) | |||||
| GenerateNamedObjects("gemm3m.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm3m_thread_${GEMM_DEFINE_LC}" false "" "" false ${float_type}) | |||||
| endif () | |||||
| endforeach () | |||||
| endif() | |||||
| endif () | endif () | ||||
| endforeach () | endforeach () | ||||
| @@ -65,7 +65,7 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int ( | |||||
| blas_queue_t queue[MAX_CPU_NUMBER]; | blas_queue_t queue[MAX_CPU_NUMBER]; | ||||
| BLASLONG range_M[MAX_CPU_NUMBER + 1], range_N[MAX_CPU_NUMBER + 1]; | BLASLONG range_M[MAX_CPU_NUMBER + 1], range_N[MAX_CPU_NUMBER + 1]; | ||||
| BLASLONG procs, total_procs, num_cpu_m, num_cpu_n; | |||||
| BLASLONG procs, num_cpu_m, num_cpu_n; | |||||
| BLASLONG width, i, j; | BLASLONG width, i, j; | ||||
| BLASLONG divM, divN; | BLASLONG divM, divN; | ||||
| @@ -335,7 +335,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
| if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; | if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; | ||||
| else | else | ||||
| if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; | |||||
| if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N; | |||||
| else | |||||
| if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; | |||||
| @@ -230,7 +230,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
| BLASLONG is, min_i, div_n; | BLASLONG is, min_i, div_n; | ||||
| BLASLONG i, current; | BLASLONG i, current; | ||||
| BLASLONG l1stride, l2size; | |||||
| BLASLONG l1stride; | |||||
| #ifdef TIMING | #ifdef TIMING | ||||
| BLASULONG rpcc_counter; | BLASULONG rpcc_counter; | ||||
| @@ -298,8 +298,6 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
| #endif | #endif | ||||
| ) return 0; | ) return 0; | ||||
| l2size = GEMM_P * GEMM_Q; | |||||
| #if 0 | #if 0 | ||||
| fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld N_from : %ld N_to : %ld\n", | fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld N_from : %ld N_to : %ld\n", | ||||
| mypos, m_from, m_to, n_from, n_to, N_from, N_to); | mypos, m_from, m_to, n_from, n_to, N_from, N_to); | ||||
| @@ -369,7 +367,9 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
| if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; | if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; | ||||
| else | else | ||||
| if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; | |||||
| if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N; | |||||
| else | |||||
| if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; | |||||
| START_RPCC(); | START_RPCC(); | ||||
| @@ -706,7 +706,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
| n = n_to - n_from; | n = n_to - n_from; | ||||
| } | } | ||||
| if ((args -> m < nthreads * SWITCH_RATIO) || (args -> n < nthreads * SWITCH_RATIO)) { | |||||
| if ((m < nthreads * SWITCH_RATIO) || (n < nthreads * SWITCH_RATIO)) { | |||||
| GEMM_LOCAL(args, range_m, range_n, sa, sb, 0); | GEMM_LOCAL(args, range_m, range_n, sa, sb, 0); | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -33,6 +33,7 @@ set(COMMON_SOURCES | |||||
| xerbla.c | xerbla.c | ||||
| openblas_set_num_threads.c | openblas_set_num_threads.c | ||||
| openblas_error_handle.c | openblas_error_handle.c | ||||
| openblas_env.c | |||||
| openblas_get_num_procs.c | openblas_get_num_procs.c | ||||
| openblas_get_num_threads.c | openblas_get_num_threads.c | ||||
| ) | ) | ||||
| @@ -1,7 +1,7 @@ | |||||
| TOPDIR = ../.. | TOPDIR = ../.. | ||||
| include ../../Makefile.system | include ../../Makefile.system | ||||
| COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_num_threads.$(SUFFIX) openblas_get_num_procs.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX) openblas_error_handle.$(SUFFIX) | |||||
| COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_num_threads.$(SUFFIX) openblas_get_num_procs.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX) openblas_error_handle.$(SUFFIX) openblas_env.$(SUFFIX) | |||||
| #COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX) | #COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX) | ||||
| @@ -118,6 +118,9 @@ openblas_get_parallel.$(SUFFIX) : openblas_get_parallel.c | |||||
| openblas_error_handle.$(SUFFIX) : openblas_error_handle.c | openblas_error_handle.$(SUFFIX) : openblas_error_handle.c | ||||
| $(CC) $(CFLAGS) -c $< -o $(@F) | $(CC) $(CFLAGS) -c $< -o $(@F) | ||||
| openblas_env.$(SUFFIX) : openblas_env.c | |||||
| $(CC) $(CFLAGS) -c $< -o $(@F) | |||||
| blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h | blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h | ||||
| $(CC) $(CFLAGS) -c $< -o $(@F) | $(CC) $(CFLAGS) -c $< -o $(@F) | ||||
| @@ -70,7 +70,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(OS_LINUX) || defined(OS_NETBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||||
| #if defined(OS_LINUX) || defined(OS_NETBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) || defined(OS_SUNOS) | |||||
| #include <dlfcn.h> | #include <dlfcn.h> | ||||
| #include <signal.h> | #include <signal.h> | ||||
| #include <sys/resource.h> | #include <sys/resource.h> | ||||
| @@ -92,6 +92,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| extern unsigned int openblas_thread_timeout(); | |||||
| #ifdef SMP_SERVER | #ifdef SMP_SERVER | ||||
| #undef MONITOR | #undef MONITOR | ||||
| @@ -524,6 +526,7 @@ static int blas_monitor(void *arg){ | |||||
| int blas_thread_init(void){ | int blas_thread_init(void){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| int ret; | int ret; | ||||
| int thread_timeout_env; | |||||
| #ifdef NEED_STACKATTR | #ifdef NEED_STACKATTR | ||||
| pthread_attr_t attr; | pthread_attr_t attr; | ||||
| #endif | #endif | ||||
| @@ -540,22 +543,12 @@ int blas_thread_init(void){ | |||||
| if (!blas_server_avail){ | if (!blas_server_avail){ | ||||
| env_var_t p; | |||||
| if (readenv(p,"THREAD_TIMEOUT")) { | |||||
| thread_timeout = atoi(p); | |||||
| if (thread_timeout < 4) thread_timeout = 4; | |||||
| if (thread_timeout > 30) thread_timeout = 30; | |||||
| thread_timeout = (1 << thread_timeout); | |||||
| }else{ | |||||
| if (readenv(p,"GOTO_THREAD_TIMEOUT")) { | |||||
| thread_timeout = atoi(p); | |||||
| if (thread_timeout < 4) thread_timeout = 4; | |||||
| if (thread_timeout > 30) thread_timeout = 30; | |||||
| thread_timeout = (1 << thread_timeout); | |||||
| } | |||||
| } | |||||
| thread_timeout_env=openblas_thread_timeout(); | |||||
| if (thread_timeout_env>0) { | |||||
| if (thread_timeout_env < 4) thread_timeout_env = 4; | |||||
| if (thread_timeout_env > 30) thread_timeout_env = 30; | |||||
| thread_timeout = (1 << thread_timeout_env); | |||||
| } | |||||
| for(i = 0; i < blas_num_threads - 1; i++){ | for(i = 0; i < blas_num_threads - 1; i++){ | ||||
| @@ -576,10 +569,12 @@ int blas_thread_init(void){ | |||||
| struct rlimit rlim; | struct rlimit rlim; | ||||
| const char *msg = strerror(ret); | const char *msg = strerror(ret); | ||||
| fprintf(STDERR, "OpenBLAS blas_thread_init: pthread_create: %s\n", msg); | fprintf(STDERR, "OpenBLAS blas_thread_init: pthread_create: %s\n", msg); | ||||
| #ifdef RLIMIT_NPROC | |||||
| if(0 == getrlimit(RLIMIT_NPROC, &rlim)) { | if(0 == getrlimit(RLIMIT_NPROC, &rlim)) { | ||||
| fprintf(STDERR, "OpenBLAS blas_thread_init: RLIMIT_NPROC " | fprintf(STDERR, "OpenBLAS blas_thread_init: RLIMIT_NPROC " | ||||
| "%ld current, %ld max\n", (long)(rlim.rlim_cur), (long)(rlim.rlim_max)); | "%ld current, %ld max\n", (long)(rlim.rlim_cur), (long)(rlim.rlim_max)); | ||||
| } | } | ||||
| #endif | |||||
| if(0 != raise(SIGINT)) { | if(0 != raise(SIGINT)) { | ||||
| fprintf(STDERR, "OpenBLAS blas_thread_init: calling exit(3)\n"); | fprintf(STDERR, "OpenBLAS blas_thread_init: calling exit(3)\n"); | ||||
| exit(EXIT_FAILURE); | exit(EXIT_FAILURE); | ||||
| @@ -261,6 +261,11 @@ static gotoblas_t *get_coretype(void){ | |||||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | ||||
| } | } | ||||
| } | } | ||||
| //Intel Avoton | |||||
| if (model == 13) { | |||||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | |||||
| return &gotoblas_NEHALEM; | |||||
| } | |||||
| return NULL; | return NULL; | ||||
| case 5: | case 5: | ||||
| //Intel Broadwell | //Intel Broadwell | ||||
| @@ -318,7 +323,7 @@ static gotoblas_t *get_coretype(void){ | |||||
| openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); | openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); | ||||
| return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | ||||
| } | } | ||||
| }else if(model == 2){ | |||||
| }else if(model == 2 || model == 3){ | |||||
| //AMD Bulldozer Opteron 6300 / Opteron 4300 / Opteron 3300 | //AMD Bulldozer Opteron 6300 / Opteron 4300 / Opteron 3300 | ||||
| if(support_avx()) | if(support_avx()) | ||||
| return &gotoblas_PILEDRIVER; | return &gotoblas_PILEDRIVER; | ||||
| @@ -327,7 +332,15 @@ static gotoblas_t *get_coretype(void){ | |||||
| return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | ||||
| } | } | ||||
| }else if(model == 0){ | }else if(model == 0){ | ||||
| if (exmodel == 3) { | |||||
| if (exmodel == 1) { | |||||
| //AMD Trinity | |||||
| if(support_avx()) | |||||
| return &gotoblas_PILEDRIVER; | |||||
| else{ | |||||
| openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); | |||||
| return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | |||||
| } | |||||
| }else if (exmodel == 3) { | |||||
| //AMD STEAMROLLER | //AMD STEAMROLLER | ||||
| if(support_avx()) | if(support_avx()) | ||||
| return &gotoblas_STEAMROLLER; | return &gotoblas_STEAMROLLER; | ||||
| @@ -378,7 +391,7 @@ static char *corename[] = { | |||||
| "Nehalem", | "Nehalem", | ||||
| "Athlon", | "Athlon", | ||||
| "Opteron", | "Opteron", | ||||
| "Opteron(SSE3)", | |||||
| "Opteron_SSE3", | |||||
| "Barcelona", | "Barcelona", | ||||
| "Nano", | "Nano", | ||||
| "Sandybridge", | "Sandybridge", | ||||
| @@ -104,6 +104,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include <errno.h> | #include <errno.h> | ||||
| #include <linux/unistd.h> | #include <linux/unistd.h> | ||||
| #include <sys/syscall.h> | #include <sys/syscall.h> | ||||
| #include <sys/time.h> | |||||
| #include <sys/resource.h> | |||||
| #endif | #endif | ||||
| #if defined(OS_FREEBSD) || defined(OS_DARWIN) | #if defined(OS_FREEBSD) || defined(OS_DARWIN) | ||||
| @@ -142,7 +144,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(_MSC_VER) && !defined(__clang__) | #if defined(_MSC_VER) && !defined(__clang__) | ||||
| #define CONSTRUCTOR __cdecl | #define CONSTRUCTOR __cdecl | ||||
| #define DESTRUCTOR __cdecl | #define DESTRUCTOR __cdecl | ||||
| #elif defined(OS_DARWIN) && defined(C_GCC) | |||||
| #elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC) | |||||
| #define CONSTRUCTOR __attribute__ ((constructor)) | #define CONSTRUCTOR __attribute__ ((constructor)) | ||||
| #define DESTRUCTOR __attribute__ ((destructor)) | #define DESTRUCTOR __attribute__ ((destructor)) | ||||
| #else | #else | ||||
| @@ -167,7 +169,7 @@ void goto_set_num_threads(int num_threads) {}; | |||||
| #else | #else | ||||
| #ifdef OS_LINUX | |||||
| #if defined(OS_LINUX) || defined(OS_SUNOS) | |||||
| #ifndef NO_AFFINITY | #ifndef NO_AFFINITY | ||||
| int get_num_procs(void); | int get_num_procs(void); | ||||
| #else | #else | ||||
| @@ -292,8 +294,11 @@ void openblas_fork_handler() | |||||
| #endif | #endif | ||||
| } | } | ||||
| extern int openblas_num_threads_env(); | |||||
| extern int openblas_goto_num_threads_env(); | |||||
| extern int openblas_omp_num_threads_env(); | |||||
| int blas_get_cpu_number(void){ | int blas_get_cpu_number(void){ | ||||
| env_var_t p; | |||||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) | #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) | ||||
| int max_num; | int max_num; | ||||
| #endif | #endif | ||||
| @@ -308,18 +313,18 @@ int blas_get_cpu_number(void){ | |||||
| blas_goto_num = 0; | blas_goto_num = 0; | ||||
| #ifndef USE_OPENMP | #ifndef USE_OPENMP | ||||
| if (readenv(p,"OPENBLAS_NUM_THREADS")) blas_goto_num = atoi(p); | |||||
| blas_goto_num=openblas_num_threads_env(); | |||||
| if (blas_goto_num < 0) blas_goto_num = 0; | if (blas_goto_num < 0) blas_goto_num = 0; | ||||
| if (blas_goto_num == 0) { | if (blas_goto_num == 0) { | ||||
| if (readenv(p,"GOTO_NUM_THREADS")) blas_goto_num = atoi(p); | |||||
| if (blas_goto_num < 0) blas_goto_num = 0; | |||||
| blas_goto_num=openblas_goto_num_threads_env(); | |||||
| if (blas_goto_num < 0) blas_goto_num = 0; | |||||
| } | } | ||||
| #endif | #endif | ||||
| blas_omp_num = 0; | blas_omp_num = 0; | ||||
| if (readenv(p,"OMP_NUM_THREADS")) blas_omp_num = atoi(p); | |||||
| blas_omp_num=openblas_omp_num_threads_env(); | |||||
| if (blas_omp_num < 0) blas_omp_num = 0; | if (blas_omp_num < 0) blas_omp_num = 0; | ||||
| if (blas_goto_num > 0) blas_num_threads = blas_goto_num; | if (blas_goto_num > 0) blas_num_threads = blas_goto_num; | ||||
| @@ -355,7 +360,9 @@ int openblas_get_num_threads(void) { | |||||
| #ifndef SMP | #ifndef SMP | ||||
| return 1; | return 1; | ||||
| #else | #else | ||||
| return blas_get_cpu_number(); | |||||
| // init blas_cpu_number if needed | |||||
| blas_get_cpu_number(); | |||||
| return blas_cpu_number; | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -914,7 +921,6 @@ static volatile struct { | |||||
| } memory[NUM_BUFFERS]; | } memory[NUM_BUFFERS]; | ||||
| static int memory_initialized = 0; | static int memory_initialized = 0; | ||||
| static void gotoblas_memory_init(void); | |||||
| /* Memory allocation routine */ | /* Memory allocation routine */ | ||||
| /* procpos ... indicates where it comes from */ | /* procpos ... indicates where it comes from */ | ||||
| @@ -1337,6 +1343,7 @@ static void gotoblas_memory_init(void) { | |||||
| /* Initialization for all function; this function should be called before main */ | /* Initialization for all function; this function should be called before main */ | ||||
| static int gotoblas_initialized = 0; | static int gotoblas_initialized = 0; | ||||
| extern void openblas_read_env(); | |||||
| void CONSTRUCTOR gotoblas_init(void) { | void CONSTRUCTOR gotoblas_init(void) { | ||||
| @@ -1346,6 +1353,8 @@ void CONSTRUCTOR gotoblas_init(void) { | |||||
| openblas_fork_handler(); | openblas_fork_handler(); | ||||
| #endif | #endif | ||||
| openblas_read_env(); | |||||
| #ifdef PROFILE | #ifdef PROFILE | ||||
| moncontrol (0); | moncontrol (0); | ||||
| #endif | #endif | ||||
| @@ -1362,6 +1371,19 @@ void CONSTRUCTOR gotoblas_init(void) { | |||||
| gotoblas_memory_init(); | gotoblas_memory_init(); | ||||
| #endif | #endif | ||||
| //#if defined(OS_LINUX) | |||||
| #if 0 | |||||
| struct rlimit curlimit; | |||||
| if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 ) | |||||
| { | |||||
| if ( curlimit.rlim_cur != curlimit.rlim_max ) | |||||
| { | |||||
| curlimit.rlim_cur = curlimit.rlim_max; | |||||
| setrlimit(RLIMIT_STACK, &curlimit); | |||||
| } | |||||
| } | |||||
| #endif | |||||
| #ifdef SMP | #ifdef SMP | ||||
| if (blas_cpu_number == 0) blas_get_cpu_number(); | if (blas_cpu_number == 0) blas_get_cpu_number(); | ||||
| #ifdef SMP_SERVER | #ifdef SMP_SERVER | ||||
| @@ -0,0 +1,84 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2011-2016, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #include "common.h" | |||||
| static int openblas_env_verbose=0; | |||||
| static unsigned int openblas_env_thread_timeout=0; | |||||
| static int openblas_env_block_factor=0; | |||||
| static int openblas_env_openblas_num_threads=0; | |||||
| static int openblas_env_goto_num_threads=0; | |||||
| static int openblas_env_omp_num_threads=0; | |||||
| int openblas_verbose() { return openblas_env_verbose;} | |||||
| unsigned int openblas_thread_timeout() { return openblas_env_thread_timeout;} | |||||
| int openblas_block_factor() { return openblas_env_block_factor;} | |||||
| int openblas_num_threads_env() { return openblas_env_openblas_num_threads;} | |||||
| int openblas_goto_num_threads_env() { return openblas_env_goto_num_threads;} | |||||
| int openblas_omp_num_threads_env() { return openblas_env_omp_num_threads;} | |||||
| void openblas_read_env() { | |||||
| int ret=0; | |||||
| env_var_t p; | |||||
| if (readenv(p,"OPENBLAS_VERBOSE")) ret = atoi(p); | |||||
| if(ret<0) ret=0; | |||||
| openblas_env_verbose=ret; | |||||
| ret=0; | |||||
| if (readenv(p,"OPENBLAS_BLOCK_FACTOR")) ret = atoi(p); | |||||
| if(ret<0) ret=0; | |||||
| openblas_env_block_factor=ret; | |||||
| ret=0; | |||||
| if (readenv(p,"OPENBLAS_THREAD_TIMEOUT")) ret = atoi(p); | |||||
| if(ret<0) ret=0; | |||||
| openblas_env_thread_timeout=(unsigned int)ret; | |||||
| ret=0; | |||||
| if (readenv(p,"OPENBLAS_NUM_THREADS")) ret = atoi(p); | |||||
| if(ret<0) ret=0; | |||||
| openblas_env_openblas_num_threads=ret; | |||||
| ret=0; | |||||
| if (readenv(p,"GOTO_NUM_THREADS")) ret = atoi(p); | |||||
| if(ret<0) ret=0; | |||||
| openblas_env_goto_num_threads=ret; | |||||
| ret=0; | |||||
| if (readenv(p,"OMP_NUM_THREADS")) ret = atoi(p); | |||||
| if(ret<0) ret=0; | |||||
| openblas_env_omp_num_threads=ret; | |||||
| } | |||||
| @@ -33,13 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| int openblas_verbose() { | |||||
| int ret=0; | |||||
| env_var_t p; | |||||
| if (readenv(p,"OPENBLAS_VERBOSE")) ret = atoi(p); | |||||
| if(ret<0) ret=0; | |||||
| return ret; | |||||
| } | |||||
| extern int openblas_verbose(); | |||||
| void openblas_warning(int verbose, const char * msg) { | void openblas_warning(int verbose, const char * msg) { | ||||
| int current_verbose; | int current_verbose; | ||||
| @@ -40,6 +40,7 @@ | |||||
| #include <string.h> | #include <string.h> | ||||
| #include "common.h" | #include "common.h" | ||||
| extern int openblas_block_factor(); | |||||
| int get_L2_size(void); | int get_L2_size(void); | ||||
| #define DEFAULT_GEMM_P 128 | #define DEFAULT_GEMM_P 128 | ||||
| @@ -249,7 +250,6 @@ int get_L2_size(void){ | |||||
| void blas_set_parameter(void){ | void blas_set_parameter(void){ | ||||
| env_var_t p; | |||||
| int factor; | int factor; | ||||
| #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) | #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) | ||||
| int size = 16; | int size = 16; | ||||
| @@ -468,9 +468,8 @@ void blas_set_parameter(void){ | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| if (readenv(p,"GOTO_BLOCK_FACTOR")) { | |||||
| factor = atoi(p); | |||||
| factor=openblas_block_factor(); | |||||
| if (factor>0) { | |||||
| if (factor < 10) factor = 10; | if (factor < 10) factor = 10; | ||||
| if (factor > 200) factor = 200; | if (factor > 200) factor = 200; | ||||
| @@ -26,10 +26,16 @@ ifndef ONLY_CBLAS | |||||
| ONLY_CBLAS = 0 | ONLY_CBLAS = 0 | ||||
| endif | endif | ||||
| ifndef BUILD_LAPACK_DEPRECATED | |||||
| BUILD_LAPACK_DEPRECATED = 0 | |||||
| endif | |||||
| ifeq ($(OSNAME), WINNT) | ifeq ($(OSNAME), WINNT) | ||||
| ifeq ($(F_COMPILER), GFORTRAN) | ifeq ($(F_COMPILER), GFORTRAN) | ||||
| ifndef ONLY_CBLAS | |||||
| EXTRALIB += -lgfortran | EXTRALIB += -lgfortran | ||||
| endif | endif | ||||
| endif | |||||
| ifeq ($(USE_OPENMP), 1) | ifeq ($(USE_OPENMP), 1) | ||||
| ifeq ($(C_COMPILER), GCC) | ifeq ($(C_COMPILER), GCC) | ||||
| EXTRALIB += -lgomp | EXTRALIB += -lgomp | ||||
| @@ -39,9 +45,11 @@ endif | |||||
| ifeq ($(OSNAME), CYGWIN_NT) | ifeq ($(OSNAME), CYGWIN_NT) | ||||
| ifeq ($(F_COMPILER), GFORTRAN) | ifeq ($(F_COMPILER), GFORTRAN) | ||||
| ifndef ONLY_CBLAS | |||||
| EXTRALIB += -lgfortran | EXTRALIB += -lgfortran | ||||
| endif | endif | ||||
| endif | endif | ||||
| endif | |||||
| all:: | all:: | ||||
| @@ -88,17 +96,17 @@ dll : ../$(LIBDLLNAME) | |||||
| -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB) | -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB) | ||||
| libopenblas.def : gensymbol | libopenblas.def : gensymbol | ||||
| perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F) | |||||
| perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) | |||||
| libgoto_hpl.def : gensymbol | libgoto_hpl.def : gensymbol | ||||
| perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F) | |||||
| perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) | |||||
| ifeq (, $(SYMBOLPREFIX)$(SYMBOLSUFFIX)) | ifeq (, $(SYMBOLPREFIX)$(SYMBOLSUFFIX)) | ||||
| $(LIBDYNNAME) : ../$(LIBNAME) osx.def | $(LIBDYNNAME) : ../$(LIBNAME) osx.def | ||||
| else | else | ||||
| ../$(LIBNAME).renamed : ../$(LIBNAME) objconv.def | |||||
| $(OBJCONV) @objconv.def ../$(LIBNAME) ../$(LIBNAME).renamed | |||||
| $(LIBDYNNAME) : ../$(LIBNAME).renamed osx.def | |||||
| ../$(LIBNAME).osx.renamed : ../$(LIBNAME) objconv.def | |||||
| $(OBJCONV) @objconv.def ../$(LIBNAME) ../$(LIBNAME).osx.renamed | |||||
| $(LIBDYNNAME) : ../$(LIBNAME).osx.renamed osx.def | |||||
| endif | endif | ||||
| ifeq ($(NOFORTRAN), $(filter $(NOFORTRAN),1 2)) | ifeq ($(NOFORTRAN), $(filter $(NOFORTRAN),1 2)) | ||||
| #only build without Fortran | #only build without Fortran | ||||
| @@ -110,7 +118,7 @@ endif | |||||
| dllinit.$(SUFFIX) : dllinit.c | dllinit.$(SUFFIX) : dllinit.c | ||||
| $(CC) $(CFLAGS) -c -o $(@F) -s $< | $(CC) $(CFLAGS) -c -o $(@F) -s $< | ||||
| ifeq ($(OSNAME), Linux) | |||||
| ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS)) | |||||
| so : ../$(LIBSONAME) | so : ../$(LIBSONAME) | ||||
| @@ -201,26 +209,26 @@ static : ../$(LIBNAME) | |||||
| rm -f goto.$(SUFFIX) | rm -f goto.$(SUFFIX) | ||||
| osx.def : gensymbol ../Makefile.system ../getarch.c | osx.def : gensymbol ../Makefile.system ../getarch.c | ||||
| perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F) | |||||
| perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) | |||||
| aix.def : gensymbol ../Makefile.system ../getarch.c | aix.def : gensymbol ../Makefile.system ../getarch.c | ||||
| perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F) | |||||
| perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) | |||||
| objcopy.def : gensymbol ../Makefile.system ../getarch.c | objcopy.def : gensymbol ../Makefile.system ../getarch.c | ||||
| perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F) | |||||
| perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) | |||||
| objconv.def : gensymbol ../Makefile.system ../getarch.c | objconv.def : gensymbol ../Makefile.system ../getarch.c | ||||
| perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F) | |||||
| perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) | |||||
| test : linktest.c | test : linktest.c | ||||
| $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. | $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. | ||||
| rm -f linktest | rm -f linktest | ||||
| linktest.c : gensymbol ../Makefile.system ../getarch.c | linktest.c : gensymbol ../Makefile.system ../getarch.c | ||||
| perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > linktest.c | |||||
| perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > linktest.c | |||||
| clean :: | clean :: | ||||
| @rm -f *.def *.dylib __.SYMDEF* | |||||
| @rm -f *.def *.dylib __.SYMDEF* *.renamed | |||||
| include ../Makefile.tail | include ../Makefile.tail | ||||
| @@ -173,18 +173,18 @@ | |||||
| sgbbrd, sgbcon, sgbequ, sgbrfs, sgbsv, | sgbbrd, sgbcon, sgbequ, sgbrfs, sgbsv, | ||||
| sgbsvx, sgbtf2, sgbtrf, sgbtrs, sgebak, sgebal, sgebd2, | sgbsvx, sgbtf2, sgbtrf, sgbtrs, sgebak, sgebal, sgebd2, | ||||
| sgebrd, sgecon, sgeequ, sgees, sgeesx, sgeev, sgeevx, | sgebrd, sgecon, sgeequ, sgees, sgeesx, sgeev, sgeevx, | ||||
| sgegs, sgegv, sgehd2, sgehrd, sgelq2, sgelqf, | |||||
| sgels, sgelsd, sgelss, sgelsx, sgelsy, sgeql2, sgeqlf, | |||||
| sgeqp3, sgeqpf, sgeqr2, sgeqr2p, sgeqrf, sgeqrfp, sgerfs, | |||||
| sgehd2, sgehrd, sgelq2, sgelqf, | |||||
| sgels, sgelsd, sgelss, sgelsy, sgeql2, sgeqlf, | |||||
| sgeqp3, sgeqr2, sgeqr2p, sgeqrf, sgeqrfp, sgerfs, | |||||
| sgerq2, sgerqf, sgesc2, sgesdd, sgesvd, sgesvx, | sgerq2, sgerqf, sgesc2, sgesdd, sgesvd, sgesvx, | ||||
| sgetc2, sgetri, | sgetc2, sgetri, | ||||
| sggbak, sggbal, sgges, sggesx, sggev, sggevx, | sggbak, sggbal, sgges, sggesx, sggev, sggevx, | ||||
| sggglm, sgghrd, sgglse, sggqrf, | sggglm, sgghrd, sgglse, sggqrf, | ||||
| sggrqf, sggsvd, sggsvp, sgtcon, sgtrfs, sgtsv, | |||||
| sggrqf, sgtcon, sgtrfs, sgtsv, | |||||
| sgtsvx, sgttrf, sgttrs, sgtts2, shgeqz, | sgtsvx, sgttrf, sgttrs, sgtts2, shgeqz, | ||||
| shsein, shseqr, slabrd, slacon, slacn2, | shsein, shseqr, slabrd, slacon, slacn2, | ||||
| slaein, slaexc, slag2, slags2, slagtm, slagv2, slahqr, | slaein, slaexc, slag2, slags2, slagtm, slagv2, slahqr, | ||||
| slahrd, slahr2, slaic1, slaln2, slals0, slalsa, slalsd, | |||||
| slahr2, slaic1, slaln2, slals0, slalsa, slalsd, | |||||
| slangb, slange, slangt, slanhs, slansb, slansp, | slangb, slange, slangt, slanhs, slansb, slansp, | ||||
| slansy, slantb, slantp, slantr, slanv2, | slansy, slantb, slantp, slantr, slanv2, | ||||
| slapll, slapmt, | slapll, slapmt, | ||||
| @@ -194,7 +194,7 @@ | |||||
| slarf, slarfb, slarfg, slarfgp, slarft, slarfx, slargv, | slarf, slarfb, slarfg, slarfgp, slarft, slarfx, slargv, | ||||
| slarrv, slartv, | slarrv, slartv, | ||||
| slarz, slarzb, slarzt, slasy2, slasyf, | slarz, slarzb, slarzt, slasy2, slasyf, | ||||
| slatbs, slatdf, slatps, slatrd, slatrs, slatrz, slatzm, | |||||
| slatbs, slatdf, slatps, slatrd, slatrs, slatrz, | |||||
| sopgtr, sopmtr, sorg2l, sorg2r, | sopgtr, sopmtr, sorg2l, sorg2r, | ||||
| sorgbr, sorghr, sorgl2, sorglq, sorgql, sorgqr, sorgr2, | sorgbr, sorghr, sorgl2, sorglq, sorgql, sorgqr, sorgr2, | ||||
| sorgrq, sorgtr, sorm2l, sorm2r, | sorgrq, sorgtr, sorm2l, sorm2r, | ||||
| @@ -220,7 +220,7 @@ | |||||
| stgsja, stgsna, stgsy2, stgsyl, stpcon, stprfs, stptri, | stgsja, stgsna, stgsy2, stgsyl, stpcon, stprfs, stptri, | ||||
| stptrs, | stptrs, | ||||
| strcon, strevc, strexc, strrfs, strsen, strsna, strsyl, | strcon, strevc, strexc, strrfs, strsen, strsna, strsyl, | ||||
| strtrs, stzrqf, stzrzf, sstemr, | |||||
| strtrs, stzrzf, sstemr, | |||||
| slansf, spftrf, spftri, spftrs, ssfrk, stfsm, stftri, stfttp, | slansf, spftrf, spftri, spftrs, ssfrk, stfsm, stftri, stfttp, | ||||
| stfttr, stpttf, stpttr, strttf, strttp, | stfttr, stpttf, stpttr, strttf, strttp, | ||||
| sgejsv, sgesvj, sgsvj0, sgsvj1, | sgejsv, sgesvj, sgsvj0, sgsvj1, | ||||
| @@ -245,14 +245,13 @@ | |||||
| cbdsqr, cgbbrd, cgbcon, cgbequ, cgbrfs, cgbsv, cgbsvx, | cbdsqr, cgbbrd, cgbcon, cgbequ, cgbrfs, cgbsv, cgbsvx, | ||||
| cgbtf2, cgbtrf, cgbtrs, cgebak, cgebal, cgebd2, cgebrd, | cgbtf2, cgbtrf, cgbtrs, cgebak, cgebal, cgebd2, cgebrd, | ||||
| cgecon, cgeequ, cgees, cgeesx, cgeev, cgeevx, | cgecon, cgeequ, cgees, cgeesx, cgeev, cgeevx, | ||||
| cgegs, cgegv, cgehd2, cgehrd, cgelq2, cgelqf, | |||||
| cgels, cgelsd, cgelss, cgelsx, cgelsy, cgeql2, cgeqlf, cgeqp3, | |||||
| cgeqpf, cgeqr2, cgeqr2p, cgeqrf, cgeqrfp, cgerfs, | |||||
| cgehd2, cgehrd, cgelq2, cgelqf, | |||||
| cgels, cgelsd, cgelss, cgelsy, cgeql2, cgeqlf, cgeqp3, | |||||
| cgeqr2, cgeqr2p, cgeqrf, cgeqrfp, cgerfs, | |||||
| cgerq2, cgerqf, cgesc2, cgesdd, cgesvd, | cgerq2, cgerqf, cgesc2, cgesdd, cgesvd, | ||||
| cgesvx, cgetc2, cgetri, | cgesvx, cgetc2, cgetri, | ||||
| cggbak, cggbal, cgges, cggesx, cggev, cggevx, cggglm, | cggbak, cggbal, cgges, cggesx, cggev, cggevx, cggglm, | ||||
| cgghrd, cgglse, cggqrf, cggrqf, | cgghrd, cgglse, cggqrf, cggrqf, | ||||
| cggsvd, cggsvp, | |||||
| cgtcon, cgtrfs, cgtsv, cgtsvx, cgttrf, cgttrs, cgtts2, chbev, | cgtcon, cgtrfs, cgtsv, cgtsvx, cgttrf, cgttrs, cgtts2, chbev, | ||||
| chbevd, chbevx, chbgst, chbgv, chbgvd, chbgvx, chbtrd, | chbevd, chbevx, chbgst, chbgv, chbgvd, chbgvx, chbtrd, | ||||
| checon, cheev, cheevd, cheevr, cheevx, chegs2, chegst, | checon, cheev, cheevd, cheevr, cheevx, chegs2, chegst, | ||||
| @@ -267,7 +266,7 @@ | |||||
| claed0, claed7, claed8, | claed0, claed7, claed8, | ||||
| claein, claesy, claev2, clags2, clagtm, | claein, claesy, claev2, clags2, clagtm, | ||||
| clahef, clahqr, | clahef, clahqr, | ||||
| clahrd, clahr2, claic1, clals0, clalsa, clalsd, clangb, clange, clangt, | |||||
| clahr2, claic1, clals0, clalsa, clalsd, clangb, clange, clangt, | |||||
| clanhb, clanhe, | clanhb, clanhe, | ||||
| clanhp, clanhs, clanht, clansb, clansp, clansy, clantb, | clanhp, clanhs, clanht, clansb, clansp, clansy, clantb, | ||||
| clantp, clantr, clapll, clapmt, clarcm, claqgb, claqge, | clantp, clantr, clapll, clapmt, clarcm, claqgb, claqge, | ||||
| @@ -278,7 +277,7 @@ | |||||
| clarfx, clargv, clarnv, clarrv, clartg, clartv, | clarfx, clargv, clarnv, clarrv, clartg, clartv, | ||||
| clarz, clarzb, clarzt, clascl, claset, clasr, classq, | clarz, clarzb, clarzt, clascl, claset, clasr, classq, | ||||
| clasyf, clatbs, clatdf, clatps, clatrd, clatrs, clatrz, | clasyf, clatbs, clatdf, clatps, clatrd, clatrs, clatrz, | ||||
| clatzm, cpbcon, cpbequ, cpbrfs, cpbstf, cpbsv, | |||||
| cpbcon, cpbequ, cpbrfs, cpbstf, cpbsv, | |||||
| cpbsvx, cpbtf2, cpbtrf, cpbtrs, cpocon, cpoequ, cporfs, | cpbsvx, cpbtf2, cpbtrf, cpbtrs, cpocon, cpoequ, cporfs, | ||||
| cposv, cposvx, cpstrf, cpstf2, | cposv, cposvx, cpstrf, cpstf2, | ||||
| cppcon, cppequ, cpprfs, cppsv, cppsvx, cpptrf, cpptri, cpptrs, | cppcon, cppequ, cpprfs, cppsv, cppsvx, cpptrf, cpptri, cpptrs, | ||||
| @@ -293,7 +292,7 @@ | |||||
| ctgexc, ctgsen, ctgsja, ctgsna, ctgsy2, ctgsyl, ctpcon, | ctgexc, ctgsen, ctgsja, ctgsna, ctgsy2, ctgsyl, ctpcon, | ||||
| ctprfs, ctptri, | ctprfs, ctptri, | ||||
| ctptrs, ctrcon, ctrevc, ctrexc, ctrrfs, ctrsen, ctrsna, | ctptrs, ctrcon, ctrevc, ctrexc, ctrrfs, ctrsen, ctrsna, | ||||
| ctrsyl, ctrtrs, ctzrqf, ctzrzf, cung2l, cung2r, | |||||
| ctrsyl, ctrtrs, ctzrzf, cung2l, cung2r, | |||||
| cungbr, cunghr, cungl2, cunglq, cungql, cungqr, cungr2, | cungbr, cunghr, cungl2, cunglq, cungql, cungqr, cungr2, | ||||
| cungrq, cungtr, cunm2l, cunm2r, cunmbr, cunmhr, cunml2, | cungrq, cungtr, cunm2l, cunm2r, cunmbr, cunmhr, cunml2, | ||||
| cunmlq, cunmql, cunmqr, cunmr2, cunmr3, cunmrq, cunmrz, | cunmlq, cunmql, cunmqr, cunmr2, cunmr3, cunmrq, cunmrz, | ||||
| @@ -321,18 +320,18 @@ | |||||
| dgbbrd, dgbcon, dgbequ, dgbrfs, dgbsv, | dgbbrd, dgbcon, dgbequ, dgbrfs, dgbsv, | ||||
| dgbsvx, dgbtf2, dgbtrf, dgbtrs, dgebak, dgebal, dgebd2, | dgbsvx, dgbtf2, dgbtrf, dgbtrs, dgebak, dgebal, dgebd2, | ||||
| dgebrd, dgecon, dgeequ, dgees, dgeesx, dgeev, dgeevx, | dgebrd, dgecon, dgeequ, dgees, dgeesx, dgeev, dgeevx, | ||||
| dgegs, dgegv, dgehd2, dgehrd, dgelq2, dgelqf, | |||||
| dgels, dgelsd, dgelss, dgelsx, dgelsy, dgeql2, dgeqlf, | |||||
| dgeqp3, dgeqpf, dgeqr2, dgeqr2p, dgeqrf, dgeqrfp, dgerfs, | |||||
| dgehd2, dgehrd, dgelq2, dgelqf, | |||||
| dgels, dgelsd, dgelss, dgelsy, dgeql2, dgeqlf, | |||||
| dgeqp3, dgeqr2, dgeqr2p, dgeqrf, dgeqrfp, dgerfs, | |||||
| dgerq2, dgerqf, dgesc2, dgesdd, dgesvd, dgesvx, | dgerq2, dgerqf, dgesc2, dgesdd, dgesvd, dgesvx, | ||||
| dgetc2, dgetri, | dgetc2, dgetri, | ||||
| dggbak, dggbal, dgges, dggesx, dggev, dggevx, | dggbak, dggbal, dgges, dggesx, dggev, dggevx, | ||||
| dggglm, dgghrd, dgglse, dggqrf, | dggglm, dgghrd, dgglse, dggqrf, | ||||
| dggrqf, dggsvd, dggsvp, dgtcon, dgtrfs, dgtsv, | |||||
| dggrqf, dgtcon, dgtrfs, dgtsv, | |||||
| dgtsvx, dgttrf, dgttrs, dgtts2, dhgeqz, | dgtsvx, dgttrf, dgttrs, dgtts2, dhgeqz, | ||||
| dhsein, dhseqr, dlabrd, dlacon, dlacn2, | dhsein, dhseqr, dlabrd, dlacon, dlacn2, | ||||
| dlaein, dlaexc, dlag2, dlags2, dlagtm, dlagv2, dlahqr, | dlaein, dlaexc, dlag2, dlags2, dlagtm, dlagv2, dlahqr, | ||||
| dlahrd, dlahr2, dlaic1, dlaln2, dlals0, dlalsa, dlalsd, | |||||
| dlahr2, dlaic1, dlaln2, dlals0, dlalsa, dlalsd, | |||||
| dlangb, dlange, dlangt, dlanhs, dlansb, dlansp, | dlangb, dlange, dlangt, dlanhs, dlansb, dlansp, | ||||
| dlansy, dlantb, dlantp, dlantr, dlanv2, | dlansy, dlantb, dlantp, dlantr, dlanv2, | ||||
| dlapll, dlapmt, | dlapll, dlapmt, | ||||
| @@ -342,7 +341,7 @@ | |||||
| dlarf, dlarfb, dlarfg, dlarfgp, dlarft, dlarfx, | dlarf, dlarfb, dlarfg, dlarfgp, dlarft, dlarfx, | ||||
| dlargv, dlarrv, dlartv, | dlargv, dlarrv, dlartv, | ||||
| dlarz, dlarzb, dlarzt, dlasy2, dlasyf, | dlarz, dlarzb, dlarzt, dlasy2, dlasyf, | ||||
| dlatbs, dlatdf, dlatps, dlatrd, dlatrs, dlatrz, dlatzm, | |||||
| dlatbs, dlatdf, dlatps, dlatrd, dlatrs, dlatrz, | |||||
| dopgtr, dopmtr, dorg2l, dorg2r, | dopgtr, dopmtr, dorg2l, dorg2r, | ||||
| dorgbr, dorghr, dorgl2, dorglq, dorgql, dorgqr, dorgr2, | dorgbr, dorghr, dorgl2, dorglq, dorgql, dorgqr, dorgr2, | ||||
| dorgrq, dorgtr, dorm2l, dorm2r, | dorgrq, dorgtr, dorm2l, dorm2r, | ||||
| @@ -368,7 +367,7 @@ | |||||
| dtgsja, dtgsna, dtgsy2, dtgsyl, dtpcon, dtprfs, dtptri, | dtgsja, dtgsna, dtgsy2, dtgsyl, dtpcon, dtprfs, dtptri, | ||||
| dtptrs, | dtptrs, | ||||
| dtrcon, dtrevc, dtrexc, dtrrfs, dtrsen, dtrsna, dtrsyl, | dtrcon, dtrevc, dtrexc, dtrrfs, dtrsen, dtrsna, dtrsyl, | ||||
| dtrtrs, dtzrqf, dtzrzf, dstemr, | |||||
| dtrtrs, dtzrzf, dstemr, | |||||
| dsgesv, dsposv, dlag2s, slag2d, dlat2s, | dsgesv, dsposv, dlag2s, slag2d, dlat2s, | ||||
| dlansf, dpftrf, dpftri, dpftrs, dsfrk, dtfsm, dtftri, dtfttp, | dlansf, dpftrf, dpftri, dpftrs, dsfrk, dtfsm, dtftri, dtfttp, | ||||
| dtfttr, dtpttf, dtpttr, dtrttf, dtrttp, | dtfttr, dtpttf, dtpttr, dtrttf, dtrttp, | ||||
| @@ -387,14 +386,13 @@ | |||||
| zbdsqr, zgbbrd, zgbcon, zgbequ, zgbrfs, zgbsv, zgbsvx, | zbdsqr, zgbbrd, zgbcon, zgbequ, zgbrfs, zgbsv, zgbsvx, | ||||
| zgbtf2, zgbtrf, zgbtrs, zgebak, zgebal, zgebd2, zgebrd, | zgbtf2, zgbtrf, zgbtrs, zgebak, zgebal, zgebd2, zgebrd, | ||||
| zgecon, zgeequ, zgees, zgeesx, zgeev, zgeevx, | zgecon, zgeequ, zgees, zgeesx, zgeev, zgeevx, | ||||
| zgegs, zgegv, zgehd2, zgehrd, zgelq2, zgelqf, | |||||
| zgels, zgelsd, zgelss, zgelsx, zgelsy, zgeql2, zgeqlf, zgeqp3, | |||||
| zgeqpf, zgeqr2, zgeqr2p, zgeqrf, zgeqrfp, zgerfs, zgerq2, zgerqf, | |||||
| zgehd2, zgehrd, zgelq2, zgelqf, | |||||
| zgels, zgelsd, zgelss, zgelsy, zgeql2, zgeqlf, zgeqp3, | |||||
| zgeqr2, zgeqr2p, zgeqrf, zgeqrfp, zgerfs, zgerq2, zgerqf, | |||||
| zgesc2, zgesdd, zgesvd, zgesvx, zgetc2, | zgesc2, zgesdd, zgesvd, zgesvx, zgetc2, | ||||
| zgetri, | zgetri, | ||||
| zggbak, zggbal, zgges, zggesx, zggev, zggevx, zggglm, | zggbak, zggbal, zgges, zggesx, zggev, zggevx, zggglm, | ||||
| zgghrd, zgglse, zggqrf, zggrqf, | zgghrd, zgglse, zggqrf, zggrqf, | ||||
| zggsvd, zggsvp, | |||||
| zgtcon, zgtrfs, zgtsv, zgtsvx, zgttrf, zgttrs, zgtts2, zhbev, | zgtcon, zgtrfs, zgtsv, zgtsvx, zgttrf, zgttrs, zgtts2, zhbev, | ||||
| zhbevd, zhbevx, zhbgst, zhbgv, zhbgvd, zhbgvx, zhbtrd, | zhbevd, zhbevx, zhbgst, zhbgv, zhbgvd, zhbgvx, zhbtrd, | ||||
| zhecon, zheev, zheevd, zheevr, zheevx, zhegs2, zhegst, | zhecon, zheev, zheevd, zheevr, zheevx, zhegs2, zhegst, | ||||
| @@ -409,7 +407,7 @@ | |||||
| zlaed0, zlaed7, zlaed8, | zlaed0, zlaed7, zlaed8, | ||||
| zlaein, zlaesy, zlaev2, zlags2, zlagtm, | zlaein, zlaesy, zlaev2, zlags2, zlagtm, | ||||
| zlahef, zlahqr, | zlahef, zlahqr, | ||||
| zlahrd, zlahr2, zlaic1, zlals0, zlalsa, zlalsd, zlangb, zlange, | |||||
| zlahr2, zlaic1, zlals0, zlalsa, zlalsd, zlangb, zlange, | |||||
| zlangt, zlanhb, | zlangt, zlanhb, | ||||
| zlanhe, | zlanhe, | ||||
| zlanhp, zlanhs, zlanht, zlansb, zlansp, zlansy, zlantb, | zlanhp, zlanhs, zlanht, zlansb, zlansp, zlansy, zlantb, | ||||
| @@ -422,7 +420,7 @@ | |||||
| zlarfx, zlargv, zlarnv, zlarrv, zlartg, zlartv, | zlarfx, zlargv, zlarnv, zlarrv, zlartg, zlartv, | ||||
| zlarz, zlarzb, zlarzt, zlascl, zlaset, zlasr, | zlarz, zlarzb, zlarzt, zlascl, zlaset, zlasr, | ||||
| zlassq, zlasyf, | zlassq, zlasyf, | ||||
| zlatbs, zlatdf, zlatps, zlatrd, zlatrs, zlatrz, zlatzm, | |||||
| zlatbs, zlatdf, zlatps, zlatrd, zlatrs, zlatrz, | |||||
| zpbcon, zpbequ, zpbrfs, zpbstf, zpbsv, | zpbcon, zpbequ, zpbrfs, zpbstf, zpbsv, | ||||
| zpbsvx, zpbtf2, zpbtrf, zpbtrs, zpocon, zpoequ, zporfs, | zpbsvx, zpbtf2, zpbtrf, zpbtrs, zpocon, zpoequ, zporfs, | ||||
| zposv, zposvx, zpotrs, zpstrf, zpstf2, | zposv, zposvx, zpotrs, zpstrf, zpstf2, | ||||
| @@ -438,7 +436,7 @@ | |||||
| ztgexc, ztgsen, ztgsja, ztgsna, ztgsy2, ztgsyl, ztpcon, | ztgexc, ztgsen, ztgsja, ztgsna, ztgsy2, ztgsyl, ztpcon, | ||||
| ztprfs, ztptri, | ztprfs, ztptri, | ||||
| ztptrs, ztrcon, ztrevc, ztrexc, ztrrfs, ztrsen, ztrsna, | ztptrs, ztrcon, ztrevc, ztrexc, ztrrfs, ztrsen, ztrsna, | ||||
| ztrsyl, ztrtrs, ztzrqf, ztzrzf, zung2l, | |||||
| ztrsyl, ztrtrs, ztzrzf, zung2l, | |||||
| zung2r, zungbr, zunghr, zungl2, zunglq, zungql, zungqr, zungr2, | zung2r, zungbr, zunghr, zungl2, zunglq, zungql, zungqr, zungr2, | ||||
| zungrq, zungtr, zunm2l, zunm2r, zunmbr, zunmhr, zunml2, | zungrq, zungtr, zunm2l, zunm2r, zunmbr, zunmhr, zunml2, | ||||
| zunmlq, zunmql, zunmqr, zunmr2, zunmr3, zunmrq, zunmrz, | zunmlq, zunmql, zunmqr, zunmr2, zunmr3, zunmrq, zunmrz, | ||||
| @@ -452,6 +450,139 @@ | |||||
| zunbdb5, zunbdb6, zuncsd, zuncsd2by1, | zunbdb5, zunbdb6, zuncsd, zuncsd2by1, | ||||
| zgeqrt, zgeqrt2, zgeqrt3, zgemqrt, | zgeqrt, zgeqrt2, zgeqrt3, zgemqrt, | ||||
| ztpqrt, ztpqrt2, ztpmqrt, ztprfb, | ztpqrt, ztpqrt2, ztpmqrt, ztprfb, | ||||
| # functions added for lapack-3.6.0 | |||||
| cgejsv, | |||||
| cgesvdx, | |||||
| cgesvj, | |||||
| cgetrf2, | |||||
| cgges3, | |||||
| cggev3, | |||||
| cgghd3, | |||||
| cggsvd3, | |||||
| cggsvp3, | |||||
| cgsvj0, | |||||
| cgsvj1, | |||||
| clagge, | |||||
| claghe, | |||||
| clagsy, | |||||
| clahilb, | |||||
| clakf2, | |||||
| clarge, | |||||
| clarnd, | |||||
| claror, | |||||
| clarot, | |||||
| clatm1, | |||||
| clatm2, | |||||
| clatm3, | |||||
| clatm5, | |||||
| clatm6, | |||||
| clatme, | |||||
| clatmr, | |||||
| clatms, | |||||
| clatmt, | |||||
| cpotrf2, | |||||
| csbmv, | |||||
| cspr2, | |||||
| csyr2, | |||||
| cunm22, | |||||
| dbdsvdx, | |||||
| dgesvdx, | |||||
| dgetrf2, | |||||
| dgges3, | |||||
| dggev3, | |||||
| dgghd3, | |||||
| dggsvd3, | |||||
| dggsvp3, | |||||
| dladiv2, | |||||
| dlagge, | |||||
| dlagsy, | |||||
| dlahilb, | |||||
| dlakf2, | |||||
| dlaran, | |||||
| dlarge, | |||||
| dlarnd, | |||||
| dlaror, | |||||
| dlarot, | |||||
| dlatm1, | |||||
| dlatm2, | |||||
| dlatm3, | |||||
| dlatm5, | |||||
| dlatm6, | |||||
| dlatm7, | |||||
| dlatme, | |||||
| dlatmr, | |||||
| dlatms, | |||||
| dlatmt, | |||||
| dorm22, | |||||
| dpotrf2, | |||||
| dsecnd, | |||||
| sbdsvdx, | |||||
| second, | |||||
| sgesvdx, | |||||
| sgetrf2, | |||||
| sgges3, | |||||
| sggev3, | |||||
| sgghd3, | |||||
| sggsvd3, | |||||
| sggsvp3, | |||||
| sladiv2, | |||||
| slagge, | |||||
| slagsy, | |||||
| slahilb, | |||||
| slakf2, | |||||
| slaran, | |||||
| slarge, | |||||
| slarnd, | |||||
| slaror, | |||||
| slarot, | |||||
| slatm1, | |||||
| slatm2, | |||||
| slatm3, | |||||
| slatm5, | |||||
| slatm6, | |||||
| slatm7, | |||||
| slatme, | |||||
| slatmr, | |||||
| slatms, | |||||
| slatmt, | |||||
| sorm22, | |||||
| spotrf2, | |||||
| zgejsv, | |||||
| zgesvdx, | |||||
| zgesvj, | |||||
| zgetrf2, | |||||
| zgges3, | |||||
| zggev3, | |||||
| zgghd3, | |||||
| zggsvd3, | |||||
| zggsvp3, | |||||
| zgsvj0, | |||||
| zgsvj1, | |||||
| zlagge, | |||||
| zlaghe, | |||||
| zlagsy, | |||||
| zlahilb, | |||||
| zlakf2, | |||||
| zlarge, | |||||
| zlarnd, | |||||
| zlaror, | |||||
| zlarot, | |||||
| zlatm1, | |||||
| zlatm2, | |||||
| zlatm3, | |||||
| zlatm5, | |||||
| zlatm6, | |||||
| zlatme, | |||||
| zlatmr, | |||||
| zlatms, | |||||
| zlatmt, | |||||
| zpotrf2, | |||||
| zsbmv, | |||||
| zspr2, | |||||
| zsyr2, | |||||
| zunm22 | |||||
| ); | ); | ||||
| @lapack_extendedprecision_objs = ( | @lapack_extendedprecision_objs = ( | ||||
| @@ -459,6 +590,13 @@ | |||||
| dlagsy, dsysvxx, sporfsx, slatms, zlatms, zherfsx, csysvxx, | dlagsy, dsysvxx, sporfsx, slatms, zlatms, zherfsx, csysvxx, | ||||
| ); | ); | ||||
| @lapack_deprecated_objs = ( | |||||
| cgegs, cggsvd, ctzrqf, dgeqpf, dlatzm, sgelsx, slahrd, zgegv, zggsvp, | |||||
| cgegv, cggsvp, dgegs, dggsvd, dtzrqf, sgeqpf, slatzm, zgelsx, zlahrd, | |||||
| cgelsx, clahrd, dgegv, dggsvp, sgegs, sggsvd, stzrqf, zgeqpf, zlatzm, | |||||
| cgeqpf, clatzm, dgelsx, dlahrd, sgegv, sggsvp, zgegs, zggsvd, ztzrqf, | |||||
| ); | |||||
| @lapackeobjs = ( | @lapackeobjs = ( | ||||
| # LAPACK C interface routines. | # LAPACK C interface routines. | ||||
| # | # | ||||
| @@ -682,8 +820,6 @@ | |||||
| LAPACKE_cgeqlf_work, | LAPACKE_cgeqlf_work, | ||||
| LAPACKE_cgeqp3, | LAPACKE_cgeqp3, | ||||
| LAPACKE_cgeqp3_work, | LAPACKE_cgeqp3_work, | ||||
| LAPACKE_cgeqpf, | |||||
| LAPACKE_cgeqpf_work, | |||||
| LAPACKE_cgeqr2, | LAPACKE_cgeqr2, | ||||
| LAPACKE_cgeqr2_work, | LAPACKE_cgeqr2_work, | ||||
| LAPACKE_cgeqrf, | LAPACKE_cgeqrf, | ||||
| @@ -738,10 +874,6 @@ | |||||
| LAPACKE_cggqrf_work, | LAPACKE_cggqrf_work, | ||||
| LAPACKE_cggrqf, | LAPACKE_cggrqf, | ||||
| LAPACKE_cggrqf_work, | LAPACKE_cggrqf_work, | ||||
| LAPACKE_cggsvd, | |||||
| LAPACKE_cggsvd_work, | |||||
| LAPACKE_cggsvp, | |||||
| LAPACKE_cggsvp_work, | |||||
| LAPACKE_cgtcon, | LAPACKE_cgtcon, | ||||
| LAPACKE_cgtcon_work, | LAPACKE_cgtcon_work, | ||||
| LAPACKE_cgtrfs, | LAPACKE_cgtrfs, | ||||
| @@ -1186,8 +1318,6 @@ | |||||
| LAPACKE_dgeqlf_work, | LAPACKE_dgeqlf_work, | ||||
| LAPACKE_dgeqp3, | LAPACKE_dgeqp3, | ||||
| LAPACKE_dgeqp3_work, | LAPACKE_dgeqp3_work, | ||||
| LAPACKE_dgeqpf, | |||||
| LAPACKE_dgeqpf_work, | |||||
| LAPACKE_dgeqr2, | LAPACKE_dgeqr2, | ||||
| LAPACKE_dgeqr2_work, | LAPACKE_dgeqr2_work, | ||||
| LAPACKE_dgeqrf, | LAPACKE_dgeqrf, | ||||
| @@ -1244,10 +1374,6 @@ | |||||
| LAPACKE_dggqrf_work, | LAPACKE_dggqrf_work, | ||||
| LAPACKE_dggrqf, | LAPACKE_dggrqf, | ||||
| LAPACKE_dggrqf_work, | LAPACKE_dggrqf_work, | ||||
| LAPACKE_dggsvd, | |||||
| LAPACKE_dggsvd_work, | |||||
| LAPACKE_dggsvp, | |||||
| LAPACKE_dggsvp_work, | |||||
| LAPACKE_dgtcon, | LAPACKE_dgtcon, | ||||
| LAPACKE_dgtcon_work, | LAPACKE_dgtcon_work, | ||||
| LAPACKE_dgtrfs, | LAPACKE_dgtrfs, | ||||
| @@ -1676,8 +1802,6 @@ | |||||
| LAPACKE_sgeqlf_work, | LAPACKE_sgeqlf_work, | ||||
| LAPACKE_sgeqp3, | LAPACKE_sgeqp3, | ||||
| LAPACKE_sgeqp3_work, | LAPACKE_sgeqp3_work, | ||||
| LAPACKE_sgeqpf, | |||||
| LAPACKE_sgeqpf_work, | |||||
| LAPACKE_sgeqr2, | LAPACKE_sgeqr2, | ||||
| LAPACKE_sgeqr2_work, | LAPACKE_sgeqr2_work, | ||||
| LAPACKE_sgeqrf, | LAPACKE_sgeqrf, | ||||
| @@ -1734,10 +1858,6 @@ | |||||
| LAPACKE_sggqrf_work, | LAPACKE_sggqrf_work, | ||||
| LAPACKE_sggrqf, | LAPACKE_sggrqf, | ||||
| LAPACKE_sggrqf_work, | LAPACKE_sggrqf_work, | ||||
| LAPACKE_sggsvd, | |||||
| LAPACKE_sggsvd_work, | |||||
| LAPACKE_sggsvp, | |||||
| LAPACKE_sggsvp_work, | |||||
| LAPACKE_sgtcon, | LAPACKE_sgtcon, | ||||
| LAPACKE_sgtcon_work, | LAPACKE_sgtcon_work, | ||||
| LAPACKE_sgtrfs, | LAPACKE_sgtrfs, | ||||
| @@ -2158,8 +2278,6 @@ | |||||
| LAPACKE_zgeqlf_work, | LAPACKE_zgeqlf_work, | ||||
| LAPACKE_zgeqp3, | LAPACKE_zgeqp3, | ||||
| LAPACKE_zgeqp3_work, | LAPACKE_zgeqp3_work, | ||||
| LAPACKE_zgeqpf, | |||||
| LAPACKE_zgeqpf_work, | |||||
| LAPACKE_zgeqr2, | LAPACKE_zgeqr2, | ||||
| LAPACKE_zgeqr2_work, | LAPACKE_zgeqr2_work, | ||||
| LAPACKE_zgeqrf, | LAPACKE_zgeqrf, | ||||
| @@ -2214,10 +2332,6 @@ | |||||
| LAPACKE_zggqrf_work, | LAPACKE_zggqrf_work, | ||||
| LAPACKE_zggrqf, | LAPACKE_zggrqf, | ||||
| LAPACKE_zggrqf_work, | LAPACKE_zggrqf_work, | ||||
| LAPACKE_zggsvd, | |||||
| LAPACKE_zggsvd_work, | |||||
| LAPACKE_zggsvp, | |||||
| LAPACKE_zggsvp_work, | |||||
| LAPACKE_zgtcon, | LAPACKE_zgtcon, | ||||
| LAPACKE_zgtcon_work, | LAPACKE_zgtcon_work, | ||||
| LAPACKE_zgtrfs, | LAPACKE_zgtrfs, | ||||
| @@ -2707,6 +2821,134 @@ | |||||
| LAPACKE_slagsy_work, | LAPACKE_slagsy_work, | ||||
| LAPACKE_zlagsy, | LAPACKE_zlagsy, | ||||
| LAPACKE_zlagsy_work, | LAPACKE_zlagsy_work, | ||||
| ## new function from lapack-3.6.0 | |||||
| LAPACKE_cgejsv, | |||||
| LAPACKE_cgejsv_work, | |||||
| LAPACKE_cgesvdx, | |||||
| LAPACKE_cgesvdx_work, | |||||
| LAPACKE_cgesvj, | |||||
| LAPACKE_cgesvj_work, | |||||
| LAPACKE_cgetrf2, | |||||
| LAPACKE_cgetrf2_work, | |||||
| LAPACKE_cgges3, | |||||
| LAPACKE_cgges3_work, | |||||
| LAPACKE_cggev3, | |||||
| LAPACKE_cggev3_work, | |||||
| LAPACKE_cgghd3, | |||||
| LAPACKE_cgghd3_work, | |||||
| LAPACKE_cggsvd3, | |||||
| LAPACKE_cggsvd3_work, | |||||
| LAPACKE_cggsvp3, | |||||
| LAPACKE_cggsvp3_work, | |||||
| LAPACKE_chetrf_rook, | |||||
| LAPACKE_chetrf_rook_work, | |||||
| LAPACKE_chetrs_rook, | |||||
| LAPACKE_chetrs_rook_work, | |||||
| LAPACKE_clapmt, | |||||
| LAPACKE_clapmt_work, | |||||
| LAPACKE_clascl, | |||||
| LAPACKE_clascl_work, | |||||
| LAPACKE_cpotrf2, | |||||
| LAPACKE_cpotrf2_work, | |||||
| LAPACKE_csytrf_rook, | |||||
| LAPACKE_csytrf_rook_work, | |||||
| LAPACKE_csytrs_rook, | |||||
| LAPACKE_csytrs_rook_work, | |||||
| LAPACKE_cuncsd2by1, | |||||
| LAPACKE_cuncsd2by1_work, | |||||
| LAPACKE_dbdsvdx, | |||||
| LAPACKE_dbdsvdx_work, | |||||
| LAPACKE_dgesvdx, | |||||
| LAPACKE_dgesvdx_work, | |||||
| LAPACKE_dgetrf2, | |||||
| LAPACKE_dgetrf2_work, | |||||
| LAPACKE_dgges3, | |||||
| LAPACKE_dgges3_work, | |||||
| LAPACKE_dggev3, | |||||
| LAPACKE_dggev3_work, | |||||
| LAPACKE_dgghd3, | |||||
| LAPACKE_dgghd3_work, | |||||
| LAPACKE_dggsvd3, | |||||
| LAPACKE_dggsvd3_work, | |||||
| LAPACKE_dggsvp3, | |||||
| LAPACKE_dggsvp3_work, | |||||
| LAPACKE_dlapmt, | |||||
| LAPACKE_dlapmt_work, | |||||
| LAPACKE_dlascl, | |||||
| LAPACKE_dlascl_work, | |||||
| LAPACKE_dorcsd2by1, | |||||
| LAPACKE_dorcsd2by1_work, | |||||
| LAPACKE_dpotrf2, | |||||
| LAPACKE_dpotrf2_work, | |||||
| LAPACKE_dsytrf_rook, | |||||
| LAPACKE_dsytrf_rook_work, | |||||
| LAPACKE_dsytrs_rook, | |||||
| LAPACKE_dsytrs_rook_work, | |||||
| LAPACKE_sbdsvdx, | |||||
| LAPACKE_sbdsvdx_work, | |||||
| LAPACKE_sgesvdx, | |||||
| LAPACKE_sgesvdx_work, | |||||
| LAPACKE_sgetrf2, | |||||
| LAPACKE_sgetrf2_work, | |||||
| LAPACKE_sgges3, | |||||
| LAPACKE_sgges3_work, | |||||
| LAPACKE_sggev3, | |||||
| LAPACKE_sggev3_work, | |||||
| LAPACKE_sgghd3, | |||||
| LAPACKE_sgghd3_work, | |||||
| LAPACKE_sggsvd3, | |||||
| LAPACKE_sggsvd3_work, | |||||
| LAPACKE_sggsvp3, | |||||
| LAPACKE_sggsvp3_work, | |||||
| LAPACKE_slapmt, | |||||
| LAPACKE_slapmt_work, | |||||
| LAPACKE_slascl, | |||||
| LAPACKE_slascl_work, | |||||
| LAPACKE_sorcsd2by1, | |||||
| LAPACKE_sorcsd2by1_work, | |||||
| LAPACKE_spotrf2, | |||||
| LAPACKE_spotrf2_work, | |||||
| LAPACKE_ssytrf_rook, | |||||
| LAPACKE_ssytrf_rook_work, | |||||
| LAPACKE_ssytrs_rook, | |||||
| LAPACKE_ssytrs_rook_work, | |||||
| LAPACKE_stpqrt, | |||||
| LAPACKE_stpqrt_work, | |||||
| LAPACKE_zgejsv, | |||||
| LAPACKE_zgejsv_work, | |||||
| LAPACKE_zgesvdx, | |||||
| LAPACKE_zgesvdx_work, | |||||
| LAPACKE_zgesvj, | |||||
| LAPACKE_zgesvj_work, | |||||
| LAPACKE_zgetrf2, | |||||
| LAPACKE_zgetrf2_work, | |||||
| LAPACKE_zgges3, | |||||
| LAPACKE_zgges3_work, | |||||
| LAPACKE_zggev3, | |||||
| LAPACKE_zggev3_work, | |||||
| LAPACKE_zgghd3, | |||||
| LAPACKE_zgghd3_work, | |||||
| LAPACKE_zggsvd3, | |||||
| LAPACKE_zggsvd3_work, | |||||
| LAPACKE_zggsvp3, | |||||
| LAPACKE_zggsvp3_work, | |||||
| LAPACKE_zhetrf_rook, | |||||
| LAPACKE_zhetrf_rook_work, | |||||
| LAPACKE_zhetrs_rook, | |||||
| LAPACKE_zhetrs_rook_work, | |||||
| LAPACKE_zlapmt, | |||||
| LAPACKE_zlapmt_work, | |||||
| LAPACKE_zlascl, | |||||
| LAPACKE_zlascl_work, | |||||
| LAPACKE_zpotrf2, | |||||
| LAPACKE_zpotrf2_work, | |||||
| LAPACKE_zsytrf_rook, | |||||
| LAPACKE_zsytrf_rook_work, | |||||
| LAPACKE_zsytrs_rook, | |||||
| LAPACKE_zsytrs_rook_work, | |||||
| LAPACKE_zuncsd2by1, | |||||
| LAPACKE_zuncsd2by1_work | |||||
| ); | ); | ||||
| #These function may need 2 underscores. | #These function may need 2 underscores. | ||||
| @@ -2749,6 +2991,11 @@ if ($ARGV[8] == 1) { | |||||
| @need_2underscore_objs = (@lapack_embeded_underscore_objs); | @need_2underscore_objs = (@lapack_embeded_underscore_objs); | ||||
| }; | }; | ||||
| if ($ARGV[11] == 1){ | |||||
| #BUILD_LAPACK_DEPRECATED=1 | |||||
| @underscore_objs =(@underscore_objs, @lapack_deprecated_objs); | |||||
| } | |||||
| } else { | } else { | ||||
| @underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs); | @underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs); | ||||
| } | } | ||||
| @@ -1,5 +1,7 @@ | |||||
| #!/usr/bin/perl | #!/usr/bin/perl | ||||
| $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); | |||||
| # | # | ||||
| # 1. Not specified | # 1. Not specified | ||||
| # 1.1 Automatically detect, then check compiler | # 1.1 Automatically detect, then check compiler | ||||
| @@ -272,8 +274,9 @@ if ($link ne "") { | |||||
| } | } | ||||
| if ($flags =~ /^\-Y/) { | if ($flags =~ /^\-Y/) { | ||||
| next if ($hostos eq 'SunOS'); | |||||
| $linker_L .= "-Wl,". $flags . " "; | $linker_L .= "-Wl,". $flags . " "; | ||||
| } | |||||
| } | |||||
| if ($flags =~ /^\-rpath\@/) { | if ($flags =~ /^\-rpath\@/) { | ||||
| $flags =~ s/\@/\,/g; | $flags =~ s/\@/\,/g; | ||||
| @@ -86,7 +86,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include <sys/types.h> | #include <sys/types.h> | ||||
| #include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
| #endif | #endif | ||||
| #ifdef linux | |||||
| #if defined(linux) || defined(__sun__) | |||||
| #include <sys/sysinfo.h> | #include <sys/sysinfo.h> | ||||
| #include <unistd.h> | #include <unistd.h> | ||||
| #endif | #endif | ||||
| @@ -552,7 +552,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define CORENAME "POWER5" | #define CORENAME "POWER5" | ||||
| #endif | #endif | ||||
| #if defined(FORCE_POWER6) || defined(FORCE_POWER7) || defined(FORCE_POWER8) | |||||
| #if defined(FORCE_POWER6) || defined(FORCE_POWER7) | |||||
| #define FORCE | #define FORCE | ||||
| #define ARCHITECTURE "POWER" | #define ARCHITECTURE "POWER" | ||||
| #define SUBARCHITECTURE "POWER6" | #define SUBARCHITECTURE "POWER6" | ||||
| @@ -565,6 +565,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define CORENAME "POWER6" | #define CORENAME "POWER6" | ||||
| #endif | #endif | ||||
| #if defined(FORCE_POWER8) | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "POWER" | |||||
| #define SUBARCHITECTURE "POWER8" | |||||
| #define SUBDIRNAME "power" | |||||
| #define ARCHCONFIG "-DPOWER8 " \ | |||||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \ | |||||
| "-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " | |||||
| #define LIBNAME "power8" | |||||
| #define CORENAME "POWER8" | |||||
| #endif | |||||
| #ifdef FORCE_PPCG4 | #ifdef FORCE_PPCG4 | ||||
| #define FORCE | #define FORCE | ||||
| #define ARCHITECTURE "POWER" | #define ARCHITECTURE "POWER" | ||||
| @@ -819,10 +833,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | ||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " | "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " | ||||
| #define LIBNAME "armv8" | #define LIBNAME "armv8" | ||||
| #define CORENAME "XGENE1" | |||||
| #else | |||||
| #define CORENAME "ARMV8" | |||||
| #endif | #endif | ||||
| #ifdef FORCE_CORTEXA57 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "ARMV8" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DCORTEXA57 " \ | |||||
| "-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ | |||||
| "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON" | |||||
| #define LIBNAME "cortexa57" | |||||
| #define CORENAME "CORTEXA57" | |||||
| #else | |||||
| #endif | |||||
| #ifndef FORCE | #ifndef FORCE | ||||
| @@ -892,7 +920,7 @@ static int get_num_cores(void) { | |||||
| size_t len; | size_t len; | ||||
| #endif | #endif | ||||
| #ifdef linux | |||||
| #if defined(linux) || defined(__sun__) | |||||
| //returns the number of processors which are currently online | //returns the number of processors which are currently online | ||||
| return sysconf(_SC_NPROCESSORS_ONLN); | return sysconf(_SC_NPROCESSORS_ONLN); | ||||
| @@ -984,7 +1012,9 @@ int main(int argc, char *argv[]){ | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if NO_PARALLEL_MAKE==1 | |||||
| #ifdef MAKE_NB_JOBS | |||||
| printf("MAKE += -j %d\n", MAKE_NB_JOBS); | |||||
| #elif NO_PARALLEL_MAKE==1 | |||||
| printf("MAKE += -j 1\n"); | printf("MAKE += -j 1\n"); | ||||
| #else | #else | ||||
| #ifndef OS_WINDOWS | #ifndef OS_WINDOWS | ||||
| @@ -79,11 +79,9 @@ void NAME(char *TRANS, blasint *M, blasint *N, | |||||
| FLOAT alpha = *ALPHA; | FLOAT alpha = *ALPHA; | ||||
| FLOAT beta = *BETA; | FLOAT beta = *BETA; | ||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| int buffer_size; | |||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| int nthreads_max; | |||||
| int nthreads_avail; | |||||
| double MNK; | |||||
| #endif | #endif | ||||
| int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { | int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { | ||||
| @@ -134,13 +132,10 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| blasint lenx, leny; | blasint lenx, leny; | ||||
| int trans; | |||||
| int trans, buffer_size; | |||||
| blasint info, t; | blasint info, t; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| int nthreads_max; | |||||
| int nthreads_avail; | |||||
| double MNK; | |||||
| #endif | #endif | ||||
| int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { | int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { | ||||
| @@ -215,43 +210,20 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| if (incx < 0) x -= (lenx - 1) * incx; | if (incx < 0) x -= (lenx - 1) * incx; | ||||
| if (incy < 0) y -= (leny - 1) * incy; | if (incy < 0) y -= (leny - 1) * incy; | ||||
| #ifdef MAX_STACK_ALLOC | |||||
| // make it volatile because some gemv implementation (ex: dgemv_n.S) | |||||
| // do not restore all register | |||||
| volatile int stack_alloc_size = 0; | |||||
| //for gemv_n and gemv_t, try to allocate on stack | |||||
| stack_alloc_size = m + n; | |||||
| #ifdef ALIGNED_ACCESS | |||||
| stack_alloc_size += 3; | |||||
| #endif | |||||
| if(stack_alloc_size < 128) | |||||
| //dgemv_n.S require a 128 bytes buffer | |||||
| stack_alloc_size = 128; | |||||
| if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT)) | |||||
| stack_alloc_size = 0; | |||||
| FLOAT stack_buffer[stack_alloc_size]; | |||||
| buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1); | |||||
| // printf("stack_alloc_size=%d\n", stack_alloc_size); | |||||
| #else | |||||
| //Original OpenBLAS/GotoBLAS codes. | |||||
| buffer = (FLOAT *)blas_memory_alloc(1); | |||||
| buffer_size = m + n + 128 / sizeof(FLOAT); | |||||
| #ifdef WINDOWS_ABI | |||||
| buffer_size += 160 / sizeof(FLOAT) ; | |||||
| #endif | #endif | ||||
| // for alignment | |||||
| buffer_size = (buffer_size + 3) & ~3; | |||||
| STACK_ALLOC(buffer_size, FLOAT, buffer); | |||||
| #ifdef SMP | #ifdef SMP | ||||
| nthreads_max = num_cpu_avail(2); | |||||
| nthreads_avail = nthreads_max; | |||||
| MNK = (double) m * (double) n; | |||||
| if ( MNK <= (24.0 * 24.0 * (double) (GEMM_MULTITHREAD_THRESHOLD*GEMM_MULTITHREAD_THRESHOLD) ) ) | |||||
| nthreads_max = 1; | |||||
| if ( nthreads_max > nthreads_avail ) | |||||
| nthreads = nthreads_avail; | |||||
| if ( 1L * m * n < 2304L * GEMM_MULTITHREAD_THRESHOLD ) | |||||
| nthreads = 1; | |||||
| else | else | ||||
| nthreads = nthreads_max; | |||||
| nthreads = num_cpu_avail(2); | |||||
| if (nthreads == 1) { | if (nthreads == 1) { | ||||
| #endif | #endif | ||||
| @@ -266,14 +238,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| } | } | ||||
| #endif | #endif | ||||
| #ifdef MAX_STACK_ALLOC | |||||
| if(!stack_alloc_size){ | |||||
| blas_memory_free(buffer); | |||||
| } | |||||
| #else | |||||
| blas_memory_free(buffer); | |||||
| #endif | |||||
| STACK_FREE(buffer); | |||||
| FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); | FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); | ||||
| IDEBUG_END; | IDEBUG_END; | ||||
| @@ -171,19 +171,14 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| if (incy < 0) y -= (n - 1) * incy; | if (incy < 0) y -= (n - 1) * incy; | ||||
| if (incx < 0) x -= (m - 1) * incx; | if (incx < 0) x -= (m - 1) * incx; | ||||
| #ifdef MAX_STACK_ALLOC | |||||
| volatile int stack_alloc_size = m; | |||||
| if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT)) | |||||
| stack_alloc_size = 0; | |||||
| FLOAT stack_buffer[stack_alloc_size]; | |||||
| buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1); | |||||
| #else | |||||
| buffer = (FLOAT *)blas_memory_alloc(1); | |||||
| #endif | |||||
| STACK_ALLOC(m, FLOAT, buffer); | |||||
| #ifdef SMPTEST | #ifdef SMPTEST | ||||
| nthreads = num_cpu_avail(2); | |||||
| // Threshold chosen so that speed-up is > 1 on a Xeon E5-2630 | |||||
| if(1L * m * n > 2048L * GEMM_MULTITHREAD_THRESHOLD) | |||||
| nthreads = num_cpu_avail(2); | |||||
| else | |||||
| nthreads = 1; | |||||
| if (nthreads == 1) { | if (nthreads == 1) { | ||||
| #endif | #endif | ||||
| @@ -198,11 +193,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| } | } | ||||
| #endif | #endif | ||||
| #ifdef MAX_STACK_ALLOC | |||||
| if(!stack_alloc_size) | |||||
| #endif | |||||
| blas_memory_free(buffer); | |||||
| STACK_FREE(buffer); | |||||
| FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); | FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); | ||||
| IDEBUG_END; | IDEBUG_END; | ||||
| @@ -95,7 +95,7 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ | |||||
| s = db / r; | s = db / r; | ||||
| z = ONE; | z = ONE; | ||||
| if (ada > adb) z = s; | if (ada > adb) z = s; | ||||
| if ((ada < adb) && (c != ZERO)) z = ONE / c; | |||||
| if ((ada <= adb) && (c != ZERO)) z = ONE / c; | |||||
| *C = c; | *C = c; | ||||
| *S = s; | *S = s; | ||||
| @@ -77,12 +77,13 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ | |||||
| if (incy < 0) y -= (n - 1) * incy; | if (incy < 0) y -= (n - 1) * incy; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| nthreads = num_cpu_avail(1); | |||||
| //disable multi-thread when incx==0 or incy==0 | //disable multi-thread when incx==0 or incy==0 | ||||
| //In that case, the threads would be dependent. | //In that case, the threads would be dependent. | ||||
| if (incx == 0 || incy == 0) | |||||
| nthreads = 1; | |||||
| if (incx == 0 || incy == 0 || n < 2097152 * GEMM_MULTITHREAD_THRESHOLD / sizeof(FLOAT)) | |||||
| nthreads = 1; | |||||
| else | |||||
| nthreads = num_cpu_avail(1); | |||||
| if (nthreads == 1) { | if (nthreads == 1) { | ||||
| #endif | #endif | ||||
| @@ -91,6 +91,27 @@ | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #ifdef SMP | |||||
| #ifndef COMPLEX | |||||
| #ifdef XDOUBLE | |||||
| #define MODE (BLAS_XDOUBLE | BLAS_REAL) | |||||
| #elif defined(DOUBLE) | |||||
| #define MODE (BLAS_DOUBLE | BLAS_REAL) | |||||
| #else | |||||
| #define MODE (BLAS_SINGLE | BLAS_REAL) | |||||
| #endif | |||||
| #else | |||||
| #ifdef XDOUBLE | |||||
| #define MODE (BLAS_XDOUBLE | BLAS_COMPLEX) | |||||
| #elif defined(DOUBLE) | |||||
| #define MODE (BLAS_DOUBLE | BLAS_COMPLEX) | |||||
| #else | |||||
| #define MODE (BLAS_SINGLE | BLAS_COMPLEX) | |||||
| #endif | |||||
| #endif | |||||
| #endif | |||||
| static int (*symm[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { | static int (*symm[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { | ||||
| #ifndef GEMM3M | #ifndef GEMM3M | ||||
| #ifndef HEMM | #ifndef HEMM | ||||
| @@ -135,26 +156,6 @@ void NAME(char *SIDE, char *UPLO, | |||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| FLOAT *sa, *sb; | FLOAT *sa, *sb; | ||||
| #ifdef SMP | |||||
| #ifndef COMPLEX | |||||
| #ifdef XDOUBLE | |||||
| int mode = BLAS_XDOUBLE | BLAS_REAL; | |||||
| #elif defined(DOUBLE) | |||||
| int mode = BLAS_DOUBLE | BLAS_REAL; | |||||
| #else | |||||
| int mode = BLAS_SINGLE | BLAS_REAL; | |||||
| #endif | |||||
| #else | |||||
| #ifdef XDOUBLE | |||||
| int mode = BLAS_XDOUBLE | BLAS_COMPLEX; | |||||
| #elif defined(DOUBLE) | |||||
| int mode = BLAS_DOUBLE | BLAS_COMPLEX; | |||||
| #else | |||||
| int mode = BLAS_SINGLE | BLAS_COMPLEX; | |||||
| #endif | |||||
| #endif | |||||
| #endif | |||||
| #if defined(SMP) && !defined(NO_AFFINITY) | #if defined(SMP) && !defined(NO_AFFINITY) | ||||
| int nodes; | int nodes; | ||||
| #endif | #endif | ||||
| @@ -246,26 +247,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, | |||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| FLOAT *sa, *sb; | FLOAT *sa, *sb; | ||||
| #ifdef SMP | |||||
| #ifndef COMPLEX | |||||
| #ifdef XDOUBLE | |||||
| int mode = BLAS_XDOUBLE | BLAS_REAL; | |||||
| #elif defined(DOUBLE) | |||||
| int mode = BLAS_DOUBLE | BLAS_REAL; | |||||
| #else | |||||
| int mode = BLAS_SINGLE | BLAS_REAL; | |||||
| #endif | |||||
| #else | |||||
| #ifdef XDOUBLE | |||||
| int mode = BLAS_XDOUBLE | BLAS_COMPLEX; | |||||
| #elif defined(DOUBLE) | |||||
| int mode = BLAS_DOUBLE | BLAS_COMPLEX; | |||||
| #else | |||||
| int mode = BLAS_SINGLE | BLAS_COMPLEX; | |||||
| #endif | |||||
| #endif | |||||
| #endif | |||||
| #if defined(SMP) && !defined(NO_AFFINITY) | #if defined(SMP) && !defined(NO_AFFINITY) | ||||
| int nodes; | int nodes; | ||||
| #endif | #endif | ||||
| @@ -407,7 +388,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, | |||||
| args.nthreads /= nodes; | args.nthreads /= nodes; | ||||
| gemm_thread_mn(mode, &args, NULL, NULL, | |||||
| gemm_thread_mn(MODE, &args, NULL, NULL, | |||||
| symm[4 | (side << 1) | uplo ], sa, sb, nodes); | symm[4 | (side << 1) | uplo ], sa, sb, nodes); | ||||
| } else { | } else { | ||||
| @@ -419,7 +400,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, | |||||
| #else | #else | ||||
| GEMM_THREAD(mode, &args, NULL, NULL, symm[(side << 1) | uplo ], sa, sb, args.nthreads); | |||||
| GEMM_THREAD(MODE, &args, NULL, NULL, symm[(side << 1) | uplo ], sa, sb, args.nthreads); | |||||
| #endif | #endif | ||||
| @@ -116,7 +116,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, | |||||
| void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *a, blasint lda) { | void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *a, blasint lda) { | ||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| int trans, uplo; | |||||
| int uplo; | |||||
| blasint info; | blasint info; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| @@ -124,7 +124,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, | |||||
| PRINT_DEBUG_CNAME; | PRINT_DEBUG_CNAME; | ||||
| trans = -1; | |||||
| uplo = -1; | uplo = -1; | ||||
| info = 0; | info = 0; | ||||
| @@ -118,7 +118,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, | |||||
| void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT *a, blasint lda) { | void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT *a, blasint lda) { | ||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| int trans, uplo; | |||||
| int uplo; | |||||
| blasint info; | blasint info; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| @@ -126,7 +126,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, | |||||
| PRINT_DEBUG_CNAME; | PRINT_DEBUG_CNAME; | ||||
| trans = -1; | |||||
| uplo = -1; | uplo = -1; | ||||
| info = 0; | info = 0; | ||||
| @@ -77,11 +77,9 @@ void NAME(char *TRANS, blasint *M, blasint *N, | |||||
| blasint incy = *INCY; | blasint incy = *INCY; | ||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| int buffer_size; | |||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| int nthreads_max; | |||||
| int nthreads_avail; | |||||
| double MNK; | |||||
| #endif | #endif | ||||
| int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, | int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, | ||||
| @@ -144,13 +142,10 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| blasint lenx, leny; | blasint lenx, leny; | ||||
| int trans; | |||||
| int trans, buffer_size; | |||||
| blasint info, t; | blasint info, t; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| int nthreads_max; | |||||
| int nthreads_avail; | |||||
| double MNK; | |||||
| #endif | #endif | ||||
| int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, | int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, | ||||
| @@ -236,22 +231,26 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| if (incx < 0) x -= (lenx - 1) * incx * 2; | if (incx < 0) x -= (lenx - 1) * incx * 2; | ||||
| if (incy < 0) y -= (leny - 1) * incy * 2; | if (incy < 0) y -= (leny - 1) * incy * 2; | ||||
| buffer = (FLOAT *)blas_memory_alloc(1); | |||||
| buffer_size = 2 * (m + n) + 128 / sizeof(FLOAT); | |||||
| #ifdef WINDOWS_ABI | |||||
| buffer_size += 160 / sizeof(FLOAT) ; | |||||
| #endif | |||||
| // for alignment | |||||
| buffer_size = (buffer_size + 3) & ~3; | |||||
| STACK_ALLOC(buffer_size, FLOAT, buffer); | |||||
| #if defined(ARCH_X86_64) && defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0 | |||||
| // cgemv_t.S return NaN if there are NaN or Inf in the buffer (see bug #746) | |||||
| if(trans && stack_alloc_size) | |||||
| memset(buffer, 0, MIN(BUFFER_SIZE, sizeof(FLOAT) * buffer_size)); | |||||
| #endif | |||||
| #ifdef SMP | #ifdef SMP | ||||
| nthreads_max = num_cpu_avail(2); | |||||
| nthreads_avail = nthreads_max; | |||||
| MNK = (double) m * (double) n; | |||||
| if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) )) | |||||
| nthreads_max = 1; | |||||
| if ( nthreads_max > nthreads_avail ) | |||||
| nthreads = nthreads_avail; | |||||
| if ( 1L * m * n < 1024L * GEMM_MULTITHREAD_THRESHOLD ) | |||||
| nthreads = 1; | |||||
| else | else | ||||
| nthreads = nthreads_max; | |||||
| nthreads = num_cpu_avail(2); | |||||
| if (nthreads == 1) { | if (nthreads == 1) { | ||||
| #endif | #endif | ||||
| @@ -267,7 +266,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| } | } | ||||
| #endif | #endif | ||||
| blas_memory_free(buffer); | |||||
| STACK_FREE(buffer); | |||||
| FUNCTION_PROFILE_END(4, m * n + m + n, 2 * m * n); | FUNCTION_PROFILE_END(4, m * n + m + n, 2 * m * n); | ||||
| @@ -210,10 +210,14 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| if (incy < 0) y -= (n - 1) * incy * 2; | if (incy < 0) y -= (n - 1) * incy * 2; | ||||
| if (incx < 0) x -= (m - 1) * incx * 2; | if (incx < 0) x -= (m - 1) * incx * 2; | ||||
| buffer = (FLOAT *)blas_memory_alloc(1); | |||||
| STACK_ALLOC(2 * m, FLOAT, buffer); | |||||
| #ifdef SMPTEST | #ifdef SMPTEST | ||||
| nthreads = num_cpu_avail(2); | |||||
| // Threshold chosen so that speed-up is > 1 on a Xeon E5-2630 | |||||
| if(1L * m * n > 36L * sizeof(FLOAT) * sizeof(FLOAT) * GEMM_MULTITHREAD_THRESHOLD) | |||||
| nthreads = num_cpu_avail(2); | |||||
| else | |||||
| nthreads = 1; | |||||
| if (nthreads == 1) { | if (nthreads == 1) { | ||||
| #endif | #endif | ||||
| @@ -245,7 +249,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| } | } | ||||
| #endif | #endif | ||||
| blas_memory_free(buffer); | |||||
| STACK_FREE(buffer); | |||||
| FUNCTION_PROFILE_END(4, m * n + m + n, 2 * m * n); | FUNCTION_PROFILE_END(4, m * n + m + n, 2 * m * n); | ||||
| @@ -117,7 +117,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT *ALPHA | |||||
| FLOAT beta_i = BETA[1]; | FLOAT beta_i = BETA[1]; | ||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| int trans, uplo; | |||||
| int uplo; | |||||
| blasint info; | blasint info; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| @@ -135,7 +135,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT *ALPHA | |||||
| PRINT_DEBUG_CNAME; | PRINT_DEBUG_CNAME; | ||||
| trans = -1; | |||||
| uplo = -1; | uplo = -1; | ||||
| info = 0; | info = 0; | ||||
| @@ -116,7 +116,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, | |||||
| void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *a, blasint lda) { | void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *a, blasint lda) { | ||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| int trans, uplo; | |||||
| int uplo; | |||||
| blasint info; | blasint info; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| @@ -124,7 +124,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, | |||||
| PRINT_DEBUG_CNAME; | PRINT_DEBUG_CNAME; | ||||
| trans = -1; | |||||
| uplo = -1; | uplo = -1; | ||||
| info = 0; | info = 0; | ||||
| @@ -121,7 +121,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT *ALPHA | |||||
| FLOAT alpha_r = ALPHA[0]; | FLOAT alpha_r = ALPHA[0]; | ||||
| FLOAT alpha_i = ALPHA[1]; | FLOAT alpha_i = ALPHA[1]; | ||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| int trans, uplo; | |||||
| int uplo; | |||||
| blasint info; | blasint info; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| @@ -129,7 +129,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT *ALPHA | |||||
| PRINT_DEBUG_CNAME; | PRINT_DEBUG_CNAME; | ||||
| trans = -1; | |||||
| uplo = -1; | uplo = -1; | ||||
| info = 0; | info = 0; | ||||