| @@ -178,4 +178,4 @@ In chronological order: | |||||
| * [2019-11-06] optimize AVX512 SGEMM | * [2019-11-06] optimize AVX512 SGEMM | ||||
| * [2019-11-12] AVX512 CGEMM & ZGEMM kernels | * [2019-11-12] AVX512 CGEMM & ZGEMM kernels | ||||
| * [2019-12-23] optimize AVX2 CGEMM and ZGEMM | * [2019-12-23] optimize AVX2 CGEMM and ZGEMM | ||||
| * [2019-12-27] AVX2 CGEMM3M kernel | |||||
| * [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels | |||||
| @@ -247,21 +247,21 @@ prof_lapack : lapack_prebuild | |||||
| lapack_prebuild : | lapack_prebuild : | ||||
| ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) | ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) | ||||
| -@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "override ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "LAPACKLIB = ../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "TMGLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "LAPACKELIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| @@ -319,7 +319,7 @@ lapack-test : | |||||
| ifneq ($(CROSS), 1) | ifneq ($(CROSS), 1) | ||||
| ( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \ | ( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \ | ||||
| ./testsecond; ./testdsecnd; ./testieee; ./testversion ) | ./testsecond; ./testdsecnd; ./testieee; ./testversion ) | ||||
| (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) | |||||
| (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING) | |||||
| endif | endif | ||||
| lapack-runtest: | lapack-runtest: | ||||
| @@ -25,6 +25,8 @@ else ifeq ($(ARCH), i386) | |||||
| override ARCH=x86 | override ARCH=x86 | ||||
| else ifeq ($(ARCH), aarch64) | else ifeq ($(ARCH), aarch64) | ||||
| override ARCH=arm64 | override ARCH=arm64 | ||||
| else ifeq ($(ARCH), zarch) | |||||
| override ARCH=zarch | |||||
| endif | endif | ||||
| NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib | NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib | ||||
| @@ -558,6 +560,11 @@ DYNAMIC_CORE += THUNDERX2T99 | |||||
| DYNAMIC_CORE += TSV110 | DYNAMIC_CORE += TSV110 | ||||
| endif | endif | ||||
| ifeq ($(ARCH), zarch) | |||||
| DYNAMIC_CORE = Z13 | |||||
| DYNAMIC_CORE += Z14 | |||||
| endif | |||||
| ifeq ($(ARCH), power) | ifeq ($(ARCH), power) | ||||
| DYNAMIC_CORE = POWER6 | DYNAMIC_CORE = POWER6 | ||||
| DYNAMIC_CORE += POWER8 | DYNAMIC_CORE += POWER8 | ||||
| @@ -115,7 +115,9 @@ set(SLASRC | |||||
| stplqt.f stplqt2.f stpmlqt.f | stplqt.f stplqt2.f stpmlqt.f | ||||
| ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f | ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f | ||||
| ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f | ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f | ||||
| ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f) | |||||
| ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f | |||||
| scombssq.f sgesvdq.f slaorhr_col_getrfnp.f | |||||
| slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f ) | |||||
| set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f | set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f | ||||
| sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f | sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f | ||||
| @@ -210,7 +212,9 @@ set(CLASRC | |||||
| ctplqt.f ctplqt2.f ctpmlqt.f | ctplqt.f ctplqt2.f ctpmlqt.f | ||||
| chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f | chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f | ||||
| cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f | cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f | ||||
| chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f) | |||||
| chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f | |||||
| cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f | |||||
| cungtsqr.f cunhr_col.f ) | |||||
| set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f | set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f | ||||
| cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f | cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f | ||||
| @@ -299,7 +303,9 @@ set(DLASRC | |||||
| dtplqt.f dtplqt2.f dtpmlqt.f | dtplqt.f dtplqt2.f dtpmlqt.f | ||||
| dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f | dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f | ||||
| dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f | dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f | ||||
| dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f) | |||||
| dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f | |||||
| dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f | |||||
| dlaorhr_col_getrfnp2.f dorgtsqr.f dorhr_col.f ) | |||||
| set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f | set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f | ||||
| dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f | dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f | ||||
| @@ -398,7 +404,9 @@ set(ZLASRC | |||||
| zgelq.f zlaswlq.f zlamswlq.f zgemlq.f | zgelq.f zlaswlq.f zlamswlq.f zgemlq.f | ||||
| zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f | zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f | ||||
| zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f | zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f | ||||
| zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f) | |||||
| zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f | |||||
| zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f | |||||
| zungtsqr.f zunhr_col.f) | |||||
| set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f | set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f | ||||
| zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f | zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f | ||||
| @@ -715,6 +715,8 @@ set(DSRC | |||||
| lapacke_dgesv_work.c | lapacke_dgesv_work.c | ||||
| lapacke_dgesvd.c | lapacke_dgesvd.c | ||||
| lapacke_dgesvd_work.c | lapacke_dgesvd_work.c | ||||
| lapacke_dgesvdq.c | |||||
| lapacke_dgesvdq_work.c | |||||
| lapacke_dgesvdx.c | lapacke_dgesvdx.c | ||||
| lapacke_dgesvdx_work.c | lapacke_dgesvdx_work.c | ||||
| lapacke_dgesvj.c | lapacke_dgesvj.c | ||||
| @@ -1287,6 +1289,8 @@ set(SSRC | |||||
| lapacke_sgesv_work.c | lapacke_sgesv_work.c | ||||
| lapacke_sgesvd.c | lapacke_sgesvd.c | ||||
| lapacke_sgesvd_work.c | lapacke_sgesvd_work.c | ||||
| lapacke_sgesvdq.c | |||||
| lapacke_sgesvdq_work.c | |||||
| lapacke_sgesvdx.c | lapacke_sgesvdx.c | ||||
| lapacke_sgesvdx_work.c | lapacke_sgesvdx_work.c | ||||
| lapacke_sgesvj.c | lapacke_sgesvj.c | ||||
| @@ -1853,6 +1857,8 @@ set(ZSRC | |||||
| lapacke_zgesv_work.c | lapacke_zgesv_work.c | ||||
| lapacke_zgesvd.c | lapacke_zgesvd.c | ||||
| lapacke_zgesvd_work.c | lapacke_zgesvd_work.c | ||||
| lapacke_zgesvdq.c | |||||
| lapacke_zgesvdq_work.c | |||||
| lapacke_zgesvdx.c | lapacke_zgesvdx.c | ||||
| lapacke_zgesvdx_work.c | lapacke_zgesvdx_work.c | ||||
| lapacke_zgesvj.c | lapacke_zgesvj.c | ||||
| @@ -5,7 +5,7 @@ T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| 16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| 7 NUMBER OF VALUES OF N | |||||
| 6 NUMBER OF VALUES OF N | |||||
| 1 2 3 5 7 9 35 VALUES OF N | 1 2 3 5 7 9 35 VALUES OF N | ||||
| 3 NUMBER OF VALUES OF ALPHA | 3 NUMBER OF VALUES OF ALPHA | ||||
| 0.0 1.0 0.7 VALUES OF ALPHA | 0.0 1.0 0.7 VALUES OF ALPHA | ||||
| @@ -5,7 +5,7 @@ T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
| T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
| 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
| 16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
| 7 NUMBER OF VALUES OF N | |||||
| 6 NUMBER OF VALUES OF N | |||||
| 0 1 2 3 5 9 35 VALUES OF N | 0 1 2 3 5 9 35 VALUES OF N | ||||
| 3 NUMBER OF VALUES OF ALPHA | 3 NUMBER OF VALUES OF ALPHA | ||||
| 0.0 1.0 0.7 VALUES OF ALPHA | 0.0 1.0 0.7 VALUES OF ALPHA | ||||
| @@ -21,9 +21,13 @@ else | |||||
| ifeq ($(ARCH),power) | ifeq ($(ARCH),power) | ||||
| COMMONOBJS += dynamic_power.$(SUFFIX) | COMMONOBJS += dynamic_power.$(SUFFIX) | ||||
| else | else | ||||
| ifeq ($(ARCH),zarch) | |||||
| COMMONOBJS += dynamic_zarch.$(SUFFIX) | |||||
| else | |||||
| COMMONOBJS += dynamic.$(SUFFIX) | COMMONOBJS += dynamic.$(SUFFIX) | ||||
| endif | endif | ||||
| endif | endif | ||||
| endif | |||||
| else | else | ||||
| COMMONOBJS += parameter.$(SUFFIX) | COMMONOBJS += parameter.$(SUFFIX) | ||||
| endif | endif | ||||
| @@ -85,9 +89,13 @@ else | |||||
| ifeq ($(ARCH),power) | ifeq ($(ARCH),power) | ||||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_power.$(SUFFIX) | HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_power.$(SUFFIX) | ||||
| else | else | ||||
| ifeq ($(ARCH),zarch) | |||||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_zarch.$(SUFFIX) | |||||
| else | |||||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) | HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) | ||||
| endif | endif | ||||
| endif | endif | ||||
| endif | |||||
| else | else | ||||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) | HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) | ||||
| endif | endif | ||||
| @@ -0,0 +1,131 @@ | |||||
| #include "common.h" | |||||
| extern gotoblas_t gotoblas_Z13; | |||||
| extern gotoblas_t gotoblas_Z14; | |||||
| extern gotoblas_t gotoblas_Z15; | |||||
| //#if (!defined C_GCC) || (GCC_VERSION >= 60000) | |||||
| //extern gotoblas_t gotoblas_Z14; | |||||
| //#endif | |||||
| #define NUM_CORETYPES 5 | |||||
| extern void openblas_warning(int verbose, const char* msg); | |||||
| static char* corename[] = { | |||||
| "unknown", | |||||
| "Z13", | |||||
| "Z14", | |||||
| "Z15", | |||||
| "ZARCH_GENERIC", | |||||
| }; | |||||
| char* gotoblas_corename(void) { | |||||
| if (gotoblas == &gotoblas_Z13) return corename[1]; | |||||
| if (gotoblas == &gotoblas_Z14) return corename[2]; | |||||
| if (gotoblas == &gotoblas_Z15) return corename[3]; | |||||
| //#if (!defined C_GCC) || (GCC_VERSION >= 60000) | |||||
| // if (gotoblas == &gotoblas_POWER9) return corename[3]; | |||||
| //#endif | |||||
| return corename[0]; // try generic? | |||||
| } | |||||
| // __builtin_cpu_is is not supported by zarch | |||||
| static gotolabs_t* get_coretype(void) { | |||||
| FILE* infile; | |||||
| char buffer[512], * p; | |||||
| p = (char*)NULL; | |||||
| infile = fopen("/proc/sysinfo", "r"); | |||||
| while (fgets(buffer, sizeof(buffer), infile)) { | |||||
| if (!strncmp("Type", buffer, 4)) { | |||||
| p = strchr(buffer, ':') + 2; | |||||
| #if 0 | |||||
| fprintf(stderr, "%s\n", p); | |||||
| #endif | |||||
| break; | |||||
| } | |||||
| } | |||||
| fclose(infile); | |||||
| if (strstr(p, "2964")) return &gotoblas_Z13; | |||||
| if (strstr(p, "2965")) return &gotoblas_Z13; | |||||
| if (strstr(p, "3906")) return &gotoblas_Z14; | |||||
| if (strstr(p, "3907")) return &gotoblas_Z14; | |||||
| if (strstr(p, "8561")) return &gotoblas_Z14; // fallback z15 to z14 | |||||
| if (strstr(p, "8562")) return &gotoblas_Z14; // fallback z15 to z14 | |||||
| return NULL; // should be ZARCH_GENERIC | |||||
| } | |||||
| static gotoblas_t* force_coretype(char* coretype) { | |||||
| int i; | |||||
| int found = -1; | |||||
| char message[128]; | |||||
| for (i = 0; i < NUM_CORETYPES; i++) | |||||
| { | |||||
| if (!strncasecmp(coretype, corename[i], 20)) | |||||
| { | |||||
| found = i; | |||||
| break; | |||||
| } | |||||
| } | |||||
| switch (found) | |||||
| { | |||||
| case 1: return (&gotoblas_Z13); | |||||
| case 2: return (&gotoblas_Z14); | |||||
| case 3: return (&gotoblas_Z15); | |||||
| //#if (!defined C_GCC) || (GCC_VERSION >= 60000) | |||||
| // case 3: return (&gotoblas_POWER9); | |||||
| //#endif | |||||
| default: return NULL; | |||||
| } | |||||
| snprintf(message, 128, "Core not found: %s\n", coretype); | |||||
| openblas_warning(1, message); | |||||
| } | |||||
| void gotoblas_dynamic_init(void) { | |||||
| char coremsg[128]; | |||||
| char coren[22]; | |||||
| char* p; | |||||
| if (gotoblas) return; | |||||
| p = getenv("OPENBLAS_CORETYPE"); | |||||
| if (p) | |||||
| { | |||||
| gotoblas = force_coretype(p); | |||||
| } | |||||
| else | |||||
| { | |||||
| gotoblas = get_coretype(); | |||||
| } | |||||
| if (gotoblas == NULL) | |||||
| { | |||||
| snprintf(coremsg, 128, "Falling back to Z14 core\n"); | |||||
| openblas_warning(1, coremsg); | |||||
| gotoblas = &gotoblas_Z14; | |||||
| } | |||||
| if (gotoblas && gotoblas->init) { | |||||
| strncpy(coren, gotoblas_corename(), 20); | |||||
| sprintf(coremsg, "Core: %s\n", coren); | |||||
| openblas_warning(2, coremsg); | |||||
| gotoblas->init(); | |||||
| } | |||||
| else { | |||||
| openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); | |||||
| exit(1); | |||||
| } | |||||
| } | |||||
| void gotoblas_dynamic_quit(void) { | |||||
| gotoblas = NULL; | |||||
| } | |||||
| @@ -694,7 +694,19 @@ | |||||
| # functions added for lapack-3.8.0 | # functions added for lapack-3.8.0 | ||||
| ilaenv2stage | |||||
| ilaenv2stage, | |||||
| # functions added for lapack-3.9.0 | |||||
| cgesvdq, | |||||
| cungtsqr, | |||||
| dcombssq, | |||||
| dgesvdq, | |||||
| dorgtsqr, | |||||
| scombssq, | |||||
| sgesvdq, | |||||
| sorgtsqr, | |||||
| zgesvdq, | |||||
| zungtsqr | |||||
| ); | ); | ||||
| @lapack_extendedprecision_objs = ( | @lapack_extendedprecision_objs = ( | ||||
| @@ -3347,6 +3359,15 @@ | |||||
| LAPACKE_zsytrf_aa_2stage_work, | LAPACKE_zsytrf_aa_2stage_work, | ||||
| LAPACKE_zsytrs_aa_2stage, | LAPACKE_zsytrs_aa_2stage, | ||||
| LAPACKE_zsytrs_aa_2stage_work, | LAPACKE_zsytrs_aa_2stage_work, | ||||
| # new functions from 3.9.0 | |||||
| LAPACKE_dgesvdq, | |||||
| LAPACKE_dgesvdq_work, | |||||
| LAPACKE_sgesvdq, | |||||
| LAPACKE_sgesvdq_work, | |||||
| LAPACKE_zgesvdq, | |||||
| LAPACKE_zgesvdq_work | |||||
| ); | ); | ||||
| #These function may need 2 underscores. | #These function may need 2 underscores. | ||||
| @@ -3419,7 +3440,13 @@ | |||||
| dsytrf_aa_2stage, dsytrs_aa_2stage, | dsytrf_aa_2stage, dsytrs_aa_2stage, | ||||
| zhesv_aa_2stage, zhetrf_aa_2stage, | zhesv_aa_2stage, zhetrf_aa_2stage, | ||||
| zhetrs_aa_2stage, zsysv_aa_2stage, | zhetrs_aa_2stage, zsysv_aa_2stage, | ||||
| zsytrf_aa_2stage, zsytrs_aa_2stage | |||||
| zsytrf_aa_2stage, zsytrs_aa_2stage, | |||||
| # 3.9.0 | |||||
| claunhr_col_getrfnp, claunhr_col_getrfnp2, cunhr_col, | |||||
| dlaorhr_col_getrfnp, dlaorhr_col_getrfnp2, dorhr_col, | |||||
| slaorhr_col_getrfnp, slaorhr_col_getrfnp2, sorhr_col, | |||||
| zlaunhr_col_getrfnp, zlaunhr_col_getrfnp2, zunhr_col | |||||
| ); | ); | ||||
| @@ -103,26 +103,34 @@ ZDOTKERNEL = zdot.S | |||||
| DSDOTKERNEL = dot.S | DSDOTKERNEL = dot.S | ||||
| DGEMM_BETA = dgemm_beta.S | DGEMM_BETA = dgemm_beta.S | ||||
| SGEMM_BETA = sgemm_beta.S | |||||
| SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | ||||
| STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | ||||
| ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | ||||
| ifeq ($(SGEMM_UNROLL_N), 4) | |||||
| SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S | |||||
| ifeq ($(SGEMM_UNROLL_M), 16) | |||||
| SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S | |||||
| else | |||||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||||
| endif | |||||
| ifeq ($(SGEMM_UNROLL_M), 4) | |||||
| SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S | |||||
| else | else | ||||
| SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | ||||
| endif | endif | ||||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | ||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | ||||
| endif | endif | ||||
| ifeq ($(SGEMM_UNROLL_N), 16) | |||||
| SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S | |||||
| else | |||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||||
| endif | |||||
| ifeq ($(SGEMM_UNROLL_N), 4) | ifeq ($(SGEMM_UNROLL_N), 4) | ||||
| SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S | SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S | ||||
| else | else | ||||
| SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | ||||
| endif | endif | ||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | ||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | ||||
| @@ -109,22 +109,29 @@ ZGEMVTKERNEL = zgemv_t.S | |||||
| SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | ||||
| STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | ||||
| ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | ||||
| ifeq ($(SGEMM_UNROLL_N), 4) | |||||
| SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S | |||||
| ifeq ($(SGEMM_UNROLL_M), 16) | |||||
| SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S | |||||
| else | |||||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||||
| endif | |||||
| ifeq ($(SGEMM_UNROLL_M), 4) | |||||
| SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S | |||||
| else | else | ||||
| SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | ||||
| endif | endif | ||||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | ||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | ||||
| endif | endif | ||||
| ifeq ($(SGEMM_UNROLL_N), 16) | |||||
| SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S | |||||
| else | |||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||||
| endif | |||||
| ifeq ($(SGEMM_UNROLL_N), 4) | ifeq ($(SGEMM_UNROLL_N), 4) | ||||
| SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S | SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S | ||||
| else | else | ||||
| SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | ||||
| endif | endif | ||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | ||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | ||||
| @@ -43,7 +43,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define betaV0 v11.d[0] | #define betaV0 v11.d[0] | ||||
| #define I x16 | #define I x16 | ||||
| #define size 128 | |||||
| #define prfm_size 640 | |||||
| #define calc_size 128 | |||||
| /************************************************************************************** | /************************************************************************************** | ||||
| * Macro definitions | * Macro definitions | ||||
| @@ -119,27 +120,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| ldp q2, q3, [A02] | ldp q2, q3, [A02] | ||||
| ldp q4, q5, [A03] | ldp q4, q5, [A03] | ||||
| ldp q6, q7, [A04] | ldp q6, q7, [A04] | ||||
| fmul v0.2d, v0.2d, betaV0 | fmul v0.2d, v0.2d, betaV0 | ||||
| fmul v1.2d, v1.2d, betaV0 | fmul v1.2d, v1.2d, betaV0 | ||||
| fmul v2.2d, v2.2d, betaV0 | fmul v2.2d, v2.2d, betaV0 | ||||
| fmul v3.2d, v3.2d, betaV0 | fmul v3.2d, v3.2d, betaV0 | ||||
| prfm PLDL1KEEP, [A01, prfm_size] | |||||
| fmul v4.2d, v4.2d, betaV0 | fmul v4.2d, v4.2d, betaV0 | ||||
| fmul v5.2d, v5.2d, betaV0 | fmul v5.2d, v5.2d, betaV0 | ||||
| prfm PLDL1KEEP, [A03, prfm_size] | |||||
| fmul v6.2d, v6.2d, betaV0 | fmul v6.2d, v6.2d, betaV0 | ||||
| fmul v7.2d, v7.2d, betaV0 | fmul v7.2d, v7.2d, betaV0 | ||||
| st1 {v0.2d, v1.2d}, [A01] | st1 {v0.2d, v1.2d}, [A01] | ||||
| add A01, A01, size | |||||
| add A01, A01, calc_size | |||||
| st1 {v2.2d, v3.2d}, [A02] | st1 {v2.2d, v3.2d}, [A02] | ||||
| add A02, A02, size | |||||
| add A02, A02, calc_size | |||||
| st1 {v4.2d, v5.2d}, [A03] | st1 {v4.2d, v5.2d}, [A03] | ||||
| add A03, A03, size | |||||
| add A03, A03, calc_size | |||||
| st1 {v6.2d, v7.2d}, [A04] | st1 {v6.2d, v7.2d}, [A04] | ||||
| add A04, A04, size | |||||
| add A04, A04, calc_size | |||||
| subs I , I , #1 | subs I , I , #1 | ||||
| bne .Lgemm_beta_03 | bne .Lgemm_beta_03 | ||||
| @@ -0,0 +1,259 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2016, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A00 PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #define ASSEMBLER | |||||
| #include "common.h" | |||||
| #define M x0 | |||||
| #define N x1 | |||||
| #define BETA s0 | |||||
| #define LDC x6 | |||||
| #define C00 x7 | |||||
| #define A01 x8 | |||||
| #define A02 x9 | |||||
| #define A03 x10 | |||||
| #define A04 x11 | |||||
| #define I x12 | |||||
| #define beta0 s11 | |||||
| #define betaV0 v11.s[0] | |||||
| #define prfm_size 640 | |||||
| #define calc_size 128 | |||||
| /************************************************************************************** | |||||
| * Macro definitions | |||||
| **************************************************************************************/ | |||||
| .macro SAVE_REGS | |||||
| add sp, sp, #-(11 * 16) | |||||
| stp d8, d9, [sp, #(0 * 16)] | |||||
| stp d10, d11, [sp, #(1 * 16)] | |||||
| stp d12, d13, [sp, #(2 * 16)] | |||||
| stp d14, d15, [sp, #(3 * 16)] | |||||
| stp d16, d17, [sp, #(4 * 16)] | |||||
| stp x18, x19, [sp, #(5 * 16)] | |||||
| stp x20, x21, [sp, #(6 * 16)] | |||||
| stp x22, x23, [sp, #(7 * 16)] | |||||
| stp x24, x25, [sp, #(8 * 16)] | |||||
| stp x26, x27, [sp, #(9 * 16)] | |||||
| str x28, [sp, #(10 * 16)] | |||||
| .endm | |||||
| .macro RESTORE_REGS | |||||
| ldp d8, d9, [sp, #(0 * 16)] | |||||
| ldp d10, d11, [sp, #(1 * 16)] | |||||
| ldp d12, d13, [sp, #(2 * 16)] | |||||
| ldp d14, d15, [sp, #(3 * 16)] | |||||
| ldp d16, d17, [sp, #(4 * 16)] | |||||
| ldp x18, x19, [sp, #(5 * 16)] | |||||
| ldp x20, x21, [sp, #(6 * 16)] | |||||
| ldp x22, x23, [sp, #(7 * 16)] | |||||
| ldp x24, x25, [sp, #(8 * 16)] | |||||
| ldp x26, x27, [sp, #(9 * 16)] | |||||
| ldr x28, [sp, #(10 * 16)] | |||||
| add sp, sp, #(11*16) | |||||
| .endm | |||||
| .macro INIT_ZERO | |||||
| fmul v0.4s, v0.4s, betaV0 | |||||
| fmul v1.4s, v1.4s, betaV0 | |||||
| fmul v2.4s, v2.4s, betaV0 | |||||
| fmul v3.4s, v3.4s, betaV0 | |||||
| fmul v4.4s, v4.4s, betaV0 | |||||
| fmul v5.4s, v5.4s, betaV0 | |||||
| fmul v6.4s, v6.4s, betaV0 | |||||
| fmul v7.4s, v7.4s, betaV0 | |||||
| .endm | |||||
| /************************************************************************************** | |||||
| * End of macro definitions | |||||
| **************************************************************************************/ | |||||
| PROLOGUE | |||||
| .align 5 | |||||
| ldr LDC, [sp] | |||||
| SAVE_REGS | |||||
| .Lgemm_beta_BEGIN: | |||||
| fmov beta0, BETA | |||||
| cmp N, #0 | |||||
| ble .Lgemm_beta_L999 | |||||
| fcmp BETA, #0.0 | |||||
| beq .Lgemm_beta_zero_01 | |||||
| .Lgemm_beta_01: | |||||
| lsl LDC, LDC, #2 | |||||
| .align 5 | |||||
| .Lgemm_beta_02: | |||||
| mov A01, C00 | |||||
| add C00, C00, LDC | |||||
| asr I, M, #5 | |||||
| cmp I, #0 | |||||
| ble .Lgemm_beta_04 | |||||
| add A02, A01, #32 | |||||
| add A03, A02, #32 | |||||
| add A04, A03, #32 | |||||
| .align 5 | |||||
| .Lgemm_beta_03: | |||||
| prfm PLDL1KEEP, [A01, prfm_size] | |||||
| ldp q0, q1, [A01] | |||||
| ldp q2, q3, [A02] | |||||
| ldp q4, q5, [A03] | |||||
| ldp q6, q7, [A04] | |||||
| fmul v0.4s, v0.4s, betaV0 | |||||
| fmul v1.4s, v1.4s, betaV0 | |||||
| fmul v2.4s, v2.4s, betaV0 | |||||
| fmul v3.4s, v3.4s, betaV0 | |||||
| fmul v4.4s, v4.4s, betaV0 | |||||
| fmul v5.4s, v5.4s, betaV0 | |||||
| fmul v6.4s, v6.4s, betaV0 | |||||
| fmul v7.4s, v7.4s, betaV0 | |||||
| prfm PLDL1KEEP, [A01, prfm_size + 64] | |||||
| st1 {v0.4s, v1.4s}, [A01] | |||||
| add A01, A01, calc_size | |||||
| st1 {v2.4s, v3.4s}, [A02] | |||||
| add A02, A02, calc_size | |||||
| st1 {v4.4s, v5.4s}, [A03] | |||||
| add A03, A03, calc_size | |||||
| st1 {v6.4s, v7.4s}, [A04] | |||||
| add A04, A04, calc_size | |||||
| subs I , I , #1 | |||||
| bne .Lgemm_beta_03 | |||||
| .align 5 | |||||
| .Lgemm_beta_04: | |||||
| and I, M , #31 | |||||
| cmp I, #0 | |||||
| ble .Lgemm_beta_06 | |||||
| .align 5 | |||||
| .Lgemm_beta_05: | |||||
| ldr s12, [A01] | |||||
| fmul s12, s12, beta0 | |||||
| str s12, [A01] | |||||
| add A01, A01, #4 | |||||
| subs I , I , #1 | |||||
| bne .Lgemm_beta_05 | |||||
| .align 5 | |||||
| .Lgemm_beta_06: | |||||
| subs N , N, #1 // N-- | |||||
| bne .Lgemm_beta_02 | |||||
| .align 5 | |||||
| .Lgemm_beta_L999: | |||||
| mov x0, #0 | |||||
| RESTORE_REGS | |||||
| ret | |||||
| .align 5 | |||||
| .Lgemm_beta_zero_01: | |||||
| INIT_ZERO | |||||
| lsl LDC, LDC, #2 | |||||
| .align 5 | |||||
| .Lgemm_beta_zero_02: | |||||
| mov A01, C00 | |||||
| add C00, C00, LDC | |||||
| asr I, M, #5 | |||||
| cmp I, #0 | |||||
| ble .Lgemm_beta_zero_04 | |||||
| add A02, A01, #32 | |||||
| add A03, A02, #32 | |||||
| add A04, A03, #32 | |||||
| .align 5 | |||||
| .Lgemm_beta_zero_03: | |||||
| st1 {v0.4s, v1.4s}, [A01] | |||||
| add A01, A01, calc_size | |||||
| st1 {v2.4s, v3.4s}, [A02] | |||||
| add A02, A02, calc_size | |||||
| st1 {v4.4s, v5.4s}, [A03] | |||||
| add A03, A03, calc_size | |||||
| st1 {v6.4s, v7.4s}, [A04] | |||||
| add A04, A04, calc_size | |||||
| subs I, I, #1 | |||||
| bne .Lgemm_beta_zero_03 | |||||
| .align 5 | |||||
| .Lgemm_beta_zero_04: | |||||
| and I, M, #31 | |||||
| cmp I, #0 | |||||
| ble .Lgemm_beta_zero_06 | |||||
| .align 5 | |||||
| .Lgemm_beta_zero_05: | |||||
| str beta0, [A01] | |||||
| add A01, A01, #4 | |||||
| subs I, I, #1 | |||||
| bne .Lgemm_beta_zero_05 | |||||
| .align 5 | |||||
| .Lgemm_beta_zero_06: | |||||
| subs N, N, #1 | |||||
| bne .Lgemm_beta_zero_02 | |||||
| .align 5 | |||||
| .Lgemm_beta_zero_L999: | |||||
| mov x0, #0 | |||||
| RESTORE_REGS | |||||
| ret | |||||
| EPILOGUE | |||||
| @@ -0,0 +1,824 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2019, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| *****************************************************************************/ | |||||
| #define ASSEMBLER | |||||
| #include "common.h" | |||||
| #define M x0 | |||||
| #define N x1 | |||||
| #define A x2 | |||||
| #define LDA x3 | |||||
| #define B x4 | |||||
| #define M8 x5 | |||||
| #define A01 x6 | |||||
| #define A02 x7 | |||||
| #define A03 x8 | |||||
| #define A04 x9 | |||||
| #define A05 x10 | |||||
| #define A06 x11 | |||||
| #define A07 x12 | |||||
| #define A08 x13 | |||||
| #define B01 x14 | |||||
| #define B02 x15 | |||||
| #define B03 x16 | |||||
| #define B04 x17 | |||||
| #define B00 x22 | |||||
| #define I x18 | |||||
| #define J x19 | |||||
| #define TEMP1 x20 | |||||
| #define A_PREFETCH 256 | |||||
| /************************************************************************************** | |||||
| * Macro definitions | |||||
| **************************************************************************************/ | |||||
| .macro SAVE_REGS | |||||
| add sp, sp, #-(11 * 16) | |||||
| stp d8, d9, [sp, #(0 * 16)] | |||||
| stp d10, d11, [sp, #(1 * 16)] | |||||
| stp d12, d13, [sp, #(2 * 16)] | |||||
| stp d14, d15, [sp, #(3 * 16)] | |||||
| stp d16, d17, [sp, #(4 * 16)] | |||||
| stp x18, x19, [sp, #(5 * 16)] | |||||
| stp x20, x21, [sp, #(6 * 16)] | |||||
| stp x22, x23, [sp, #(7 * 16)] | |||||
| stp x24, x25, [sp, #(8 * 16)] | |||||
| stp x26, x27, [sp, #(9 * 16)] | |||||
| str x28, [sp, #(10 * 16)] | |||||
| .endm | |||||
| .macro RESTORE_REGS | |||||
| ldp d8, d9, [sp, #(0 * 16)] | |||||
| ldp d10, d11, [sp, #(1 * 16)] | |||||
| ldp d12, d13, [sp, #(2 * 16)] | |||||
| ldp d14, d15, [sp, #(3 * 16)] | |||||
| ldp d16, d17, [sp, #(4 * 16)] | |||||
| ldp x18, x19, [sp, #(5 * 16)] | |||||
| ldp x20, x21, [sp, #(6 * 16)] | |||||
| ldp x22, x23, [sp, #(7 * 16)] | |||||
| ldp x24, x25, [sp, #(8 * 16)] | |||||
| ldp x26, x27, [sp, #(9 * 16)] | |||||
| ldr x28, [sp, #(10 * 16)] | |||||
| add sp, sp, #(11*16) | |||||
| .endm | |||||
| /*************************************************************************************************************************/ | |||||
| .macro COPY16x8 | |||||
| prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A05, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A06, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A07, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A08, #A_PREFETCH] | |||||
| //prfm PSTL1KEEP, [B00, M8] | |||||
| ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01] | |||||
| add A01, A01, #64 | |||||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00] | |||||
| add TEMP1, B00, #64 | |||||
| ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [A02] | |||||
| add A02, A02, #64 | |||||
| st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [TEMP1] | |||||
| add TEMP1, TEMP1, #64 | |||||
| ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [A03] | |||||
| add A03, A03, #64 | |||||
| st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [TEMP1] | |||||
| add TEMP1, TEMP1, #64 | |||||
| ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [A04] | |||||
| add A04, A04, #64 | |||||
| st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [TEMP1] | |||||
| add TEMP1, TEMP1, #64 | |||||
| ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [A05] | |||||
| add A05, A05, #64 | |||||
| st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [TEMP1] | |||||
| add TEMP1, TEMP1, #64 | |||||
| ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [A06] | |||||
| add A06, A06, #64 | |||||
| st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [TEMP1] | |||||
| add TEMP1, TEMP1, #64 | |||||
| ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [A07] | |||||
| add A07, A07, #64 | |||||
| st1 {v24.4s, v25.4s, v26.4s, v27.4s}, [TEMP1] | |||||
| add TEMP1, TEMP1, #64 | |||||
| ld1 {v28.4s, v29.4s, v30.4s, v31.4s}, [A08] | |||||
| add A08, A08, #64 | |||||
| st1 {v28.4s, v29.4s, v30.4s, v31.4s}, [TEMP1] | |||||
| add TEMP1, TEMP1, #64 | |||||
| add B00, B00, M8 | |||||
| .endm | |||||
| .macro COPY8x8 | |||||
| prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A05, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A06, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A07, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A08, #A_PREFETCH] | |||||
| ldp q0, q1, [A01] | |||||
| ldp q2, q3, [A02] | |||||
| add A01, A01, #32 | |||||
| add A02, A02, #32 | |||||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B01] | |||||
| add B01, B01, #64 | |||||
| ldp q4, q5, [A03] | |||||
| ldp q6, q7, [A04] | |||||
| add A03, A03, #32 | |||||
| add A04, A04, #32 | |||||
| st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [B01] | |||||
| add B01, B01, #64 | |||||
| ldp q8, q9, [A05] | |||||
| ldp q10, q11, [A06] | |||||
| add A05, A05, #32 | |||||
| add A06, A06, #32 | |||||
| st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [B01] | |||||
| add B01, B01, #64 | |||||
| ldp q12, q13, [A07] | |||||
| ldp q14, q15, [A08] | |||||
| add A07, A07, #32 | |||||
| add A08, A08, #32 | |||||
| st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [B01] | |||||
| add B01, B01, #64 | |||||
| .endm | |||||
| .macro COPY4x8 | |||||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A05, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A06, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A07, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A08, #A_PREFETCH] | |||||
| ldr q0, [A01] | |||||
| ldr q1, [A02] | |||||
| ldr q2, [A03] | |||||
| ldr q3, [A04] | |||||
| add A01, A01, #16 | |||||
| add A02, A02, #16 | |||||
| add A03, A03, #16 | |||||
| add A04, A04, #16 | |||||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B02] | |||||
| add B02, B02, #64 | |||||
| ldr q4, [A05] | |||||
| ldr q5, [A06] | |||||
| ldr q6, [A07] | |||||
| ldr q7, [A08] | |||||
| add A05, A05, #16 | |||||
| add A06, A06, #16 | |||||
| add A07, A07, #16 | |||||
| add A08, A08, #16 | |||||
| st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [B02] | |||||
| add B02, B02, #64 | |||||
| .endm | |||||
| .macro COPY2x8 | |||||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A05, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A06, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A07, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A08, #A_PREFETCH] | |||||
| ldr d0, [A01] | |||||
| ldr d1, [A02] | |||||
| ldr d2, [A03] | |||||
| ldr d3, [A04] | |||||
| add A01, A01, #8 | |||||
| add A02, A02, #8 | |||||
| add A03, A03, #8 | |||||
| add A04, A04, #8 | |||||
| stp d0, d1, [B03] | |||||
| add B03, B03, #16 | |||||
| stp d2, d3, [B03] | |||||
| add B03, B03, #16 | |||||
| ldr d4, [A05] | |||||
| ldr d5, [A06] | |||||
| ldr d6, [A07] | |||||
| ldr d7, [A08] | |||||
| add A05, A05, #8 | |||||
| add A06, A06, #8 | |||||
| add A07, A07, #8 | |||||
| add A08, A08, #8 | |||||
| stp d4, d5, [B03] | |||||
| add B03, B03, #16 | |||||
| stp d6, d7, [B03] | |||||
| add B03, B03, #16 | |||||
| .endm | |||||
| .macro COPY1x8 | |||||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A05, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A06, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A07, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A08, #A_PREFETCH] | |||||
| ldr s0, [A01] | |||||
| ldr s1, [A02] | |||||
| ldr s2, [A03] | |||||
| ldr s3, [A04] | |||||
| add A01, A01, #4 | |||||
| add A02, A02, #4 | |||||
| add A03, A03, #4 | |||||
| add A04, A04, #4 | |||||
| stp s0, s1, [B04] | |||||
| add B04, B04, #8 | |||||
| stp s2, s3, [B04] | |||||
| add B04, B04, #8 | |||||
| ldr s4, [A05] | |||||
| ldr s5, [A06] | |||||
| ldr s6, [A07] | |||||
| ldr s7, [A08] | |||||
| ldr d4, [A05], #8 | |||||
| ldr d5, [A06], #8 | |||||
| ldr d6, [A07], #8 | |||||
| ldr d7, [A08], #8 | |||||
| stp s4, s5, [B04] | |||||
| add B04, B04, #8 | |||||
| stp s6, s7, [B04] | |||||
| add B04, B04, #8 | |||||
| .endm | |||||
| /*************************************************************************************************************************/ | |||||
| .macro COPY16x4 | |||||
| prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||||
| ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01] | |||||
| add A01, A01, #64 | |||||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00] | |||||
| add TEMP1, B00, #64 | |||||
| ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [A02] | |||||
| add A02, A02, #64 | |||||
| st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [TEMP1] | |||||
| add TEMP1, TEMP1, #64 | |||||
| ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [A03] | |||||
| add A03, A03, #64 | |||||
| st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [TEMP1] | |||||
| add TEMP1, TEMP1, #64 | |||||
| ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [A04] | |||||
| add A04, A04, #64 | |||||
| st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [TEMP1] | |||||
| add B00, B00, M8 | |||||
| .endm | |||||
| .macro COPY8x4 | |||||
| prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||||
| ldp q0, q1, [A01] | |||||
| ldp q2, q3, [A02] | |||||
| add A01, A01, #32 | |||||
| add A02, A02, #32 | |||||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B01] | |||||
| add B01, B01, #64 | |||||
| ldp q4, q5, [A03] | |||||
| ldp q6, q7, [A04] | |||||
| add A03, A03, #32 | |||||
| add A04, A04, #32 | |||||
| st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [B01] | |||||
| add B01, B01, #64 | |||||
| .endm | |||||
| .macro COPY4x4 | |||||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||||
| ldr q0, [A01] | |||||
| ldr q1, [A02] | |||||
| ldr q2, [A03] | |||||
| ldr q3, [A04] | |||||
| add A01, A01, #16 | |||||
| add A02, A02, #16 | |||||
| add A03, A03, #16 | |||||
| add A04, A04, #16 | |||||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B02] | |||||
| add B02, B02, #64 | |||||
| .endm | |||||
| .macro COPY2x4 | |||||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||||
| ldr d0, [A01] | |||||
| ldr d1, [A02] | |||||
| ldr d2, [A03] | |||||
| ldr d3, [A04] | |||||
| add A01, A01, #8 | |||||
| add A02, A02, #8 | |||||
| add A03, A03, #8 | |||||
| add A04, A04, #8 | |||||
| stp d0, d1, [B03] | |||||
| add B03, B03, #16 | |||||
| stp d2, d3, [B03] | |||||
| add B03, B03, #16 | |||||
| .endm | |||||
| .macro COPY1x4 | |||||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||||
| ldr s0, [A01] | |||||
| ldr s1, [A02] | |||||
| ldr s2, [A03] | |||||
| ldr s3, [A04] | |||||
| add A01, A01, #4 | |||||
| add A02, A02, #4 | |||||
| add A03, A03, #4 | |||||
| add A04, A04, #4 | |||||
| stp s0, s1, [B04] | |||||
| add B04, B04, #8 | |||||
| stp s2, s3, [B04] | |||||
| add B04, B04, #8 | |||||
| .endm | |||||
| /*************************************************************************************************************************/ | |||||
| .macro COPY16x2 | |||||
| prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||||
| ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01] | |||||
| add A01, A01, #64 | |||||
| ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [A02] | |||||
| add A02, A02, #64 | |||||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00] | |||||
| add TEMP1, B00, #64 | |||||
| st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [TEMP1] | |||||
| add B00, B00, M8 | |||||
| .endm | |||||
| .macro COPY8x2 | |||||
| prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||||
| ld1 {v0.4s, v1.4s}, [A01] | |||||
| ld1 {v2.4s, v3.4s}, [A02] | |||||
| add A01, A01, #32 | |||||
| add A02, A02, #32 | |||||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B01] | |||||
| add B01, B01, #64 | |||||
| .endm | |||||
| .macro COPY4x2 | |||||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||||
| ldr q0, [A01] | |||||
| ldr q1, [A02] | |||||
| add A01, A01, #16 | |||||
| add A02, A02, #16 | |||||
| stp q0, q1, [B02] | |||||
| add B02, B02, #32 | |||||
| .endm | |||||
| .macro COPY2x2 | |||||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||||
| ldr d0, [A01] | |||||
| ldr d1, [A02] | |||||
| add A01, A01, #8 | |||||
| add A02, A02, #8 | |||||
| stp d0, d1, [B03] | |||||
| add B03, B03, #16 | |||||
| .endm | |||||
| .macro COPY1x2 | |||||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||||
| ldr s0, [A01] | |||||
| ldr s1, [A02] | |||||
| add A01, A01, #4 | |||||
| add A02, A02, #4 | |||||
| stp s0, s1, [B04] | |||||
| add B04, B04, #8 | |||||
| .endm | |||||
| /*************************************************************************************************************************/ | |||||
| .macro COPY16x1 | |||||
| prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01] | |||||
| add A01, A01, #64 | |||||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00] | |||||
| add B00, B00, M8 | |||||
| .endm | |||||
| .macro COPY8x1 | |||||
| prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| ldp q0, q1, [A01] | |||||
| add A01, A01, #32 | |||||
| stp q0, q1, [B01] | |||||
| add B01, B01, #32 | |||||
| .endm | |||||
| .macro COPY4x1 | |||||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| ldr q0, [A01] | |||||
| add A01, A01, #16 | |||||
| str q0, [B02] | |||||
| add B02, B02, #16 | |||||
| .endm | |||||
| .macro COPY2x1 | |||||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| ldr d0, [A01] | |||||
| add A01, A01, #8 | |||||
| str d0, [B03] | |||||
| add B03, B03, #8 | |||||
| .endm | |||||
| .macro COPY1x1 | |||||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||||
| ldr s0, [A01] | |||||
| add A01, A01, #4 | |||||
| str s0, [B04] | |||||
| add B04, B04, #4 | |||||
| .endm | |||||
| /************************************************************************************** | |||||
| * End of macro definitions | |||||
| **************************************************************************************/ | |||||
| PROLOGUE | |||||
| .align 5 | |||||
| SAVE_REGS | |||||
| lsl LDA, LDA, #2 // LDA = LDA * SIZE | |||||
| lsl TEMP1, M, #2 // TEMP1 = M * SIZE | |||||
| and B01 , N , #-16 | |||||
| and B02 , N , #-8 | |||||
| and B03 , N , #-4 | |||||
| and B04 , N , #-2 | |||||
| mul B01, B01, TEMP1 | |||||
| mul B02, B02, TEMP1 | |||||
| mul B03, B03, TEMP1 | |||||
| mul B04, B04, TEMP1 | |||||
| add B01 , B01, B | |||||
| add B02 , B02, B | |||||
| add B03 , B03, B | |||||
| add B04 , B04, B | |||||
| lsl M8, M, #6 // M8 = M * 16 * SIZE | |||||
| .Lsgemm_tcopy_L8_BEGIN: | |||||
| asr J, M, #3 // J = M / 8 | |||||
| cmp J, #0 | |||||
| ble .Lsgemm_tcopy_L4_BEGIN | |||||
| .align 5 | |||||
| .Lsgemm_tcopy_L8_M16_BEGIN: | |||||
| mov A01, A | |||||
| add A02, A01, LDA | |||||
| add A03, A02, LDA | |||||
| add A04, A03, LDA | |||||
| add A05, A04, LDA | |||||
| add A06, A05, LDA | |||||
| add A07, A06, LDA | |||||
| add A08, A07, LDA | |||||
| add A, A08, LDA | |||||
| mov B00, B | |||||
| add B, B00, #512 // B = B + 8 * 16 * SIZE | |||||
| asr I, N, #4 // I = N / 16 | |||||
| cmp I, #0 | |||||
| ble .Lsgemm_tcopy_L8_M16_40 | |||||
| .align 5 | |||||
| .Lsgemm_tcopy_L8_M16_20: | |||||
| COPY16x8 | |||||
| subs I , I , #1 | |||||
| bne .Lsgemm_tcopy_L8_M16_20 | |||||
| .Lsgemm_tcopy_L8_M16_40: | |||||
| tst N , #8 | |||||
| ble .Lsgemm_tcopy_L8_M16_60 | |||||
| COPY8x8 | |||||
| .Lsgemm_tcopy_L8_M16_60: | |||||
| tst N , #4 | |||||
| ble .Lsgemm_tcopy_L8_M16_80 | |||||
| COPY4x8 | |||||
| .Lsgemm_tcopy_L8_M16_80: | |||||
| tst N , #2 | |||||
| ble .Lsgemm_tcopy_L8_M16_100 | |||||
| COPY2x8 | |||||
| .Lsgemm_tcopy_L8_M16_100: | |||||
| tst N, #1 | |||||
| ble .Lsgemm_tcopy_L8_M16_END | |||||
| COPY1x8 | |||||
| .Lsgemm_tcopy_L8_M16_END: | |||||
| subs J , J, #1 // j-- | |||||
| bne .Lsgemm_tcopy_L8_M16_BEGIN | |||||
| /*********************************************************************************************/ | |||||
| .Lsgemm_tcopy_L4_BEGIN: | |||||
| tst M, #7 | |||||
| ble .Lsgemm_tcopy_L999 | |||||
| tst M, #4 | |||||
| ble .Lsgemm_tcopy_L2_BEGIN | |||||
| .Lsgemm_tcopy_L4_M16_BEGIN: | |||||
| mov A01, A | |||||
| add A02, A01, LDA | |||||
| add A03, A02, LDA | |||||
| add A04, A03, LDA | |||||
| add A, A04, LDA | |||||
| mov B00, B | |||||
| add B, B00, #256 // B = B + 4 * 16 * SIZE | |||||
| asr I, N, #4 // I = N / 16 | |||||
| cmp I, #0 | |||||
| ble .Lsgemm_tcopy_L4_M16_40 | |||||
| .align 5 | |||||
| .Lsgemm_tcopy_L4_M16_20: | |||||
| COPY16x4 | |||||
| subs I , I , #1 | |||||
| bne .Lsgemm_tcopy_L4_M16_20 | |||||
| .Lsgemm_tcopy_L4_M16_40: | |||||
| tst N , #8 | |||||
| ble .Lsgemm_tcopy_L4_M16_60 | |||||
| COPY8x4 | |||||
| .Lsgemm_tcopy_L4_M16_60: | |||||
| tst N , #4 | |||||
| ble .Lsgemm_tcopy_L4_M16_80 | |||||
| COPY4x4 | |||||
| .Lsgemm_tcopy_L4_M16_80: | |||||
| tst N , #2 | |||||
| ble .Lsgemm_tcopy_L4_M16_100 | |||||
| COPY2x4 | |||||
| .Lsgemm_tcopy_L4_M16_100: | |||||
| tst N, #1 | |||||
| ble .Lsgemm_tcopy_L4_M16_END | |||||
| COPY1x4 | |||||
| .Lsgemm_tcopy_L4_M16_END: | |||||
| /*********************************************************************************************/ | |||||
| .Lsgemm_tcopy_L2_BEGIN: | |||||
| tst M, #3 | |||||
| ble .Lsgemm_tcopy_L999 | |||||
| tst M, #2 | |||||
| ble .Lsgemm_tcopy_L1_BEGIN | |||||
| .Lsgemm_tcopy_L2_M16_BEGIN: | |||||
| mov A01, A | |||||
| add A02, A01, LDA | |||||
| add A, A02, LDA | |||||
| mov B00, B | |||||
| add B, B00, #128 // B = B + 2 * 16 * SIZE | |||||
| asr I, N, #4 // I = N / 16 | |||||
| cmp I, #0 | |||||
| ble .Lsgemm_tcopy_L2_M16_40 | |||||
| .align 5 | |||||
| .Lsgemm_tcopy_L2_M16_20: | |||||
| COPY16x2 | |||||
| subs I , I , #1 | |||||
| bne .Lsgemm_tcopy_L2_M16_20 | |||||
| .Lsgemm_tcopy_L2_M16_40: | |||||
| tst N , #8 | |||||
| ble .Lsgemm_tcopy_L2_M16_60 | |||||
| COPY8x2 | |||||
| .Lsgemm_tcopy_L2_M16_60: | |||||
| tst N , #4 | |||||
| ble .Lsgemm_tcopy_L2_M16_80 | |||||
| COPY4x2 | |||||
| .Lsgemm_tcopy_L2_M16_80: | |||||
| tst N , #2 | |||||
| ble .Lsgemm_tcopy_L2_M16_100 | |||||
| COPY2x2 | |||||
| .Lsgemm_tcopy_L2_M16_100: | |||||
| tst N , #1 | |||||
| ble .Lsgemm_tcopy_L2_M16_END | |||||
| COPY1x2 | |||||
| .Lsgemm_tcopy_L2_M16_END: | |||||
| /*********************************************************************************************/ | |||||
| .Lsgemm_tcopy_L1_BEGIN: | |||||
| tst M, #1 | |||||
| ble .Lsgemm_tcopy_L999 | |||||
| .Lsgemm_tcopy_L1_M16_BEGIN: | |||||
| mov A01, A // A01 = A | |||||
| mov B00, B | |||||
| asr I, N, #4 // I = M / 16 | |||||
| cmp I, #0 | |||||
| ble .Lsgemm_tcopy_L1_M16_40 | |||||
| .align 5 | |||||
| .Lsgemm_tcopy_L1_M16_20: | |||||
| COPY16x1 | |||||
| subs I , I , #1 | |||||
| bne .Lsgemm_tcopy_L1_M16_20 | |||||
| .Lsgemm_tcopy_L1_M16_40: | |||||
| tst N , #8 | |||||
| ble .Lsgemm_tcopy_L1_M16_60 | |||||
| COPY8x1 | |||||
| .Lsgemm_tcopy_L1_M16_60: | |||||
| tst N , #4 | |||||
| ble .Lsgemm_tcopy_L1_M16_80 | |||||
| COPY4x1 | |||||
| .Lsgemm_tcopy_L1_M16_80: | |||||
| tst N , #2 | |||||
| ble .Lsgemm_tcopy_L1_M16_100 | |||||
| COPY2x1 | |||||
| .Lsgemm_tcopy_L1_M16_100: | |||||
| tst N , #1 | |||||
| ble .Lsgemm_tcopy_L1_M16_END | |||||
| COPY1x1 | |||||
| .Lsgemm_tcopy_L1_M16_END: | |||||
| .Lsgemm_tcopy_L999: | |||||
| mov x0, #0 // set return value | |||||
| RESTORE_REGS | |||||
| ret | |||||
| EPILOGUE | |||||
| @@ -739,6 +739,26 @@ static void init_parameter(void) { | |||||
| } | } | ||||
| #else //POWER | #else //POWER | ||||
| #if defined(ARCH_ZARCH) | |||||
| static void init_parameter(void) { | |||||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||||
| TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | |||||
| TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | |||||
| TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | |||||
| TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | |||||
| TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||||
| TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | |||||
| TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | |||||
| TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; | |||||
| } | |||||
| #else //ZARCH | |||||
| #ifdef ARCH_X86 | #ifdef ARCH_X86 | ||||
| static int get_l2_size_old(void){ | static int get_l2_size_old(void){ | ||||
| int i, eax, ebx, ecx, edx, cpuid_level; | int i, eax, ebx, ecx, edx, cpuid_level; | ||||
| @@ -1325,4 +1345,5 @@ static void init_parameter(void) { | |||||
| } | } | ||||
| #endif //POWER | #endif //POWER | ||||
| #endif //ZARCH | |||||
| #endif //defined(ARCH_ARM64) | #endif //defined(ARCH_ARM64) | ||||
| @@ -98,5 +98,5 @@ ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | ||||
| CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c | CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c | ||||
| ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S | |||||
| ZGEMM3MKERNEL = zgemm3m_kernel_4x4_haswell.c | |||||
| @@ -95,5 +95,5 @@ ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | ||||
| CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c | CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c | ||||
| ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S | |||||
| ZGEMM3MKERNEL = zgemm3m_kernel_4x4_haswell.c | |||||
| @@ -0,0 +1,224 @@ | |||||
| /* %0 = "+r"(a_pointer), %1 = "+r"(b_pointer), %2 = "+r"(c_pointer), %3 = "+r"(ldc_in_bytes), %4 for k_count, %5 for c_store */ | |||||
| /* r12 = k << 5(const), r13 = k(const), r14 = b_head_pos(const), r15 = tmp */ | |||||
| #include "common.h" | |||||
| #include <stdint.h> | |||||
| //recommended settings: GEMM_Q=256, GEMM_P=256 | |||||
| /* m = 4 *//* ymm0 for alpha, ymm1-ymm3 for temporary use, ymm4-ymm15 for accumulators */ | |||||
| #define KERNEL_k1m4n1 \ | |||||
| "vmovupd (%0),%%ymm1; addq $32,%0;"\ | |||||
| "vbroadcastsd (%1),%%ymm2; vfmadd231pd %%ymm1,%%ymm2,%%ymm4;"\ | |||||
| "addq $8,%1;" | |||||
| #define KERNEL_h_k1m4n2 \ | |||||
| "vmovddup (%0),%%ymm1; vmovddup 8(%0),%%ymm2; addq $32,%0;"\ | |||||
| "vbroadcastf128 (%1),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,%%ymm4; vfmadd231pd %%ymm2,%%ymm3,%%ymm5;" | |||||
| #define KERNEL_k1m4n2 KERNEL_h_k1m4n2 "addq $16,%1;" | |||||
| #define KERNEL_h_k1m4n4 \ | |||||
| KERNEL_h_k1m4n2 "vbroadcastf128 16(%1),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,%%ymm6; vfmadd231pd %%ymm2,%%ymm3,%%ymm7;" | |||||
| #define KERNEL_k1m4n4 KERNEL_h_k1m4n4 "addq $32,%1;" | |||||
| #define unit_kernel_k1m4n4(c1,c2,c3,c4,off1,off2,...) \ | |||||
| "vbroadcastf128 "#off1"("#__VA_ARGS__"),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,"#c1"; vfmadd231pd %%ymm2,%%ymm3,"#c2";"\ | |||||
| "vbroadcastf128 "#off2"("#__VA_ARGS__"),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,"#c3"; vfmadd231pd %%ymm2,%%ymm3,"#c4";" | |||||
| #define KERNEL_h_k1m4n8 KERNEL_h_k1m4n4 unit_kernel_k1m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,0,16,%1,%%r12,1) | |||||
| #define KERNEL_k1m4n8 KERNEL_h_k1m4n8 "addq $32,%1;" | |||||
| #define KERNEL_h_k1m4n12 KERNEL_h_k1m4n8 unit_kernel_k1m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15,0,16,%1,%%r12,2) | |||||
| #define KERNEL_k1m4n12 KERNEL_h_k1m4n12 "addq $32,%1;" | |||||
| #define KERNEL_k2m4n1 KERNEL_k1m4n1 KERNEL_k1m4n1 | |||||
| #define KERNEL_k2m4n2 KERNEL_k1m4n2 KERNEL_k1m4n2 | |||||
| #define KERNEL_k2m4n4 KERNEL_k1m4n4 KERNEL_k1m4n4 | |||||
| #define KERNEL_k2m4n8 KERNEL_k1m4n8 KERNEL_k1m4n8 | |||||
| #define KERNEL_k2m4n12 \ | |||||
| "vmovddup (%0),%%ymm1; vmovddup 8(%0),%%ymm2;"\ | |||||
| unit_kernel_k1m4n4(%%ymm4,%%ymm5,%%ymm6,%%ymm7,0,16,%1)\ | |||||
| unit_kernel_k1m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,0,16,%1,%%r12,1)\ | |||||
| unit_kernel_k1m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15,0,16,%1,%%r12,2)\ | |||||
| "vmovddup 32(%0),%%ymm1; vmovddup 40(%0),%%ymm2; prefetcht0 512(%0); addq $64,%0;"\ | |||||
| unit_kernel_k1m4n4(%%ymm4,%%ymm5,%%ymm6,%%ymm7,32,48,%1)\ | |||||
| unit_kernel_k1m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,32,48,%1,%%r12,1)\ | |||||
| unit_kernel_k1m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15,32,48,%1,%%r12,2) "addq $64,%1;" | |||||
| #define INIT_m4n1 "vpxor %%ymm4,%%ymm4,%%ymm4;" | |||||
| #define INIT_m4n2 INIT_m4n1 "vpxor %%ymm5,%%ymm5,%%ymm5;" | |||||
| #define INIT_m4n4 INIT_m4n2 "vpxor %%ymm6,%%ymm6,%%ymm6;vpxor %%ymm7,%%ymm7,%%ymm7;" | |||||
| #define unit_init_m4n4(c1,c2,c3,c4) \ | |||||
| "vpxor "#c1","#c1","#c1";vpxor "#c2","#c2","#c2";vpxor "#c3","#c3","#c3";vpxor "#c4","#c4","#c4";" | |||||
| #define INIT_m4n8 INIT_m4n4 unit_init_m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11) | |||||
| #define INIT_m4n12 INIT_m4n8 unit_init_m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15) | |||||
| #define SAVE_h_m4n1 \ | |||||
| "vpermpd $216,%%ymm4,%%ymm3; vunpcklpd %%ymm3,%%ymm3,%%ymm1; vunpckhpd %%ymm3,%%ymm3,%%ymm2;"\ | |||||
| "vfmadd213pd (%2),%%ymm0,%%ymm1; vfmadd213pd 32(%2),%%ymm0,%%ymm2; vmovupd %%ymm1,(%2); vmovupd %%ymm2,32(%2);" | |||||
| #define unit_save_m4n2(c1,c2) \ | |||||
| "vperm2f128 $2,"#c1","#c2",%%ymm2; vperm2f128 $19,"#c1","#c2","#c2"; vmovapd %%ymm2,"#c1";"\ | |||||
| "vunpcklpd "#c1","#c1",%%ymm2; vunpcklpd "#c2","#c2",%%ymm3;"\ | |||||
| "vfmadd213pd (%5),%%ymm0,%%ymm2; vfmadd213pd 32(%5),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5); vmovupd %%ymm3,32(%5);"\ | |||||
| "vunpckhpd "#c1","#c1",%%ymm2; vunpckhpd "#c2","#c2",%%ymm3;"\ | |||||
| "vfmadd213pd (%5,%3,1),%%ymm0,%%ymm2; vfmadd213pd 32(%5,%3,1),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5,%3,1); vmovupd %%ymm3,32(%5,%3,1);"\ | |||||
| "leaq (%5,%3,2),%5;" | |||||
| #define SAVE_h_m4n2 "movq %2,%5;" unit_save_m4n2(%%ymm4,%%ymm5) | |||||
| #define SAVE_h_m4n4 SAVE_h_m4n2 unit_save_m4n2(%%ymm6,%%ymm7) | |||||
| #define SAVE_h_m4n8 SAVE_h_m4n4 unit_save_m4n2(%%ymm8,%%ymm9) unit_save_m4n2(%%ymm10,%%ymm11) | |||||
| #define SAVE_h_m4n12 SAVE_h_m4n8 unit_save_m4n2(%%ymm12,%%ymm13) unit_save_m4n2(%%ymm14,%%ymm15) | |||||
| #define SAVE_m4(ndim) SAVE_h_m4n##ndim "addq $64,%2;" | |||||
| #define COMPUTE_m4(ndim) \ | |||||
| INIT_m4n##ndim\ | |||||
| "movq %%r13,%4; movq %%r14,%1; movq %2,%5; xorq %%r15,%%r15;"\ | |||||
| "cmpq $24,%4; jb "#ndim"004042f;"\ | |||||
| #ndim"004041:\n\t"\ | |||||
| "cmpq $126,%%r15; movq $126,%%r15; cmoveq %3,%%r15;"\ | |||||
| KERNEL_k2m4n##ndim KERNEL_k2m4n##ndim\ | |||||
| "prefetcht1 (%5); subq $63,%5;"\ | |||||
| KERNEL_k2m4n##ndim KERNEL_k2m4n##ndim\ | |||||
| "addq %%r15,%5; prefetcht1 (%8); addq $32,%8;"\ | |||||
| "subq $8,%4; cmpq $16,%4; jnb "#ndim"004041b;"\ | |||||
| "movq %2,%5;"\ | |||||
| #ndim"004042:\n\t"\ | |||||
| "testq %4,%4; jz "#ndim"004043f;"\ | |||||
| "prefetcht0 (%5); prefetcht0 63(%5);"\ | |||||
| KERNEL_k1m4n##ndim\ | |||||
| "prefetcht0 (%5,%3,4); prefetcht0 63(%5,%3,4); addq %3,%5;"\ | |||||
| "decq %4; jmp "#ndim"004042b;"\ | |||||
| #ndim"004043:\n\t"\ | |||||
| "prefetcht0 (%%r14); prefetcht0 64(%%r14);"\ | |||||
| SAVE_m4(ndim) | |||||
| /* m = 2 *//* vmm0 for alpha, vmm1-vmm3 for temporary use, vmm4-vmm9 for accumulators */ | |||||
| #define KERNEL_k1m2n1 \ | |||||
| "vmovupd (%0),%%xmm1; addq $16,%0;"\ | |||||
| "vmovddup (%1),%%xmm2; vfmadd231pd %%xmm1,%%xmm2,%%xmm4;"\ | |||||
| "addq $8,%1;" | |||||
| #define KERNEL_h_k1m2n2 \ | |||||
| "vmovddup (%0),%%xmm1; vmovddup 8(%0),%%xmm2; addq $16,%0;"\ | |||||
| "vmovupd (%1),%%xmm3; vfmadd231pd %%xmm1,%%xmm3,%%xmm4; vfmadd231pd %%xmm2,%%xmm3,%%xmm5;" | |||||
| #define KERNEL_k1m2n2 KERNEL_h_k1m2n2 "addq $16,%1;" | |||||
| #define unit_kernel_k1m2n4(c1,c2,...) \ | |||||
| "vmovupd ("#__VA_ARGS__"),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,"#c1"; vfmadd231pd %%ymm2,%%ymm3,"#c2";" | |||||
| #define KERNEL_h_k1m2n4 \ | |||||
| "vbroadcastsd (%0),%%ymm1; vbroadcastsd 8(%0),%%ymm2; addq $16,%0;"\ | |||||
| unit_kernel_k1m2n4(%%ymm4,%%ymm5,%1) | |||||
| #define KERNEL_k1m2n4 KERNEL_h_k1m2n4 "addq $32,%1;" | |||||
| #define KERNEL_h_k1m2n8 KERNEL_h_k1m2n4 \ | |||||
| unit_kernel_k1m2n4(%%ymm6,%%ymm7,%1,%%r12,1) | |||||
| #define KERNEL_k1m2n8 KERNEL_h_k1m2n8 "addq $32,%1;" | |||||
| #define KERNEL_h_k1m2n12 KERNEL_h_k1m2n8 \ | |||||
| unit_kernel_k1m2n4(%%ymm8,%%ymm9,%1,%%r12,2) | |||||
| #define KERNEL_k1m2n12 KERNEL_h_k1m2n12 "addq $32,%1;" | |||||
| #define INIT_m2n1 "vpxor %%xmm4,%%xmm4,%%xmm4;" | |||||
| #define INIT_m2n2 INIT_m2n1 "vpxor %%xmm5,%%xmm5,%%xmm5;" | |||||
| #define unit_init_m2n4(c1,c2) "vpxor "#c1","#c1","#c1";vpxor "#c2","#c2","#c2";" | |||||
| #define INIT_m2n4 unit_init_m2n4(%%ymm4,%%ymm5) | |||||
| #define INIT_m2n8 INIT_m2n4 unit_init_m2n4(%%ymm6,%%ymm7) | |||||
| #define INIT_m2n12 INIT_m2n8 unit_init_m2n4(%%ymm8,%%ymm9) | |||||
| #define SAVE_h_m2n1 \ | |||||
| "vinsertf128 $1,%%xmm4,%%ymm4,%%ymm4; vpermilpd $12,%%ymm4,%%ymm4; vfmadd213pd (%2),%%ymm0,%%ymm4; vmovupd %%ymm4,(%2);" | |||||
| #define SAVE_h_m2n2 \ | |||||
| "vinsertf128 $1,%%xmm5,%%ymm4,%%ymm4; vunpcklpd %%ymm4,%%ymm4,%%ymm1; vunpckhpd %%ymm4,%%ymm4,%%ymm2;"\ | |||||
| "vfmadd213pd (%2),%%ymm0,%%ymm1; vmovupd %%ymm1,(%2);"\ | |||||
| "vfmadd213pd (%2,%3,1),%%ymm0,%%ymm2; vmovupd %%ymm2,(%2,%3,1);" | |||||
| #define unit_save_m2n4(c1,c2) \ | |||||
| "vperm2f128 $2,"#c1","#c2",%%ymm1; vunpcklpd %%ymm1,%%ymm1,%%ymm2; vunpckhpd %%ymm1,%%ymm1,%%ymm3;"\ | |||||
| "vfmadd213pd (%5),%%ymm0,%%ymm2; vfmadd213pd (%5,%3,1),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5); vmovupd %%ymm3,(%5,%3,1); leaq (%5,%3,2),%5;"\ | |||||
| "vperm2f128 $19,"#c1","#c2",%%ymm1; vunpcklpd %%ymm1,%%ymm1,%%ymm2; vunpckhpd %%ymm1,%%ymm1,%%ymm3;"\ | |||||
| "vfmadd213pd (%5),%%ymm0,%%ymm2; vfmadd213pd (%5,%3,1),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5); vmovupd %%ymm3,(%5,%3,1); leaq (%5,%3,2),%5;" | |||||
| #define SAVE_h_m2n4 "movq %2,%5;" unit_save_m2n4(%%ymm4,%%ymm5) | |||||
| #define SAVE_h_m2n8 SAVE_h_m2n4 unit_save_m2n4(%%ymm6,%%ymm7) | |||||
| #define SAVE_h_m2n12 SAVE_h_m2n8 unit_save_m2n4(%%ymm8,%%ymm9) | |||||
| #define SAVE_m2(ndim) SAVE_h_m2n##ndim "addq $32,%2;" | |||||
| #define COMPUTE_m2(ndim) \ | |||||
| INIT_m2n##ndim\ | |||||
| "movq %%r13,%4; movq %%r14,%1;"\ | |||||
| #ndim"002022:\n\t"\ | |||||
| "testq %4,%4; jz "#ndim"002023f;"\ | |||||
| KERNEL_k1m2n##ndim\ | |||||
| "decq %4; jmp "#ndim"002022b;"\ | |||||
| #ndim"002023:\n\t"\ | |||||
| SAVE_m2(ndim) | |||||
| /* m = 1 *//* vmm0 for alpha, vmm1-vmm3 and vmm10-vmm15 for temporary use, vmm4-vmm6 for accumulators */ | |||||
| #define KERNEL_k1m1n1 \ | |||||
| "vmovsd (%0),%%xmm1; addq $8,%0;"\ | |||||
| "vfmadd231sd (%1),%%xmm1,%%xmm4; addq $8,%1;" | |||||
| #define KERNEL_k1m1n2 \ | |||||
| "vmovddup (%0),%%xmm1; addq $8,%0;"\ | |||||
| "vfmadd231pd (%1),%%xmm1,%%xmm4; addq $16,%1;" | |||||
| #define unit_kernel_k1m1n4(c1,...) \ | |||||
| "vmovupd ("#__VA_ARGS__"),%%ymm2; vfmadd231pd %%ymm1,%%ymm2,"#c1";" | |||||
| #define KERNEL_h_k1m1n4 \ | |||||
| "vbroadcastsd (%0),%%ymm1; addq $8,%0;"\ | |||||
| unit_kernel_k1m1n4(%%ymm4,%1) | |||||
| #define KERNEL_k1m1n4 KERNEL_h_k1m1n4 "addq $32,%1;" | |||||
| #define KERNEL_h_k1m1n8 KERNEL_h_k1m1n4 unit_kernel_k1m1n4(%%ymm5,%1,%%r12,1) | |||||
| #define KERNEL_k1m1n8 KERNEL_h_k1m1n8 "addq $32,%1;" | |||||
| #define KERNEL_h_k1m1n12 KERNEL_h_k1m1n8 unit_kernel_k1m1n4(%%ymm6,%1,%%r12,2) | |||||
| #define KERNEL_k1m1n12 KERNEL_h_k1m1n12 "addq $32,%1;" | |||||
| #define INIT_m1n1 INIT_m2n1 | |||||
| #define INIT_m1n2 INIT_m2n1 | |||||
| #define INIT_m1n4 "vpxor %%ymm4,%%ymm4,%%ymm4;" | |||||
| #define INIT_m1n8 INIT_m1n4 "vpxor %%ymm5,%%ymm5,%%ymm5;" | |||||
| #define INIT_m1n12 INIT_m1n8 "vpxor %%ymm6,%%ymm6,%%ymm6;" | |||||
| #define SAVE_h_m1n1 \ | |||||
| "vmovddup %%xmm4,%%xmm4; vfmadd213pd (%2),%%xmm0,%%xmm4; vmovupd %%xmm4,(%2);" | |||||
| #define SAVE_h_m1n2 \ | |||||
| "vunpcklpd %%xmm4,%%xmm4,%%xmm1; vunpckhpd %%xmm4,%%xmm4,%%xmm2;"\ | |||||
| "vfmadd213pd (%2),%%xmm0,%%xmm1; vmovupd %%xmm1,(%2);"\ | |||||
| "vfmadd213pd (%2,%3,1),%%xmm0,%%xmm2; vmovupd %%xmm2,(%2,%3,1);" | |||||
| #define unit_save_m1n4(c1) \ | |||||
| "vunpcklpd "#c1","#c1",%%ymm1; vunpckhpd "#c1","#c1",%%ymm2;"\ | |||||
| "vmovupd (%5),%%xmm3; vinsertf128 $1,(%5,%3,2),%%ymm3,%%ymm3;"\ | |||||
| "vfmadd213pd %%ymm3,%%ymm0,%%ymm1; vmovupd %%xmm1,(%5); vextractf128 $1,%%ymm1,(%5,%3,2); addq %3,%5;"\ | |||||
| "vmovupd (%5),%%xmm3; vinsertf128 $1,(%5,%3,2),%%ymm3,%%ymm3;"\ | |||||
| "vfmadd213pd %%ymm3,%%ymm0,%%ymm2; vmovupd %%xmm2,(%5); vextractf128 $1,%%ymm2,(%5,%3,2); addq %3,%5; leaq (%5,%3,2),%5;" | |||||
| #define SAVE_h_m1n4 "movq %2,%5;" unit_save_m1n4(%%ymm4) | |||||
| #define SAVE_h_m1n8 SAVE_h_m1n4 unit_save_m1n4(%%ymm5) | |||||
| #define SAVE_h_m1n12 SAVE_h_m1n8 unit_save_m1n4(%%ymm6) | |||||
| #define SAVE_m1(ndim) SAVE_h_m1n##ndim "addq $16,%2;" | |||||
| #define COMPUTE_m1(ndim) \ | |||||
| INIT_m1n##ndim\ | |||||
| "movq %%r13,%4; movq %%r14,%1;"\ | |||||
| #ndim"001011:\n\t"\ | |||||
| "testq %4,%4; jz "#ndim"001012f;"\ | |||||
| KERNEL_k1m1n##ndim\ | |||||
| "decq %4; jmp "#ndim"001011b;"\ | |||||
| #ndim"001012:\n\t"\ | |||||
| SAVE_m1(ndim) | |||||
| #define COMPUTE(ndim) {\ | |||||
| next_b = b_pointer + ndim * K;\ | |||||
| __asm__ __volatile__(\ | |||||
| "vbroadcastf128 (%6),%%ymm0;"\ | |||||
| "movq %4,%%r13; movq %4,%%r12; salq $5,%%r12; movq %1,%%r14; movq %7,%%r11;"\ | |||||
| "cmpq $4,%7;jb 33101"#ndim"f;"\ | |||||
| "33109"#ndim":\n\t"\ | |||||
| COMPUTE_m4(ndim)\ | |||||
| "subq $4,%7;cmpq $4,%7;jnb 33109"#ndim"b;"\ | |||||
| "33101"#ndim":\n\t"\ | |||||
| "cmpq $2,%7;jb 33104"#ndim"f;"\ | |||||
| COMPUTE_m2(ndim)\ | |||||
| "subq $2,%7;"\ | |||||
| "33104"#ndim":\n\t"\ | |||||
| "testq %7,%7;jz 33105"#ndim"f;"\ | |||||
| COMPUTE_m1(ndim)\ | |||||
| "33105"#ndim":\n\t"\ | |||||
| "movq %%r13,%4; movq %%r14,%1; movq %%r11,%7;"\ | |||||
| :"+r"(a_pointer),"+r"(b_pointer),"+r"(c_pointer),"+r"(ldc_in_bytes),"+r"(K),"+r"(ctemp),"+r"(const_val),"+r"(M),"+r"(next_b)\ | |||||
| ::"r11","r12","r13","r14","r15","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14",\ | |||||
| "xmm15","cc","memory");\ | |||||
| a_pointer -= M * K; b_pointer += ndim * K; c_pointer += 2*(LDC * ndim - M);\ | |||||
| } | |||||
| int __attribute__ ((noinline)) | |||||
| CNAME(BLASLONG m, BLASLONG n, BLASLONG k, double alphar, double alphai, double * __restrict__ A, double * __restrict__ B, double * __restrict__ C, BLASLONG LDC) | |||||
| { | |||||
| if(m==0||n==0||k==0) return 0; | |||||
| int64_t ldc_in_bytes = (int64_t)LDC * sizeof(double) * 2; | |||||
| double constval[2]; constval[0] = alphar; constval[1] = alphai; | |||||
| double *const_val=constval; | |||||
| int64_t M = (int64_t)m, K = (int64_t)k; | |||||
| BLASLONG n_count = n; | |||||
| double *a_pointer = A,*b_pointer = B,*c_pointer = C,*ctemp = C,*next_b = B; | |||||
| for(;n_count>11;n_count-=12) COMPUTE(12) | |||||
| for(;n_count>7;n_count-=8) COMPUTE(8) | |||||
| for(;n_count>3;n_count-=4) COMPUTE(4) | |||||
| for(;n_count>1;n_count-=2) COMPUTE(2) | |||||
| if(n_count>0) COMPUTE(1) | |||||
| return 0; | |||||
| } | |||||
| @@ -96,10 +96,10 @@ SGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||||
| SGEMMITCOPY = ../generic/gemm_tcopy_8.c | SGEMMITCOPY = ../generic/gemm_tcopy_8.c | ||||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | SGEMMONCOPY = ../generic/gemm_ncopy_4.c | ||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | ||||
| SGEMMINCOPYOBJ = sgemm_incopy.o | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy.o | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| @@ -108,16 +108,16 @@ DGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||||
| DGEMMITCOPY = ../generic/gemm_tcopy_8.c | DGEMMITCOPY = ../generic/gemm_tcopy_8.c | ||||
| DGEMMONCOPY = ../generic/gemm_ncopy_4.c | DGEMMONCOPY = ../generic/gemm_ncopy_4.c | ||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | ||||
| DGEMMINCOPYOBJ = dgemm_incopy.o | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy.o | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMKERNEL = ctrmm4x4V.S | CGEMMKERNEL = ctrmm4x4V.S | ||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | ||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | ||||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMKERNEL = ztrmm4x4V.S | ZGEMMKERNEL = ztrmm4x4V.S | ||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c | ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c | ||||
| @@ -96,10 +96,10 @@ SGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||||
| SGEMMITCOPY = ../generic/gemm_tcopy_8.c | SGEMMITCOPY = ../generic/gemm_tcopy_8.c | ||||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | SGEMMONCOPY = ../generic/gemm_ncopy_4.c | ||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | ||||
| SGEMMINCOPYOBJ = sgemm_incopy.o | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy.o | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| @@ -108,16 +108,16 @@ DGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||||
| DGEMMITCOPY = ../generic/gemm_tcopy_8.c | DGEMMITCOPY = ../generic/gemm_tcopy_8.c | ||||
| DGEMMONCOPY = ../generic/gemm_ncopy_4.c | DGEMMONCOPY = ../generic/gemm_ncopy_4.c | ||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | ||||
| DGEMMINCOPYOBJ = dgemm_incopy.o | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy.o | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMKERNEL = ctrmm4x4V.S | CGEMMKERNEL = ctrmm4x4V.S | ||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | ||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | ||||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMKERNEL = ztrmm4x4V.S | ZGEMMKERNEL = ztrmm4x4V.S | ||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c | ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c | ||||
| @@ -94,26 +94,26 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
| SGEMMKERNEL = ../generic/gemmkernel_2x2.c | SGEMMKERNEL = ../generic/gemmkernel_2x2.c | ||||
| SGEMMONCOPY = ../generic/gemm_ncopy_2.c | SGEMMONCOPY = ../generic/gemm_ncopy_2.c | ||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | ||||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMKERNEL = ../generic/gemmkernel_2x2.c | DGEMMKERNEL = ../generic/gemmkernel_2x2.c | ||||
| DGEMMONCOPY = ../generic/gemm_ncopy_2.c | DGEMMONCOPY = ../generic/gemm_ncopy_2.c | ||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | ||||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | ||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | ||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | ||||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | ||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | ||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | ||||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | ||||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | ||||
| @@ -0,0 +1,38 @@ | |||||
| image: | |||||
| - Visual Studio 2017 | |||||
| configuration: Release | |||||
| clone_depth: 3 | |||||
| matrix: | |||||
| fast_finish: false | |||||
| skip_commits: | |||||
| # Add [av skip] to commit messages | |||||
| message: /\[av skip\]/ | |||||
| cache: | |||||
| - '%APPVEYOR_BUILD_FOLDER%\build' | |||||
| environment: | |||||
| global: | |||||
| CONDA_INSTALL_LOCN: C:\\Miniconda36-x64 | |||||
| install: | |||||
| - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat | |||||
| - conda config --add channels conda-forge --force | |||||
| - conda install --yes --quiet flang jom | |||||
| - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 | |||||
| - set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" | |||||
| - set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" | |||||
| before_build: | |||||
| - ps: if (-Not (Test-Path .\build)) { mkdir build } | |||||
| - cd build | |||||
| - cmake -G "NMake Makefiles JOM" -DCMAKE_Fortran_COMPILER=flang -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON .. | |||||
| build_script: | |||||
| - cmake --build . | |||||
| test_script: | |||||
| - ctest -j2 | |||||
| @@ -35,3 +35,9 @@ LAPACKE/example/xexample* | |||||
| # SED | # SED | ||||
| SRC/*-e | SRC/*-e | ||||
| LAPACKE/src/*-e | LAPACKE/src/*-e | ||||
| build* | |||||
| # DOCS documentation | |||||
| DOCS/man | |||||
| DOCS/explore-html | |||||
| output_err | |||||
| @@ -1,33 +1,32 @@ | |||||
| language: cpp | |||||
| language: c | |||||
| dist: xenial | |||||
| group: travis_latest | |||||
| git: | |||||
| depth: 3 | |||||
| quiet: true | |||||
| addons: | addons: | ||||
| apt: | apt: | ||||
| sources: | |||||
| - george-edison55-precise-backports # cmake | |||||
| packages: | packages: | ||||
| - cmake | |||||
| - cmake-data | |||||
| - gfortran | |||||
| os: | |||||
| - linux | |||||
| - osx | |||||
| env: | |||||
| - CMAKE_BUILD_TYPE=Release | |||||
| - CMAKE_BUILD_TYPE=Coverage | |||||
| - gfortran | |||||
| install: | |||||
| - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; | |||||
| then | |||||
| for pkg in gcc cmake; do | |||||
| if brew list -1 | grep -q "^${pkg}\$"; then | |||||
| brew outdated $pkg || brew upgrade $pkg; | |||||
| else | |||||
| brew install $pkg; | |||||
| fi | |||||
| done | |||||
| fi | |||||
| matrix: | |||||
| include: | |||||
| - os: linux | |||||
| env: CMAKE_BUILD_TYPE=Release | |||||
| - os: linux | |||||
| env: CMAKE_BUILD_TYPE=Coverage | |||||
| - os: osx | |||||
| env: CMAKE_BUILD_TYPE=Release | |||||
| before_install: | |||||
| - brew update > /dev/null | |||||
| - brew install gcc > /dev/null | |||||
| - os: osx | |||||
| env: CMAKE_BUILD_TYPE=Coverage | |||||
| before_install: | |||||
| - brew update > /dev/null | |||||
| - brew install gcc > /dev/null | |||||
| script: | script: | ||||
| - export PR=https://api.github.com/repos/$TRAVIS_REPO_SLUG/pulls/$TRAVIS_PULL_REQUEST | - export PR=https://api.github.com/repos/$TRAVIS_REPO_SLUG/pulls/$TRAVIS_PULL_REQUEST | ||||
| @@ -6,4 +6,5 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR | |||||
| install(FILES | install(FILES | ||||
| ${CMAKE_CURRENT_BINARY_DIR}/blas.pc | ${CMAKE_CURRENT_BINARY_DIR}/blas.pc | ||||
| DESTINATION ${PKG_CONFIG_DIR} | DESTINATION ${PKG_CONFIG_DIR} | ||||
| COMPONENT Development | |||||
| ) | ) | ||||
| @@ -1,13 +1,18 @@ | |||||
| include ../make.inc | |||||
| TOPSRCDIR = .. | |||||
| include $(TOPSRCDIR)/make.inc | |||||
| .PHONY: all | |||||
| all: blas | all: blas | ||||
| .PHONY: blas | |||||
| blas: | blas: | ||||
| $(MAKE) -C SRC | $(MAKE) -C SRC | ||||
| .PHONY: blas_testing | |||||
| blas_testing: blas | blas_testing: blas | ||||
| $(MAKE) -C TESTING run | $(MAKE) -C TESTING run | ||||
| .PHONY: clean cleanobj cleanlib cleanexe cleantest | |||||
| clean: | clean: | ||||
| $(MAKE) -C SRC clean | $(MAKE) -C SRC clean | ||||
| $(MAKE) -C TESTING clean | $(MAKE) -C TESTING clean | ||||
| @@ -1,5 +1,3 @@ | |||||
| include ../../make.inc | |||||
| ####################################################################### | ####################################################################### | ||||
| # This is the makefile to create a library for the BLAS. | # This is the makefile to create a library for the BLAS. | ||||
| # The files are grouped as follows: | # The files are grouped as follows: | ||||
| @@ -55,6 +53,10 @@ include ../../make.inc | |||||
| # | # | ||||
| ####################################################################### | ####################################################################### | ||||
| TOPSRCDIR = ../.. | |||||
| include $(TOPSRCDIR)/make.inc | |||||
| .PHONY: all | |||||
| all: $(BLASLIB) | all: $(BLASLIB) | ||||
| #--------------------------------------------------------- | #--------------------------------------------------------- | ||||
| @@ -138,33 +140,32 @@ ALLOBJ = $(SBLAS1) $(SBLAS2) $(SBLAS3) $(DBLAS1) $(DBLAS2) $(DBLAS3) \ | |||||
| $(ZBLAS2) $(ZBLAS3) $(ALLBLAS) | $(ZBLAS2) $(ZBLAS3) $(ALLBLAS) | ||||
| $(BLASLIB): $(ALLOBJ) | $(BLASLIB): $(ALLOBJ) | ||||
| $(ARCH) $(ARCHFLAGS) $@ $^ | |||||
| $(AR) $(ARFLAGS) $@ $^ | |||||
| $(RANLIB) $@ | $(RANLIB) $@ | ||||
| .PHONY: single double complex complex16 | |||||
| single: $(SBLAS1) $(ALLBLAS) $(SBLAS2) $(SBLAS3) | single: $(SBLAS1) $(ALLBLAS) $(SBLAS2) $(SBLAS3) | ||||
| $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(BLASLIB) $^ | |||||
| $(RANLIB) $(BLASLIB) | $(RANLIB) $(BLASLIB) | ||||
| double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3) | double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3) | ||||
| $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(BLASLIB) $^ | |||||
| $(RANLIB) $(BLASLIB) | $(RANLIB) $(BLASLIB) | ||||
| complex: $(CBLAS1) $(CB1AUX) $(ALLBLAS) $(CBLAS2) $(CBLAS3) | complex: $(CBLAS1) $(CB1AUX) $(ALLBLAS) $(CBLAS2) $(CBLAS3) | ||||
| $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(BLASLIB) $^ | |||||
| $(RANLIB) $(BLASLIB) | $(RANLIB) $(BLASLIB) | ||||
| complex16: $(ZBLAS1) $(ZB1AUX) $(ALLBLAS) $(ZBLAS2) $(ZBLAS3) | complex16: $(ZBLAS1) $(ZB1AUX) $(ALLBLAS) $(ZBLAS2) $(ZBLAS3) | ||||
| $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(BLASLIB) $^ | |||||
| $(RANLIB) $(BLASLIB) | $(RANLIB) $(BLASLIB) | ||||
| FRC: | FRC: | ||||
| @FRC=$(FRC) | @FRC=$(FRC) | ||||
| .PHONY: clean cleanobj cleanlib | |||||
| clean: cleanobj cleanlib | clean: cleanobj cleanlib | ||||
| cleanobj: | cleanobj: | ||||
| rm -f *.o | rm -f *.o | ||||
| cleanlib: | cleanlib: | ||||
| #rm -f $(BLASLIB) # May point to a system lib, e.g. -lblas | #rm -f $(BLASLIB) # May point to a system lib, e.g. -lblas | ||||
| .f.o: | |||||
| $(FORTRAN) $(OPTS) -c -o $@ $< | |||||
| @@ -43,7 +43,7 @@ | |||||
| *> \param[in] INCX | *> \param[in] INCX | ||||
| *> \verbatim | *> \verbatim | ||||
| *> INCX is INTEGER | *> INCX is INTEGER | ||||
| *> storage spacing between elements of SX | |||||
| *> storage spacing between elements of CX | |||||
| *> \endverbatim | *> \endverbatim | ||||
| * | * | ||||
| * Authors: | * Authors: | ||||
| @@ -43,7 +43,7 @@ | |||||
| *> \param[in] INCX | *> \param[in] INCX | ||||
| *> \verbatim | *> \verbatim | ||||
| *> INCX is INTEGER | *> INCX is INTEGER | ||||
| *> storage spacing between elements of SX | |||||
| *> storage spacing between elements of DX | |||||
| *> \endverbatim | *> \endverbatim | ||||
| * | * | ||||
| * Authors: | * Authors: | ||||
| @@ -43,7 +43,7 @@ | |||||
| *> \param[in] INCX | *> \param[in] INCX | ||||
| *> \verbatim | *> \verbatim | ||||
| *> INCX is INTEGER | *> INCX is INTEGER | ||||
| *> storage spacing between elements of SX | |||||
| *> storage spacing between elements of ZX | |||||
| *> \endverbatim | *> \endverbatim | ||||
| * | * | ||||
| * Authors: | * Authors: | ||||
| @@ -0,0 +1,29 @@ | |||||
| SBLAS1 = files('isamax.f', 'sasum.f', 'saxpy.f', 'scopy.f', 'sdot.f', 'snrm2.f', 'srot.f', 'srotg.f', 'sscal.f', 'sswap.f', 'sdsdot.f', 'srotmg.f', 'srotm.f') | |||||
| CBLAS1 = files('scabs1.f', 'scasum.f', 'scnrm2.f', 'icamax.f', 'caxpy.f', 'ccopy.f', 'cdotc.f', 'cdotu.f', 'csscal.f', 'crotg.f', 'cscal.f', 'cswap.f', 'csrot.f') | |||||
| DBLAS1 = files('idamax.f', 'dasum.f', 'daxpy.f', 'dcopy.f', 'ddot.f', 'dnrm2.f', 'drot.f', 'drotg.f', 'dscal.f', 'dsdot.f', 'dswap.f', 'drotmg.f', 'drotm.f') | |||||
| ZBLAS1 = files('dcabs1.f', 'dzasum.f', 'dznrm2.f', 'izamax.f', 'zaxpy.f', 'zcopy.f', 'zdotc.f', 'zdotu.f', 'zdscal.f', 'zrotg.f', 'zscal.f', 'zswap.f', 'zdrot.f') | |||||
| CB1AUX = files('isamax.f', 'sasum.f', 'saxpy.f', 'scopy.f', 'snrm2.f', 'sscal.f') | |||||
| ZB1AUX = files('idamax.f', 'dasum.f', 'daxpy.f', 'dcopy.f', 'dnrm2.f', 'dscal.f') | |||||
| ALLBLAS = files('lsame.f', 'xerbla.f', 'xerbla_array.f') | |||||
| SBLAS2 = files('sgemv.f', 'sgbmv.f', 'ssymv.f', 'ssbmv.f', 'sspmv.f', 'strmv.f', 'stbmv.f', 'stpmv.f', 'strsv.f', 'stbsv.f', 'stpsv.f', 'sger.f', 'ssyr.f', 'sspr.f', 'ssyr2.f', 'sspr2.f') | |||||
| CBLAS2 = files('cgemv.f', 'cgbmv.f', 'chemv.f', 'chbmv.f', 'chpmv.f', 'ctrmv.f', 'ctbmv.f', 'ctpmv.f', 'ctrsv.f', 'ctbsv.f', 'ctpsv.f', 'cgerc.f', 'cgeru.f', 'cher.f', 'chpr.f', 'cher2.f', 'chpr2.f') | |||||
| DBLAS2 = files('dgemv.f', 'dgbmv.f', 'dsymv.f', 'dsbmv.f', 'dspmv.f', 'dtrmv.f', 'dtbmv.f', 'dtpmv.f', 'dtrsv.f', 'dtbsv.f', 'dtpsv.f', 'dger.f', 'dsyr.f', 'dspr.f', 'dsyr2.f', 'dspr2.f') | |||||
| ZBLAS2 = files('zgemv.f', 'zgbmv.f', 'zhemv.f', 'zhbmv.f', 'zhpmv.f', 'ztrmv.f', 'ztbmv.f', 'ztpmv.f', 'ztrsv.f', 'ztbsv.f', 'ztpsv.f', 'zgerc.f', 'zgeru.f', 'zher.f', 'zhpr.f', 'zher2.f', 'zhpr2.f') | |||||
| SBLAS3 = files('sgemm.f', 'ssymm.f', 'ssyrk.f', 'ssyr2k.f', 'strmm.f', 'strsm.f') | |||||
| CBLAS3 = files('cgemm.f', 'csymm.f', 'csyrk.f', 'csyr2k.f', 'ctrmm.f', 'ctrsm.f', 'chemm.f', 'cherk.f', 'cher2k.f') | |||||
| DBLAS3 = files('dgemm.f', 'dsymm.f', 'dsyrk.f', 'dsyr2k.f', 'dtrmm.f', 'dtrsm.f') | |||||
| ZBLAS3 = files('zgemm.f', 'zsymm.f', 'zsyrk.f', 'zsyr2k.f', 'ztrmm.f', 'ztrsm.f', 'zhemm.f', 'zherk.f', 'zher2k.f') | |||||
| @@ -23,13 +23,13 @@ | |||||
| *> | *> | ||||
| *> \verbatim | *> \verbatim | ||||
| *> | *> | ||||
| * Compute the inner product of two vectors with extended | |||||
| * precision accumulation. | |||||
| * | |||||
| * Returns S.P. result with dot product accumulated in D.P. | |||||
| * SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY), | |||||
| * where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is | |||||
| * defined in a similar way using INCY. | |||||
| *> Compute the inner product of two vectors with extended | |||||
| *> precision accumulation. | |||||
| *> | |||||
| *> Returns S.P. result with dot product accumulated in D.P. | |||||
| *> SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY), | |||||
| *> where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is | |||||
| *> defined in a similar way using INCY. | |||||
| *> \endverbatim | *> \endverbatim | ||||
| * | * | ||||
| * Arguments: | * Arguments: | ||||
| @@ -77,7 +77,14 @@ | |||||
| *> \author Lawson, C. L., (JPL), Hanson, R. J., (SNLA), | *> \author Lawson, C. L., (JPL), Hanson, R. J., (SNLA), | ||||
| *> \author Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL) | *> \author Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL) | ||||
| * | * | ||||
| *> \ingroup complex_blas_level1 | |||||
| *> \author Univ. of Tennessee | |||||
| *> \author Univ. of California Berkeley | |||||
| *> \author Univ. of Colorado Denver | |||||
| *> \author NAG Ltd. | |||||
| * | |||||
| *> \date November 2017 | |||||
| * | |||||
| *> \ingroup single_blas_level1 | |||||
| * | * | ||||
| *> \par Further Details: | *> \par Further Details: | ||||
| * ===================== | * ===================== | ||||
| @@ -102,65 +109,7 @@ | |||||
| *> 920501 Reformatted the REFERENCES section. (WRB) | *> 920501 Reformatted the REFERENCES section. (WRB) | ||||
| *> 070118 Reformat to LAPACK coding style | *> 070118 Reformat to LAPACK coding style | ||||
| *> \endverbatim | *> \endverbatim | ||||
| * | |||||
| * ===================================================================== | |||||
| * | |||||
| * .. Local Scalars .. | |||||
| * DOUBLE PRECISION DSDOT | |||||
| * INTEGER I,KX,KY,NS | |||||
| * .. | |||||
| * .. Intrinsic Functions .. | |||||
| * INTRINSIC DBLE | |||||
| * .. | |||||
| * DSDOT = SB | |||||
| * IF (N.LE.0) THEN | |||||
| * SDSDOT = DSDOT | |||||
| * RETURN | |||||
| * END IF | |||||
| * IF (INCX.EQ.INCY .AND. INCX.GT.0) THEN | |||||
| * | |||||
| * Code for equal and positive increments. | |||||
| * | |||||
| * NS = N*INCX | |||||
| * DO I = 1,NS,INCX | |||||
| * DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I)) | |||||
| * END DO | |||||
| * ELSE | |||||
| * | |||||
| * Code for unequal or nonpositive increments. | |||||
| * | |||||
| * KX = 1 | |||||
| * KY = 1 | |||||
| * IF (INCX.LT.0) KX = 1 + (1-N)*INCX | |||||
| * IF (INCY.LT.0) KY = 1 + (1-N)*INCY | |||||
| * DO I = 1,N | |||||
| * DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY)) | |||||
| * KX = KX + INCX | |||||
| * KY = KY + INCY | |||||
| * END DO | |||||
| * END IF | |||||
| * SDSDOT = DSDOT | |||||
| * RETURN | |||||
| * END | |||||
| * | |||||
| *> \par Purpose: | |||||
| * ============= | |||||
| *> | *> | ||||
| *> \verbatim | |||||
| *> \endverbatim | |||||
| * | |||||
| * Authors: | |||||
| * ======== | |||||
| * | |||||
| *> \author Univ. of Tennessee | |||||
| *> \author Univ. of California Berkeley | |||||
| *> \author Univ. of Colorado Denver | |||||
| *> \author NAG Ltd. | |||||
| * | |||||
| *> \date November 2017 | |||||
| * | |||||
| *> \ingroup single_blas_level1 | |||||
| * | |||||
| * ===================================================================== | * ===================================================================== | ||||
| REAL FUNCTION SDSDOT(N,SB,SX,INCX,SY,INCY) | REAL FUNCTION SDSDOT(N,SB,SX,INCX,SY,INCY) | ||||
| * | * | ||||
| @@ -175,71 +124,6 @@ | |||||
| * .. | * .. | ||||
| * .. Array Arguments .. | * .. Array Arguments .. | ||||
| REAL SX(*),SY(*) | REAL SX(*),SY(*) | ||||
| * .. | |||||
| * | |||||
| * PURPOSE | |||||
| * ======= | |||||
| * | |||||
| * Compute the inner product of two vectors with extended | |||||
| * precision accumulation. | |||||
| * | |||||
| * Returns S.P. result with dot product accumulated in D.P. | |||||
| * SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY), | |||||
| * where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is | |||||
| * defined in a similar way using INCY. | |||||
| * | |||||
| * AUTHOR | |||||
| * ====== | |||||
| * Lawson, C. L., (JPL), Hanson, R. J., (SNLA), | |||||
| * Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL) | |||||
| * | |||||
| * ARGUMENTS | |||||
| * ========= | |||||
| * | |||||
| * N (input) INTEGER | |||||
| * number of elements in input vector(s) | |||||
| * | |||||
| * SB (input) REAL | |||||
| * single precision scalar to be added to inner product | |||||
| * | |||||
| * SX (input) REAL array, dimension (N) | |||||
| * single precision vector with N elements | |||||
| * | |||||
| * INCX (input) INTEGER | |||||
| * storage spacing between elements of SX | |||||
| * | |||||
| * SY (input) REAL array, dimension (N) | |||||
| * single precision vector with N elements | |||||
| * | |||||
| * INCY (input) INTEGER | |||||
| * storage spacing between elements of SY | |||||
| * | |||||
| * SDSDOT (output) REAL | |||||
| * single precision dot product (SB if N .LE. 0) | |||||
| * | |||||
| * Further Details | |||||
| * =============== | |||||
| * | |||||
| * REFERENCES | |||||
| * | |||||
| * C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T. | |||||
| * Krogh, Basic linear algebra subprograms for Fortran | |||||
| * usage, Algorithm No. 539, Transactions on Mathematical | |||||
| * Software 5, 3 (September 1979), pp. 308-323. | |||||
| * | |||||
| * REVISION HISTORY (YYMMDD) | |||||
| * | |||||
| * 791001 DATE WRITTEN | |||||
| * 890531 Changed all specific intrinsics to generic. (WRB) | |||||
| * 890831 Modified array declarations. (WRB) | |||||
| * 890831 REVISION DATE from Version 3.2 | |||||
| * 891214 Prologue converted to Version 4.0 format. (BAB) | |||||
| * 920310 Corrected definition of LX in DESCRIPTION. (WRB) | |||||
| * 920501 Reformatted the REFERENCES section. (WRB) | |||||
| * 070118 Reformat to LAPACK coding style | |||||
| * | |||||
| * ===================================================================== | |||||
| * | |||||
| * .. Local Scalars .. | * .. Local Scalars .. | ||||
| DOUBLE PRECISION DSDOT | DOUBLE PRECISION DSDOT | ||||
| INTEGER I,KX,KY,NS | INTEGER I,KX,KY,NS | ||||
| @@ -1,5 +1,7 @@ | |||||
| include ../../make.inc | |||||
| TOPSRCDIR = ../.. | |||||
| include $(TOPSRCDIR)/make.inc | |||||
| .PHONY: all single double complex complex16 | |||||
| all: single double complex complex16 | all: single double complex complex16 | ||||
| single: xblat1s xblat2s xblat3s | single: xblat1s xblat2s xblat3s | ||||
| double: xblat1d xblat2d xblat3d | double: xblat1d xblat2d xblat3d | ||||
| @@ -7,32 +9,33 @@ complex: xblat1c xblat2c xblat3c | |||||
| complex16: xblat1z xblat2z xblat3z | complex16: xblat1z xblat2z xblat3z | ||||
| xblat1s: sblat1.o $(BLASLIB) | xblat1s: sblat1.o $(BLASLIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xblat1d: dblat1.o $(BLASLIB) | xblat1d: dblat1.o $(BLASLIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xblat1c: cblat1.o $(BLASLIB) | xblat1c: cblat1.o $(BLASLIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xblat1z: zblat1.o $(BLASLIB) | xblat1z: zblat1.o $(BLASLIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xblat2s: sblat2.o $(BLASLIB) | xblat2s: sblat2.o $(BLASLIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xblat2d: dblat2.o $(BLASLIB) | xblat2d: dblat2.o $(BLASLIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xblat2c: cblat2.o $(BLASLIB) | xblat2c: cblat2.o $(BLASLIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xblat2z: zblat2.o $(BLASLIB) | xblat2z: zblat2.o $(BLASLIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xblat3s: sblat3.o $(BLASLIB) | xblat3s: sblat3.o $(BLASLIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xblat3d: dblat3.o $(BLASLIB) | xblat3d: dblat3.o $(BLASLIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xblat3c: cblat3.o $(BLASLIB) | xblat3c: cblat3.o $(BLASLIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xblat3z: zblat3.o $(BLASLIB) | xblat3z: zblat3.o $(BLASLIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| .PHONY: run | |||||
| run: all | run: all | ||||
| ./xblat1s > sblat1.out | ./xblat1s > sblat1.out | ||||
| ./xblat1d > dblat1.out | ./xblat1d > dblat1.out | ||||
| @@ -47,6 +50,7 @@ run: all | |||||
| ./xblat3c < cblat3.in | ./xblat3c < cblat3.in | ||||
| ./xblat3z < zblat3.in | ./xblat3z < zblat3.in | ||||
| .PHONY: clean cleanobj cleanexe cleantest | |||||
| clean: cleanobj cleanexe cleantest | clean: cleanobj cleanexe cleantest | ||||
| cleanobj: | cleanobj: | ||||
| rm -f *.o | rm -f *.o | ||||
| @@ -54,6 +58,3 @@ cleanexe: | |||||
| rm -f xblat* | rm -f xblat* | ||||
| cleantest: | cleantest: | ||||
| rm -f *.out core | rm -f *.out core | ||||
| .f.o: | |||||
| $(FORTRAN) $(OPTS) -c -o $@ $< | |||||
| @@ -619,7 +619,7 @@ | |||||
| SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | ||||
| * ************************* STEST1 ***************************** | * ************************* STEST1 ***************************** | ||||
| * | * | ||||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||||
| * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | ||||
| * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | ||||
| * | * | ||||
| @@ -991,7 +991,7 @@ | |||||
| SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | ||||
| * ************************* STEST1 ***************************** | * ************************* STEST1 ***************************** | ||||
| * | * | ||||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||||
| * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | ||||
| * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | ||||
| * | * | ||||
| @@ -946,7 +946,7 @@ | |||||
| SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | ||||
| * ************************* STEST1 ***************************** | * ************************* STEST1 ***************************** | ||||
| * | * | ||||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||||
| * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | ||||
| * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | ||||
| * | * | ||||
| @@ -619,7 +619,7 @@ | |||||
| SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | ||||
| * ************************* STEST1 ***************************** | * ************************* STEST1 ***************************** | ||||
| * | * | ||||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||||
| * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | ||||
| * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | ||||
| * | * | ||||
| @@ -12,8 +12,10 @@ FortranCInterface_HEADER(${LAPACK_BINARY_DIR}/include/cblas_mangling.h | |||||
| SYMBOL_NAMESPACE "F77_") | SYMBOL_NAMESPACE "F77_") | ||||
| if(NOT FortranCInterface_GLOBAL_FOUND OR NOT FortranCInterface_MODULE_FOUND) | if(NOT FortranCInterface_GLOBAL_FOUND OR NOT FortranCInterface_MODULE_FOUND) | ||||
| message(WARNING "Reverting to pre-defined include/lapacke_mangling.h") | message(WARNING "Reverting to pre-defined include/lapacke_mangling.h") | ||||
| configure_file(include/lapacke_mangling_with_flags.h.in | |||||
| ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h) | |||||
| configure_file(include/lapacke_mangling_with_flags.h.in | |||||
| ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h) | |||||
| configure_file(include/cblas_mangling_with_flags.h.in | |||||
| ${LAPACK_BINARY_DIR}/include/cblas_mangling.h) | |||||
| endif() | endif() | ||||
| include_directories(include ${LAPACK_BINARY_DIR}/include) | include_directories(include ${LAPACK_BINARY_DIR}/include) | ||||
| @@ -28,7 +30,10 @@ endforeach() | |||||
| endmacro() | endmacro() | ||||
| append_subdir_files(CBLAS_INCLUDE "include") | append_subdir_files(CBLAS_INCLUDE "include") | ||||
| install(FILES ${CBLAS_INCLUDE} ${LAPACK_BINARY_DIR}/include/cblas_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||||
| install(FILES ${CBLAS_INCLUDE} ${LAPACK_BINARY_DIR}/include/cblas_mangling.h | |||||
| DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} | |||||
| COMPONENT Development | |||||
| ) | |||||
| # -------------------------------------------------- | # -------------------------------------------------- | ||||
| if(BUILD_TESTING) | if(BUILD_TESTING) | ||||
| @@ -45,7 +50,9 @@ endif() | |||||
| set(_cblas_config_install_guard_target "") | set(_cblas_config_install_guard_target "") | ||||
| if(ALL_TARGETS) | if(ALL_TARGETS) | ||||
| install(EXPORT cblas-targets | install(EXPORT cblas-targets | ||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}) | |||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} | |||||
| COMPONENT Development | |||||
| ) | |||||
| # Choose one of the cblas targets to use as a guard for | # Choose one of the cblas targets to use as a guard for | ||||
| # cblas-config.cmake to load targets from the install tree. | # cblas-config.cmake to load targets from the install tree. | ||||
| list(GET ALL_TARGETS 0 _cblas_config_install_guard_target) | list(GET ALL_TARGETS 0 _cblas_config_install_guard_target) | ||||
| @@ -82,4 +89,6 @@ install(FILES | |||||
| ) | ) | ||||
| #install(EXPORT cblas-targets | #install(EXPORT cblas-targets | ||||
| # DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}) | |||||
| # DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} | |||||
| # COMPONENT Development | |||||
| # ) | |||||
| @@ -1,19 +1,25 @@ | |||||
| include ../make.inc | |||||
| TOPSRCDIR = .. | |||||
| include $(TOPSRCDIR)/make.inc | |||||
| .PHONY: all | |||||
| all: cblas | all: cblas | ||||
| .PHONY: cblas | |||||
| cblas: include/cblas_mangling.h | cblas: include/cblas_mangling.h | ||||
| $(MAKE) -C src | $(MAKE) -C src | ||||
| include/cblas_mangling.h: include/cblas_mangling_with_flags.h.in | include/cblas_mangling.h: include/cblas_mangling_with_flags.h.in | ||||
| cp $< $@ | |||||
| cp include/cblas_mangling_with_flags.h.in $@ | |||||
| .PHONY: cblas_testing | |||||
| cblas_testing: cblas | cblas_testing: cblas | ||||
| $(MAKE) -C testing run | $(MAKE) -C testing run | ||||
| .PHONY: cblas_example | |||||
| cblas_example: cblas | cblas_example: cblas | ||||
| $(MAKE) -C examples | $(MAKE) -C examples | ||||
| .PHONY: clean cleanobj cleanlib cleanexe cleantest | |||||
| clean: | clean: | ||||
| $(MAKE) -C src clean | $(MAKE) -C src clean | ||||
| $(MAKE) -C testing clean | $(MAKE) -C testing clean | ||||
| @@ -1,17 +1,21 @@ | |||||
| include ../../make.inc | |||||
| TOPSRCDIR = ../.. | |||||
| include $(TOPSRCDIR)/make.inc | |||||
| .SUFFIXES: .c .o | |||||
| .c.o: | |||||
| $(CC) $(CFLAGS) -I../include -c -o $@ $< | |||||
| .PHONY: all | |||||
| all: cblas_ex1 cblas_ex2 | all: cblas_ex1 cblas_ex2 | ||||
| cblas_ex1: cblas_example1.o $(CBLASLIB) $(BLASLIB) | cblas_ex1: cblas_example1.o $(CBLASLIB) $(BLASLIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| cblas_ex2: cblas_example2.o $(CBLASLIB) $(BLASLIB) | cblas_ex2: cblas_example2.o $(CBLASLIB) $(BLASLIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| .PHONY: clean cleanobj cleanexe | |||||
| clean: cleanobj cleanexe | clean: cleanobj cleanexe | ||||
| cleanobj: | cleanobj: | ||||
| rm -f *.o | rm -f *.o | ||||
| cleanexe: | cleanexe: | ||||
| rm -f cblas_ex1 cblas_ex2 | rm -f cblas_ex1 cblas_ex2 | ||||
| .c.o: | |||||
| $(CC) $(CFLAGS) -I../include -c -o $@ $< | |||||
| @@ -47,7 +47,7 @@ int main ( ) | |||||
| a[m*3+1] = 6; | a[m*3+1] = 6; | ||||
| a[m*3+2] = 7; | a[m*3+2] = 7; | ||||
| a[m*3+3] = 8; | a[m*3+3] = 8; | ||||
| /* The elemetns of x and y */ | |||||
| /* The elements of x and y */ | |||||
| x[0] = 1; | x[0] = 1; | ||||
| x[1] = 2; | x[1] = 2; | ||||
| x[2] = 1; | x[2] = 1; | ||||
| @@ -1,7 +1,13 @@ | |||||
| # This Makefile compiles the CBLAS routines | # This Makefile compiles the CBLAS routines | ||||
| include ../../make.inc | |||||
| TOPSRCDIR = ../.. | |||||
| include $(TOPSRCDIR)/make.inc | |||||
| .SUFFIXES: .c .o | |||||
| .c.o: | |||||
| $(CC) $(CFLAGS) -I../include -c -o $@ $< | |||||
| .PHONY: all | |||||
| all: $(CBLASLIB) | all: $(CBLASLIB) | ||||
| # Error handling routines for level 2 & 3 | # Error handling routines for level 2 & 3 | ||||
| @@ -43,24 +49,25 @@ zlev1 = cblas_zswap.o cblas_zscal.o cblas_zdscal.o cblas_zcopy.o \ | |||||
| # Common files for level 1 single precision | # Common files for level 1 single precision | ||||
| sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o | sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o | ||||
| .PHONY: slib1 dlib1 clib1 zlib1 | |||||
| # Single precision real | # Single precision real | ||||
| slib1: $(slev1) $(sclev1) | slib1: $(slev1) $(sclev1) | ||||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||||
| $(RANLIB) $(CBLASLIB) | $(RANLIB) $(CBLASLIB) | ||||
| # Double precision real | # Double precision real | ||||
| dlib1: $(dlev1) | dlib1: $(dlev1) | ||||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||||
| $(RANLIB) $(CBLASLIB) | $(RANLIB) $(CBLASLIB) | ||||
| # Single precision complex | # Single precision complex | ||||
| clib1: $(clev1) $(sclev1) | clib1: $(clev1) $(sclev1) | ||||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||||
| $(RANLIB) $(CBLASLIB) | $(RANLIB) $(CBLASLIB) | ||||
| # Double precision complex | # Double precision complex | ||||
| zlib1: $(zlev1) | zlib1: $(zlev1) | ||||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||||
| $(RANLIB) $(CBLASLIB) | $(RANLIB) $(CBLASLIB) | ||||
| # | # | ||||
| @@ -95,24 +102,25 @@ zlev2 = cblas_zgemv.o cblas_zgbmv.o cblas_zhemv.o cblas_zhbmv.o cblas_zhpmv.o \ | |||||
| cblas_ztpsv.o cblas_zgeru.o cblas_zgerc.o cblas_zher.o cblas_zher2.o \ | cblas_ztpsv.o cblas_zgeru.o cblas_zgerc.o cblas_zher.o cblas_zher2.o \ | ||||
| cblas_zhpr.o cblas_zhpr2.o | cblas_zhpr.o cblas_zhpr2.o | ||||
| .PHONY: slib2 dlib2 clib2 zlib2 | |||||
| # Single precision real | # Single precision real | ||||
| slib2: $(slev2) $(errhand) | slib2: $(slev2) $(errhand) | ||||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||||
| $(RANLIB) $(CBLASLIB) | $(RANLIB) $(CBLASLIB) | ||||
| # Double precision real | # Double precision real | ||||
| dlib2: $(dlev2) $(errhand) | dlib2: $(dlev2) $(errhand) | ||||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||||
| $(RANLIB) $(CBLASLIB) | $(RANLIB) $(CBLASLIB) | ||||
| # Single precision complex | # Single precision complex | ||||
| clib2: $(clev2) $(errhand) | clib2: $(clev2) $(errhand) | ||||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||||
| $(RANLIB) $(CBLASLIB) | $(RANLIB) $(CBLASLIB) | ||||
| # Double precision complex | # Double precision complex | ||||
| zlib2: $(zlev2) $(errhand) | zlib2: $(zlev2) $(errhand) | ||||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||||
| $(RANLIB) $(CBLASLIB) | $(RANLIB) $(CBLASLIB) | ||||
| # | # | ||||
| @@ -141,24 +149,25 @@ zlev3 = cblas_zgemm.o cblas_zsymm.o cblas_zhemm.o cblas_zherk.o \ | |||||
| cblas_zher2k.o cblas_ztrmm.o cblas_ztrsm.o cblas_zsyrk.o \ | cblas_zher2k.o cblas_ztrmm.o cblas_ztrsm.o cblas_zsyrk.o \ | ||||
| cblas_zsyr2k.o | cblas_zsyr2k.o | ||||
| .PHONY: slib3 dlib3 clib3 zlib3 | |||||
| # Single precision real | # Single precision real | ||||
| slib3: $(slev3) $(errhand) | slib3: $(slev3) $(errhand) | ||||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||||
| $(RANLIB) $(CBLASLIB) | $(RANLIB) $(CBLASLIB) | ||||
| # Double precision real | # Double precision real | ||||
| dlib3: $(dlev3) $(errhand) | dlib3: $(dlev3) $(errhand) | ||||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||||
| $(RANLIB) $(CBLASLIB) | $(RANLIB) $(CBLASLIB) | ||||
| # Single precision complex | # Single precision complex | ||||
| clib3: $(clev3) $(errhand) | clib3: $(clev3) $(errhand) | ||||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||||
| $(RANLIB) $(CBLASLIB) | $(RANLIB) $(CBLASLIB) | ||||
| # Double precision complex | # Double precision complex | ||||
| zlib3: $(zlev3) $(errhand) | zlib3: $(zlev3) $(errhand) | ||||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||||
| $(RANLIB) $(CBLASLIB) | $(RANLIB) $(CBLASLIB) | ||||
| @@ -166,36 +175,33 @@ alev1 = $(slev1) $(dlev1) $(clev1) $(zlev1) $(sclev1) | |||||
| alev2 = $(slev2) $(dlev2) $(clev2) $(zlev2) | alev2 = $(slev2) $(dlev2) $(clev2) $(zlev2) | ||||
| alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3) | alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3) | ||||
| .PHONY: all1 all2 all3 | |||||
| # All level 1 | # All level 1 | ||||
| all1: $(alev1) | all1: $(alev1) | ||||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||||
| $(RANLIB) $(CBLASLIB) | $(RANLIB) $(CBLASLIB) | ||||
| # All level 2 | # All level 2 | ||||
| all2: $(alev2) $(errhand) | all2: $(alev2) $(errhand) | ||||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||||
| $(RANLIB) $(CBLASLIB) | $(RANLIB) $(CBLASLIB) | ||||
| # All level 3 | # All level 3 | ||||
| all3: $(alev3) $(errhand) | all3: $(alev3) $(errhand) | ||||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||||
| $(RANLIB) $(CBLASLIB) | $(RANLIB) $(CBLASLIB) | ||||
| # All levels and precisions | # All levels and precisions | ||||
| $(CBLASLIB): $(alev1) $(alev2) $(alev3) $(errhand) | $(CBLASLIB): $(alev1) $(alev2) $(alev3) $(errhand) | ||||
| $(ARCH) $(ARCHFLAGS) $@ $^ | |||||
| $(AR) $(ARFLAGS) $@ $^ | |||||
| $(RANLIB) $@ | $(RANLIB) $@ | ||||
| FRC: | FRC: | ||||
| @FRC=$(FRC) | @FRC=$(FRC) | ||||
| .PHONY: clean cleanobj cleanlib | |||||
| clean: cleanobj cleanlib | clean: cleanobj cleanlib | ||||
| cleanobj: | cleanobj: | ||||
| rm -f *.o | rm -f *.o | ||||
| cleanlib: | cleanlib: | ||||
| rm -f $(CBLASLIB) | rm -f $(CBLASLIB) | ||||
| .c.o: | |||||
| $(CC) $(CFLAGS) -I../include -c -o $@ $< | |||||
| .f.o: | |||||
| $(FORTRAN) $(OPTS) -c -o $@ $< | |||||
| @@ -91,7 +91,7 @@ void cblas_sgemm(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE TransA, | |||||
| else | else | ||||
| { | { | ||||
| cblas_xerbla(2, "cblas_sgemm", | cblas_xerbla(2, "cblas_sgemm", | ||||
| "Illegal TransA setting, %d\n", TransA); | |||||
| "Illegal TransB setting, %d\n", TransB); | |||||
| CBLAS_CallFromC = 0; | CBLAS_CallFromC = 0; | ||||
| RowMajorStrg = 0; | RowMajorStrg = 0; | ||||
| return; | return; | ||||
| @@ -2,7 +2,12 @@ | |||||
| # The Makefile compiles c wrappers and testers for CBLAS. | # The Makefile compiles c wrappers and testers for CBLAS. | ||||
| # | # | ||||
| include ../../make.inc | |||||
| TOPSRCDIR = ../.. | |||||
| include $(TOPSRCDIR)/make.inc | |||||
| .SUFFIXES: .c .o | |||||
| .c.o: | |||||
| $(CC) $(CFLAGS) -I../include -c -o $@ $< | |||||
| # Archive files necessary to compile | # Archive files necessary to compile | ||||
| LIB = $(CBLASLIB) $(BLASLIB) | LIB = $(CBLASLIB) $(BLASLIB) | ||||
| @@ -27,6 +32,7 @@ ztestl1o = c_zblas1.o | |||||
| ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o | ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o | ||||
| ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o | ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o | ||||
| .PHONY: all all1 all2 all3 | |||||
| all: all1 all2 all3 | all: all1 all2 all3 | ||||
| all1: xscblat1 xdcblat1 xccblat1 xzcblat1 | all1: xscblat1 xdcblat1 xccblat1 xzcblat1 | ||||
| all2: xscblat2 xdcblat2 xccblat2 xzcblat2 | all2: xscblat2 xdcblat2 xccblat2 xzcblat2 | ||||
| @@ -38,37 +44,38 @@ all3: xscblat3 xdcblat3 xccblat3 xzcblat3 | |||||
| # Single real | # Single real | ||||
| xscblat1: c_sblat1.o $(stestl1o) $(LIB) | xscblat1: c_sblat1.o $(stestl1o) $(LIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xscblat2: c_sblat2.o $(stestl2o) $(LIB) | xscblat2: c_sblat2.o $(stestl2o) $(LIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xscblat3: c_sblat3.o $(stestl3o) $(LIB) | xscblat3: c_sblat3.o $(stestl3o) $(LIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| # Double real | # Double real | ||||
| xdcblat1: c_dblat1.o $(dtestl1o) $(LIB) | xdcblat1: c_dblat1.o $(dtestl1o) $(LIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xdcblat2: c_dblat2.o $(dtestl2o) $(LIB) | xdcblat2: c_dblat2.o $(dtestl2o) $(LIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xdcblat3: c_dblat3.o $(dtestl3o) $(LIB) | xdcblat3: c_dblat3.o $(dtestl3o) $(LIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| # Single complex | # Single complex | ||||
| xccblat1: c_cblat1.o $(ctestl1o) $(LIB) | xccblat1: c_cblat1.o $(ctestl1o) $(LIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xccblat2: c_cblat2.o $(ctestl2o) $(LIB) | xccblat2: c_cblat2.o $(ctestl2o) $(LIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xccblat3: c_cblat3.o $(ctestl3o) $(LIB) | xccblat3: c_cblat3.o $(ctestl3o) $(LIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| # Double complex | # Double complex | ||||
| xzcblat1: c_zblat1.o $(ztestl1o) $(LIB) | xzcblat1: c_zblat1.o $(ztestl1o) $(LIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xzcblat2: c_zblat2.o $(ztestl2o) $(LIB) | xzcblat2: c_zblat2.o $(ztestl2o) $(LIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| xzcblat3: c_zblat3.o $(ztestl3o) $(LIB) | xzcblat3: c_zblat3.o $(ztestl3o) $(LIB) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| # RUN TESTS | # RUN TESTS | ||||
| .PHONY: run | |||||
| run: all | run: all | ||||
| @echo "--> TESTING CBLAS 1 - SINGLE PRECISION REAL <--" | @echo "--> TESTING CBLAS 1 - SINGLE PRECISION REAL <--" | ||||
| @./xscblat1 > stest1.out | @./xscblat1 > stest1.out | ||||
| @@ -95,6 +102,7 @@ run: all | |||||
| @echo "--> TESTING CBLAS 3 - DOUBLE PRECISION COMPLEX <--" | @echo "--> TESTING CBLAS 3 - DOUBLE PRECISION COMPLEX <--" | ||||
| @./xzcblat3 < zin3 > ztest3.out | @./xzcblat3 < zin3 > ztest3.out | ||||
| .PHONY: clean cleanobj cleanexe cleantest | |||||
| clean: cleanobj cleanexe cleantest | clean: cleanobj cleanexe cleantest | ||||
| cleanobj: | cleanobj: | ||||
| rm -f *.o | rm -f *.o | ||||
| @@ -102,9 +110,3 @@ cleanexe: | |||||
| rm -f x* | rm -f x* | ||||
| cleantest: | cleantest: | ||||
| rm -f *.out core | rm -f *.out core | ||||
| .SUFFIXES: .o .f .c | |||||
| .c.o: | |||||
| $(CC) $(CFLAGS) -I../include -c -o $@ $< | |||||
| .f.o: | |||||
| $(FORTRAN) $(OPTS) -c -o $@ $< | |||||
| @@ -577,7 +577,7 @@ | |||||
| SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | ||||
| * ************************* STEST1 ***************************** | * ************************* STEST1 ***************************** | ||||
| * | * | ||||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||||
| * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | ||||
| * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | ||||
| * | * | ||||
| @@ -653,7 +653,7 @@ | |||||
| SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | ||||
| * ************************* STEST1 ***************************** | * ************************* STEST1 ***************************** | ||||
| * | * | ||||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||||
| * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | ||||
| * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | ||||
| * | * | ||||
| @@ -653,7 +653,7 @@ | |||||
| SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | ||||
| * ************************* STEST1 ***************************** | * ************************* STEST1 ***************************** | ||||
| * | * | ||||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||||
| * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | ||||
| * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | ||||
| * | * | ||||
| @@ -577,7 +577,7 @@ | |||||
| SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | ||||
| * ************************* STEST1 ***************************** | * ************************* STEST1 ***************************** | ||||
| * | * | ||||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||||
| * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | ||||
| * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | ||||
| * | * | ||||
| @@ -1,4 +1,4 @@ | |||||
| # This module checks against various known compilers and thier respective | |||||
| # This module checks against various known compilers and their respective | |||||
| # flags to determine any specific flags needing to be set. | # flags to determine any specific flags needing to be set. | ||||
| # | # | ||||
| # 1. If FPE traps are enabled either abort or disable them | # 1. If FPE traps are enabled either abort or disable them | ||||
| @@ -20,7 +20,7 @@ set(CMAKE_REQUIRED_QUIET ${codecov_FIND_QUIETLY}) | |||||
| get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) | get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) | ||||
| foreach (LANG ${ENABLED_LANGUAGES}) | foreach (LANG ${ENABLED_LANGUAGES}) | ||||
| # Gcov evaluation is dependend on the used compiler. Check gcov support for | |||||
| # Gcov evaluation is dependent on the used compiler. Check gcov support for | |||||
| # each compiler that is used. If gcov binary was already found for this | # each compiler that is used. If gcov binary was already found for this | ||||
| # compiler, do not try to find it again. | # compiler, do not try to find it again. | ||||
| if(NOT GCOV_${CMAKE_${LANG}_COMPILER_ID}_BIN) | if(NOT GCOV_${CMAKE_${LANG}_COMPILER_ID}_BIN) | ||||
| @@ -42,7 +42,7 @@ set(CMAKE_REQUIRED_QUIET ${codecov_FIND_QUIETLY}) | |||||
| get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) | get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) | ||||
| foreach (LANG ${ENABLED_LANGUAGES}) | foreach (LANG ${ENABLED_LANGUAGES}) | ||||
| # Coverage flags are not dependend on language, but the used compiler. So | |||||
| # Coverage flags are not dependent on language, but the used compiler. So | |||||
| # instead of searching flags foreach language, search flags foreach compiler | # instead of searching flags foreach language, search flags foreach compiler | ||||
| # used. | # used. | ||||
| set(COMPILER ${CMAKE_${LANG}_COMPILER_ID}) | set(COMPILER ${CMAKE_${LANG}_COMPILER_ID}) | ||||
| @@ -24,7 +24,7 @@ message(STATUS "=========") | |||||
| set(F77_OUTPUT_EXE "/Fe" CACHE INTERNAL | set(F77_OUTPUT_EXE "/Fe" CACHE INTERNAL | ||||
| "Fortran compiler option for setting executable file name.") | "Fortran compiler option for setting executable file name.") | ||||
| else() | else() | ||||
| # in other case, let user specify their fortran configrations. | |||||
| # in other case, let user specify their fortran configurations. | |||||
| set(F77_OPTION_COMPILE "-c" CACHE STRING | set(F77_OPTION_COMPILE "-c" CACHE STRING | ||||
| "Fortran compiler option for compiling without linking.") | "Fortran compiler option for compiling without linking.") | ||||
| set(F77_OUTPUT_OBJ "-o" CACHE STRING | set(F77_OUTPUT_OBJ "-o" CACHE STRING | ||||
| @@ -5,6 +5,10 @@ if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}") | |||||
| endif() | endif() | ||||
| unset(_LAPACK_TARGET) | unset(_LAPACK_TARGET) | ||||
| # Hint for project building against lapack | |||||
| set(LAPACK_Fortran_COMPILER_ID "@CMAKE_Fortran_COMPILER_ID@") | |||||
| # Report the blas and lapack raw or imported libraries. | # Report the blas and lapack raw or imported libraries. | ||||
| set(LAPACK_blas_LIBRARIES "@BLAS_LIBRARIES@") | set(LAPACK_blas_LIBRARIES "@BLAS_LIBRARIES@") | ||||
| set(LAPACK_lapack_LIBRARIES "@LAPACK_LIBRARIES@") | set(LAPACK_lapack_LIBRARIES "@LAPACK_LIBRARIES@") | ||||
| set(LAPACK_LIBRARIES ${LAPACK_blas_LIBRARIES} ${LAPACK_lapack_LIBRARIES}) | |||||
| @@ -8,8 +8,12 @@ if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}") | |||||
| endif() | endif() | ||||
| unset(_LAPACK_TARGET) | unset(_LAPACK_TARGET) | ||||
| # Hint for project building against lapack | |||||
| set(LAPACK_Fortran_COMPILER_ID "@CMAKE_Fortran_COMPILER_ID@") | |||||
| # Report the blas and lapack raw or imported libraries. | # Report the blas and lapack raw or imported libraries. | ||||
| set(LAPACK_blas_LIBRARIES "@BLAS_LIBRARIES@") | set(LAPACK_blas_LIBRARIES "@BLAS_LIBRARIES@") | ||||
| set(LAPACK_lapack_LIBRARIES "@LAPACK_LIBRARIES@") | set(LAPACK_lapack_LIBRARIES "@LAPACK_LIBRARIES@") | ||||
| set(LAPACK_LIBRARIES ${LAPACK_blas_LIBRARIES} ${LAPACK_lapack_LIBRARIES}) | |||||
| unset(_LAPACK_SELF_DIR) | unset(_LAPACK_SELF_DIR) | ||||
| @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8.12) | |||||
| project(LAPACK Fortran C) | project(LAPACK Fortran C) | ||||
| set(LAPACK_MAJOR_VERSION 3) | set(LAPACK_MAJOR_VERSION 3) | ||||
| set(LAPACK_MINOR_VERSION 8) | |||||
| set(LAPACK_MINOR_VERSION 9) | |||||
| set(LAPACK_PATCH_VERSION 0) | set(LAPACK_PATCH_VERSION 0) | ||||
| set( | set( | ||||
| LAPACK_VERSION | LAPACK_VERSION | ||||
| @@ -13,6 +13,9 @@ set( | |||||
| # Add the CMake directory for custon CMake modules | # Add the CMake directory for custon CMake modules | ||||
| set(CMAKE_MODULE_PATH "${LAPACK_SOURCE_DIR}/CMAKE" ${CMAKE_MODULE_PATH}) | set(CMAKE_MODULE_PATH "${LAPACK_SOURCE_DIR}/CMAKE" ${CMAKE_MODULE_PATH}) | ||||
| # Export all symbols on Windows when building shared libraries | |||||
| SET(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE) | |||||
| # Set a default build type if none was specified | # Set a default build type if none was specified | ||||
| if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) | if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) | ||||
| message(STATUS "Setting build type to 'Release' as none was specified.") | message(STATUS "Setting build type to 'Release' as none was specified.") | ||||
| @@ -21,8 +24,19 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) | |||||
| set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "Coverage") | set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "Coverage") | ||||
| endif() | endif() | ||||
| string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER) | |||||
| if(${CMAKE_BUILD_TYPE_UPPER} STREQUAL "COVERAGE") | |||||
| # Coverage | |||||
| set(_is_coverage_build 0) | |||||
| set(_msg "Checking if build type is 'Coverage'") | |||||
| message(STATUS "${_msg}") | |||||
| if(NOT CMAKE_CONFIGURATION_TYPES) | |||||
| string(TOLOWER ${CMAKE_BUILD_TYPE} _build_type_lc) | |||||
| if(${_build_type_lc} STREQUAL "coverage") | |||||
| set(_is_coverage_build 1) | |||||
| endif() | |||||
| endif() | |||||
| message(STATUS "${_msg}: ${_is_coverage_build}") | |||||
| if(_is_coverage_build) | |||||
| message(STATUS "Adding coverage") | message(STATUS "Adding coverage") | ||||
| find_package(codecov) | find_package(codecov) | ||||
| endif() | endif() | ||||
| @@ -58,18 +72,18 @@ include(PreventInSourceBuilds) | |||||
| include(PreventInBuildInstalls) | include(PreventInBuildInstalls) | ||||
| if(UNIX) | if(UNIX) | ||||
| if("${CMAKE_Fortran_COMPILER}" MATCHES "ifort") | |||||
| set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fp-model strict") | |||||
| if(CMAKE_Fortran_COMPILER_ID STREQUAL Intel) | |||||
| list(APPEND CMAKE_Fortran_FLAGS "-fp-model strict") | |||||
| endif() | endif() | ||||
| if("${CMAKE_Fortran_COMPILER}" MATCHES "xlf") | |||||
| set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qnosave -qstrict=none") | |||||
| if(CMAKE_Fortran_COMPILER_ID STREQUAL XL) | |||||
| list(APPEND CMAKE_Fortran_FLAGS "-qnosave -qstrict=none") | |||||
| endif() | endif() | ||||
| # Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler. | # Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler. | ||||
| # This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin | # This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin | ||||
| string(REPLACE \;mtsk\; \; CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES "${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}") | string(REPLACE \;mtsk\; \; CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES "${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}") | ||||
| endif() | endif() | ||||
| if(CMAKE_Fortran_COMPILER_ID STREQUAL "Compaq") | |||||
| if(CMAKE_Fortran_COMPILER_ID STREQUAL Compaq) | |||||
| if(WIN32) | if(WIN32) | ||||
| if(CMAKE_GENERATOR STREQUAL "NMake Makefiles") | if(CMAKE_GENERATOR STREQUAL "NMake Makefiles") | ||||
| get_filename_component(CMAKE_Fortran_COMPILER_CMDNAM ${CMAKE_Fortran_COMPILER} NAME_WE) | get_filename_component(CMAKE_Fortran_COMPILER_CMDNAM ${CMAKE_Fortran_COMPILER} NAME_WE) | ||||
| @@ -96,24 +110,16 @@ if(CMAKE_Fortran_COMPILER_ID STREQUAL "Compaq") | |||||
| endif() | endif() | ||||
| endif() | endif() | ||||
| # Get Python | |||||
| message(STATUS "Looking for Python greater than 2.6 - ${PYTHONINTERP_FOUND}") | |||||
| find_package(PythonInterp 2.7) # lapack_testing.py uses features from python 2.7 and greater | |||||
| if(PYTHONINTERP_FOUND) | |||||
| message(STATUS "Using Python version ${PYTHON_VERSION_STRING}") | |||||
| else() | |||||
| message(STATUS "No suitable Python version found, so skipping summary tests.") | |||||
| endif() | |||||
| # -------------------------------------------------- | |||||
| # -------------------------------------------------- | |||||
| set(LAPACK_INSTALL_EXPORT_NAME lapack-targets) | set(LAPACK_INSTALL_EXPORT_NAME lapack-targets) | ||||
| macro(lapack_install_library lib) | macro(lapack_install_library lib) | ||||
| install(TARGETS ${lib} | install(TARGETS ${lib} | ||||
| EXPORT ${LAPACK_INSTALL_EXPORT_NAME} | EXPORT ${LAPACK_INSTALL_EXPORT_NAME} | ||||
| ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} | |||||
| LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} | |||||
| RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} | |||||
| ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT Development | |||||
| LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT RuntimeLibraries | |||||
| RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT RuntimeLibraries | |||||
| ) | ) | ||||
| endmacro() | endmacro() | ||||
| @@ -121,12 +127,22 @@ set(PKG_CONFIG_DIR ${CMAKE_INSTALL_LIBDIR}/pkgconfig) | |||||
| # -------------------------------------------------- | # -------------------------------------------------- | ||||
| # Testing | # Testing | ||||
| option(BUILD_TESTING "Build tests" OFF) | |||||
| enable_testing() | |||||
| option(BUILD_TESTING "Build tests" ${_is_coverage_build}) | |||||
| include(CTest) | include(CTest) | ||||
| enable_testing() | |||||
| message(STATUS "Build tests: ${BUILD_TESTING}") | message(STATUS "Build tests: ${BUILD_TESTING}") | ||||
| # lapack_testing.py uses features from python 2.7 and greater | |||||
| if(BUILD_TESTING) | |||||
| set(_msg "Looking for Python >= 2.7 needed for summary tests") | |||||
| message(STATUS "${_msg}") | |||||
| find_package(PythonInterp 2.7 QUIET) | |||||
| if(PYTHONINTERP_FOUND) | |||||
| message(STATUS "${_msg} - found (${PYTHON_VERSION_STRING})") | |||||
| else() | |||||
| message(STATUS "${_msg} - not found (skipping summary tests)") | |||||
| endif() | |||||
| endif() | |||||
| # -------------------------------------------------- | # -------------------------------------------------- | ||||
| # Organize output files. On Windows this also keeps .dll files next | # Organize output files. On Windows this also keeps .dll files next | ||||
| # to the .exe files that need them, making tests easy to run. | # to the .exe files that need them, making tests easy to run. | ||||
| @@ -299,16 +315,40 @@ if(LAPACKE) | |||||
| add_subdirectory(LAPACKE) | add_subdirectory(LAPACKE) | ||||
| endif() | endif() | ||||
| #------------------------------------- | |||||
| # BLAS++ / LAPACK++ | |||||
| option(BLAS++ "Build BLAS++" OFF) | |||||
| option(LAPACK++ "Build LAPACK++" OFF) | |||||
| function(_display_cpp_implementation_msg name) | |||||
| string(TOLOWER ${name} name_lc) | |||||
| message(STATUS "${name}++ enable") | |||||
| message(STATUS "----------------") | |||||
| message(STATUS "Thank you for your interest in ${name}++, a newly developed C++ API for ${name} library") | |||||
| message(STATUS "The objective of ${name}++ is to provide a convenient, performance oriented API for development in the C++ language, that, for the most part, preserves established conventions, while, at the same time, takes advantages of modern C++ features, such as: namespaces, templates, exceptions, etc.") | |||||
| message(STATUS "We are still working on integrating ${name}++ in our library. For the moment, you can download directly ${name_lc}++ from https://bitbucket.org/icl/${name_lc}pp") | |||||
| message(STATUS "For support ${name}++ related question, please email: slate-user@icl.utk.edu") | |||||
| message(STATUS "----------------") | |||||
| endfunction() | |||||
| if(BLAS++) | |||||
| _display_cpp_implementation_msg("BLAS") | |||||
| endif() | |||||
| if(LAPACK++) | |||||
| _display_cpp_implementation_msg("LAPACK") | |||||
| endif() | |||||
| # -------------------------------------------------- | # -------------------------------------------------- | ||||
| # CPACK Packaging | # CPACK Packaging | ||||
| set(CPACK_PACKAGE_NAME "LAPACK") | set(CPACK_PACKAGE_NAME "LAPACK") | ||||
| set(CPACK_PACKAGE_VENDOR "University of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd") | set(CPACK_PACKAGE_VENDOR "University of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd") | ||||
| set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "LAPACK- Linear Algebra Package") | set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "LAPACK- Linear Algebra Package") | ||||
| set(CPACK_PACKAGE_VERSION_MAJOR 3) | |||||
| set(CPACK_PACKAGE_VERSION_MINOR 5) | |||||
| set(CPACK_PACKAGE_VERSION_PATCH 0) | |||||
| set(CPACK_PACKAGE_VERSION_MAJOR ${LAPACK_MAJOR_VERSION}) | |||||
| set(CPACK_PACKAGE_VERSION_MINOR ${LAPACK_MINOR_VERSION}) | |||||
| set(CPACK_PACKAGE_VERSION_PATCH ${LAPACK_PATCH_VERSION}) | |||||
| set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") | set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") | ||||
| set(CPACK_MONOLITHIC_INSTALL ON) | |||||
| set(CPACK_PACKAGE_INSTALL_DIRECTORY "LAPACK") | set(CPACK_PACKAGE_INSTALL_DIRECTORY "LAPACK") | ||||
| if(WIN32 AND NOT UNIX) | if(WIN32 AND NOT UNIX) | ||||
| # There is a bug in NSI that does not handle full unix paths properly. Make | # There is a bug in NSI that does not handle full unix paths properly. Make | ||||
| @@ -347,7 +387,9 @@ endif() | |||||
| set(_lapack_config_install_guard_target "") | set(_lapack_config_install_guard_target "") | ||||
| if(ALL_TARGETS) | if(ALL_TARGETS) | ||||
| install(EXPORT lapack-targets | install(EXPORT lapack-targets | ||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION}) | |||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION} | |||||
| COMPONENT Development | |||||
| ) | |||||
| # Choose one of the lapack targets to use as a guard for | # Choose one of the lapack targets to use as a guard for | ||||
| # lapack-config.cmake to load targets from the install tree. | # lapack-config.cmake to load targets from the install tree. | ||||
| @@ -382,6 +424,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_D | |||||
| install(FILES | install(FILES | ||||
| ${CMAKE_CURRENT_BINARY_DIR}/lapack.pc | ${CMAKE_CURRENT_BINARY_DIR}/lapack.pc | ||||
| DESTINATION ${PKG_CONFIG_DIR} | DESTINATION ${PKG_CONFIG_DIR} | ||||
| COMPONENT Development | |||||
| ) | ) | ||||
| configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-install.cmake.in | configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-install.cmake.in | ||||
| @@ -398,4 +441,6 @@ install(FILES | |||||
| ${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake | ${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake | ||||
| ${LAPACK_BINARY_DIR}/lapack-config-version.cmake | ${LAPACK_BINARY_DIR}/lapack-config-version.cmake | ||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION} | DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION} | ||||
| COMPONENT Development | |||||
| ) | ) | ||||
| @@ -38,7 +38,7 @@ PROJECT_NAME = LAPACK | |||||
| # could be handy for archiving the generated documentation or if some version | # could be handy for archiving the generated documentation or if some version | ||||
| # control system is used. | # control system is used. | ||||
| PROJECT_NUMBER = 3.8.0 | |||||
| PROJECT_NUMBER = 3.9.0 | |||||
| # Using the PROJECT_BRIEF tag one can provide an optional one line description | # Using the PROJECT_BRIEF tag one can provide an optional one line description | ||||
| # for a project that appears at the top of each page and should give viewer a | # for a project that appears at the top of each page and should give viewer a | ||||
| @@ -38,7 +38,7 @@ PROJECT_NAME = LAPACK | |||||
| # could be handy for archiving the generated documentation or if some version | # could be handy for archiving the generated documentation or if some version | ||||
| # control system is used. | # control system is used. | ||||
| PROJECT_NUMBER = 3.8.0 | |||||
| PROJECT_NUMBER = 3.9.0 | |||||
| # Using the PROJECT_BRIEF tag one can provide an optional one line description | # Using the PROJECT_BRIEF tag one can provide an optional one line description | ||||
| # for a project that appears at the top of each page and should give viewer a | # for a project that appears at the top of each page and should give viewer a | ||||
| @@ -439,39 +439,39 @@ SHELL = /bin/sh | |||||
| \end{quote} | \end{quote} | ||||
| and it will need to be modified to \texttt{SHELL = /sbin/sh} if you are | and it will need to be modified to \texttt{SHELL = /sbin/sh} if you are | ||||
| installing LAPACK on an SGI architecture. | installing LAPACK on an SGI architecture. | ||||
| Second, you will | |||||
| need to modify the \texttt{PLAT} definition, which is appended to all | |||||
| library names, to specify the architecture to which you are installing | |||||
| LAPACK. This features avoids confusion in library names when you are | |||||
| installing LAPACK on more than one architecture. Next, you will need | |||||
| to modify \texttt{FORTRAN}, \texttt{OPTS}, \texttt{DRVOPTS}, \texttt{NOOPT}, \texttt{LOADER}, | |||||
| and \texttt{LOADOPTS} to specify | |||||
| Next, you will need to modify \texttt{FC}, \texttt{FFLAGS}, | |||||
| \texttt{FFLAGS\_DRV}, \texttt{FFLAGS\_NOOPT}, and \texttt{LDFLAGS} to specify | |||||
| the compiler, compiler options, compiler options for the testing and | the compiler, compiler options, compiler options for the testing and | ||||
| timing\footnotemark[\value{footnote}] main programs, loader, loader options. | |||||
| Next you will have to choose which function you will use to time in the \texttt{SECOND} and \texttt{DSECND} routines. | |||||
| timing\footnotemark[\value{footnote}] main programs, and linker options. | |||||
| Next you will have to choose which function you will use to time in the | |||||
| \texttt{SECOND} and \texttt{DSECND} routines. | |||||
| \begin{verbatim} | \begin{verbatim} | ||||
| #The Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME | |||||
| TIMER = EXT_ETIME | |||||
| # For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ | |||||
| # TIMER = EXT_ETIME_ | |||||
| # For gfortran compiler: SECOND and DSECND will use the INTERNAL FUNCTION ETIME | |||||
| # TIMER = INT_ETIME | |||||
| # If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) | |||||
| # SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME | |||||
| # TIMER = INT_CPU_TIME | |||||
| # If neither of this works...you can use the NONE value... | |||||
| # In that case, SECOND and DSECND will always return 0 | |||||
| # TIMER = NONE | |||||
| # Default: SECOND and DSECND will use a call to the | |||||
| # EXTERNAL FUNCTION ETIME | |||||
| #TIMER = EXT_ETIME | |||||
| # For RS6K: SECOND and DSECND will use a call to the | |||||
| # EXTERNAL FUNCTION ETIME_ | |||||
| #TIMER = EXT_ETIME_ | |||||
| # For gfortran compiler: SECOND and DSECND will use a call to the | |||||
| # INTERNAL FUNCTION ETIME | |||||
| TIMER = INT_ETIME | |||||
| # If your Fortran compiler does not provide etime (like Nag Fortran | |||||
| # Compiler, etc...) SECOND and DSECND will use a call to the | |||||
| # INTERNAL FUNCTION CPU_TIME | |||||
| #TIMER = INT_CPU_TIME | |||||
| # If none of these work, you can use the NONE value. | |||||
| # In that case, SECOND and DSECND will always return 0. | |||||
| #TIMER = NONE | |||||
| \end{verbatim} | \end{verbatim} | ||||
| Refer to the section~\ref{second} to get more information. | Refer to the section~\ref{second} to get more information. | ||||
| Next, you will need to modify \texttt{ARCH}, \texttt{ARCHFLAGS}, and \texttt{RANLIB} to specify archiver, | |||||
| Next, you will need to modify \texttt{AR}, \texttt{ARFLAGS}, and \texttt{RANLIB} to specify archiver, | |||||
| archiver options, and ranlib for your machine. If your architecture | archiver options, and ranlib for your machine. If your architecture | ||||
| does not require \texttt{ranlib} to be run after each archive command (as | does not require \texttt{ranlib} to be run after each archive command (as | ||||
| is the case with CRAY computers running UNICOS, Hewlett Packard | is the case with CRAY computers running UNICOS, Hewlett Packard | ||||
| computers running HP-UX, or SUN SPARCstations running Solaris), set | computers running HP-UX, or SUN SPARCstations running Solaris), set | ||||
| \texttt{ranlib=echo}. And finally, you must | |||||
| \texttt{RANLIB = echo}. And finally, you must | |||||
| modify the \texttt{BLASLIB} definition to specify the BLAS library to which | modify the \texttt{BLASLIB} definition to specify the BLAS library to which | ||||
| you will be linking. If an optimized version of the BLAS is available | you will be linking. If an optimized version of the BLAS is available | ||||
| on your machine, you are highly recommended to link to that library. | on your machine, you are highly recommended to link to that library. | ||||
| @@ -721,24 +721,24 @@ The version that will be used depends on the value of the TIMER variable in the | |||||
| \begin{itemize} | \begin{itemize} | ||||
| \item If ETIME is available as an external function, set the value of the TIMER variable in your | \item If ETIME is available as an external function, set the value of the TIMER variable in your | ||||
| make.inc to \texttt{EXT\_ETIME}:\texttt{second\_EXT\_ETIME.f} and \texttt{dsecnd\_EXT\_ETIME.f} will be used. | |||||
| make.inc to \texttt{EXT\_ETIME}: \texttt{second\_EXT\_ETIME.f} and \texttt{dsecnd\_EXT\_ETIME.f} will be used. | |||||
| Usually on HPPA architectures, | Usually on HPPA architectures, | ||||
| the compiler and loader flag \texttt{+U77} should be included to access | |||||
| the compiler and linker flag \texttt{+U77} should be included to access | |||||
| the function \texttt{ETIME}. | the function \texttt{ETIME}. | ||||
| \item If ETIME\_ is available as an external function, set the value of the TIMER variable in your make.inc | \item If ETIME\_ is available as an external function, set the value of the TIMER variable in your make.inc | ||||
| to \texttt{EXT\_ETIME\_}:\texttt{second\_EXT\_ETIME\_.f} and \texttt{dsecnd\_EXT\_ETIME\_.f} will be used. | |||||
| to \texttt{EXT\_ETIME\_}: \texttt{second\_EXT\_ETIME\_.f} and \texttt{dsecnd\_EXT\_ETIME\_.f} will be used. | |||||
| It is the case on some IBM architectures such as IBM RS/6000s. | It is the case on some IBM architectures such as IBM RS/6000s. | ||||
| \item If ETIME is available as an internal function, set the value of the TIMER variable in your make.inc | \item If ETIME is available as an internal function, set the value of the TIMER variable in your make.inc | ||||
| to \texttt{INT\_ETIME}:\texttt{second\_INT\_ETIME.f} and \texttt{dsecnd\_INT\_ETIME.f} will be used. | |||||
| to \texttt{INT\_ETIME}: \texttt{second\_INT\_ETIME.f} and \texttt{dsecnd\_INT\_ETIME.f} will be used. | |||||
| This is the case with gfortan. | This is the case with gfortan. | ||||
| \item If CPU\_TIME is available as an internal function, set the value of the TIMER variable in your make.inc | \item If CPU\_TIME is available as an internal function, set the value of the TIMER variable in your make.inc | ||||
| to \texttt{INT\_CPU\_TIME}:\texttt{second\_INT\_CPU\_TIME.f} and \texttt{dsecnd\_INT\_CPU\_TIME.f} will be used. | |||||
| to \texttt{INT\_CPU\_TIME}: \texttt{second\_INT\_CPU\_TIME.f} and \texttt{dsecnd\_INT\_CPU\_TIME.f} will be used. | |||||
| \item If none of these function is available, set the value of the TIMER variable in your make.inc | \item If none of these function is available, set the value of the TIMER variable in your make.inc | ||||
| to \texttt{NONE:}\texttt{second\_NONE.f} and \texttt{dsecnd\_NONE.f} will be used. | |||||
| to \texttt{NONE}: \texttt{second\_NONE.f} and \texttt{dsecnd\_NONE.f} will be used. | |||||
| These routines will always return zero. | These routines will always return zero. | ||||
| \end{itemize} | \end{itemize} | ||||
| @@ -829,8 +829,8 @@ data type to the library if necessary. | |||||
| \end{itemize} | \end{itemize} | ||||
| \noindent | \noindent | ||||
| The BLAS library is created in \texttt{LAPACK/blas\_PLAT.a}, where | |||||
| \texttt{PLAT} is the user-defined architecture suffix specified in the file | |||||
| The BLAS library is created in \texttt{LAPACK/librefblas.a}, | |||||
| or in the user-defined location specified by \texttt{BLASLIB} in the file | |||||
| \texttt{LAPACK/make.inc}. | \texttt{LAPACK/make.inc}. | ||||
| \subsection{Run the BLAS Test Programs}\label{testblas} | \subsection{Run the BLAS Test Programs}\label{testblas} | ||||
| @@ -882,8 +882,8 @@ data type to the library if necessary. | |||||
| \end{itemize} | \end{itemize} | ||||
| \noindent | \noindent | ||||
| The LAPACK library is created in \texttt{LAPACK/lapack\_PLAT.a}, where | |||||
| \texttt{PLAT} is the user-defined architecture suffix specified in the file | |||||
| The LAPACK library is created in \texttt{LAPACK/liblapack.a}, | |||||
| or in the user-defined location specified by \texttt{LAPACKLIB} in the file | |||||
| \texttt{LAPACK/make.inc}. | \texttt{LAPACK/make.inc}. | ||||
| \subsection{Create the Test Matrix Generator Library} | \subsection{Create the Test Matrix Generator Library} | ||||
| @@ -902,9 +902,9 @@ data type to the library if necessary. | |||||
| \end{itemize} | \end{itemize} | ||||
| \noindent | \noindent | ||||
| The test matrix generator library is created in \texttt{LAPACK/tmglib\_PLAT.a}, | |||||
| where \texttt{PLAT} is the user-defined architecture suffix specified in the | |||||
| file \texttt{LAPACK/make.inc}. | |||||
| The test matrix generator library is created in \texttt{LAPACK/libtmglib.a}, | |||||
| or in the user-defined location specified by \texttt{TMGLIB} in the file | |||||
| \texttt{LAPACK/make.inc}. | |||||
| \subsection{Run the LAPACK Test Programs} | \subsection{Run the LAPACK Test Programs} | ||||
| @@ -1114,9 +1114,7 @@ To make a library of the instrumented LAPACK routines, first | |||||
| go to \texttt{LAPACK/TIMING/LIN/LINSRC} and type \texttt{make} followed | go to \texttt{LAPACK/TIMING/LIN/LINSRC} and type \texttt{make} followed | ||||
| by the data types desired, as in the examples of Section~\ref{toplevelmakefile}. | by the data types desired, as in the examples of Section~\ref{toplevelmakefile}. | ||||
| The library of instrumented code is created in | The library of instrumented code is created in | ||||
| \texttt{LAPACK/TIMING/LIN/linsrc\_PLAT.a}, | |||||
| where \texttt{PLAT} is the user-defined architecture suffix specified in the | |||||
| file \texttt{LAPACK/make.inc}. | |||||
| \texttt{LAPACK/TIMING/LIN/linsrc.a}. | |||||
| \end{sloppypar} | \end{sloppypar} | ||||
| \item[b)] | \item[b)] | ||||
| @@ -1251,9 +1249,7 @@ To make a library of the instrumented LAPACK routines, first | |||||
| go to \texttt{LAPACK/TIMING/EIG/EIGSRC} and type \texttt{make} followed | go to \texttt{LAPACK/TIMING/EIG/EIGSRC} and type \texttt{make} followed | ||||
| by the data types desired, as in the examples of Section~\ref{toplevelmakefile}. | by the data types desired, as in the examples of Section~\ref{toplevelmakefile}. | ||||
| The library of instrumented code is created in | The library of instrumented code is created in | ||||
| \texttt{LAPACK/TIMING/EIG/eigsrc\_PLAT.a}, | |||||
| where \texttt{PLAT} is the user-defined architecture suffix specified in the | |||||
| file \texttt{LAPACK/make.inc}. | |||||
| \texttt{LAPACK/TIMING/EIG/eigsrc.a}. | |||||
| \end{sloppypar} | \end{sloppypar} | ||||
| \item[b)] | \item[b)] | ||||
| @@ -1389,7 +1385,7 @@ installing LAPACK on an SGI architecture. | |||||
| \section{ETIME} | \section{ETIME} | ||||
| On HPPA architectures, | On HPPA architectures, | ||||
| the compiler and loader flag \texttt{+U77} should be included to access | |||||
| the compiler and linker flag \texttt{+U77} should be included to access | |||||
| the function \texttt{ETIME}. | the function \texttt{ETIME}. | ||||
| \section{ILAENV and IEEE-754 compliance} | \section{ILAENV and IEEE-754 compliance} | ||||
| @@ -1494,13 +1490,13 @@ has two options: increase your stack size, or force all local variables | |||||
| to be allocated statically. | to be allocated statically. | ||||
| On HPPA architectures, the | On HPPA architectures, the | ||||
| compiler and loader flag \texttt{-K} should be used when compiling these testing | |||||
| compiler and linker flag \texttt{-K} should be used when compiling these testing | |||||
| and timing main programs to avoid such a stack overflow. I.e., set | and timing main programs to avoid such a stack overflow. I.e., set | ||||
| \texttt{DRVOPTS = -K} in the \texttt{LAPACK/make.inc} file. | |||||
| \texttt{FFLAGS\_DRV = -K} in the \texttt{LAPACK/make.inc} file. | |||||
| For similar reasons, | For similar reasons, | ||||
| on SGI architectures, the compiler and loader flag \texttt{-static} should be | |||||
| used. I.e., set \texttt{DRVOPTS = -static} in the \texttt{LAPACK/make.inc} file. | |||||
| on SGI architectures, the compiler and linker flag \texttt{-static} should be | |||||
| used. I.e., set \texttt{FFLAGS\_DRV = -static} in the \texttt{LAPACK/make.inc} file. | |||||
| \section{IEEE arithmetic} | \section{IEEE arithmetic} | ||||
| @@ -1,30 +1,33 @@ | |||||
| include ../make.inc | |||||
| TOPSRCDIR = .. | |||||
| include $(TOPSRCDIR)/make.inc | |||||
| .PHONY: all testlsame testslamch testdlamch testsecond testdsecnd testieee testversion | |||||
| all: testlsame testslamch testdlamch testsecond testdsecnd testieee testversion | all: testlsame testslamch testdlamch testsecond testdsecnd testieee testversion | ||||
| testlsame: lsame.o lsametst.o | testlsame: lsame.o lsametst.o | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| testslamch: slamch.o lsame.o slamchtst.o | testslamch: slamch.o lsame.o slamchtst.o | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| testdlamch: dlamch.o lsame.o dlamchtst.o | testdlamch: dlamch.o lsame.o dlamchtst.o | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| testsecond: second_$(TIMER).o secondtst.o | testsecond: second_$(TIMER).o secondtst.o | ||||
| @echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)" | @echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)" | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| testdsecnd: dsecnd_$(TIMER).o dsecndtst.o | testdsecnd: dsecnd_$(TIMER).o dsecndtst.o | ||||
| @echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)" | @echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)" | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| testieee: tstiee.o | testieee: tstiee.o | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| testversion: ilaver.o LAPACK_version.o | testversion: ilaver.o LAPACK_version.o | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| .PHONY: run | |||||
| run: all | run: all | ||||
| ./testlsame | ./testlsame | ||||
| ./testslamch | ./testslamch | ||||
| @@ -34,6 +37,7 @@ run: all | |||||
| ./testieee | ./testieee | ||||
| ./testversion | ./testversion | ||||
| .PHONY: clean cleanobj cleanexe cleantest | |||||
| clean: cleanobj cleanexe cleantest | clean: cleanobj cleanexe cleantest | ||||
| cleanobj: | cleanobj: | ||||
| rm -f *.o | rm -f *.o | ||||
| @@ -42,9 +46,5 @@ cleanexe: | |||||
| cleantest: | cleantest: | ||||
| rm -f core | rm -f core | ||||
| .SUFFIXES: .o .f | |||||
| .f.o: | |||||
| $(FORTRAN) $(OPTS) -c -o $@ $< | |||||
| slamch.o: slamch.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< | |||||
| dlamch.o: dlamch.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< | |||||
| slamch.o: slamch.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< | |||||
| dlamch.o: dlamch.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< | |||||
| @@ -10,6 +10,10 @@ | |||||
| * | * | ||||
| * DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) | * DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) | ||||
| * | * | ||||
| * .. Scalar Arguments .. | |||||
| * CHARACTER CMACH | |||||
| * .. | |||||
| * | |||||
| * | * | ||||
| *> \par Purpose: | *> \par Purpose: | ||||
| * ============= | * ============= | ||||
| @@ -24,6 +28,7 @@ | |||||
| * | * | ||||
| *> \param[in] CMACH | *> \param[in] CMACH | ||||
| *> \verbatim | *> \verbatim | ||||
| *> CMACH is CHARACTER*1 | |||||
| *> Specifies the value to be returned by DLAMCH: | *> Specifies the value to be returned by DLAMCH: | ||||
| *> = 'E' or 'e', DLAMCH := eps | *> = 'E' or 'e', DLAMCH := eps | ||||
| *> = 'S' or 's , DLAMCH := sfmin | *> = 'S' or 's , DLAMCH := sfmin | ||||
| @@ -10,6 +10,10 @@ | |||||
| * | * | ||||
| * DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) | * DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) | ||||
| * | * | ||||
| * .. Scalar Arguments .. | |||||
| * CHARACTER CMACH | |||||
| * .. | |||||
| * | |||||
| * | * | ||||
| *> \par Purpose: | *> \par Purpose: | ||||
| * ============= | * ============= | ||||
| @@ -25,12 +25,15 @@ | |||||
| * ========== | * ========== | ||||
| * | * | ||||
| *> \param[out] VERS_MAJOR | *> \param[out] VERS_MAJOR | ||||
| *> VERS_MAJOR is INTEGER | |||||
| *> return the lapack major version | *> return the lapack major version | ||||
| *> | *> | ||||
| *> \param[out] VERS_MINOR | *> \param[out] VERS_MINOR | ||||
| *> VERS_MINOR is INTEGER | |||||
| *> return the lapack minor version from the major version | *> return the lapack minor version from the major version | ||||
| *> | *> | ||||
| *> \param[out] VERS_PATCH | *> \param[out] VERS_PATCH | ||||
| *> VERS_PATCH is INTEGER | |||||
| *> return the lapack patch version from the minor version | *> return the lapack patch version from the minor version | ||||
| * | * | ||||
| * Authors: | * Authors: | ||||
| @@ -41,24 +44,23 @@ | |||||
| *> \author Univ. of Colorado Denver | *> \author Univ. of Colorado Denver | ||||
| *> \author NAG Ltd. | *> \author NAG Ltd. | ||||
| * | * | ||||
| *> \date June 2017 | |||||
| *> \date November 2019 | |||||
| * | * | ||||
| *> \ingroup auxOTHERauxiliary | *> \ingroup auxOTHERauxiliary | ||||
| * | * | ||||
| * ===================================================================== | * ===================================================================== | ||||
| SUBROUTINE ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) | SUBROUTINE ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) | ||||
| * | * | ||||
| * -- LAPACK computational routine (version 3.7.1) -- | |||||
| * -- LAPACK computational routine -- | |||||
| * -- LAPACK is a software package provided by Univ. of Tennessee, -- | * -- LAPACK is a software package provided by Univ. of Tennessee, -- | ||||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | ||||
| * June 2017 | |||||
| * | * | ||||
| * ===================================================================== | * ===================================================================== | ||||
| * | * | ||||
| INTEGER VERS_MAJOR, VERS_MINOR, VERS_PATCH | INTEGER VERS_MAJOR, VERS_MINOR, VERS_PATCH | ||||
| * ===================================================================== | * ===================================================================== | ||||
| VERS_MAJOR = 3 | VERS_MAJOR = 3 | ||||
| VERS_MINOR = 8 | |||||
| VERS_MINOR = 9 | |||||
| VERS_PATCH = 0 | VERS_PATCH = 0 | ||||
| * ===================================================================== | * ===================================================================== | ||||
| * | * | ||||
| @@ -8,30 +8,28 @@ SHELL = /bin/sh | |||||
| # CC is the C compiler, normally invoked with options CFLAGS. | # CC is the C compiler, normally invoked with options CFLAGS. | ||||
| # | # | ||||
| CC = cc | |||||
| CC = cc | |||||
| CFLAGS = -O4 | CFLAGS = -O4 | ||||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||||
| # and desired compiler options for your machine. NOOPT refers to | # and desired compiler options for your machine. NOOPT refers to | ||||
| # the compiler options desired when NO OPTIMIZATION is selected. | # the compiler options desired when NO OPTIMIZATION is selected. | ||||
| # | # | ||||
| FORTRAN = f77 | |||||
| OPTS = -O4 -fpe1 | |||||
| DRVOPTS = $(OPTS) | |||||
| NOOPT = | |||||
| FC = f77 | |||||
| FFLAGS = -O4 -fpe1 | |||||
| FFLAGS_DRV = $(FFLAGS) | |||||
| FFLAGS_NOOPT = | |||||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||||
| # load options for your machine. | |||||
| # Define LDFLAGS to the desired linker options for your machine. | |||||
| # | # | ||||
| LOADER = f77 | |||||
| LOADOPTS = | |||||
| LDFLAGS = | |||||
| # The archiver and the flag(s) to use when building an archive | # The archiver and the flag(s) to use when building an archive | ||||
| # (library). If your system has no ranlib, set RANLIB = echo. | # (library). If your system has no ranlib, set RANLIB = echo. | ||||
| # | # | ||||
| ARCH = ar | |||||
| ARCHFLAGS = cr | |||||
| RANLIB = ranlib | |||||
| AR = ar | |||||
| ARFLAGS = cr | |||||
| RANLIB = ranlib | |||||
| # Timer for the SECOND and DSECND routines | # Timer for the SECOND and DSECND routines | ||||
| # | # | ||||
| @@ -74,9 +72,9 @@ TIMER = EXT_ETIME | |||||
| # machine-specific, optimized BLAS library should be used whenever | # machine-specific, optimized BLAS library should be used whenever | ||||
| # possible.) | # possible.) | ||||
| # | # | ||||
| #BLASLIB = ../../librefblas.a | |||||
| #BLASLIB = $(TOPSRCDIR)/librefblas.a | |||||
| BLASLIB = -ldxml | BLASLIB = -ldxml | ||||
| CBLASLIB = ../../libcblas.a | |||||
| LAPACKLIB = liblapack.a | |||||
| TMGLIB = libtmglib.a | |||||
| LAPACKELIB = liblapacke.a | |||||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||||
| @@ -8,30 +8,28 @@ SHELL = /bin/sh | |||||
| # CC is the C compiler, normally invoked with options CFLAGS. | # CC is the C compiler, normally invoked with options CFLAGS. | ||||
| # | # | ||||
| CC = cc | |||||
| CC = cc | |||||
| CFLAGS = | CFLAGS = | ||||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||||
| # and desired compiler options for your machine. NOOPT refers to | # and desired compiler options for your machine. NOOPT refers to | ||||
| # the compiler options desired when NO OPTIMIZATION is selected. | # the compiler options desired when NO OPTIMIZATION is selected. | ||||
| # | # | ||||
| FORTRAN = f77 | |||||
| OPTS = +O4 +U77 | |||||
| DRVOPTS = $(OPTS) -K | |||||
| NOOPT = +U77 | |||||
| FC = f77 | |||||
| FFLAGS = +O4 +U77 | |||||
| FFLAGS_DRV = $(FFLAGS) -K | |||||
| FFLAGS_NOOPT = +U77 | |||||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||||
| # load options for your machine. | |||||
| # Define LDFLAGS to the desired linker options for your machine. | |||||
| # | # | ||||
| LOADER = f77 | |||||
| LOADOPTS = -Aa +U77 | |||||
| LDFLAGS = | |||||
| # The archiver and the flag(s) to use when building an archive | # The archiver and the flag(s) to use when building an archive | ||||
| # (library). If your system has no ranlib, set RANLIB = echo. | # (library). If your system has no ranlib, set RANLIB = echo. | ||||
| # | # | ||||
| ARCH = ar | |||||
| ARCHFLAGS = cr | |||||
| RANLIB = echo | |||||
| AR = ar | |||||
| ARFLAGS = cr | |||||
| RANLIB = echo | |||||
| # Timer for the SECOND and DSECND routines | # Timer for the SECOND and DSECND routines | ||||
| # | # | ||||
| @@ -74,9 +72,9 @@ TIMER = EXT_ETIME | |||||
| # machine-specific, optimized BLAS library should be used whenever | # machine-specific, optimized BLAS library should be used whenever | ||||
| # possible.) | # possible.) | ||||
| # | # | ||||
| #BLASLIB = ../../librefblas.a | |||||
| #BLASLIB = $(TOPSRCDIR)/librefblas.a | |||||
| BLASLIB = -lblas | BLASLIB = -lblas | ||||
| CBLASLIB = ../../libcblas.a | |||||
| LAPACKLIB = liblapack.a | |||||
| TMGLIB = libtmglib.a | |||||
| LAPACKELIB = liblapacke.a | |||||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||||
| @@ -8,33 +8,30 @@ SHELL = /sbin/sh | |||||
| # CC is the C compiler, normally invoked with options CFLAGS. | # CC is the C compiler, normally invoked with options CFLAGS. | ||||
| # | # | ||||
| CC = cc | |||||
| CC = cc | |||||
| CFLAGS = -O3 | CFLAGS = -O3 | ||||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||||
| # and desired compiler options for your machine. NOOPT refers to | # and desired compiler options for your machine. NOOPT refers to | ||||
| # the compiler options desired when NO OPTIMIZATION is selected. | # the compiler options desired when NO OPTIMIZATION is selected. | ||||
| # | # | ||||
| FORTRAN = f77 | |||||
| OPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON | |||||
| #OPTS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON | |||||
| DRVOPTS = $(OPTS) -static | |||||
| NOOPT = -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON | |||||
| #NOOPT = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON | |||||
| FC = f77 | |||||
| FFLAGS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON | |||||
| #FFLAGS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON | |||||
| FFLAGS_DRV = $(FFLAGS) -static | |||||
| FFLAGS_NOOPT = -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON | |||||
| #FFLAGS_NOOPT = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON | |||||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||||
| # load options for your machine. | |||||
| # Define LDFLAGS to the desired linker options for your machine. | |||||
| # | # | ||||
| LOADER = f77 | |||||
| LOADOPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON | |||||
| #LOADOPTS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON | |||||
| LDFLAGS = | |||||
| # The archiver and the flag(s) to use when building an archive | # The archiver and the flag(s) to use when building an archive | ||||
| # (library). If your system has no ranlib, set RANLIB = echo. | # (library). If your system has no ranlib, set RANLIB = echo. | ||||
| # | # | ||||
| ARCH = ar | |||||
| ARCHFLAGS = cr | |||||
| RANLIB = echo | |||||
| AR = ar | |||||
| ARFLAGS = cr | |||||
| RANLIB = echo | |||||
| # Timer for the SECOND and DSECND routines | # Timer for the SECOND and DSECND routines | ||||
| # | # | ||||
| @@ -78,8 +75,8 @@ TIMER = EXT_ETIME | |||||
| # possible.) | # possible.) | ||||
| # | # | ||||
| #BLASLIB = -lblas | #BLASLIB = -lblas | ||||
| BLASLIB = ../../librefblas.a | |||||
| CBLASLIB = ../../libcblas.a | |||||
| LAPACKLIB = liblapack.a | |||||
| TMGLIB = libtmglib.a | |||||
| LAPACKELIB = liblapacke.a | |||||
| BLASLIB = $(TOPSRCDIR)/librefblas.a | |||||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||||
| @@ -8,33 +8,30 @@ SHELL = /sbin/sh | |||||
| # CC is the C compiler, normally invoked with options CFLAGS. | # CC is the C compiler, normally invoked with options CFLAGS. | ||||
| # | # | ||||
| CC = cc | |||||
| CC = cc | |||||
| CFLAGS = -O3 | CFLAGS = -O3 | ||||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||||
| # and desired compiler options for your machine. NOOPT refers to | # and desired compiler options for your machine. NOOPT refers to | ||||
| # the compiler options desired when NO OPTIMIZATION is selected. | # the compiler options desired when NO OPTIMIZATION is selected. | ||||
| # | # | ||||
| FORTRAN = f77 | |||||
| OPTS = -O3 -64 -mips4 -r10000 | |||||
| #OPTS = -O3 -64 -mips4 -r10000 -mp | |||||
| DRVOPTS = $(OPTS) -static | |||||
| NOOPT = -64 -mips4 -r10000 | |||||
| #NOOPT = -64 -mips4 -r10000 -mp | |||||
| FC = f77 | |||||
| FFLAGS = -O3 -64 -mips4 -r10000 | |||||
| #FFLAGS = -O3 -64 -mips4 -r10000 -mp | |||||
| FFLAGS_DRV = $(FFLAGS) -static | |||||
| FFLAGS_NOOPT = -64 -mips4 -r10000 | |||||
| #FFLAGS_NOOPT = -64 -mips4 -r10000 -mp | |||||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||||
| # load options for your machine. | |||||
| # Define LDFLAGS to the desired linker options for your machine. | |||||
| # | # | ||||
| LOADER = f77 | |||||
| LOADOPTS = -O3 -64 -mips4 -r10000 | |||||
| #LOADOPTS = -O3 -64 -mips4 -r10000 -mp | |||||
| LDFLAGS = | |||||
| # The archiver and the flag(s) to use when building an archive | # The archiver and the flag(s) to use when building an archive | ||||
| # (library). If your system has no ranlib, set RANLIB = echo. | # (library). If your system has no ranlib, set RANLIB = echo. | ||||
| # | # | ||||
| ARCH = ar | |||||
| ARCHFLAGS = cr | |||||
| RANLIB = echo | |||||
| AR = ar | |||||
| ARFLAGS = cr | |||||
| RANLIB = echo | |||||
| # Timer for the SECOND and DSECND routines | # Timer for the SECOND and DSECND routines | ||||
| # | # | ||||
| @@ -79,8 +76,8 @@ TIMER = EXT_ETIME | |||||
| # | # | ||||
| BLASLIB = -lblas | BLASLIB = -lblas | ||||
| #BLASLIB = -lblas_mp | #BLASLIB = -lblas_mp | ||||
| #BLASLIB = ../../librefblas.a | |||||
| CBLASLIB = ../../libcblas.a | |||||
| LAPACKLIB = liblapack.a | |||||
| TMGLIB = libtmglib.a | |||||
| LAPACKELIB = liblapacke.a | |||||
| #BLASLIB = $(TOPSRCDIR)/librefblas.a | |||||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||||
| @@ -8,30 +8,28 @@ SHELL = /sbin/sh | |||||
| # CC is the C compiler, normally invoked with options CFLAGS. | # CC is the C compiler, normally invoked with options CFLAGS. | ||||
| # | # | ||||
| CC = cc | |||||
| CC = cc | |||||
| CFLAGS = -O4 | CFLAGS = -O4 | ||||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||||
| # and desired compiler options for your machine. NOOPT refers to | # and desired compiler options for your machine. NOOPT refers to | ||||
| # the compiler options desired when NO OPTIMIZATION is selected. | # the compiler options desired when NO OPTIMIZATION is selected. | ||||
| # | # | ||||
| FORTRAN = f77 | |||||
| OPTS = -O4 | |||||
| DRVOPTS = $(OPTS) -static | |||||
| NOOPT = | |||||
| FC = f77 | |||||
| FFLAGS = -O4 | |||||
| FFLAGS_DRV = $(FFLAGS) -static | |||||
| FFLAGS_NOOPT = | |||||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||||
| # load options for your machine. | |||||
| # Define LDFLAGS to the desired linker options for your machine. | |||||
| # | # | ||||
| LOADER = f77 | |||||
| LOADOPTS = | |||||
| LDFLAGS = | |||||
| # The archiver and the flag(s) to use when building an archive | # The archiver and the flag(s) to use when building an archive | ||||
| # (library). If your system has no ranlib, set RANLIB = echo. | # (library). If your system has no ranlib, set RANLIB = echo. | ||||
| # | # | ||||
| ARCH = ar | |||||
| ARCHFLAGS = cr | |||||
| RANLIB = echo | |||||
| AR = ar | |||||
| ARFLAGS = cr | |||||
| RANLIB = echo | |||||
| # Timer for the SECOND and DSECND routines | # Timer for the SECOND and DSECND routines | ||||
| # | # | ||||
| @@ -75,8 +73,8 @@ TIMER = EXT_ETIME | |||||
| # possible.) | # possible.) | ||||
| # | # | ||||
| #BLASLIB = -lblas | #BLASLIB = -lblas | ||||
| BLASLIB = ../../librefblas.a | |||||
| CBLASLIB = ../../libcblas.a | |||||
| LAPACKLIB = liblapack.a | |||||
| TMGLIB = libtmglib.a | |||||
| LAPACKELIB = liblapacke.a | |||||
| BLASLIB = $(TOPSRCDIR)/librefblas.a | |||||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||||
| @@ -8,30 +8,28 @@ SHELL = /bin/sh | |||||
| # CC is the C compiler, normally invoked with options CFLAGS. | # CC is the C compiler, normally invoked with options CFLAGS. | ||||
| # | # | ||||
| CC = cc | |||||
| CC = cc | |||||
| CFLAGS = -O3 | CFLAGS = -O3 | ||||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||||
| # and desired compiler options for your machine. NOOPT refers to | # and desired compiler options for your machine. NOOPT refers to | ||||
| # the compiler options desired when NO OPTIMIZATION is selected. | # the compiler options desired when NO OPTIMIZATION is selected. | ||||
| # | # | ||||
| FORTRAN = f77 | |||||
| OPTS = -dalign -O4 -fast | |||||
| DRVOPTS = $(OPTS) | |||||
| NOOPT = | |||||
| FC = f77 | |||||
| FFLAGS = -dalign -O4 -fast | |||||
| FFLAGS_DRV = $(FFLAGS) | |||||
| FFLAGS_NOOPT = | |||||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||||
| # load options for your machine. | |||||
| # Define LDFLAGS to the desired linker options for your machine. | |||||
| # | # | ||||
| LOADER = f77 | |||||
| LOADOPTS = -dalign -O4 -fast | |||||
| LDFLAGS = | |||||
| # The archiver and the flag(s) to use when building an archive | # The archiver and the flag(s) to use when building an archive | ||||
| # (library). If your system has no ranlib, set RANLIB = echo. | # (library). If your system has no ranlib, set RANLIB = echo. | ||||
| # | # | ||||
| ARCH = ar | |||||
| ARCHFLAGS = cr | |||||
| RANLIB = ranlib | |||||
| AR = ar | |||||
| ARFLAGS = cr | |||||
| RANLIB = ranlib | |||||
| # Timer for the SECOND and DSECND routines | # Timer for the SECOND and DSECND routines | ||||
| # | # | ||||
| @@ -75,8 +73,8 @@ TIMER = EXT_ETIME | |||||
| # possible.) | # possible.) | ||||
| # | # | ||||
| #BLASLIB = -lblas | #BLASLIB = -lblas | ||||
| BLASLIB = ../../librefblas.a | |||||
| CBLASLIB = ../../libcblas.a | |||||
| LAPACKLIB = liblapack.a | |||||
| TMGLIB = libtmglib.a | |||||
| LAPACKELIB = liblapacke.a | |||||
| BLASLIB = $(TOPSRCDIR)/librefblas.a | |||||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||||
| @@ -8,34 +8,31 @@ SHELL = /bin/sh | |||||
| # CC is the C compiler, normally invoked with options CFLAGS. | # CC is the C compiler, normally invoked with options CFLAGS. | ||||
| # | # | ||||
| CC = cc | |||||
| CC = cc | |||||
| CFLAGS = -O3 | CFLAGS = -O3 | ||||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||||
| # and desired compiler options for your machine. NOOPT refers to | # and desired compiler options for your machine. NOOPT refers to | ||||
| # the compiler options desired when NO OPTIMIZATION is selected. | # the compiler options desired when NO OPTIMIZATION is selected. | ||||
| # | # | ||||
| FORTRAN = f77 | |||||
| #OPTS = -O4 -u -f -mt | |||||
| #OPTS = -u -f -dalign -native -xO5 -xarch=v8plusa | |||||
| OPTS = -u -f -dalign -native -xO2 -xarch=v8plusa | |||||
| DRVOPTS = $(OPTS) | |||||
| NOOPT = -u -f | |||||
| #NOOPT = -u -f -mt | |||||
| FC = f77 | |||||
| #FFLAGS = -O4 -u -f -mt | |||||
| #FFLAGS = -u -f -dalign -native -xO5 -xarch=v8plusa | |||||
| FFLAGS = -u -f -dalign -native -xO2 -xarch=v8plusa | |||||
| FFLAGS_DRV = $(FFLAGS) | |||||
| FFLAGS_NOOPT = -u -f | |||||
| #FFLAGS_NOOPT = -u -f -mt | |||||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||||
| # load options for your machine. | |||||
| # Define LDFLAGS to the desired linker options for your machine. | |||||
| # | # | ||||
| LOADER = f77 | |||||
| #LOADOPTS = -mt | |||||
| LOADOPTS = -f -dalign -native -xO2 -xarch=v8plusa | |||||
| LDFLAGS = | |||||
| # The archiver and the flag(s) to use when building an archive | # The archiver and the flag(s) to use when building an archive | ||||
| # (library). If your system has no ranlib, set RANLIB = echo. | # (library). If your system has no ranlib, set RANLIB = echo. | ||||
| # | # | ||||
| ARCH = ar | |||||
| ARCHFLAGS = cr | |||||
| RANLIB = echo | |||||
| AR = ar | |||||
| ARFLAGS = cr | |||||
| RANLIB = echo | |||||
| # Timer for the SECOND and DSECND routines | # Timer for the SECOND and DSECND routines | ||||
| # | # | ||||
| @@ -78,10 +75,10 @@ TIMER = EXT_ETIME | |||||
| # machine-specific, optimized BLAS library should be used whenever | # machine-specific, optimized BLAS library should be used whenever | ||||
| # possible.) | # possible.) | ||||
| # | # | ||||
| #BLASLIB = ../../librefblas.a | |||||
| #BLASLIB = $(TOPSRCDIR)/librefblas.a | |||||
| #BLASLIB = -xlic_lib=sunperf_mt | #BLASLIB = -xlic_lib=sunperf_mt | ||||
| BLASLIB = -xlic_lib=sunperf | BLASLIB = -xlic_lib=sunperf | ||||
| CBLASLIB = ../../libcblas.a | |||||
| LAPACKLIB = liblapack.a | |||||
| TMGLIB = libtmglib.a | |||||
| LAPACKELIB = liblapacke.a | |||||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||||
| @@ -8,31 +8,29 @@ SHELL = /bin/sh | |||||
| # CC is the C compiler, normally invoked with options CFLAGS. | # CC is the C compiler, normally invoked with options CFLAGS. | ||||
| # | # | ||||
| CC = xlc | |||||
| CC = xlc | |||||
| CFLAGS = -O3 -qnosave | CFLAGS = -O3 -qnosave | ||||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||||
| # and desired compiler options for your machine. NOOPT refers to | # and desired compiler options for your machine. NOOPT refers to | ||||
| # the compiler options desired when NO OPTIMIZATION is selected. | # the compiler options desired when NO OPTIMIZATION is selected. | ||||
| # | # | ||||
| FORTRAN = xlf | |||||
| OPTS = -O3 -qfixed -qnosave | |||||
| FC = xlf | |||||
| FFLAGS = -O3 -qfixed -qnosave | |||||
| # For -O2, add -qstrict=none | # For -O2, add -qstrict=none | ||||
| DRVOPTS = $(OPTS) | |||||
| NOOPT = -O0 -qfixed -qnosave | |||||
| FFLAGS_DRV = $(FFLAGS) | |||||
| FFLAGS_NOOPT = -O0 -qfixed -qnosave | |||||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||||
| # load options for your machine. | |||||
| # Define LDFLAGS to the desired linker options for your machine. | |||||
| # | # | ||||
| LOADER = xlf | |||||
| LOADOPTS = -qnosave | |||||
| LDFLAGS = | |||||
| # The archiver and the flag(s) to use when building an archive | # The archiver and the flag(s) to use when building an archive | ||||
| # (library). If your system has no ranlib, set RANLIB = echo. | # (library). If your system has no ranlib, set RANLIB = echo. | ||||
| # | # | ||||
| ARCH = ar | |||||
| ARCHFLAGS = cr | |||||
| RANLIB = ranlib | |||||
| AR = ar | |||||
| ARFLAGS = cr | |||||
| RANLIB = ranlib | |||||
| # Timer for the SECOND and DSECND routines | # Timer for the SECOND and DSECND routines | ||||
| # | # | ||||
| @@ -75,9 +73,9 @@ TIMER = EXT_ETIME_ | |||||
| # machine-specific, optimized BLAS library should be used whenever | # machine-specific, optimized BLAS library should be used whenever | ||||
| # possible.) | # possible.) | ||||
| # | # | ||||
| #BLASLIB = ../../librefblas.a | |||||
| #BLASLIB = $(TOPSRCDIR)/librefblas.a | |||||
| BLASLIB = -lessl | BLASLIB = -lessl | ||||
| CBLASLIB = ../../libcblas.a | |||||
| LAPACKLIB = liblapack.a | |||||
| TMGLIB = libtmglib.a | |||||
| LAPACKELIB = liblapacke.a | |||||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||||
| @@ -8,10 +8,10 @@ SHELL = /bin/sh | |||||
| # CC is the C compiler, normally invoked with options CFLAGS. | # CC is the C compiler, normally invoked with options CFLAGS. | ||||
| # | # | ||||
| CC = gcc | |||||
| CC = gcc | |||||
| CFLAGS = -O3 | CFLAGS = -O3 | ||||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||||
| # and desired compiler options for your machine. NOOPT refers to | # and desired compiler options for your machine. NOOPT refers to | ||||
| # the compiler options desired when NO OPTIMIZATION is selected. | # the compiler options desired when NO OPTIMIZATION is selected. | ||||
| # | # | ||||
| @@ -19,23 +19,21 @@ CFLAGS = -O3 | |||||
| # and handle these quantities appropriately. As a consequence, one | # and handle these quantities appropriately. As a consequence, one | ||||
| # should not compile LAPACK with flags such as -ffpe-trap=overflow. | # should not compile LAPACK with flags such as -ffpe-trap=overflow. | ||||
| # | # | ||||
| FORTRAN = gfortran | |||||
| OPTS = -O2 -frecursive | |||||
| DRVOPTS = $(OPTS) | |||||
| NOOPT = -O0 -frecursive | |||||
| FC = gfortran | |||||
| FFLAGS = -O2 -frecursive | |||||
| FFLAGS_DRV = $(FFLAGS) | |||||
| FFLAGS_NOOPT = -O0 -frecursive | |||||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||||
| # load options for your machine. | |||||
| # Define LDFLAGS to the desired linker options for your machine. | |||||
| # | # | ||||
| LOADER = gfortran | |||||
| LOADOPTS = | |||||
| LDFLAGS = | |||||
| # The archiver and the flag(s) to use when building an archive | # The archiver and the flag(s) to use when building an archive | ||||
| # (library). If your system has no ranlib, set RANLIB = echo. | # (library). If your system has no ranlib, set RANLIB = echo. | ||||
| # | # | ||||
| ARCH = ar | |||||
| ARCHFLAGS = cr | |||||
| RANLIB = ranlib | |||||
| AR = ar | |||||
| ARFLAGS = cr | |||||
| RANLIB = ranlib | |||||
| # Timer for the SECOND and DSECND routines | # Timer for the SECOND and DSECND routines | ||||
| # | # | ||||
| @@ -78,8 +76,8 @@ TIMER = INT_ETIME | |||||
| # machine-specific, optimized BLAS library should be used whenever | # machine-specific, optimized BLAS library should be used whenever | ||||
| # possible.) | # possible.) | ||||
| # | # | ||||
| BLASLIB = ../../librefblas.a | |||||
| CBLASLIB = ../../libcblas.a | |||||
| LAPACKLIB = liblapack.a | |||||
| TMGLIB = libtmglib.a | |||||
| LAPACKELIB = liblapacke.a | |||||
| BLASLIB = $(TOPSRCDIR)/librefblas.a | |||||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||||
| @@ -8,10 +8,10 @@ SHELL = /bin/sh | |||||
| # CC is the C compiler, normally invoked with options CFLAGS. | # CC is the C compiler, normally invoked with options CFLAGS. | ||||
| # | # | ||||
| CC = gcc | |||||
| CC = gcc | |||||
| CFLAGS = -g | CFLAGS = -g | ||||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||||
| # and desired compiler options for your machine. NOOPT refers to | # and desired compiler options for your machine. NOOPT refers to | ||||
| # the compiler options desired when NO OPTIMIZATION is selected. | # the compiler options desired when NO OPTIMIZATION is selected. | ||||
| # | # | ||||
| @@ -19,23 +19,21 @@ CFLAGS = -g | |||||
| # and handle these quantities appropriately. As a consequence, one | # and handle these quantities appropriately. As a consequence, one | ||||
| # should not compile LAPACK with flags such as -ffpe-trap=overflow. | # should not compile LAPACK with flags such as -ffpe-trap=overflow. | ||||
| # | # | ||||
| FORTRAN = gfortran -fimplicit-none -g -frecursive | |||||
| OPTS = | |||||
| DRVOPTS = $(OPTS) | |||||
| NOOPT = -g -O0 -frecursive | |||||
| FC = gfortran | |||||
| FFLAGS = -fimplicit-none -g -frecursive | |||||
| FFLAGS_DRV = $(FFLAGS) | |||||
| FFLAGS_NOOPT = $(FFLAGS) -O0 | |||||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||||
| # load options for your machine. | |||||
| # Define LDFLAGS to the desired linker options for your machine. | |||||
| # | # | ||||
| LOADER = gfortran -g | |||||
| LOADOPTS = | |||||
| LDFLAGS = | |||||
| # The archiver and the flag(s) to use when building an archive | # The archiver and the flag(s) to use when building an archive | ||||
| # (library). If your system has no ranlib, set RANLIB = echo. | # (library). If your system has no ranlib, set RANLIB = echo. | ||||
| # | # | ||||
| ARCH = ar | |||||
| ARCHFLAGS = cr | |||||
| RANLIB = ranlib | |||||
| AR = ar | |||||
| ARFLAGS = cr | |||||
| RANLIB = ranlib | |||||
| # Timer for the SECOND and DSECND routines | # Timer for the SECOND and DSECND routines | ||||
| # | # | ||||
| @@ -78,8 +76,8 @@ TIMER = INT_CPU_TIME | |||||
| # machine-specific, optimized BLAS library should be used whenever | # machine-specific, optimized BLAS library should be used whenever | ||||
| # possible.) | # possible.) | ||||
| # | # | ||||
| BLASLIB = ../../librefblas.a | |||||
| CBLASLIB = ../../libcblas.a | |||||
| LAPACKLIB = liblapack.a | |||||
| TMGLIB = libtmglib.a | |||||
| LAPACKELIB = liblapacke.a | |||||
| BLASLIB = $(TOPSRCDIR)/librefblas.a | |||||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||||
| @@ -8,30 +8,28 @@ SHELL = /bin/sh | |||||
| # CC is the C compiler, normally invoked with options CFLAGS. | # CC is the C compiler, normally invoked with options CFLAGS. | ||||
| # | # | ||||
| CC = icc | |||||
| CC = icc | |||||
| CFLAGS = -O3 | CFLAGS = -O3 | ||||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||||
| # and desired compiler options for your machine. NOOPT refers to | # and desired compiler options for your machine. NOOPT refers to | ||||
| # the compiler options desired when NO OPTIMIZATION is selected. | # the compiler options desired when NO OPTIMIZATION is selected. | ||||
| # | # | ||||
| FORTRAN = ifort | |||||
| OPTS = -O3 -fp-model strict -assume protect_parens | |||||
| DRVOPTS = $(OPTS) | |||||
| NOOPT = -O0 -fp-model strict -assume protect_parens | |||||
| FC = ifort | |||||
| FFLAGS = -O3 -fp-model strict -assume protect_parens | |||||
| FFLAGS_DRV = $(FFLAGS) | |||||
| FFLAGS_NOOPT = -O0 -fp-model strict -assume protect_parens | |||||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||||
| # load options for your machine. | |||||
| # Define LDFLAGS to the desired linker options for your machine. | |||||
| # | # | ||||
| LOADER = ifort | |||||
| LOADOPTS = | |||||
| LDFLAGS = | |||||
| # The archiver and the flag(s) to use when building an archive | # The archiver and the flag(s) to use when building an archive | ||||
| # (library). If your system has no ranlib, set RANLIB = echo. | # (library). If your system has no ranlib, set RANLIB = echo. | ||||
| # | # | ||||
| ARCH = ar | |||||
| ARCHFLAGS = cr | |||||
| RANLIB = ranlib | |||||
| AR = ar | |||||
| ARFLAGS = cr | |||||
| RANLIB = ranlib | |||||
| # Timer for the SECOND and DSECND routines | # Timer for the SECOND and DSECND routines | ||||
| # | # | ||||
| @@ -74,8 +72,8 @@ TIMER = EXT_ETIME | |||||
| # machine-specific, optimized BLAS library should be used whenever | # machine-specific, optimized BLAS library should be used whenever | ||||
| # possible.) | # possible.) | ||||
| # | # | ||||
| BLASLIB = ../../librefblas.a | |||||
| CBLASLIB = ../../libcblas.a | |||||
| LAPACKLIB = liblapack.a | |||||
| TMGLIB = libtmglib.a | |||||
| LAPACKELIB = liblapacke.a | |||||
| BLASLIB = $(TOPSRCDIR)/librefblas.a | |||||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||||
| @@ -8,30 +8,28 @@ SHELL = /bin/sh | |||||
| # CC is the C compiler, normally invoked with options CFLAGS. | # CC is the C compiler, normally invoked with options CFLAGS. | ||||
| # | # | ||||
| CC = pgcc | |||||
| CC = pgcc | |||||
| CFLAGS = | CFLAGS = | ||||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||||
| # and desired compiler options for your machine. NOOPT refers to | # and desired compiler options for your machine. NOOPT refers to | ||||
| # the compiler options desired when NO OPTIMIZATION is selected. | # the compiler options desired when NO OPTIMIZATION is selected. | ||||
| # | # | ||||
| FORTRAN = pgf95 | |||||
| OPTS = -O3 | |||||
| DRVOPTS = $(OPTS) | |||||
| NOOPT = -O0 | |||||
| FC = pgf95 | |||||
| FFLAGS = -O3 | |||||
| FFLAGS_DRV = $(FFLAGS) | |||||
| FFLAGS_NOOPT = -O0 | |||||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||||
| # load options for your machine. | |||||
| # Define LDFLAGS to the desired linker options for your machine. | |||||
| # | # | ||||
| LOADER = $(FORTRAN) | |||||
| LOADOPTS = | |||||
| LDFLAGS = | |||||
| # The archiver and the flag(s) to use when building an archive | # The archiver and the flag(s) to use when building an archive | ||||
| # (library). If your system has no ranlib, set RANLIB = echo. | # (library). If your system has no ranlib, set RANLIB = echo. | ||||
| # | # | ||||
| ARCH = ar | |||||
| ARCHFLAGS = cr | |||||
| RANLIB = echo | |||||
| AR = ar | |||||
| ARFLAGS = cr | |||||
| RANLIB = echo | |||||
| # Timer for the SECOND and DSECND routines | # Timer for the SECOND and DSECND routines | ||||
| # | # | ||||
| @@ -74,8 +72,8 @@ TIMER = INT_CPU_TIME | |||||
| # machine-specific, optimized BLAS library should be used whenever | # machine-specific, optimized BLAS library should be used whenever | ||||
| # possible.) | # possible.) | ||||
| # | # | ||||
| BLASLIB = ../../librefblas.a | |||||
| CBLASLIB = ../../libcblas.a | |||||
| LAPACKLIB = liblapack.a | |||||
| TMGLIB = libtmglib.a | |||||
| LAPACKELIB = liblapacke.a | |||||
| BLASLIB = $(TOPSRCDIR)/librefblas.a | |||||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||||
| @@ -8,30 +8,28 @@ SHELL = /bin/sh | |||||
| # CC is the C compiler, normally invoked with options CFLAGS. | # CC is the C compiler, normally invoked with options CFLAGS. | ||||
| # | # | ||||
| CC = pghpc | |||||
| CC = pghpc | |||||
| CFLAGS = | CFLAGS = | ||||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||||
| # and desired compiler options for your machine. NOOPT refers to | # and desired compiler options for your machine. NOOPT refers to | ||||
| # the compiler options desired when NO OPTIMIZATION is selected. | # the compiler options desired when NO OPTIMIZATION is selected. | ||||
| # | # | ||||
| FORTRAN = pghpf | |||||
| OPTS = -O4 -Mnohpfc -Mdclchk | |||||
| DRVOPTS = $(OPTS) | |||||
| NOOPT = -Mnohpfc -Mdclchk | |||||
| FC = pghpf | |||||
| FFLAGS = -O4 -Mnohpfc -Mdclchk | |||||
| FFLAGS_DRV = $(FFLAGS) | |||||
| FFLAGS_NOOPT = -Mnohpfc -Mdclchk | |||||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||||
| # load options for your machine. | |||||
| # Define LDFLAGS to the desired linker options for your machine. | |||||
| # | # | ||||
| LOADER = pghpf | |||||
| LOADOPTS = | |||||
| LDFLAGS = | |||||
| # The archiver and the flag(s) to use when building an archive | # The archiver and the flag(s) to use when building an archive | ||||
| # (library). If your system has no ranlib, set RANLIB = echo. | # (library). If your system has no ranlib, set RANLIB = echo. | ||||
| # | # | ||||
| ARCH = ar | |||||
| ARCHFLAGS = cr | |||||
| RANLIB = echo | |||||
| AR = ar | |||||
| ARFLAGS = cr | |||||
| RANLIB = echo | |||||
| # Timer for the SECOND and DSECND routines | # Timer for the SECOND and DSECND routines | ||||
| # | # | ||||
| @@ -75,8 +73,8 @@ TIMER = EXT_ETIME | |||||
| # possible.) | # possible.) | ||||
| # | # | ||||
| #BLASLIB = -lessl | #BLASLIB = -lessl | ||||
| BLASLIB = ../../librefblas.a | |||||
| CBLASLIB = ../../libcblas.a | |||||
| LAPACKLIB = liblapack.a | |||||
| TMGLIB = libtmglib.a | |||||
| LAPACKELIB = liblapacke.a | |||||
| BLASLIB = $(TOPSRCDIR)/librefblas.a | |||||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||||
| @@ -28,6 +28,7 @@ | |||||
| * | * | ||||
| *> \param[in] CMACH | *> \param[in] CMACH | ||||
| *> \verbatim | *> \verbatim | ||||
| *> CMACH is CHARACTER*1 | |||||
| *> Specifies the value to be returned by SLAMCH: | *> Specifies the value to be returned by SLAMCH: | ||||
| *> = 'E' or 'e', SLAMCH := eps | *> = 'E' or 'e', SLAMCH := eps | ||||
| *> = 'S' or 's , SLAMCH := sfmin | *> = 'S' or 's , SLAMCH := sfmin | ||||
| @@ -16,18 +16,16 @@ if(NOT FortranCInterface_GLOBAL_FOUND OR NOT FortranCInterface_MODULE_FOUND) | |||||
| ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h) | ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h) | ||||
| endif() | endif() | ||||
| if(WIN32 AND NOT UNIX) | |||||
| add_definitions(-DHAVE_LAPACK_CONFIG_H -DLAPACK_COMPLEX_STRUCTURE) | |||||
| message(STATUS "Windows BUILD") | |||||
| endif() | |||||
| get_directory_property(DirDefs COMPILE_DEFINITIONS) | |||||
| include_directories(include ${LAPACK_BINARY_DIR}/include) | include_directories(include ${LAPACK_BINARY_DIR}/include) | ||||
| add_subdirectory(include) | add_subdirectory(include) | ||||
| add_subdirectory(src) | add_subdirectory(src) | ||||
| add_subdirectory(utils) | add_subdirectory(utils) | ||||
| option(LAPACKE_BUILD_SINGLE "Build LAPACKE single precision real" ON) | |||||
| option(LAPACKE_BUILD_DOUBLE "Build LAPACKE double precision real" ON) | |||||
| option(LAPACKE_BUILD_COMPLEX "Build LAPACKE single precision complex" ON) | |||||
| option(LAPACKE_BUILD_COMPLEX16 "Build LAPACKE double precision complex" ON) | |||||
| macro(append_subdir_files variable dirname) | macro(append_subdir_files variable dirname) | ||||
| get_directory_property(holder DIRECTORY ${dirname} DEFINITION ${variable}) | get_directory_property(holder DIRECTORY ${dirname} DEFINITION ${variable}) | ||||
| foreach(depfile ${holder}) | foreach(depfile ${holder}) | ||||
| @@ -35,8 +33,29 @@ macro(append_subdir_files variable dirname) | |||||
| endforeach() | endforeach() | ||||
| endmacro() | endmacro() | ||||
| message(STATUS "Build LAPACKE single precision real: ${LAPACKE_BUILD_SINGLE}") | |||||
| message(STATUS "Build LAPACKE double precision real: ${LAPACKE_BUILD_DOUBLE}") | |||||
| message(STATUS "Build LAPACKE single precision complex: ${LAPACKE_BUILD_COMPLEX}") | |||||
| message(STATUS "Build LAPACKE double precision complex: ${LAPACKE_BUILD_COMPLEX16}") | |||||
| append_subdir_files(LAPACKE_INCLUDE "include") | append_subdir_files(LAPACKE_INCLUDE "include") | ||||
| append_subdir_files(SOURCES "src") | append_subdir_files(SOURCES "src") | ||||
| if (LAPACKE_BUILD_SINGLE) | |||||
| append_subdir_files(SOURCES_SINGLE "src") | |||||
| list(APPEND SOURCES ${SOURCES_SINGLE}) | |||||
| endif() | |||||
| if (LAPACKE_BUILD_DOUBLE) | |||||
| append_subdir_files(SOURCES_DOUBLE "src") | |||||
| list(APPEND SOURCES ${SOURCES_DOUBLE}) | |||||
| endif() | |||||
| if (LAPACKE_BUILD_COMPLEX) | |||||
| append_subdir_files(SOURCES_COMPLEX "src") | |||||
| list(APPEND SOURCES ${SOURCES_COMPLEX}) | |||||
| endif() | |||||
| if (LAPACKE_BUILD_COMPLEX16) | |||||
| append_subdir_files(SOURCES_COMPLEX16 "src") | |||||
| list(APPEND SOURCES ${SOURCES_COMPLEX16}) | |||||
| endif() | |||||
| append_subdir_files(DEPRECATED "src") | append_subdir_files(DEPRECATED "src") | ||||
| append_subdir_files(EXTENDED "src") | append_subdir_files(EXTENDED "src") | ||||
| append_subdir_files(MATGEN "src") | append_subdir_files(MATGEN "src") | ||||
| @@ -61,9 +80,13 @@ set_target_properties( | |||||
| SOVERSION ${LAPACK_MAJOR_VERSION} | SOVERSION ${LAPACK_MAJOR_VERSION} | ||||
| ) | ) | ||||
| target_include_directories(lapacke PUBLIC | target_include_directories(lapacke PUBLIC | ||||
| $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> | |||||
| $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> | |||||
| $<INSTALL_INTERFACE:include> | $<INSTALL_INTERFACE:include> | ||||
| ) | ) | ||||
| if(WIN32 AND NOT UNIX) | |||||
| target_compile_definitions(lapacke PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE) | |||||
| message(STATUS "Windows BUILD") | |||||
| endif() | |||||
| if(LAPACKE_WITH_TMG) | if(LAPACKE_WITH_TMG) | ||||
| target_link_libraries(lapacke PRIVATE tmglib) | target_link_libraries(lapacke PRIVATE tmglib) | ||||
| @@ -71,7 +94,11 @@ endif() | |||||
| target_link_libraries(lapacke PRIVATE ${LAPACK_LIBRARIES}) | target_link_libraries(lapacke PRIVATE ${LAPACK_LIBRARIES}) | ||||
| lapack_install_library(lapacke) | lapack_install_library(lapacke) | ||||
| install(FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||||
| install( | |||||
| FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h | |||||
| DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} | |||||
| COMPONENT Development | |||||
| ) | |||||
| if(BUILD_TESTING) | if(BUILD_TESTING) | ||||
| add_subdirectory(example) | add_subdirectory(example) | ||||
| @@ -82,6 +109,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_ | |||||
| install(FILES | install(FILES | ||||
| ${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc | ${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc | ||||
| DESTINATION ${PKG_CONFIG_DIR} | DESTINATION ${PKG_CONFIG_DIR} | ||||
| COMPONENT Development | |||||
| ) | ) | ||||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-version.cmake.in | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-version.cmake.in | ||||
| @@ -95,7 +123,10 @@ install(FILES | |||||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake | ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake | ||||
| ${LAPACK_BINARY_DIR}/lapacke-config-version.cmake | ${LAPACK_BINARY_DIR}/lapacke-config-version.cmake | ||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION} | DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION} | ||||
| COMPONENT Development | |||||
| ) | ) | ||||
| install(EXPORT lapacke-targets | install(EXPORT lapacke-targets | ||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION}) | |||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION} | |||||
| COMPONENT Development | |||||
| ) | |||||
| @@ -40,22 +40,26 @@ | |||||
| # To clean everything including lapacke library type | # To clean everything including lapacke library type | ||||
| # 'make cleanall' | # 'make cleanall' | ||||
| # | # | ||||
| include ../make.inc | |||||
| TOPSRCDIR = .. | |||||
| include $(TOPSRCDIR)/make.inc | |||||
| .PHONY: all | |||||
| all: lapacke | all: lapacke | ||||
| .PHONY: lapacke | |||||
| lapacke: include/lapacke_mangling.h | lapacke: include/lapacke_mangling.h | ||||
| $(MAKE) -C src | $(MAKE) -C src | ||||
| $(MAKE) -C utils | $(MAKE) -C utils | ||||
| include/lapacke_mangling.h: include/lapacke_mangling_with_flags.h.in | include/lapacke_mangling.h: include/lapacke_mangling_with_flags.h.in | ||||
| cp $< $@ | |||||
| cp include/lapacke_mangling_with_flags.h.in $@ | |||||
| .PHONY: lapacke_example | |||||
| lapacke_example: lapacke | lapacke_example: lapacke | ||||
| $(MAKE) -C example | $(MAKE) -C example | ||||
| #clean: cleanlib | |||||
| clean: cleanobj | |||||
| .PHONY: clean cleanobj cleanlib cleanexe | |||||
| clean: | |||||
| $(MAKE) -C src clean | $(MAKE) -C src clean | ||||
| $(MAKE) -C utils clean | $(MAKE) -C utils clean | ||||
| $(MAKE) -C example clean | $(MAKE) -C example clean | ||||
| @@ -64,6 +68,6 @@ cleanobj: | |||||
| $(MAKE) -C utils cleanobj | $(MAKE) -C utils cleanobj | ||||
| $(MAKE) -C example cleanobj | $(MAKE) -C example cleanobj | ||||
| cleanlib: | cleanlib: | ||||
| rm -f ../$(LAPACKELIB) | |||||
| $(MAKE) -C src cleanlib | |||||
| cleanexe: | cleanexe: | ||||
| $(MAKE) -C example cleanexe | $(MAKE) -C example cleanexe | ||||
| @@ -7,8 +7,11 @@ if(NOT TARGET lapacke) | |||||
| include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") | include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") | ||||
| endif() | endif() | ||||
| # Hint for project building against lapack | |||||
| set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID}) | |||||
| # Report lapacke header search locations from build tree. | # Report lapacke header search locations from build tree. | ||||
| set(LAPACKE_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include") | set(LAPACKE_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include") | ||||
| # Report lapacke libraries. | # Report lapacke libraries. | ||||
| set(LAPACKE_LIBRARIES lapacke) | |||||
| set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES}) | |||||
| @@ -13,11 +13,14 @@ if(NOT TARGET lapacke) | |||||
| include(${_LAPACKE_SELF_DIR}/lapacke-targets.cmake) | include(${_LAPACKE_SELF_DIR}/lapacke-targets.cmake) | ||||
| endif() | endif() | ||||
| # Hint for project building against lapack | |||||
| set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID}) | |||||
| # Report lapacke header search locations. | # Report lapacke header search locations. | ||||
| set(LAPACKE_INCLUDE_DIRS ${_LAPACKE_PREFIX}/include) | set(LAPACKE_INCLUDE_DIRS ${_LAPACKE_PREFIX}/include) | ||||
| # Report lapacke libraries. | # Report lapacke libraries. | ||||
| set(LAPACKE_LIBRARIES lapacke) | |||||
| set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES}) | |||||
| unset(_LAPACKE_PREFIX) | unset(_LAPACKE_PREFIX) | ||||
| unset(_LAPACKE_SELF_DIR) | unset(_LAPACKE_SELF_DIR) | ||||
| @@ -1,34 +1,38 @@ | |||||
| include ../../make.inc | |||||
| TOPSRCDIR = ../.. | |||||
| include $(TOPSRCDIR)/make.inc | |||||
| .SUFFIXES: .c .o | |||||
| .c.o: | |||||
| $(CC) $(CFLAGS) -I. -I../include -c -o $@ $< | |||||
| .PHONY: all | |||||
| all: xexample_DGESV_rowmajor \ | all: xexample_DGESV_rowmajor \ | ||||
| xexample_DGESV_colmajor \ | xexample_DGESV_colmajor \ | ||||
| xexample_DGELS_rowmajor \ | xexample_DGELS_rowmajor \ | ||||
| xexample_DGELS_colmajor | xexample_DGELS_colmajor | ||||
| LIBRARIES = ../../$(LAPACKELIB) ../../$(LAPACKLIB) $(BLASLIB) | |||||
| LIBRARIES = $(LAPACKELIB) $(LAPACKLIB) $(BLASLIB) | |||||
| # Double Precision Examples | # Double Precision Examples | ||||
| xexample_DGESV_rowmajor: example_DGESV_rowmajor.o lapacke_example_aux.o $(LIBRARIES) | xexample_DGESV_rowmajor: example_DGESV_rowmajor.o lapacke_example_aux.o $(LIBRARIES) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| ./$@ | ./$@ | ||||
| xexample_DGESV_colmajor: example_DGESV_colmajor.o lapacke_example_aux.o $(LIBRARIES) | xexample_DGESV_colmajor: example_DGESV_colmajor.o lapacke_example_aux.o $(LIBRARIES) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| ./$@ | ./$@ | ||||
| xexample_DGELS_rowmajor: example_DGELS_rowmajor.o lapacke_example_aux.o $(LIBRARIES) | xexample_DGELS_rowmajor: example_DGELS_rowmajor.o lapacke_example_aux.o $(LIBRARIES) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| ./$@ | ./$@ | ||||
| xexample_DGELS_colmajor: example_DGELS_colmajor.o lapacke_example_aux.o $(LIBRARIES) | xexample_DGELS_colmajor: example_DGELS_colmajor.o lapacke_example_aux.o $(LIBRARIES) | ||||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||||
| ./$@ | ./$@ | ||||
| .PHONY: clean cleanobj cleanexe | |||||
| clean: cleanobj cleanexe | clean: cleanobj cleanexe | ||||
| cleanobj: | cleanobj: | ||||
| rm -f *.o | rm -f *.o | ||||
| cleanexe: | cleanexe: | ||||
| rm -f x* | rm -f x* | ||||
| .c.o: | |||||
| $(CC) $(CFLAGS) -I. -I../include -c -o $@ $< | |||||
| @@ -1,3 +1,3 @@ | |||||
| set(LAPACKE_INCLUDE lapacke.h lapacke_config.h lapacke_utils.h) | |||||
| set(LAPACKE_INCLUDE lapacke.h lapack.h lapacke_config.h lapacke_utils.h) | |||||
| file(COPY ${LAPACKE_INCLUDE} DESTINATION ${LAPACK_BINARY_DIR}/include) | file(COPY ${LAPACKE_INCLUDE} DESTINATION ${LAPACK_BINARY_DIR}/include) | ||||
| @@ -1,4 +1,4 @@ | |||||
| set(SOURCES | |||||
| set(SOURCES_COMPLEX | |||||
| lapacke_cbbcsd.c | lapacke_cbbcsd.c | ||||
| lapacke_cbbcsd_work.c | lapacke_cbbcsd_work.c | ||||
| lapacke_cbdsqr.c | lapacke_cbdsqr.c | ||||
| @@ -78,11 +78,11 @@ lapacke_cgeqrf_work.c | |||||
| lapacke_cgeqrfp.c | lapacke_cgeqrfp.c | ||||
| lapacke_cgeqrfp_work.c | lapacke_cgeqrfp_work.c | ||||
| lapacke_cgeqrt.c | lapacke_cgeqrt.c | ||||
| lapacke_cgeqrt_work.c | |||||
| lapacke_cgeqrt2.c | lapacke_cgeqrt2.c | ||||
| lapacke_cgeqrt2_work.c | lapacke_cgeqrt2_work.c | ||||
| lapacke_cgeqrt3.c | lapacke_cgeqrt3.c | ||||
| lapacke_cgeqrt3_work.c | lapacke_cgeqrt3_work.c | ||||
| lapacke_cgeqrt_work.c | |||||
| lapacke_cgerfs.c | lapacke_cgerfs.c | ||||
| lapacke_cgerfs_work.c | lapacke_cgerfs_work.c | ||||
| lapacke_cgerqf.c | lapacke_cgerqf.c | ||||
| @@ -93,6 +93,8 @@ lapacke_cgesv.c | |||||
| lapacke_cgesv_work.c | lapacke_cgesv_work.c | ||||
| lapacke_cgesvd.c | lapacke_cgesvd.c | ||||
| lapacke_cgesvd_work.c | lapacke_cgesvd_work.c | ||||
| lapacke_cgesvdq.c | |||||
| lapacke_cgesvdq_work.c | |||||
| lapacke_cgesvdx.c | lapacke_cgesvdx.c | ||||
| lapacke_cgesvdx_work.c | lapacke_cgesvdx_work.c | ||||
| lapacke_cgesvj.c | lapacke_cgesvj.c | ||||
| @@ -129,10 +131,10 @@ lapacke_cggevx.c | |||||
| lapacke_cggevx_work.c | lapacke_cggevx_work.c | ||||
| lapacke_cggglm.c | lapacke_cggglm.c | ||||
| lapacke_cggglm_work.c | lapacke_cggglm_work.c | ||||
| lapacke_cgghrd.c | |||||
| lapacke_cgghrd_work.c | |||||
| lapacke_cgghd3.c | lapacke_cgghd3.c | ||||
| lapacke_cgghd3_work.c | lapacke_cgghd3_work.c | ||||
| lapacke_cgghrd.c | |||||
| lapacke_cgghrd_work.c | |||||
| lapacke_cgglse.c | lapacke_cgglse.c | ||||
| lapacke_cgglse_work.c | lapacke_cgglse_work.c | ||||
| lapacke_cggqrf.c | lapacke_cggqrf.c | ||||
| @@ -157,14 +159,14 @@ lapacke_cgttrs.c | |||||
| lapacke_cgttrs_work.c | lapacke_cgttrs_work.c | ||||
| lapacke_chbev.c | lapacke_chbev.c | ||||
| lapacke_chbev_work.c | lapacke_chbev_work.c | ||||
| lapacke_chbevd.c | |||||
| lapacke_chbevd_work.c | |||||
| lapacke_chbevx.c | |||||
| lapacke_chbevx_work.c | |||||
| lapacke_chbev_2stage.c | lapacke_chbev_2stage.c | ||||
| lapacke_chbev_2stage_work.c | lapacke_chbev_2stage_work.c | ||||
| lapacke_chbevd.c | |||||
| lapacke_chbevd_work.c | |||||
| lapacke_chbevd_2stage.c | lapacke_chbevd_2stage.c | ||||
| lapacke_chbevd_2stage_work.c | lapacke_chbevd_2stage_work.c | ||||
| lapacke_chbevx.c | |||||
| lapacke_chbevx_work.c | |||||
| lapacke_chbevx_2stage.c | lapacke_chbevx_2stage.c | ||||
| lapacke_chbevx_2stage_work.c | lapacke_chbevx_2stage_work.c | ||||
| lapacke_chbgst.c | lapacke_chbgst.c | ||||
| @@ -185,18 +187,18 @@ lapacke_cheequb.c | |||||
| lapacke_cheequb_work.c | lapacke_cheequb_work.c | ||||
| lapacke_cheev.c | lapacke_cheev.c | ||||
| lapacke_cheev_work.c | lapacke_cheev_work.c | ||||
| lapacke_cheevd.c | |||||
| lapacke_cheevd_work.c | |||||
| lapacke_cheevr.c | |||||
| lapacke_cheevr_work.c | |||||
| lapacke_cheevx.c | |||||
| lapacke_cheevx_work.c | |||||
| lapacke_cheev_2stage.c | lapacke_cheev_2stage.c | ||||
| lapacke_cheev_2stage_work.c | lapacke_cheev_2stage_work.c | ||||
| lapacke_cheevd.c | |||||
| lapacke_cheevd_work.c | |||||
| lapacke_cheevd_2stage.c | lapacke_cheevd_2stage.c | ||||
| lapacke_cheevd_2stage_work.c | lapacke_cheevd_2stage_work.c | ||||
| lapacke_cheevr.c | |||||
| lapacke_cheevr_work.c | |||||
| lapacke_cheevr_2stage.c | lapacke_cheevr_2stage.c | ||||
| lapacke_cheevr_2stage_work.c | lapacke_cheevr_2stage_work.c | ||||
| lapacke_cheevx.c | |||||
| lapacke_cheevx_work.c | |||||
| lapacke_cheevx_2stage.c | lapacke_cheevx_2stage.c | ||||
| lapacke_cheevx_2stage_work.c | lapacke_cheevx_2stage_work.c | ||||
| lapacke_chegst.c | lapacke_chegst.c | ||||
| @@ -214,8 +216,8 @@ lapacke_cherfs_work.c | |||||
| lapacke_chesv.c | lapacke_chesv.c | ||||
| lapacke_chesv_work.c | lapacke_chesv_work.c | ||||
| lapacke_chesv_aa.c | lapacke_chesv_aa.c | ||||
| lapacke_chesv_aa_2stage.c | |||||
| lapacke_chesv_aa_work.c | lapacke_chesv_aa_work.c | ||||
| lapacke_chesv_aa_2stage.c | |||||
| lapacke_chesv_aa_2stage_work.c | lapacke_chesv_aa_2stage_work.c | ||||
| lapacke_chesv_rk.c | lapacke_chesv_rk.c | ||||
| lapacke_chesv_rk_work.c | lapacke_chesv_rk_work.c | ||||
| @@ -226,35 +228,35 @@ lapacke_cheswapr_work.c | |||||
| lapacke_chetrd.c | lapacke_chetrd.c | ||||
| lapacke_chetrd_work.c | lapacke_chetrd_work.c | ||||
| lapacke_chetrf.c | lapacke_chetrf.c | ||||
| lapacke_chetrf_rook.c | |||||
| lapacke_chetrf_work.c | lapacke_chetrf_work.c | ||||
| lapacke_chetrf_rook_work.c | |||||
| lapacke_chetrf_aa.c | lapacke_chetrf_aa.c | ||||
| lapacke_chetrf_aa_2stage.c | |||||
| lapacke_chetrf_aa_work.c | lapacke_chetrf_aa_work.c | ||||
| lapacke_chetrf_aa_2stage.c | |||||
| lapacke_chetrf_aa_2stage_work.c | lapacke_chetrf_aa_2stage_work.c | ||||
| lapacke_chetrf_rk.c | lapacke_chetrf_rk.c | ||||
| lapacke_chetrf_rk_work.c | lapacke_chetrf_rk_work.c | ||||
| lapacke_chetrf_rook.c | |||||
| lapacke_chetrf_rook_work.c | |||||
| lapacke_chetri.c | lapacke_chetri.c | ||||
| lapacke_chetri_work.c | |||||
| lapacke_chetri2.c | lapacke_chetri2.c | ||||
| lapacke_chetri2_work.c | lapacke_chetri2_work.c | ||||
| lapacke_chetri_3.c | |||||
| lapacke_chetri_3_work.c | |||||
| lapacke_chetri2x.c | lapacke_chetri2x.c | ||||
| lapacke_chetri2x_work.c | lapacke_chetri2x_work.c | ||||
| lapacke_chetri_work.c | |||||
| lapacke_chetri_3.c | |||||
| lapacke_chetri_3_work.c | |||||
| lapacke_chetrs.c | lapacke_chetrs.c | ||||
| lapacke_chetrs_rook.c | |||||
| lapacke_chetrs_work.c | |||||
| lapacke_chetrs2.c | lapacke_chetrs2.c | ||||
| lapacke_chetrs2_work.c | lapacke_chetrs2_work.c | ||||
| lapacke_chetrs_work.c | |||||
| lapacke_chetrs_rook_work.c | |||||
| lapacke_chetrs_3.c | |||||
| lapacke_chetrs_3_work.c | |||||
| lapacke_chetrs_aa.c | lapacke_chetrs_aa.c | ||||
| lapacke_chetrs_aa_2stage.c | |||||
| lapacke_chetrs_aa_work.c | lapacke_chetrs_aa_work.c | ||||
| lapacke_chetrs_aa_2stage.c | |||||
| lapacke_chetrs_aa_2stage_work.c | lapacke_chetrs_aa_2stage_work.c | ||||
| lapacke_chetrs_3.c | |||||
| lapacke_chetrs_3_work.c | |||||
| lapacke_chetrs_rook.c | |||||
| lapacke_chetrs_rook_work.c | |||||
| lapacke_chfrk.c | lapacke_chfrk.c | ||||
| lapacke_chfrk_work.c | lapacke_chfrk_work.c | ||||
| lapacke_chgeqz.c | lapacke_chgeqz.c | ||||
| @@ -445,52 +447,54 @@ lapacke_csyconv.c | |||||
| lapacke_csyconv_work.c | lapacke_csyconv_work.c | ||||
| lapacke_csyequb.c | lapacke_csyequb.c | ||||
| lapacke_csyequb_work.c | lapacke_csyequb_work.c | ||||
| lapacke_csyr.c | |||||
| lapacke_csyr_work.c | |||||
| lapacke_csyrfs.c | lapacke_csyrfs.c | ||||
| lapacke_csyrfs_work.c | lapacke_csyrfs_work.c | ||||
| lapacke_csysv.c | lapacke_csysv.c | ||||
| lapacke_csysv_rook.c | |||||
| lapacke_csysv_rook_work.c | |||||
| lapacke_csysv_work.c | lapacke_csysv_work.c | ||||
| lapacke_csysv_aa.c | lapacke_csysv_aa.c | ||||
| lapacke_csysv_aa_2stage.c | |||||
| lapacke_csysv_aa_work.c | lapacke_csysv_aa_work.c | ||||
| lapacke_csysv_aa_2stage.c | |||||
| lapacke_csysv_aa_2stage_work.c | lapacke_csysv_aa_2stage_work.c | ||||
| lapacke_csysv_rk.c | lapacke_csysv_rk.c | ||||
| lapacke_csysv_rk_work.c | lapacke_csysv_rk_work.c | ||||
| lapacke_csysv_rook.c | |||||
| lapacke_csysv_rook_work.c | |||||
| lapacke_csysvx.c | lapacke_csysvx.c | ||||
| lapacke_csysvx_work.c | lapacke_csysvx_work.c | ||||
| lapacke_csyswapr.c | lapacke_csyswapr.c | ||||
| lapacke_csyswapr_work.c | lapacke_csyswapr_work.c | ||||
| lapacke_csytrf.c | lapacke_csytrf.c | ||||
| lapacke_csytrf_work.c | lapacke_csytrf_work.c | ||||
| lapacke_csytrf_rook.c | |||||
| lapacke_csytrf_rook_work.c | |||||
| lapacke_csytrf_aa.c | lapacke_csytrf_aa.c | ||||
| lapacke_csytrf_aa_2stage.c | |||||
| lapacke_csytrf_aa_work.c | lapacke_csytrf_aa_work.c | ||||
| lapacke_csytrf_aa_2stage.c | |||||
| lapacke_csytrf_aa_2stage_work.c | lapacke_csytrf_aa_2stage_work.c | ||||
| lapacke_csytrf_rk.c | lapacke_csytrf_rk.c | ||||
| lapacke_csytrf_rk_work.c | lapacke_csytrf_rk_work.c | ||||
| lapacke_csytrf_rook.c | |||||
| lapacke_csytrf_rook_work.c | |||||
| lapacke_csytri.c | lapacke_csytri.c | ||||
| lapacke_csytri_work.c | |||||
| lapacke_csytri2.c | lapacke_csytri2.c | ||||
| lapacke_csytri2_work.c | lapacke_csytri2_work.c | ||||
| lapacke_csytri_3.c | |||||
| lapacke_csytri_3_work.c | |||||
| lapacke_csytri2x.c | lapacke_csytri2x.c | ||||
| lapacke_csytri2x_work.c | lapacke_csytri2x_work.c | ||||
| lapacke_csytri_work.c | |||||
| lapacke_csytri_3.c | |||||
| lapacke_csytri_3_work.c | |||||
| lapacke_csytrs.c | lapacke_csytrs.c | ||||
| lapacke_csytrs_rook.c | |||||
| lapacke_csytrs_work.c | |||||
| lapacke_csytrs2.c | lapacke_csytrs2.c | ||||
| lapacke_csytrs2_work.c | lapacke_csytrs2_work.c | ||||
| lapacke_csytrs_work.c | |||||
| lapacke_csytrs_rook_work.c | |||||
| lapacke_csytrs_3.c | |||||
| lapacke_csytrs_3_work.c | |||||
| lapacke_csytrs_aa.c | lapacke_csytrs_aa.c | ||||
| lapacke_csytrs_aa_2stage.c | |||||
| lapacke_csytrs_aa_work.c | lapacke_csytrs_aa_work.c | ||||
| lapacke_csytrs_aa_2stage.c | |||||
| lapacke_csytrs_aa_2stage_work.c | lapacke_csytrs_aa_2stage_work.c | ||||
| lapacke_csytrs_3.c | |||||
| lapacke_csytrs_3_work.c | |||||
| lapacke_csytrs_rook.c | |||||
| lapacke_csytrs_rook_work.c | |||||
| lapacke_ctbcon.c | lapacke_ctbcon.c | ||||
| lapacke_ctbcon_work.c | lapacke_ctbcon_work.c | ||||
| lapacke_ctbrfs.c | lapacke_ctbrfs.c | ||||
| @@ -522,9 +526,9 @@ lapacke_ctpcon_work.c | |||||
| lapacke_ctpmqrt.c | lapacke_ctpmqrt.c | ||||
| lapacke_ctpmqrt_work.c | lapacke_ctpmqrt_work.c | ||||
| lapacke_ctpqrt.c | lapacke_ctpqrt.c | ||||
| lapacke_ctpqrt_work.c | |||||
| lapacke_ctpqrt2.c | lapacke_ctpqrt2.c | ||||
| lapacke_ctpqrt2_work.c | lapacke_ctpqrt2_work.c | ||||
| lapacke_ctpqrt_work.c | |||||
| lapacke_ctprfb.c | lapacke_ctprfb.c | ||||
| lapacke_ctprfb_work.c | lapacke_ctprfb_work.c | ||||
| lapacke_ctprfs.c | lapacke_ctprfs.c | ||||
| @@ -601,14 +605,16 @@ lapacke_cupgtr.c | |||||
| lapacke_cupgtr_work.c | lapacke_cupgtr_work.c | ||||
| lapacke_cupmtr.c | lapacke_cupmtr.c | ||||
| lapacke_cupmtr_work.c | lapacke_cupmtr_work.c | ||||
| ) | |||||
| set(SOURCES_DOUBLE | |||||
| lapacke_dbbcsd.c | lapacke_dbbcsd.c | ||||
| lapacke_dbbcsd_work.c | lapacke_dbbcsd_work.c | ||||
| lapacke_dbdsdc.c | lapacke_dbdsdc.c | ||||
| lapacke_dbdsdc_work.c | lapacke_dbdsdc_work.c | ||||
| lapacke_dbdsvdx.c | |||||
| lapacke_dbdsvdx_work.c | |||||
| lapacke_dbdsqr.c | lapacke_dbdsqr.c | ||||
| lapacke_dbdsqr_work.c | lapacke_dbdsqr_work.c | ||||
| lapacke_dbdsvdx.c | |||||
| lapacke_dbdsvdx_work.c | |||||
| lapacke_ddisna.c | lapacke_ddisna.c | ||||
| lapacke_ddisna_work.c | lapacke_ddisna_work.c | ||||
| lapacke_dgbbrd.c | lapacke_dgbbrd.c | ||||
| @@ -686,11 +692,11 @@ lapacke_dgeqrf_work.c | |||||
| lapacke_dgeqrfp.c | lapacke_dgeqrfp.c | ||||
| lapacke_dgeqrfp_work.c | lapacke_dgeqrfp_work.c | ||||
| lapacke_dgeqrt.c | lapacke_dgeqrt.c | ||||
| lapacke_dgeqrt_work.c | |||||
| lapacke_dgeqrt2.c | lapacke_dgeqrt2.c | ||||
| lapacke_dgeqrt2_work.c | lapacke_dgeqrt2_work.c | ||||
| lapacke_dgeqrt3.c | lapacke_dgeqrt3.c | ||||
| lapacke_dgeqrt3_work.c | lapacke_dgeqrt3_work.c | ||||
| lapacke_dgeqrt_work.c | |||||
| lapacke_dgerfs.c | lapacke_dgerfs.c | ||||
| lapacke_dgerfs_work.c | lapacke_dgerfs_work.c | ||||
| lapacke_dgerqf.c | lapacke_dgerqf.c | ||||
| @@ -701,6 +707,8 @@ lapacke_dgesv.c | |||||
| lapacke_dgesv_work.c | lapacke_dgesv_work.c | ||||
| lapacke_dgesvd.c | lapacke_dgesvd.c | ||||
| lapacke_dgesvd_work.c | lapacke_dgesvd_work.c | ||||
| lapacke_dgesvdq.c | |||||
| lapacke_dgesvdq_work.c | |||||
| lapacke_dgesvdx.c | lapacke_dgesvdx.c | ||||
| lapacke_dgesvdx_work.c | lapacke_dgesvdx_work.c | ||||
| lapacke_dgesvj.c | lapacke_dgesvj.c | ||||
| @@ -737,10 +745,10 @@ lapacke_dggevx.c | |||||
| lapacke_dggevx_work.c | lapacke_dggevx_work.c | ||||
| lapacke_dggglm.c | lapacke_dggglm.c | ||||
| lapacke_dggglm_work.c | lapacke_dggglm_work.c | ||||
| lapacke_dgghrd.c | |||||
| lapacke_dgghrd_work.c | |||||
| lapacke_dgghd3.c | lapacke_dgghd3.c | ||||
| lapacke_dgghd3_work.c | lapacke_dgghd3_work.c | ||||
| lapacke_dgghrd.c | |||||
| lapacke_dgghrd_work.c | |||||
| lapacke_dgglse.c | lapacke_dgglse.c | ||||
| lapacke_dgglse_work.c | lapacke_dgglse_work.c | ||||
| lapacke_dggqrf.c | lapacke_dggqrf.c | ||||
| @@ -823,10 +831,10 @@ lapacke_dopmtr.c | |||||
| lapacke_dopmtr_work.c | lapacke_dopmtr_work.c | ||||
| lapacke_dorbdb.c | lapacke_dorbdb.c | ||||
| lapacke_dorbdb_work.c | lapacke_dorbdb_work.c | ||||
| lapacke_dorcsd2by1.c | |||||
| lapacke_dorcsd2by1_work.c | |||||
| lapacke_dorcsd.c | lapacke_dorcsd.c | ||||
| lapacke_dorcsd_work.c | lapacke_dorcsd_work.c | ||||
| lapacke_dorcsd2by1.c | |||||
| lapacke_dorcsd2by1_work.c | |||||
| lapacke_dorgbr.c | lapacke_dorgbr.c | ||||
| lapacke_dorgbr_work.c | lapacke_dorgbr_work.c | ||||
| lapacke_dorghr.c | lapacke_dorghr.c | ||||
| @@ -933,14 +941,14 @@ lapacke_dpttrs.c | |||||
| lapacke_dpttrs_work.c | lapacke_dpttrs_work.c | ||||
| lapacke_dsbev.c | lapacke_dsbev.c | ||||
| lapacke_dsbev_work.c | lapacke_dsbev_work.c | ||||
| lapacke_dsbevd.c | |||||
| lapacke_dsbevd_work.c | |||||
| lapacke_dsbevx.c | |||||
| lapacke_dsbevx_work.c | |||||
| lapacke_dsbev_2stage.c | lapacke_dsbev_2stage.c | ||||
| lapacke_dsbev_2stage_work.c | lapacke_dsbev_2stage_work.c | ||||
| lapacke_dsbevd.c | |||||
| lapacke_dsbevd_work.c | |||||
| lapacke_dsbevd_2stage.c | lapacke_dsbevd_2stage.c | ||||
| lapacke_dsbevd_2stage_work.c | lapacke_dsbevd_2stage_work.c | ||||
| lapacke_dsbevx.c | |||||
| lapacke_dsbevx_work.c | |||||
| lapacke_dsbevx_2stage.c | lapacke_dsbevx_2stage.c | ||||
| lapacke_dsbevx_2stage_work.c | lapacke_dsbevx_2stage_work.c | ||||
| lapacke_dsbgst.c | lapacke_dsbgst.c | ||||
| @@ -1021,18 +1029,18 @@ lapacke_dsyequb.c | |||||
| lapacke_dsyequb_work.c | lapacke_dsyequb_work.c | ||||
| lapacke_dsyev.c | lapacke_dsyev.c | ||||
| lapacke_dsyev_work.c | lapacke_dsyev_work.c | ||||
| lapacke_dsyevd.c | |||||
| lapacke_dsyevd_work.c | |||||
| lapacke_dsyevr.c | |||||
| lapacke_dsyevr_work.c | |||||
| lapacke_dsyevx.c | |||||
| lapacke_dsyevx_work.c | |||||
| lapacke_dsyev_2stage.c | lapacke_dsyev_2stage.c | ||||
| lapacke_dsyev_2stage_work.c | lapacke_dsyev_2stage_work.c | ||||
| lapacke_dsyevd.c | |||||
| lapacke_dsyevd_work.c | |||||
| lapacke_dsyevd_2stage.c | lapacke_dsyevd_2stage.c | ||||
| lapacke_dsyevd_2stage_work.c | lapacke_dsyevd_2stage_work.c | ||||
| lapacke_dsyevr.c | |||||
| lapacke_dsyevr_work.c | |||||
| lapacke_dsyevr_2stage.c | lapacke_dsyevr_2stage.c | ||||
| lapacke_dsyevr_2stage_work.c | lapacke_dsyevr_2stage_work.c | ||||
| lapacke_dsyevx.c | |||||
| lapacke_dsyevx_work.c | |||||
| lapacke_dsyevx_2stage.c | lapacke_dsyevx_2stage.c | ||||
| lapacke_dsyevx_2stage_work.c | lapacke_dsyevx_2stage_work.c | ||||
| lapacke_dsygst.c | lapacke_dsygst.c | ||||
| @@ -1048,15 +1056,15 @@ lapacke_dsygvx_work.c | |||||
| lapacke_dsyrfs.c | lapacke_dsyrfs.c | ||||
| lapacke_dsyrfs_work.c | lapacke_dsyrfs_work.c | ||||
| lapacke_dsysv.c | lapacke_dsysv.c | ||||
| lapacke_dsysv_rook.c | |||||
| lapacke_dsysv_rook_work.c | |||||
| lapacke_dsysv_work.c | lapacke_dsysv_work.c | ||||
| lapacke_dsysv_aa.c | lapacke_dsysv_aa.c | ||||
| lapacke_dsysv_aa_2stage.c | |||||
| lapacke_dsysv_aa_work.c | lapacke_dsysv_aa_work.c | ||||
| lapacke_dsysv_aa_2stage.c | |||||
| lapacke_dsysv_aa_2stage_work.c | lapacke_dsysv_aa_2stage_work.c | ||||
| lapacke_dsysv_rk.c | lapacke_dsysv_rk.c | ||||
| lapacke_dsysv_rk_work.c | lapacke_dsysv_rk_work.c | ||||
| lapacke_dsysv_rook.c | |||||
| lapacke_dsysv_rook_work.c | |||||
| lapacke_dsysvx.c | lapacke_dsysvx.c | ||||
| lapacke_dsysvx_work.c | lapacke_dsysvx_work.c | ||||
| lapacke_dsyswapr.c | lapacke_dsyswapr.c | ||||
| @@ -1065,33 +1073,33 @@ lapacke_dsytrd.c | |||||
| lapacke_dsytrd_work.c | lapacke_dsytrd_work.c | ||||
| lapacke_dsytrf.c | lapacke_dsytrf.c | ||||
| lapacke_dsytrf_work.c | lapacke_dsytrf_work.c | ||||
| lapacke_dsytrf_rook.c | |||||
| lapacke_dsytrf_rook_work.c | |||||
| lapacke_dsytrf_aa.c | lapacke_dsytrf_aa.c | ||||
| lapacke_dsytrf_aa_2stage.c | |||||
| lapacke_dsytrf_aa_work.c | lapacke_dsytrf_aa_work.c | ||||
| lapacke_dsytrf_aa_2stage.c | |||||
| lapacke_dsytrf_aa_2stage_work.c | lapacke_dsytrf_aa_2stage_work.c | ||||
| lapacke_dsytrf_rk.c | lapacke_dsytrf_rk.c | ||||
| lapacke_dsytrf_rk_work.c | lapacke_dsytrf_rk_work.c | ||||
| lapacke_dsytrf_rook.c | |||||
| lapacke_dsytrf_rook_work.c | |||||
| lapacke_dsytri.c | lapacke_dsytri.c | ||||
| lapacke_dsytri_work.c | |||||
| lapacke_dsytri2.c | lapacke_dsytri2.c | ||||
| lapacke_dsytri2_work.c | lapacke_dsytri2_work.c | ||||
| lapacke_dsytri_3.c | |||||
| lapacke_dsytri_3_work.c | |||||
| lapacke_dsytri2x.c | lapacke_dsytri2x.c | ||||
| lapacke_dsytri2x_work.c | lapacke_dsytri2x_work.c | ||||
| lapacke_dsytri_work.c | |||||
| lapacke_dsytri_3.c | |||||
| lapacke_dsytri_3_work.c | |||||
| lapacke_dsytrs.c | lapacke_dsytrs.c | ||||
| lapacke_dsytrs_rook.c | |||||
| lapacke_dsytrs_work.c | |||||
| lapacke_dsytrs2.c | lapacke_dsytrs2.c | ||||
| lapacke_dsytrs2_work.c | lapacke_dsytrs2_work.c | ||||
| lapacke_dsytrs_3.c | |||||
| lapacke_dsytrs_3_work.c | |||||
| lapacke_dsytrs_aa.c | lapacke_dsytrs_aa.c | ||||
| lapacke_dsytrs_aa_2stage.c | |||||
| lapacke_dsytrs_aa_work.c | lapacke_dsytrs_aa_work.c | ||||
| lapacke_dsytrs_aa_2stage.c | |||||
| lapacke_dsytrs_aa_2stage_work.c | lapacke_dsytrs_aa_2stage_work.c | ||||
| lapacke_dsytrs_3.c | |||||
| lapacke_dsytrs_3_work.c | |||||
| lapacke_dsytrs_work.c | |||||
| lapacke_dsytrs_rook.c | |||||
| lapacke_dsytrs_rook_work.c | lapacke_dsytrs_rook_work.c | ||||
| lapacke_dtbcon.c | lapacke_dtbcon.c | ||||
| lapacke_dtbcon_work.c | lapacke_dtbcon_work.c | ||||
| @@ -1124,9 +1132,9 @@ lapacke_dtpcon_work.c | |||||
| lapacke_dtpmqrt.c | lapacke_dtpmqrt.c | ||||
| lapacke_dtpmqrt_work.c | lapacke_dtpmqrt_work.c | ||||
| lapacke_dtpqrt.c | lapacke_dtpqrt.c | ||||
| lapacke_dtpqrt_work.c | |||||
| lapacke_dtpqrt2.c | lapacke_dtpqrt2.c | ||||
| lapacke_dtpqrt2_work.c | lapacke_dtpqrt2_work.c | ||||
| lapacke_dtpqrt_work.c | |||||
| lapacke_dtprfb.c | lapacke_dtprfb.c | ||||
| lapacke_dtprfb_work.c | lapacke_dtprfb_work.c | ||||
| lapacke_dtprfs.c | lapacke_dtprfs.c | ||||
| @@ -1163,15 +1171,21 @@ lapacke_dtrttp.c | |||||
| lapacke_dtrttp_work.c | lapacke_dtrttp_work.c | ||||
| lapacke_dtzrzf.c | lapacke_dtzrzf.c | ||||
| lapacke_dtzrzf_work.c | lapacke_dtzrzf_work.c | ||||
| ) | |||||
| set(SOURCES | |||||
| lapacke_nancheck.c | lapacke_nancheck.c | ||||
| lapacke_ilaver.c | |||||
| ) | |||||
| set(SOURCES_SINGLE | |||||
| lapacke_sbbcsd.c | lapacke_sbbcsd.c | ||||
| lapacke_sbbcsd_work.c | lapacke_sbbcsd_work.c | ||||
| lapacke_sbdsdc.c | lapacke_sbdsdc.c | ||||
| lapacke_sbdsdc_work.c | lapacke_sbdsdc_work.c | ||||
| lapacke_sbdsvdx.c | |||||
| lapacke_sbdsvdx_work.c | |||||
| lapacke_sbdsqr.c | lapacke_sbdsqr.c | ||||
| lapacke_sbdsqr_work.c | lapacke_sbdsqr_work.c | ||||
| lapacke_sbdsvdx.c | |||||
| lapacke_sbdsvdx_work.c | |||||
| lapacke_sdisna.c | lapacke_sdisna.c | ||||
| lapacke_sdisna_work.c | lapacke_sdisna_work.c | ||||
| lapacke_sgbbrd.c | lapacke_sgbbrd.c | ||||
| @@ -1249,11 +1263,11 @@ lapacke_sgeqrf_work.c | |||||
| lapacke_sgeqrfp.c | lapacke_sgeqrfp.c | ||||
| lapacke_sgeqrfp_work.c | lapacke_sgeqrfp_work.c | ||||
| lapacke_sgeqrt.c | lapacke_sgeqrt.c | ||||
| lapacke_sgeqrt_work.c | |||||
| lapacke_sgeqrt2.c | lapacke_sgeqrt2.c | ||||
| lapacke_sgeqrt2_work.c | lapacke_sgeqrt2_work.c | ||||
| lapacke_sgeqrt3.c | lapacke_sgeqrt3.c | ||||
| lapacke_sgeqrt3_work.c | lapacke_sgeqrt3_work.c | ||||
| lapacke_sgeqrt_work.c | |||||
| lapacke_sgerfs.c | lapacke_sgerfs.c | ||||
| lapacke_sgerfs_work.c | lapacke_sgerfs_work.c | ||||
| lapacke_sgerqf.c | lapacke_sgerqf.c | ||||
| @@ -1264,6 +1278,8 @@ lapacke_sgesv.c | |||||
| lapacke_sgesv_work.c | lapacke_sgesv_work.c | ||||
| lapacke_sgesvd.c | lapacke_sgesvd.c | ||||
| lapacke_sgesvd_work.c | lapacke_sgesvd_work.c | ||||
| lapacke_sgesvdq.c | |||||
| lapacke_sgesvdq_work.c | |||||
| lapacke_sgesvdx.c | lapacke_sgesvdx.c | ||||
| lapacke_sgesvdx_work.c | lapacke_sgesvdx_work.c | ||||
| lapacke_sgesvj.c | lapacke_sgesvj.c | ||||
| @@ -1300,10 +1316,10 @@ lapacke_sggevx.c | |||||
| lapacke_sggevx_work.c | lapacke_sggevx_work.c | ||||
| lapacke_sggglm.c | lapacke_sggglm.c | ||||
| lapacke_sggglm_work.c | lapacke_sggglm_work.c | ||||
| lapacke_sgghrd.c | |||||
| lapacke_sgghrd_work.c | |||||
| lapacke_sgghd3.c | lapacke_sgghd3.c | ||||
| lapacke_sgghd3_work.c | lapacke_sgghd3_work.c | ||||
| lapacke_sgghrd.c | |||||
| lapacke_sgghrd_work.c | |||||
| lapacke_sgglse.c | lapacke_sgglse.c | ||||
| lapacke_sgglse_work.c | lapacke_sgglse_work.c | ||||
| lapacke_sggqrf.c | lapacke_sggqrf.c | ||||
| @@ -1496,14 +1512,14 @@ lapacke_spttrs.c | |||||
| lapacke_spttrs_work.c | lapacke_spttrs_work.c | ||||
| lapacke_ssbev.c | lapacke_ssbev.c | ||||
| lapacke_ssbev_work.c | lapacke_ssbev_work.c | ||||
| lapacke_ssbevd.c | |||||
| lapacke_ssbevd_work.c | |||||
| lapacke_ssbevx.c | |||||
| lapacke_ssbevx_work.c | |||||
| lapacke_ssbev_2stage.c | lapacke_ssbev_2stage.c | ||||
| lapacke_ssbev_2stage_work.c | lapacke_ssbev_2stage_work.c | ||||
| lapacke_ssbevd.c | |||||
| lapacke_ssbevd_work.c | |||||
| lapacke_ssbevd_2stage.c | lapacke_ssbevd_2stage.c | ||||
| lapacke_ssbevd_2stage_work.c | lapacke_ssbevd_2stage_work.c | ||||
| lapacke_ssbevx.c | |||||
| lapacke_ssbevx_work.c | |||||
| lapacke_ssbevx_2stage.c | lapacke_ssbevx_2stage.c | ||||
| lapacke_ssbevx_2stage_work.c | lapacke_ssbevx_2stage_work.c | ||||
| lapacke_ssbgst.c | lapacke_ssbgst.c | ||||
| @@ -1580,18 +1596,18 @@ lapacke_ssyequb.c | |||||
| lapacke_ssyequb_work.c | lapacke_ssyequb_work.c | ||||
| lapacke_ssyev.c | lapacke_ssyev.c | ||||
| lapacke_ssyev_work.c | lapacke_ssyev_work.c | ||||
| lapacke_ssyevd.c | |||||
| lapacke_ssyevd_work.c | |||||
| lapacke_ssyevr.c | |||||
| lapacke_ssyevr_work.c | |||||
| lapacke_ssyevx.c | |||||
| lapacke_ssyevx_work.c | |||||
| lapacke_ssyev_2stage.c | lapacke_ssyev_2stage.c | ||||
| lapacke_ssyev_2stage_work.c | lapacke_ssyev_2stage_work.c | ||||
| lapacke_ssyevd.c | |||||
| lapacke_ssyevd_work.c | |||||
| lapacke_ssyevd_2stage.c | lapacke_ssyevd_2stage.c | ||||
| lapacke_ssyevd_2stage_work.c | lapacke_ssyevd_2stage_work.c | ||||
| lapacke_ssyevr.c | |||||
| lapacke_ssyevr_work.c | |||||
| lapacke_ssyevr_2stage.c | lapacke_ssyevr_2stage.c | ||||
| lapacke_ssyevr_2stage_work.c | lapacke_ssyevr_2stage_work.c | ||||
| lapacke_ssyevx.c | |||||
| lapacke_ssyevx_work.c | |||||
| lapacke_ssyevx_2stage.c | lapacke_ssyevx_2stage.c | ||||
| lapacke_ssyevx_2stage_work.c | lapacke_ssyevx_2stage_work.c | ||||
| lapacke_ssygst.c | lapacke_ssygst.c | ||||
| @@ -1607,8 +1623,6 @@ lapacke_ssygvx_work.c | |||||
| lapacke_ssyrfs.c | lapacke_ssyrfs.c | ||||
| lapacke_ssyrfs_work.c | lapacke_ssyrfs_work.c | ||||
| lapacke_ssysv.c | lapacke_ssysv.c | ||||
| lapacke_ssysv_rook.c | |||||
| lapacke_ssysv_rook_work.c | |||||
| lapacke_ssysv_work.c | lapacke_ssysv_work.c | ||||
| lapacke_ssysv_aa.c | lapacke_ssysv_aa.c | ||||
| lapacke_ssysv_aa_work.c | lapacke_ssysv_aa_work.c | ||||
| @@ -1616,6 +1630,8 @@ lapacke_ssysv_aa_2stage.c | |||||
| lapacke_ssysv_aa_2stage_work.c | lapacke_ssysv_aa_2stage_work.c | ||||
| lapacke_ssysv_rk.c | lapacke_ssysv_rk.c | ||||
| lapacke_ssysv_rk_work.c | lapacke_ssysv_rk_work.c | ||||
| lapacke_ssysv_rook.c | |||||
| lapacke_ssysv_rook_work.c | |||||
| lapacke_ssysvx.c | lapacke_ssysvx.c | ||||
| lapacke_ssysvx_work.c | lapacke_ssysvx_work.c | ||||
| lapacke_ssyswapr.c | lapacke_ssyswapr.c | ||||
| @@ -1624,33 +1640,33 @@ lapacke_ssytrd.c | |||||
| lapacke_ssytrd_work.c | lapacke_ssytrd_work.c | ||||
| lapacke_ssytrf.c | lapacke_ssytrf.c | ||||
| lapacke_ssytrf_work.c | lapacke_ssytrf_work.c | ||||
| lapacke_ssytrf_rook.c | |||||
| lapacke_ssytrf_rook_work.c | |||||
| lapacke_ssytrf_aa.c | lapacke_ssytrf_aa.c | ||||
| lapacke_ssytrf_aa_2stage.c | |||||
| lapacke_ssytrf_aa_work.c | lapacke_ssytrf_aa_work.c | ||||
| lapacke_ssytrf_aa_2stage.c | |||||
| lapacke_ssytrf_aa_2stage_work.c | lapacke_ssytrf_aa_2stage_work.c | ||||
| lapacke_ssytrf_rk.c | lapacke_ssytrf_rk.c | ||||
| lapacke_ssytrf_rk_work.c | lapacke_ssytrf_rk_work.c | ||||
| lapacke_ssytrf_rook.c | |||||
| lapacke_ssytrf_rook_work.c | |||||
| lapacke_ssytri.c | lapacke_ssytri.c | ||||
| lapacke_ssytri_work.c | |||||
| lapacke_ssytri2.c | lapacke_ssytri2.c | ||||
| lapacke_ssytri2_work.c | lapacke_ssytri2_work.c | ||||
| lapacke_ssytri_3.c | |||||
| lapacke_ssytri_3_work.c | |||||
| lapacke_ssytri2x.c | lapacke_ssytri2x.c | ||||
| lapacke_ssytri2x_work.c | lapacke_ssytri2x_work.c | ||||
| lapacke_ssytri_work.c | |||||
| lapacke_ssytri_3.c | |||||
| lapacke_ssytri_3_work.c | |||||
| lapacke_ssytrs.c | lapacke_ssytrs.c | ||||
| lapacke_ssytrs_rook.c | |||||
| lapacke_ssytrs_work.c | |||||
| lapacke_ssytrs2.c | lapacke_ssytrs2.c | ||||
| lapacke_ssytrs2_work.c | lapacke_ssytrs2_work.c | ||||
| lapacke_ssytrs_3.c | |||||
| lapacke_ssytrs_3_work.c | |||||
| lapacke_ssytrs_aa.c | lapacke_ssytrs_aa.c | ||||
| lapacke_ssytrs_aa_2stage.c | |||||
| lapacke_ssytrs_aa_work.c | lapacke_ssytrs_aa_work.c | ||||
| lapacke_ssytrs_aa_2stage.c | |||||
| lapacke_ssytrs_aa_2stage_work.c | lapacke_ssytrs_aa_2stage_work.c | ||||
| lapacke_ssytrs_3.c | |||||
| lapacke_ssytrs_3_work.c | |||||
| lapacke_ssytrs_work.c | |||||
| lapacke_ssytrs_rook.c | |||||
| lapacke_ssytrs_rook_work.c | lapacke_ssytrs_rook_work.c | ||||
| lapacke_stbcon.c | lapacke_stbcon.c | ||||
| lapacke_stbcon_work.c | lapacke_stbcon_work.c | ||||
| @@ -1722,6 +1738,8 @@ lapacke_strttp.c | |||||
| lapacke_strttp_work.c | lapacke_strttp_work.c | ||||
| lapacke_stzrzf.c | lapacke_stzrzf.c | ||||
| lapacke_stzrzf_work.c | lapacke_stzrzf_work.c | ||||
| ) | |||||
| set(SOURCES_COMPLEX16 | |||||
| lapacke_zbbcsd.c | lapacke_zbbcsd.c | ||||
| lapacke_zbbcsd_work.c | lapacke_zbbcsd_work.c | ||||
| lapacke_zbdsqr.c | lapacke_zbdsqr.c | ||||
| @@ -1805,11 +1823,11 @@ lapacke_zgeqrf_work.c | |||||
| lapacke_zgeqrfp.c | lapacke_zgeqrfp.c | ||||
| lapacke_zgeqrfp_work.c | lapacke_zgeqrfp_work.c | ||||
| lapacke_zgeqrt.c | lapacke_zgeqrt.c | ||||
| lapacke_zgeqrt_work.c | |||||
| lapacke_zgeqrt2.c | lapacke_zgeqrt2.c | ||||
| lapacke_zgeqrt2_work.c | lapacke_zgeqrt2_work.c | ||||
| lapacke_zgeqrt3.c | lapacke_zgeqrt3.c | ||||
| lapacke_zgeqrt3_work.c | lapacke_zgeqrt3_work.c | ||||
| lapacke_zgeqrt_work.c | |||||
| lapacke_zgerfs.c | lapacke_zgerfs.c | ||||
| lapacke_zgerfs_work.c | lapacke_zgerfs_work.c | ||||
| lapacke_zgerqf.c | lapacke_zgerqf.c | ||||
| @@ -1820,6 +1838,8 @@ lapacke_zgesv.c | |||||
| lapacke_zgesv_work.c | lapacke_zgesv_work.c | ||||
| lapacke_zgesvd.c | lapacke_zgesvd.c | ||||
| lapacke_zgesvd_work.c | lapacke_zgesvd_work.c | ||||
| lapacke_zgesvdq.c | |||||
| lapacke_zgesvdq_work.c | |||||
| lapacke_zgesvdx.c | lapacke_zgesvdx.c | ||||
| lapacke_zgesvdx_work.c | lapacke_zgesvdx_work.c | ||||
| lapacke_zgesvj.c | lapacke_zgesvj.c | ||||
| @@ -1856,10 +1876,10 @@ lapacke_zggevx.c | |||||
| lapacke_zggevx_work.c | lapacke_zggevx_work.c | ||||
| lapacke_zggglm.c | lapacke_zggglm.c | ||||
| lapacke_zggglm_work.c | lapacke_zggglm_work.c | ||||
| lapacke_zgghrd.c | |||||
| lapacke_zgghrd_work.c | |||||
| lapacke_zgghd3.c | lapacke_zgghd3.c | ||||
| lapacke_zgghd3_work.c | lapacke_zgghd3_work.c | ||||
| lapacke_zgghrd.c | |||||
| lapacke_zgghrd_work.c | |||||
| lapacke_zgglse.c | lapacke_zgglse.c | ||||
| lapacke_zgglse_work.c | lapacke_zgglse_work.c | ||||
| lapacke_zggqrf.c | lapacke_zggqrf.c | ||||
| @@ -1884,14 +1904,14 @@ lapacke_zgttrs.c | |||||
| lapacke_zgttrs_work.c | lapacke_zgttrs_work.c | ||||
| lapacke_zhbev.c | lapacke_zhbev.c | ||||
| lapacke_zhbev_work.c | lapacke_zhbev_work.c | ||||
| lapacke_zhbevd.c | |||||
| lapacke_zhbevd_work.c | |||||
| lapacke_zhbevx.c | |||||
| lapacke_zhbevx_work.c | |||||
| lapacke_zhbev_2stage.c | lapacke_zhbev_2stage.c | ||||
| lapacke_zhbev_2stage_work.c | lapacke_zhbev_2stage_work.c | ||||
| lapacke_zhbevd.c | |||||
| lapacke_zhbevd_work.c | |||||
| lapacke_zhbevd_2stage.c | lapacke_zhbevd_2stage.c | ||||
| lapacke_zhbevd_2stage_work.c | lapacke_zhbevd_2stage_work.c | ||||
| lapacke_zhbevx.c | |||||
| lapacke_zhbevx_work.c | |||||
| lapacke_zhbevx_2stage.c | lapacke_zhbevx_2stage.c | ||||
| lapacke_zhbevx_2stage_work.c | lapacke_zhbevx_2stage_work.c | ||||
| lapacke_zhbgst.c | lapacke_zhbgst.c | ||||
| @@ -1912,18 +1932,18 @@ lapacke_zheequb.c | |||||
| lapacke_zheequb_work.c | lapacke_zheequb_work.c | ||||
| lapacke_zheev.c | lapacke_zheev.c | ||||
| lapacke_zheev_work.c | lapacke_zheev_work.c | ||||
| lapacke_zheevd.c | |||||
| lapacke_zheevd_work.c | |||||
| lapacke_zheevr.c | |||||
| lapacke_zheevr_work.c | |||||
| lapacke_zheevx.c | |||||
| lapacke_zheevx_work.c | |||||
| lapacke_zheev_2stage.c | lapacke_zheev_2stage.c | ||||
| lapacke_zheev_2stage_work.c | lapacke_zheev_2stage_work.c | ||||
| lapacke_zheevd.c | |||||
| lapacke_zheevd_work.c | |||||
| lapacke_zheevd_2stage.c | lapacke_zheevd_2stage.c | ||||
| lapacke_zheevd_2stage_work.c | lapacke_zheevd_2stage_work.c | ||||
| lapacke_zheevr.c | |||||
| lapacke_zheevr_work.c | |||||
| lapacke_zheevr_2stage.c | lapacke_zheevr_2stage.c | ||||
| lapacke_zheevr_2stage_work.c | lapacke_zheevr_2stage_work.c | ||||
| lapacke_zheevx.c | |||||
| lapacke_zheevx_work.c | |||||
| lapacke_zheevx_2stage.c | lapacke_zheevx_2stage.c | ||||
| lapacke_zheevx_2stage_work.c | lapacke_zheevx_2stage_work.c | ||||
| lapacke_zhegst.c | lapacke_zhegst.c | ||||
| @@ -1941,8 +1961,8 @@ lapacke_zherfs_work.c | |||||
| lapacke_zhesv.c | lapacke_zhesv.c | ||||
| lapacke_zhesv_work.c | lapacke_zhesv_work.c | ||||
| lapacke_zhesv_aa.c | lapacke_zhesv_aa.c | ||||
| lapacke_zhesv_aa_2stage.c | |||||
| lapacke_zhesv_aa_work.c | lapacke_zhesv_aa_work.c | ||||
| lapacke_zhesv_aa_2stage.c | |||||
| lapacke_zhesv_aa_2stage_work.c | lapacke_zhesv_aa_2stage_work.c | ||||
| lapacke_zhesv_rk.c | lapacke_zhesv_rk.c | ||||
| lapacke_zhesv_rk_work.c | lapacke_zhesv_rk_work.c | ||||
| @@ -1953,34 +1973,34 @@ lapacke_zheswapr_work.c | |||||
| lapacke_zhetrd.c | lapacke_zhetrd.c | ||||
| lapacke_zhetrd_work.c | lapacke_zhetrd_work.c | ||||
| lapacke_zhetrf.c | lapacke_zhetrf.c | ||||
| lapacke_zhetrf_rook.c | |||||
| lapacke_zhetrf_work.c | lapacke_zhetrf_work.c | ||||
| lapacke_zhetrf_rook_work.c | |||||
| lapacke_zhetrf_aa.c | lapacke_zhetrf_aa.c | ||||
| lapacke_zhetrf_aa_2stage.c | |||||
| lapacke_zhetrf_aa_work.c | lapacke_zhetrf_aa_work.c | ||||
| lapacke_zhetrf_aa_2stage.c | |||||
| lapacke_zhetrf_aa_2stage_work.c | lapacke_zhetrf_aa_2stage_work.c | ||||
| lapacke_zhetrf_rk.c | lapacke_zhetrf_rk.c | ||||
| lapacke_zhetrf_rk_work.c | lapacke_zhetrf_rk_work.c | ||||
| lapacke_zhetrf_rook.c | |||||
| lapacke_zhetrf_rook_work.c | |||||
| lapacke_zhetri.c | lapacke_zhetri.c | ||||
| lapacke_zhetri_work.c | |||||
| lapacke_zhetri2.c | lapacke_zhetri2.c | ||||
| lapacke_zhetri2_work.c | lapacke_zhetri2_work.c | ||||
| lapacke_zhetri_3.c | |||||
| lapacke_zhetri_3_work.c | |||||
| lapacke_zhetri2x.c | lapacke_zhetri2x.c | ||||
| lapacke_zhetri2x_work.c | lapacke_zhetri2x_work.c | ||||
| lapacke_zhetri_work.c | |||||
| lapacke_zhetri_3.c | |||||
| lapacke_zhetri_3_work.c | |||||
| lapacke_zhetrs.c | lapacke_zhetrs.c | ||||
| lapacke_zhetrs_rook.c | |||||
| lapacke_zhetrs_work.c | |||||
| lapacke_zhetrs2.c | lapacke_zhetrs2.c | ||||
| lapacke_zhetrs2_work.c | lapacke_zhetrs2_work.c | ||||
| lapacke_zhetrs_work.c | |||||
| lapacke_zhetrs_3.c | |||||
| lapacke_zhetrs_3_work.c | |||||
| lapacke_zhetrs_aa.c | lapacke_zhetrs_aa.c | ||||
| lapacke_zhetrs_aa_2stage.c | |||||
| lapacke_zhetrs_aa_work.c | lapacke_zhetrs_aa_work.c | ||||
| lapacke_zhetrs_aa_2stage.c | |||||
| lapacke_zhetrs_aa_2stage_work.c | lapacke_zhetrs_aa_2stage_work.c | ||||
| lapacke_zhetrs_3.c | |||||
| lapacke_zhetrs_3_work.c | |||||
| lapacke_zhetrs_rook.c | |||||
| lapacke_zhetrs_rook_work.c | lapacke_zhetrs_rook_work.c | ||||
| lapacke_zhfrk.c | lapacke_zhfrk.c | ||||
| lapacke_zhfrk_work.c | lapacke_zhfrk_work.c | ||||
| @@ -2172,52 +2192,54 @@ lapacke_zsyconv.c | |||||
| lapacke_zsyconv_work.c | lapacke_zsyconv_work.c | ||||
| lapacke_zsyequb.c | lapacke_zsyequb.c | ||||
| lapacke_zsyequb_work.c | lapacke_zsyequb_work.c | ||||
| lapacke_zsyr.c | |||||
| lapacke_zsyr_work.c | |||||
| lapacke_zsyrfs.c | lapacke_zsyrfs.c | ||||
| lapacke_zsyrfs_work.c | lapacke_zsyrfs_work.c | ||||
| lapacke_zsysv.c | lapacke_zsysv.c | ||||
| lapacke_zsysv_rook.c | |||||
| lapacke_zsysv_rook_work.c | |||||
| lapacke_zsysv_work.c | lapacke_zsysv_work.c | ||||
| lapacke_zsysv_aa.c | lapacke_zsysv_aa.c | ||||
| lapacke_zsysv_aa_2stage.c | |||||
| lapacke_zsysv_aa_work.c | lapacke_zsysv_aa_work.c | ||||
| lapacke_zsysv_aa_2stage.c | |||||
| lapacke_zsysv_aa_2stage_work.c | lapacke_zsysv_aa_2stage_work.c | ||||
| lapacke_zsysv_rk.c | lapacke_zsysv_rk.c | ||||
| lapacke_zsysv_rk_work.c | lapacke_zsysv_rk_work.c | ||||
| lapacke_zsysv_rook.c | |||||
| lapacke_zsysv_rook_work.c | |||||
| lapacke_zsysvx.c | lapacke_zsysvx.c | ||||
| lapacke_zsysvx_work.c | lapacke_zsysvx_work.c | ||||
| lapacke_zsyswapr.c | lapacke_zsyswapr.c | ||||
| lapacke_zsyswapr_work.c | lapacke_zsyswapr_work.c | ||||
| lapacke_zsytrf.c | lapacke_zsytrf.c | ||||
| lapacke_zsytrf_work.c | lapacke_zsytrf_work.c | ||||
| lapacke_zsytrf_rook.c | |||||
| lapacke_zsytrf_rook_work.c | |||||
| lapacke_zsytrf_aa.c | lapacke_zsytrf_aa.c | ||||
| lapacke_zsytrf_aa_2stage.c | |||||
| lapacke_zsytrf_aa_work.c | lapacke_zsytrf_aa_work.c | ||||
| lapacke_zsytrf_aa_2stage.c | |||||
| lapacke_zsytrf_aa_2stage_work.c | lapacke_zsytrf_aa_2stage_work.c | ||||
| lapacke_zsytrf_rk.c | lapacke_zsytrf_rk.c | ||||
| lapacke_zsytrf_rk_work.c | lapacke_zsytrf_rk_work.c | ||||
| lapacke_zsytrf_rook.c | |||||
| lapacke_zsytrf_rook_work.c | |||||
| lapacke_zsytri.c | lapacke_zsytri.c | ||||
| lapacke_zsytri_work.c | |||||
| lapacke_zsytri2.c | lapacke_zsytri2.c | ||||
| lapacke_zsytri2_work.c | lapacke_zsytri2_work.c | ||||
| lapacke_zsytri_3.c | |||||
| lapacke_zsytri_3_work.c | |||||
| lapacke_zsytri2x.c | lapacke_zsytri2x.c | ||||
| lapacke_zsytri2x_work.c | lapacke_zsytri2x_work.c | ||||
| lapacke_zsytri_work.c | |||||
| lapacke_zsytri_3.c | |||||
| lapacke_zsytri_3_work.c | |||||
| lapacke_zsytrs.c | lapacke_zsytrs.c | ||||
| lapacke_zsytrs_rook.c | |||||
| lapacke_zsytrs_work.c | |||||
| lapacke_zsytrs2.c | lapacke_zsytrs2.c | ||||
| lapacke_zsytrs2_work.c | lapacke_zsytrs2_work.c | ||||
| lapacke_zsytrs_work.c | |||||
| lapacke_zsytrs_rook_work.c | |||||
| lapacke_zsytrs_3.c | |||||
| lapacke_zsytrs_3_work.c | |||||
| lapacke_zsytrs_aa.c | lapacke_zsytrs_aa.c | ||||
| lapacke_zsytrs_aa_2stage.c | |||||
| lapacke_zsytrs_aa_work.c | lapacke_zsytrs_aa_work.c | ||||
| lapacke_zsytrs_aa_2stage.c | |||||
| lapacke_zsytrs_aa_2stage_work.c | lapacke_zsytrs_aa_2stage_work.c | ||||
| lapacke_zsytrs_3.c | |||||
| lapacke_zsytrs_3_work.c | |||||
| lapacke_zsytrs_rook.c | |||||
| lapacke_zsytrs_rook_work.c | |||||
| lapacke_ztbcon.c | lapacke_ztbcon.c | ||||
| lapacke_ztbcon_work.c | lapacke_ztbcon_work.c | ||||
| lapacke_ztbrfs.c | lapacke_ztbrfs.c | ||||
| @@ -2249,9 +2271,9 @@ lapacke_ztpcon_work.c | |||||
| lapacke_ztpmqrt.c | lapacke_ztpmqrt.c | ||||
| lapacke_ztpmqrt_work.c | lapacke_ztpmqrt_work.c | ||||
| lapacke_ztpqrt.c | lapacke_ztpqrt.c | ||||
| lapacke_ztpqrt_work.c | |||||
| lapacke_ztpqrt2.c | lapacke_ztpqrt2.c | ||||
| lapacke_ztpqrt2_work.c | lapacke_ztpqrt2_work.c | ||||
| lapacke_ztpqrt_work.c | |||||
| lapacke_ztprfb.c | lapacke_ztprfb.c | ||||
| lapacke_ztprfb_work.c | lapacke_ztprfb_work.c | ||||
| lapacke_ztprfs.c | lapacke_ztprfs.c | ||||
| @@ -2328,11 +2350,6 @@ lapacke_zupgtr.c | |||||
| lapacke_zupgtr_work.c | lapacke_zupgtr_work.c | ||||
| lapacke_zupmtr.c | lapacke_zupmtr.c | ||||
| lapacke_zupmtr_work.c | lapacke_zupmtr_work.c | ||||
| lapacke_zsyr.c | |||||
| lapacke_csyr.c | |||||
| lapacke_zsyr_work.c | |||||
| lapacke_csyr_work.c | |||||
| lapacke_ilaver.c | |||||
| ) | ) | ||||
| set(DEPRECATED | set(DEPRECATED | ||||
| @@ -32,12 +32,21 @@ | |||||
| ############################################################################## | ############################################################################## | ||||
| # makefile for LAPACKE, used to build lapacke binary. | # makefile for LAPACKE, used to build lapacke binary. | ||||
| # | # | ||||
| # Note: we use multiple OBJ_A, OBJ_B, etc, instead of a single OBJ | |||||
| # Note: we use multiple OBJ_S, OBJ_C, etc, instead of a single OBJ | |||||
| # to allow build with mingw (argument list too long for the msys ar) | # to allow build with mingw (argument list too long for the msys ar) | ||||
| # | # | ||||
| include ../../make.inc | |||||
| TOPSRCDIR = ../.. | |||||
| include $(TOPSRCDIR)/make.inc | |||||
| OBJ_A = \ | |||||
| .SUFFIXES: .c .o | |||||
| .c.o: | |||||
| $(CC) $(CFLAGS) -I../include -c -o $@ $< | |||||
| OBJ = \ | |||||
| lapacke_ilaver.o \ | |||||
| lapacke_nancheck.o | |||||
| OBJ_C = \ | |||||
| lapacke_cbbcsd.o \ | lapacke_cbbcsd.o \ | ||||
| lapacke_cbbcsd_work.o \ | lapacke_cbbcsd_work.o \ | ||||
| lapacke_cbdsqr.o \ | lapacke_cbdsqr.o \ | ||||
| @@ -82,12 +91,12 @@ lapacke_cgeevx.o \ | |||||
| lapacke_cgeevx_work.o \ | lapacke_cgeevx_work.o \ | ||||
| lapacke_cgehrd.o \ | lapacke_cgehrd.o \ | ||||
| lapacke_cgehrd_work.o \ | lapacke_cgehrd_work.o \ | ||||
| lapacke_cgejsv.o \ | |||||
| lapacke_cgejsv_work.o \ | |||||
| lapacke_cgelq.o \ | lapacke_cgelq.o \ | ||||
| lapacke_cgelq_work.o \ | lapacke_cgelq_work.o \ | ||||
| lapacke_cgelq2.o \ | lapacke_cgelq2.o \ | ||||
| lapacke_cgelq2_work.o \ | lapacke_cgelq2_work.o \ | ||||
| lapacke_cgejsv.o \ | |||||
| lapacke_cgejsv_work.o \ | |||||
| lapacke_cgelqf.o \ | lapacke_cgelqf.o \ | ||||
| lapacke_cgelqf_work.o \ | lapacke_cgelqf_work.o \ | ||||
| lapacke_cgels.o \ | lapacke_cgels.o \ | ||||
| @@ -117,11 +126,11 @@ lapacke_cgeqrf_work.o \ | |||||
| lapacke_cgeqrfp.o \ | lapacke_cgeqrfp.o \ | ||||
| lapacke_cgeqrfp_work.o \ | lapacke_cgeqrfp_work.o \ | ||||
| lapacke_cgeqrt.o \ | lapacke_cgeqrt.o \ | ||||
| lapacke_cgeqrt_work.o \ | |||||
| lapacke_cgeqrt2.o \ | lapacke_cgeqrt2.o \ | ||||
| lapacke_cgeqrt2_work.o \ | lapacke_cgeqrt2_work.o \ | ||||
| lapacke_cgeqrt3.o \ | lapacke_cgeqrt3.o \ | ||||
| lapacke_cgeqrt3_work.o \ | lapacke_cgeqrt3_work.o \ | ||||
| lapacke_cgeqrt_work.o \ | |||||
| lapacke_cgerfs.o \ | lapacke_cgerfs.o \ | ||||
| lapacke_cgerfs_work.o \ | lapacke_cgerfs_work.o \ | ||||
| lapacke_cgerqf.o \ | lapacke_cgerqf.o \ | ||||
| @@ -132,6 +141,8 @@ lapacke_cgesv.o \ | |||||
| lapacke_cgesv_work.o \ | lapacke_cgesv_work.o \ | ||||
| lapacke_cgesvd.o \ | lapacke_cgesvd.o \ | ||||
| lapacke_cgesvd_work.o \ | lapacke_cgesvd_work.o \ | ||||
| lapacke_cgesvdq.o \ | |||||
| lapacke_cgesvdq_work.o \ | |||||
| lapacke_cgesvdx.o \ | lapacke_cgesvdx.o \ | ||||
| lapacke_cgesvdx_work.o \ | lapacke_cgesvdx_work.o \ | ||||
| lapacke_cgesvj.o \ | lapacke_cgesvj.o \ | ||||
| @@ -168,10 +179,10 @@ lapacke_cggevx.o \ | |||||
| lapacke_cggevx_work.o \ | lapacke_cggevx_work.o \ | ||||
| lapacke_cggglm.o \ | lapacke_cggglm.o \ | ||||
| lapacke_cggglm_work.o \ | lapacke_cggglm_work.o \ | ||||
| lapacke_cgghrd.o \ | |||||
| lapacke_cgghrd_work.o \ | |||||
| lapacke_cgghd3.o \ | lapacke_cgghd3.o \ | ||||
| lapacke_cgghd3_work.o \ | lapacke_cgghd3_work.o \ | ||||
| lapacke_cgghrd.o \ | |||||
| lapacke_cgghrd_work.o \ | |||||
| lapacke_cgglse.o \ | lapacke_cgglse.o \ | ||||
| lapacke_cgglse_work.o \ | lapacke_cgglse_work.o \ | ||||
| lapacke_cggqrf.o \ | lapacke_cggqrf.o \ | ||||
| @@ -196,14 +207,14 @@ lapacke_cgttrs.o \ | |||||
| lapacke_cgttrs_work.o \ | lapacke_cgttrs_work.o \ | ||||
| lapacke_chbev.o \ | lapacke_chbev.o \ | ||||
| lapacke_chbev_work.o \ | lapacke_chbev_work.o \ | ||||
| lapacke_chbevd.o \ | |||||
| lapacke_chbevd_work.o \ | |||||
| lapacke_chbevx.o \ | |||||
| lapacke_chbevx_work.o \ | |||||
| lapacke_chbev_2stage.o \ | lapacke_chbev_2stage.o \ | ||||
| lapacke_chbev_2stage_work.o \ | lapacke_chbev_2stage_work.o \ | ||||
| lapacke_chbevd.o \ | |||||
| lapacke_chbevd_work.o \ | |||||
| lapacke_chbevd_2stage.o \ | lapacke_chbevd_2stage.o \ | ||||
| lapacke_chbevd_2stage_work.o \ | lapacke_chbevd_2stage_work.o \ | ||||
| lapacke_chbevx.o \ | |||||
| lapacke_chbevx_work.o \ | |||||
| lapacke_chbevx_2stage.o \ | lapacke_chbevx_2stage.o \ | ||||
| lapacke_chbevx_2stage_work.o \ | lapacke_chbevx_2stage_work.o \ | ||||
| lapacke_chbgst.o \ | lapacke_chbgst.o \ | ||||
| @@ -224,18 +235,18 @@ lapacke_cheequb.o \ | |||||
| lapacke_cheequb_work.o \ | lapacke_cheequb_work.o \ | ||||
| lapacke_cheev.o \ | lapacke_cheev.o \ | ||||
| lapacke_cheev_work.o \ | lapacke_cheev_work.o \ | ||||
| lapacke_cheevd.o \ | |||||
| lapacke_cheevd_work.o \ | |||||
| lapacke_cheevr.o \ | |||||
| lapacke_cheevr_work.o \ | |||||
| lapacke_cheevx.o \ | |||||
| lapacke_cheevx_work.o \ | |||||
| lapacke_cheev_2stage.o \ | lapacke_cheev_2stage.o \ | ||||
| lapacke_cheev_2stage_work.o \ | lapacke_cheev_2stage_work.o \ | ||||
| lapacke_cheevd.o \ | |||||
| lapacke_cheevd_work.o \ | |||||
| lapacke_cheevd_2stage.o \ | lapacke_cheevd_2stage.o \ | ||||
| lapacke_cheevd_2stage_work.o \ | lapacke_cheevd_2stage_work.o \ | ||||
| lapacke_cheevr.o \ | |||||
| lapacke_cheevr_work.o \ | |||||
| lapacke_cheevr_2stage.o \ | lapacke_cheevr_2stage.o \ | ||||
| lapacke_cheevr_2stage_work.o \ | lapacke_cheevr_2stage_work.o \ | ||||
| lapacke_cheevx.o \ | |||||
| lapacke_cheevx_work.o \ | |||||
| lapacke_cheevx_2stage.o \ | lapacke_cheevx_2stage.o \ | ||||
| lapacke_cheevx_2stage_work.o \ | lapacke_cheevx_2stage_work.o \ | ||||
| lapacke_chegst.o \ | lapacke_chegst.o \ | ||||
| @@ -265,35 +276,35 @@ lapacke_cheswapr_work.o \ | |||||
| lapacke_chetrd.o \ | lapacke_chetrd.o \ | ||||
| lapacke_chetrd_work.o \ | lapacke_chetrd_work.o \ | ||||
| lapacke_chetrf.o \ | lapacke_chetrf.o \ | ||||
| lapacke_chetrf_rook.o \ | |||||
| lapacke_chetrf_work.o \ | lapacke_chetrf_work.o \ | ||||
| lapacke_chetrf_rook_work.o \ | |||||
| lapacke_chetrf_aa.o \ | lapacke_chetrf_aa.o \ | ||||
| lapacke_chetrf_aa_2stage.o \ | |||||
| lapacke_chetrf_aa_work.o \ | lapacke_chetrf_aa_work.o \ | ||||
| lapacke_chetrf_aa_2stage.o \ | |||||
| lapacke_chetrf_aa_2stage_work.o \ | lapacke_chetrf_aa_2stage_work.o \ | ||||
| lapacke_chetrf_rk.o \ | lapacke_chetrf_rk.o \ | ||||
| lapacke_chetrf_rk_work.o \ | lapacke_chetrf_rk_work.o \ | ||||
| lapacke_chetrf_rook.o \ | |||||
| lapacke_chetrf_rook_work.o \ | |||||
| lapacke_chetri.o \ | lapacke_chetri.o \ | ||||
| lapacke_chetri_work.o \ | |||||
| lapacke_chetri2.o \ | lapacke_chetri2.o \ | ||||
| lapacke_chetri2_work.o \ | lapacke_chetri2_work.o \ | ||||
| lapacke_chetri_3.o \ | |||||
| lapacke_chetri_3_work.o \ | |||||
| lapacke_chetri2x.o \ | lapacke_chetri2x.o \ | ||||
| lapacke_chetri2x_work.o \ | lapacke_chetri2x_work.o \ | ||||
| lapacke_chetri_work.o \ | |||||
| lapacke_chetri_3.o \ | |||||
| lapacke_chetri_3_work.o \ | |||||
| lapacke_chetrs.o \ | lapacke_chetrs.o \ | ||||
| lapacke_chetrs_rook.o \ | |||||
| lapacke_chetrs_work.o \ | |||||
| lapacke_chetrs2.o \ | lapacke_chetrs2.o \ | ||||
| lapacke_chetrs2_work.o \ | lapacke_chetrs2_work.o \ | ||||
| lapacke_chetrs_work.o \ | |||||
| lapacke_chetrs_rook_work.o \ | |||||
| lapacke_chetrs_3.o \ | |||||
| lapacke_chetrs_3_work.o \ | |||||
| lapacke_chetrs_aa.o \ | lapacke_chetrs_aa.o \ | ||||
| lapacke_chetrs_aa_2stage.o \ | |||||
| lapacke_chetrs_aa_work.o \ | lapacke_chetrs_aa_work.o \ | ||||
| lapacke_chetrs_aa_2stage.o \ | |||||
| lapacke_chetrs_aa_2stage_work.o \ | lapacke_chetrs_aa_2stage_work.o \ | ||||
| lapacke_chetrs_3.o \ | |||||
| lapacke_chetrs_3_work.o \ | |||||
| lapacke_chetrs_rook.o \ | |||||
| lapacke_chetrs_rook_work.o \ | |||||
| lapacke_chfrk.o \ | lapacke_chfrk.o \ | ||||
| lapacke_chfrk_work.o \ | lapacke_chfrk_work.o \ | ||||
| lapacke_chgeqz.o \ | lapacke_chgeqz.o \ | ||||
| @@ -484,11 +495,11 @@ lapacke_csyconv.o \ | |||||
| lapacke_csyconv_work.o \ | lapacke_csyconv_work.o \ | ||||
| lapacke_csyequb.o \ | lapacke_csyequb.o \ | ||||
| lapacke_csyequb_work.o \ | lapacke_csyequb_work.o \ | ||||
| lapacke_csyr.o \ | |||||
| lapacke_csyr_work.o \ | |||||
| lapacke_csyrfs.o \ | lapacke_csyrfs.o \ | ||||
| lapacke_csyrfs_work.o \ | lapacke_csyrfs_work.o \ | ||||
| lapacke_csysv.o \ | lapacke_csysv.o \ | ||||
| lapacke_csysv_rook.o \ | |||||
| lapacke_csysv_rook_work.o \ | |||||
| lapacke_csysv_work.o \ | lapacke_csysv_work.o \ | ||||
| lapacke_csysv_aa.o \ | lapacke_csysv_aa.o \ | ||||
| lapacke_csysv_aa_work.o \ | lapacke_csysv_aa_work.o \ | ||||
| @@ -496,40 +507,42 @@ lapacke_csysv_aa_2stage.o \ | |||||
| lapacke_csysv_aa_2stage_work.o \ | lapacke_csysv_aa_2stage_work.o \ | ||||
| lapacke_csysv_rk.o \ | lapacke_csysv_rk.o \ | ||||
| lapacke_csysv_rk_work.o \ | lapacke_csysv_rk_work.o \ | ||||
| lapacke_csysv_rook.o \ | |||||
| lapacke_csysv_rook_work.o \ | |||||
| lapacke_csysvx.o \ | lapacke_csysvx.o \ | ||||
| lapacke_csysvx_work.o \ | lapacke_csysvx_work.o \ | ||||
| lapacke_csyswapr.o \ | lapacke_csyswapr.o \ | ||||
| lapacke_csyswapr_work.o \ | lapacke_csyswapr_work.o \ | ||||
| lapacke_csytrf.o \ | lapacke_csytrf.o \ | ||||
| lapacke_csytrf_work.o \ | lapacke_csytrf_work.o \ | ||||
| lapacke_csytrf_rook.o \ | |||||
| lapacke_csytrf_rook_work.o \ | |||||
| lapacke_csytrf_aa.o \ | lapacke_csytrf_aa.o \ | ||||
| lapacke_csytrf_aa_2stage.o \ | |||||
| lapacke_csytrf_aa_work.o \ | lapacke_csytrf_aa_work.o \ | ||||
| lapacke_csytrf_aa_2stage.o \ | |||||
| lapacke_csytrf_aa_2stage_work.o \ | lapacke_csytrf_aa_2stage_work.o \ | ||||
| lapacke_csytrf_rk.o \ | lapacke_csytrf_rk.o \ | ||||
| lapacke_csytrf_rk_work.o \ | lapacke_csytrf_rk_work.o \ | ||||
| lapacke_csytrf_rook.o \ | |||||
| lapacke_csytrf_rook_work.o \ | |||||
| lapacke_csytri.o \ | lapacke_csytri.o \ | ||||
| lapacke_csytri_work.o \ | |||||
| lapacke_csytri2.o \ | lapacke_csytri2.o \ | ||||
| lapacke_csytri2_work.o \ | lapacke_csytri2_work.o \ | ||||
| lapacke_csytri_3.o \ | |||||
| lapacke_csytri_3_work.o \ | |||||
| lapacke_csytri2x.o \ | lapacke_csytri2x.o \ | ||||
| lapacke_csytri2x_work.o \ | lapacke_csytri2x_work.o \ | ||||
| lapacke_csytri_work.o \ | |||||
| lapacke_csytri_3.o \ | |||||
| lapacke_csytri_3_work.o \ | |||||
| lapacke_csytrs.o \ | lapacke_csytrs.o \ | ||||
| lapacke_csytrs_rook.o \ | |||||
| lapacke_csytrs_work.o \ | |||||
| lapacke_csytrs2.o \ | lapacke_csytrs2.o \ | ||||
| lapacke_csytrs2_work.o \ | lapacke_csytrs2_work.o \ | ||||
| lapacke_csytrs_work.o \ | |||||
| lapacke_csytrs_rook_work.o \ | |||||
| lapacke_csytrs_3.o \ | |||||
| lapacke_csytrs_3_work.o \ | |||||
| lapacke_csytrs_aa.o \ | lapacke_csytrs_aa.o \ | ||||
| lapacke_csytrs_aa_2stage.o \ | |||||
| lapacke_csytrs_aa_work.o \ | lapacke_csytrs_aa_work.o \ | ||||
| lapacke_csytrs_aa_2stage.o \ | |||||
| lapacke_csytrs_aa_2stage_work.o \ | lapacke_csytrs_aa_2stage_work.o \ | ||||
| lapacke_csytrs_3.o \ | |||||
| lapacke_csytrs_3_work.o \ | |||||
| lapacke_csytrs_rook.o \ | |||||
| lapacke_csytrs_rook_work.o \ | |||||
| lapacke_ctbcon.o \ | lapacke_ctbcon.o \ | ||||
| lapacke_ctbcon_work.o \ | lapacke_ctbcon_work.o \ | ||||
| lapacke_ctbrfs.o \ | lapacke_ctbrfs.o \ | ||||
| @@ -561,9 +574,9 @@ lapacke_ctpcon_work.o \ | |||||
| lapacke_ctpmqrt.o \ | lapacke_ctpmqrt.o \ | ||||
| lapacke_ctpmqrt_work.o \ | lapacke_ctpmqrt_work.o \ | ||||
| lapacke_ctpqrt.o \ | lapacke_ctpqrt.o \ | ||||
| lapacke_ctpqrt_work.o \ | |||||
| lapacke_ctpqrt2.o \ | lapacke_ctpqrt2.o \ | ||||
| lapacke_ctpqrt2_work.o \ | lapacke_ctpqrt2_work.o \ | ||||
| lapacke_ctpqrt_work.o \ | |||||
| lapacke_ctprfb.o \ | lapacke_ctprfb.o \ | ||||
| lapacke_ctprfb_work.o \ | lapacke_ctprfb_work.o \ | ||||
| lapacke_ctprfs.o \ | lapacke_ctprfs.o \ | ||||
| @@ -639,15 +652,17 @@ lapacke_cunmtr_work.o \ | |||||
| lapacke_cupgtr.o \ | lapacke_cupgtr.o \ | ||||
| lapacke_cupgtr_work.o \ | lapacke_cupgtr_work.o \ | ||||
| lapacke_cupmtr.o \ | lapacke_cupmtr.o \ | ||||
| lapacke_cupmtr_work.o \ | |||||
| lapacke_cupmtr_work.o | |||||
| OBJ_D = \ | |||||
| lapacke_dbbcsd.o \ | lapacke_dbbcsd.o \ | ||||
| lapacke_dbbcsd_work.o \ | lapacke_dbbcsd_work.o \ | ||||
| lapacke_dbdsdc.o \ | lapacke_dbdsdc.o \ | ||||
| lapacke_dbdsdc_work.o \ | lapacke_dbdsdc_work.o \ | ||||
| lapacke_dbdsvdx.o \ | |||||
| lapacke_dbdsvdx_work.o \ | |||||
| lapacke_dbdsqr.o \ | lapacke_dbdsqr.o \ | ||||
| lapacke_dbdsqr_work.o \ | lapacke_dbdsqr_work.o \ | ||||
| lapacke_dbdsvdx.o \ | |||||
| lapacke_dbdsvdx_work.o \ | |||||
| lapacke_ddisna.o \ | lapacke_ddisna.o \ | ||||
| lapacke_ddisna_work.o \ | lapacke_ddisna_work.o \ | ||||
| lapacke_dgbbrd.o \ | lapacke_dgbbrd.o \ | ||||
| @@ -725,11 +740,11 @@ lapacke_dgeqrf_work.o \ | |||||
| lapacke_dgeqrfp.o \ | lapacke_dgeqrfp.o \ | ||||
| lapacke_dgeqrfp_work.o \ | lapacke_dgeqrfp_work.o \ | ||||
| lapacke_dgeqrt.o \ | lapacke_dgeqrt.o \ | ||||
| lapacke_dgeqrt_work.o \ | |||||
| lapacke_dgeqrt2.o \ | lapacke_dgeqrt2.o \ | ||||
| lapacke_dgeqrt2_work.o \ | lapacke_dgeqrt2_work.o \ | ||||
| lapacke_dgeqrt3.o \ | lapacke_dgeqrt3.o \ | ||||
| lapacke_dgeqrt3_work.o \ | lapacke_dgeqrt3_work.o \ | ||||
| lapacke_dgeqrt_work.o \ | |||||
| lapacke_dgerfs.o \ | lapacke_dgerfs.o \ | ||||
| lapacke_dgerfs_work.o \ | lapacke_dgerfs_work.o \ | ||||
| lapacke_dgerqf.o \ | lapacke_dgerqf.o \ | ||||
| @@ -740,6 +755,8 @@ lapacke_dgesv.o \ | |||||
| lapacke_dgesv_work.o \ | lapacke_dgesv_work.o \ | ||||
| lapacke_dgesvd.o \ | lapacke_dgesvd.o \ | ||||
| lapacke_dgesvd_work.o \ | lapacke_dgesvd_work.o \ | ||||
| lapacke_dgesvdq.o \ | |||||
| lapacke_dgesvdq_work.o \ | |||||
| lapacke_dgesvdx.o \ | lapacke_dgesvdx.o \ | ||||
| lapacke_dgesvdx_work.o \ | lapacke_dgesvdx_work.o \ | ||||
| lapacke_dgesvj.o \ | lapacke_dgesvj.o \ | ||||
| @@ -776,10 +793,10 @@ lapacke_dggevx.o \ | |||||
| lapacke_dggevx_work.o \ | lapacke_dggevx_work.o \ | ||||
| lapacke_dggglm.o \ | lapacke_dggglm.o \ | ||||
| lapacke_dggglm_work.o \ | lapacke_dggglm_work.o \ | ||||
| lapacke_dgghrd.o \ | |||||
| lapacke_dgghrd_work.o \ | |||||
| lapacke_dgghd3.o \ | lapacke_dgghd3.o \ | ||||
| lapacke_dgghd3_work.o \ | lapacke_dgghd3_work.o \ | ||||
| lapacke_dgghrd.o \ | |||||
| lapacke_dgghrd_work.o \ | |||||
| lapacke_dgglse.o \ | lapacke_dgglse.o \ | ||||
| lapacke_dgglse_work.o \ | lapacke_dgglse_work.o \ | ||||
| lapacke_dggqrf.o \ | lapacke_dggqrf.o \ | ||||
| @@ -972,14 +989,14 @@ lapacke_dpttrs.o \ | |||||
| lapacke_dpttrs_work.o \ | lapacke_dpttrs_work.o \ | ||||
| lapacke_dsbev.o \ | lapacke_dsbev.o \ | ||||
| lapacke_dsbev_work.o \ | lapacke_dsbev_work.o \ | ||||
| lapacke_dsbevd.o \ | |||||
| lapacke_dsbevd_work.o \ | |||||
| lapacke_dsbevx.o \ | |||||
| lapacke_dsbevx_work.o \ | |||||
| lapacke_dsbev_2stage.o \ | lapacke_dsbev_2stage.o \ | ||||
| lapacke_dsbev_2stage_work.o \ | lapacke_dsbev_2stage_work.o \ | ||||
| lapacke_dsbevd.o \ | |||||
| lapacke_dsbevd_work.o \ | |||||
| lapacke_dsbevd_2stage.o \ | lapacke_dsbevd_2stage.o \ | ||||
| lapacke_dsbevd_2stage_work.o \ | lapacke_dsbevd_2stage_work.o \ | ||||
| lapacke_dsbevx.o \ | |||||
| lapacke_dsbevx_work.o \ | |||||
| lapacke_dsbevx_2stage.o \ | lapacke_dsbevx_2stage.o \ | ||||
| lapacke_dsbevx_2stage_work.o \ | lapacke_dsbevx_2stage_work.o \ | ||||
| lapacke_dsbgst.o \ | lapacke_dsbgst.o \ | ||||
| @@ -1060,18 +1077,18 @@ lapacke_dsyequb.o \ | |||||
| lapacke_dsyequb_work.o \ | lapacke_dsyequb_work.o \ | ||||
| lapacke_dsyev.o \ | lapacke_dsyev.o \ | ||||
| lapacke_dsyev_work.o \ | lapacke_dsyev_work.o \ | ||||
| lapacke_dsyevd.o \ | |||||
| lapacke_dsyevd_work.o \ | |||||
| lapacke_dsyevr.o \ | |||||
| lapacke_dsyevr_work.o \ | |||||
| lapacke_dsyevx.o \ | |||||
| lapacke_dsyevx_work.o \ | |||||
| lapacke_dsyev_2stage.o \ | lapacke_dsyev_2stage.o \ | ||||
| lapacke_dsyev_2stage_work.o \ | lapacke_dsyev_2stage_work.o \ | ||||
| lapacke_dsyevd.o \ | |||||
| lapacke_dsyevd_work.o \ | |||||
| lapacke_dsyevd_2stage.o \ | lapacke_dsyevd_2stage.o \ | ||||
| lapacke_dsyevd_2stage_work.o \ | lapacke_dsyevd_2stage_work.o \ | ||||
| lapacke_dsyevr.o \ | |||||
| lapacke_dsyevr_work.o \ | |||||
| lapacke_dsyevr_2stage.o \ | lapacke_dsyevr_2stage.o \ | ||||
| lapacke_dsyevr_2stage_work.o \ | lapacke_dsyevr_2stage_work.o \ | ||||
| lapacke_dsyevx.o \ | |||||
| lapacke_dsyevx_work.o \ | |||||
| lapacke_dsyevx_2stage.o \ | lapacke_dsyevx_2stage.o \ | ||||
| lapacke_dsyevx_2stage_work.o \ | lapacke_dsyevx_2stage_work.o \ | ||||
| lapacke_dsygst.o \ | lapacke_dsygst.o \ | ||||
| @@ -1087,8 +1104,6 @@ lapacke_dsygvx_work.o \ | |||||
| lapacke_dsyrfs.o \ | lapacke_dsyrfs.o \ | ||||
| lapacke_dsyrfs_work.o \ | lapacke_dsyrfs_work.o \ | ||||
| lapacke_dsysv.o \ | lapacke_dsysv.o \ | ||||
| lapacke_dsysv_rook.o \ | |||||
| lapacke_dsysv_rook_work.o \ | |||||
| lapacke_dsysv_work.o \ | lapacke_dsysv_work.o \ | ||||
| lapacke_dsysv_aa.o \ | lapacke_dsysv_aa.o \ | ||||
| lapacke_dsysv_aa_work.o \ | lapacke_dsysv_aa_work.o \ | ||||
| @@ -1096,6 +1111,8 @@ lapacke_dsysv_aa_2stage.o \ | |||||
| lapacke_dsysv_aa_2stage_work.o \ | lapacke_dsysv_aa_2stage_work.o \ | ||||
| lapacke_dsysv_rk.o \ | lapacke_dsysv_rk.o \ | ||||
| lapacke_dsysv_rk_work.o \ | lapacke_dsysv_rk_work.o \ | ||||
| lapacke_dsysv_rook.o \ | |||||
| lapacke_dsysv_rook_work.o \ | |||||
| lapacke_dsysvx.o \ | lapacke_dsysvx.o \ | ||||
| lapacke_dsysvx_work.o \ | lapacke_dsysvx_work.o \ | ||||
| lapacke_dsyswapr.o \ | lapacke_dsyswapr.o \ | ||||
| @@ -1104,36 +1121,34 @@ lapacke_dsytrd.o \ | |||||
| lapacke_dsytrd_work.o \ | lapacke_dsytrd_work.o \ | ||||
| lapacke_dsytrf.o \ | lapacke_dsytrf.o \ | ||||
| lapacke_dsytrf_work.o \ | lapacke_dsytrf_work.o \ | ||||
| lapacke_dsytrf_rook.o \ | |||||
| lapacke_dsytrf_rook_work.o \ | |||||
| lapacke_dsytrf_aa.o \ | lapacke_dsytrf_aa.o \ | ||||
| lapacke_dsytrf_aa_work.o \ | lapacke_dsytrf_aa_work.o \ | ||||
| lapacke_dsytrf_aa_2stage.o \ | lapacke_dsytrf_aa_2stage.o \ | ||||
| lapacke_dsytrf_aa_2stage_work.o \ | lapacke_dsytrf_aa_2stage_work.o \ | ||||
| lapacke_dsytrf_rk.o \ | lapacke_dsytrf_rk.o \ | ||||
| lapacke_dsytrf_rk_work.o \ | lapacke_dsytrf_rk_work.o \ | ||||
| lapacke_dsytrf_rook.o \ | |||||
| lapacke_dsytrf_rook_work.o \ | |||||
| lapacke_dsytri.o \ | lapacke_dsytri.o \ | ||||
| lapacke_dsytri_work.o \ | |||||
| lapacke_dsytri2.o \ | lapacke_dsytri2.o \ | ||||
| lapacke_dsytri2_work.o \ | lapacke_dsytri2_work.o \ | ||||
| lapacke_dsytri_3.o \ | |||||
| lapacke_dsytri_3_work.o \ | |||||
| lapacke_dsytri2x.o \ | lapacke_dsytri2x.o \ | ||||
| lapacke_dsytri2x_work.o \ | lapacke_dsytri2x_work.o \ | ||||
| lapacke_dsytri_work.o | |||||
| OBJ_B = \ | |||||
| lapacke_dsytri_3.o \ | |||||
| lapacke_dsytri_3_work.o \ | |||||
| lapacke_dsytrs.o \ | lapacke_dsytrs.o \ | ||||
| lapacke_dsytrs_rook.o \ | |||||
| lapacke_dsytrs_work.o \ | |||||
| lapacke_dsytrs2.o \ | lapacke_dsytrs2.o \ | ||||
| lapacke_dsytrs2_work.o \ | lapacke_dsytrs2_work.o \ | ||||
| lapacke_dsytrs_work.o \ | |||||
| lapacke_dsytrs_rook_work.o \ | |||||
| lapacke_dsytrs_3.o \ | |||||
| lapacke_dsytrs_3_work.o \ | |||||
| lapacke_dsytrs_aa.o \ | lapacke_dsytrs_aa.o \ | ||||
| lapacke_dsytrs_aa_2stage.o \ | |||||
| lapacke_dsytrs_aa_work.o \ | lapacke_dsytrs_aa_work.o \ | ||||
| lapacke_dsytrs_aa_2stage.o \ | |||||
| lapacke_dsytrs_aa_2stage_work.o \ | lapacke_dsytrs_aa_2stage_work.o \ | ||||
| lapacke_dsytrs_3.o \ | |||||
| lapacke_dsytrs_3_work.o \ | |||||
| lapacke_dsytrs_rook.o \ | |||||
| lapacke_dsytrs_rook_work.o \ | |||||
| lapacke_dtbcon.o \ | lapacke_dtbcon.o \ | ||||
| lapacke_dtbcon_work.o \ | lapacke_dtbcon_work.o \ | ||||
| lapacke_dtbrfs.o \ | lapacke_dtbrfs.o \ | ||||
| @@ -1165,9 +1180,9 @@ lapacke_dtpcon_work.o \ | |||||
| lapacke_dtpmqrt.o \ | lapacke_dtpmqrt.o \ | ||||
| lapacke_dtpmqrt_work.o \ | lapacke_dtpmqrt_work.o \ | ||||
| lapacke_dtpqrt.o \ | lapacke_dtpqrt.o \ | ||||
| lapacke_dtpqrt_work.o \ | |||||
| lapacke_dtpqrt2.o \ | lapacke_dtpqrt2.o \ | ||||
| lapacke_dtpqrt2_work.o \ | lapacke_dtpqrt2_work.o \ | ||||
| lapacke_dtpqrt_work.o \ | |||||
| lapacke_dtprfb.o \ | lapacke_dtprfb.o \ | ||||
| lapacke_dtprfb_work.o \ | lapacke_dtprfb_work.o \ | ||||
| lapacke_dtprfs.o \ | lapacke_dtprfs.o \ | ||||
| @@ -1203,16 +1218,17 @@ lapacke_dtrttf_work.o \ | |||||
| lapacke_dtrttp.o \ | lapacke_dtrttp.o \ | ||||
| lapacke_dtrttp_work.o \ | lapacke_dtrttp_work.o \ | ||||
| lapacke_dtzrzf.o \ | lapacke_dtzrzf.o \ | ||||
| lapacke_dtzrzf_work.o \ | |||||
| lapacke_nancheck.o \ | |||||
| lapacke_dtzrzf_work.o | |||||
| OBJ_S = \ | |||||
| lapacke_sbbcsd.o \ | lapacke_sbbcsd.o \ | ||||
| lapacke_sbbcsd_work.o \ | lapacke_sbbcsd_work.o \ | ||||
| lapacke_sbdsdc.o \ | lapacke_sbdsdc.o \ | ||||
| lapacke_sbdsdc_work.o \ | lapacke_sbdsdc_work.o \ | ||||
| lapacke_sbdsvdx.o \ | |||||
| lapacke_sbdsvdx_work.o \ | |||||
| lapacke_sbdsqr.o \ | lapacke_sbdsqr.o \ | ||||
| lapacke_sbdsqr_work.o \ | lapacke_sbdsqr_work.o \ | ||||
| lapacke_sbdsvdx.o \ | |||||
| lapacke_sbdsvdx_work.o \ | |||||
| lapacke_sdisna.o \ | lapacke_sdisna.o \ | ||||
| lapacke_sdisna_work.o \ | lapacke_sdisna_work.o \ | ||||
| lapacke_sgbbrd.o \ | lapacke_sgbbrd.o \ | ||||
| @@ -1290,11 +1306,11 @@ lapacke_sgeqrf_work.o \ | |||||
| lapacke_sgeqrfp.o \ | lapacke_sgeqrfp.o \ | ||||
| lapacke_sgeqrfp_work.o \ | lapacke_sgeqrfp_work.o \ | ||||
| lapacke_sgeqrt.o \ | lapacke_sgeqrt.o \ | ||||
| lapacke_sgeqrt_work.o \ | |||||
| lapacke_sgeqrt2.o \ | lapacke_sgeqrt2.o \ | ||||
| lapacke_sgeqrt2_work.o \ | lapacke_sgeqrt2_work.o \ | ||||
| lapacke_sgeqrt3.o \ | lapacke_sgeqrt3.o \ | ||||
| lapacke_sgeqrt3_work.o \ | lapacke_sgeqrt3_work.o \ | ||||
| lapacke_sgeqrt_work.o \ | |||||
| lapacke_sgerfs.o \ | lapacke_sgerfs.o \ | ||||
| lapacke_sgerfs_work.o \ | lapacke_sgerfs_work.o \ | ||||
| lapacke_sgerqf.o \ | lapacke_sgerqf.o \ | ||||
| @@ -1305,6 +1321,8 @@ lapacke_sgesv.o \ | |||||
| lapacke_sgesv_work.o \ | lapacke_sgesv_work.o \ | ||||
| lapacke_sgesvd.o \ | lapacke_sgesvd.o \ | ||||
| lapacke_sgesvd_work.o \ | lapacke_sgesvd_work.o \ | ||||
| lapacke_sgesvdq.o \ | |||||
| lapacke_sgesvdq_work.o \ | |||||
| lapacke_sgesvdx.o \ | lapacke_sgesvdx.o \ | ||||
| lapacke_sgesvdx_work.o \ | lapacke_sgesvdx_work.o \ | ||||
| lapacke_sgesvj.o \ | lapacke_sgesvj.o \ | ||||
| @@ -1341,10 +1359,10 @@ lapacke_sggevx.o \ | |||||
| lapacke_sggevx_work.o \ | lapacke_sggevx_work.o \ | ||||
| lapacke_sggglm.o \ | lapacke_sggglm.o \ | ||||
| lapacke_sggglm_work.o \ | lapacke_sggglm_work.o \ | ||||
| lapacke_sgghrd.o \ | |||||
| lapacke_sgghrd_work.o \ | |||||
| lapacke_sgghd3.o \ | lapacke_sgghd3.o \ | ||||
| lapacke_sgghd3_work.o \ | lapacke_sgghd3_work.o \ | ||||
| lapacke_sgghrd.o \ | |||||
| lapacke_sgghrd_work.o \ | |||||
| lapacke_sgglse.o \ | lapacke_sgglse.o \ | ||||
| lapacke_sgglse_work.o \ | lapacke_sgglse_work.o \ | ||||
| lapacke_sggqrf.o \ | lapacke_sggqrf.o \ | ||||
| @@ -1537,14 +1555,14 @@ lapacke_spttrs.o \ | |||||
| lapacke_spttrs_work.o \ | lapacke_spttrs_work.o \ | ||||
| lapacke_ssbev.o \ | lapacke_ssbev.o \ | ||||
| lapacke_ssbev_work.o \ | lapacke_ssbev_work.o \ | ||||
| lapacke_ssbevd.o \ | |||||
| lapacke_ssbevd_work.o \ | |||||
| lapacke_ssbevx.o \ | |||||
| lapacke_ssbevx_work.o \ | |||||
| lapacke_ssbev_2stage.o \ | lapacke_ssbev_2stage.o \ | ||||
| lapacke_ssbev_2stage_work.o \ | lapacke_ssbev_2stage_work.o \ | ||||
| lapacke_ssbevd.o \ | |||||
| lapacke_ssbevd_work.o \ | |||||
| lapacke_ssbevd_2stage.o \ | lapacke_ssbevd_2stage.o \ | ||||
| lapacke_ssbevd_2stage_work.o \ | lapacke_ssbevd_2stage_work.o \ | ||||
| lapacke_ssbevx.o \ | |||||
| lapacke_ssbevx_work.o \ | |||||
| lapacke_ssbevx_2stage.o \ | lapacke_ssbevx_2stage.o \ | ||||
| lapacke_ssbevx_2stage_work.o \ | lapacke_ssbevx_2stage_work.o \ | ||||
| lapacke_ssbgst.o \ | lapacke_ssbgst.o \ | ||||
| @@ -1621,18 +1639,18 @@ lapacke_ssyequb.o \ | |||||
| lapacke_ssyequb_work.o \ | lapacke_ssyequb_work.o \ | ||||
| lapacke_ssyev.o \ | lapacke_ssyev.o \ | ||||
| lapacke_ssyev_work.o \ | lapacke_ssyev_work.o \ | ||||
| lapacke_ssyevd.o \ | |||||
| lapacke_ssyevd_work.o \ | |||||
| lapacke_ssyevr.o \ | |||||
| lapacke_ssyevr_work.o \ | |||||
| lapacke_ssyevx.o \ | |||||
| lapacke_ssyevx_work.o \ | |||||
| lapacke_ssyev_2stage.o \ | lapacke_ssyev_2stage.o \ | ||||
| lapacke_ssyev_2stage_work.o \ | lapacke_ssyev_2stage_work.o \ | ||||
| lapacke_ssyevd.o \ | |||||
| lapacke_ssyevd_work.o \ | |||||
| lapacke_ssyevd_2stage.o \ | lapacke_ssyevd_2stage.o \ | ||||
| lapacke_ssyevd_2stage_work.o \ | lapacke_ssyevd_2stage_work.o \ | ||||
| lapacke_ssyevr.o \ | |||||
| lapacke_ssyevr_work.o \ | |||||
| lapacke_ssyevr_2stage.o \ | lapacke_ssyevr_2stage.o \ | ||||
| lapacke_ssyevr_2stage_work.o \ | lapacke_ssyevr_2stage_work.o \ | ||||
| lapacke_ssyevx.o \ | |||||
| lapacke_ssyevx_work.o \ | |||||
| lapacke_ssyevx_2stage.o \ | lapacke_ssyevx_2stage.o \ | ||||
| lapacke_ssyevx_2stage_work.o \ | lapacke_ssyevx_2stage_work.o \ | ||||
| lapacke_ssygst.o \ | lapacke_ssygst.o \ | ||||
| @@ -1648,8 +1666,6 @@ lapacke_ssygvx_work.o \ | |||||
| lapacke_ssyrfs.o \ | lapacke_ssyrfs.o \ | ||||
| lapacke_ssyrfs_work.o \ | lapacke_ssyrfs_work.o \ | ||||
| lapacke_ssysv.o \ | lapacke_ssysv.o \ | ||||
| lapacke_ssysv_rook.o \ | |||||
| lapacke_ssysv_rook_work.o \ | |||||
| lapacke_ssysv_work.o \ | lapacke_ssysv_work.o \ | ||||
| lapacke_ssysv_aa.o \ | lapacke_ssysv_aa.o \ | ||||
| lapacke_ssysv_aa_work.o \ | lapacke_ssysv_aa_work.o \ | ||||
| @@ -1657,6 +1673,8 @@ lapacke_ssysv_aa_2stage.o \ | |||||
| lapacke_ssysv_aa_2stage_work.o \ | lapacke_ssysv_aa_2stage_work.o \ | ||||
| lapacke_ssysv_rk.o \ | lapacke_ssysv_rk.o \ | ||||
| lapacke_ssysv_rk_work.o \ | lapacke_ssysv_rk_work.o \ | ||||
| lapacke_ssysv_rook.o \ | |||||
| lapacke_ssysv_rook_work.o \ | |||||
| lapacke_ssysvx.o \ | lapacke_ssysvx.o \ | ||||
| lapacke_ssysvx_work.o \ | lapacke_ssysvx_work.o \ | ||||
| lapacke_ssyswapr.o \ | lapacke_ssyswapr.o \ | ||||
| @@ -1665,34 +1683,34 @@ lapacke_ssytrd.o \ | |||||
| lapacke_ssytrd_work.o \ | lapacke_ssytrd_work.o \ | ||||
| lapacke_ssytrf.o \ | lapacke_ssytrf.o \ | ||||
| lapacke_ssytrf_work.o \ | lapacke_ssytrf_work.o \ | ||||
| lapacke_ssytrf_rook.o \ | |||||
| lapacke_ssytrf_rook_work.o \ | |||||
| lapacke_ssytrf_aa.o \ | lapacke_ssytrf_aa.o \ | ||||
| lapacke_ssytrf_aa_work.o \ | lapacke_ssytrf_aa_work.o \ | ||||
| lapacke_ssytrf_aa_2stage.o \ | lapacke_ssytrf_aa_2stage.o \ | ||||
| lapacke_ssytrf_aa_2stage_work.o \ | lapacke_ssytrf_aa_2stage_work.o \ | ||||
| lapacke_ssytrf_rk.o \ | lapacke_ssytrf_rk.o \ | ||||
| lapacke_ssytrf_rk_work.o \ | lapacke_ssytrf_rk_work.o \ | ||||
| lapacke_ssytrf_rook.o \ | |||||
| lapacke_ssytrf_rook_work.o \ | |||||
| lapacke_ssytri.o \ | lapacke_ssytri.o \ | ||||
| lapacke_ssytri_work.o \ | |||||
| lapacke_ssytri2.o \ | lapacke_ssytri2.o \ | ||||
| lapacke_ssytri2_work.o \ | lapacke_ssytri2_work.o \ | ||||
| lapacke_ssytri_3.o \ | |||||
| lapacke_ssytri_3_work.o \ | |||||
| lapacke_ssytri2x.o \ | lapacke_ssytri2x.o \ | ||||
| lapacke_ssytri2x_work.o \ | lapacke_ssytri2x_work.o \ | ||||
| lapacke_ssytri_work.o \ | |||||
| lapacke_ssytri_3.o \ | |||||
| lapacke_ssytri_3_work.o \ | |||||
| lapacke_ssytrs.o \ | lapacke_ssytrs.o \ | ||||
| lapacke_ssytrs_rook.o \ | |||||
| lapacke_ssytrs_work.o \ | |||||
| lapacke_ssytrs2.o \ | lapacke_ssytrs2.o \ | ||||
| lapacke_ssytrs2_work.o \ | lapacke_ssytrs2_work.o \ | ||||
| lapacke_ssytrs_work.o \ | |||||
| lapacke_ssytrs_rook_work.o \ | |||||
| lapacke_ssytrs_3.o \ | |||||
| lapacke_ssytrs_3_work.o \ | |||||
| lapacke_ssytrs_aa.o \ | lapacke_ssytrs_aa.o \ | ||||
| lapacke_ssytrs_aa_2stage.o \ | |||||
| lapacke_ssytrs_aa_work.o \ | lapacke_ssytrs_aa_work.o \ | ||||
| lapacke_ssytrs_aa_2stage.o \ | |||||
| lapacke_ssytrs_aa_2stage_work.o \ | lapacke_ssytrs_aa_2stage_work.o \ | ||||
| lapacke_ssytrs_3.o \ | |||||
| lapacke_ssytrs_3_work.o \ | |||||
| lapacke_ssytrs_rook.o \ | |||||
| lapacke_ssytrs_rook_work.o \ | |||||
| lapacke_stbcon.o \ | lapacke_stbcon.o \ | ||||
| lapacke_stbcon_work.o \ | lapacke_stbcon_work.o \ | ||||
| lapacke_stbrfs.o \ | lapacke_stbrfs.o \ | ||||
| @@ -1762,7 +1780,9 @@ lapacke_strttf_work.o \ | |||||
| lapacke_strttp.o \ | lapacke_strttp.o \ | ||||
| lapacke_strttp_work.o \ | lapacke_strttp_work.o \ | ||||
| lapacke_stzrzf.o \ | lapacke_stzrzf.o \ | ||||
| lapacke_stzrzf_work.o \ | |||||
| lapacke_stzrzf_work.o | |||||
| OBJ_Z = \ | |||||
| lapacke_zbbcsd.o \ | lapacke_zbbcsd.o \ | ||||
| lapacke_zbbcsd_work.o \ | lapacke_zbbcsd_work.o \ | ||||
| lapacke_zbdsqr.o \ | lapacke_zbdsqr.o \ | ||||
| @@ -1846,11 +1866,11 @@ lapacke_zgeqrf_work.o \ | |||||
| lapacke_zgeqrfp.o \ | lapacke_zgeqrfp.o \ | ||||
| lapacke_zgeqrfp_work.o \ | lapacke_zgeqrfp_work.o \ | ||||
| lapacke_zgeqrt.o \ | lapacke_zgeqrt.o \ | ||||
| lapacke_zgeqrt_work.o \ | |||||
| lapacke_zgeqrt2.o \ | lapacke_zgeqrt2.o \ | ||||
| lapacke_zgeqrt2_work.o \ | lapacke_zgeqrt2_work.o \ | ||||
| lapacke_zgeqrt3.o \ | lapacke_zgeqrt3.o \ | ||||
| lapacke_zgeqrt3_work.o \ | lapacke_zgeqrt3_work.o \ | ||||
| lapacke_zgeqrt_work.o \ | |||||
| lapacke_zgerfs.o \ | lapacke_zgerfs.o \ | ||||
| lapacke_zgerfs_work.o \ | lapacke_zgerfs_work.o \ | ||||
| lapacke_zgerqf.o \ | lapacke_zgerqf.o \ | ||||
| @@ -1861,6 +1881,8 @@ lapacke_zgesv.o \ | |||||
| lapacke_zgesv_work.o \ | lapacke_zgesv_work.o \ | ||||
| lapacke_zgesvd.o \ | lapacke_zgesvd.o \ | ||||
| lapacke_zgesvd_work.o \ | lapacke_zgesvd_work.o \ | ||||
| lapacke_zgesvdq.o \ | |||||
| lapacke_zgesvdq_work.o \ | |||||
| lapacke_zgesvdx.o \ | lapacke_zgesvdx.o \ | ||||
| lapacke_zgesvdx_work.o \ | lapacke_zgesvdx_work.o \ | ||||
| lapacke_zgesvj.o \ | lapacke_zgesvj.o \ | ||||
| @@ -1897,10 +1919,10 @@ lapacke_zggevx.o \ | |||||
| lapacke_zggevx_work.o \ | lapacke_zggevx_work.o \ | ||||
| lapacke_zggglm.o \ | lapacke_zggglm.o \ | ||||
| lapacke_zggglm_work.o \ | lapacke_zggglm_work.o \ | ||||
| lapacke_zgghrd.o \ | |||||
| lapacke_zgghrd_work.o \ | |||||
| lapacke_zgghd3.o \ | lapacke_zgghd3.o \ | ||||
| lapacke_zgghd3_work.o \ | lapacke_zgghd3_work.o \ | ||||
| lapacke_zgghrd.o \ | |||||
| lapacke_zgghrd_work.o \ | |||||
| lapacke_zgglse.o \ | lapacke_zgglse.o \ | ||||
| lapacke_zgglse_work.o \ | lapacke_zgglse_work.o \ | ||||
| lapacke_zggqrf.o \ | lapacke_zggqrf.o \ | ||||
| @@ -1925,14 +1947,14 @@ lapacke_zgttrs.o \ | |||||
| lapacke_zgttrs_work.o \ | lapacke_zgttrs_work.o \ | ||||
| lapacke_zhbev.o \ | lapacke_zhbev.o \ | ||||
| lapacke_zhbev_work.o \ | lapacke_zhbev_work.o \ | ||||
| lapacke_zhbevd.o \ | |||||
| lapacke_zhbevd_work.o \ | |||||
| lapacke_zhbevx.o \ | |||||
| lapacke_zhbevx_work.o \ | |||||
| lapacke_zhbev_2stage.o \ | lapacke_zhbev_2stage.o \ | ||||
| lapacke_zhbev_2stage_work.o \ | lapacke_zhbev_2stage_work.o \ | ||||
| lapacke_zhbevd.o \ | |||||
| lapacke_zhbevd_work.o \ | |||||
| lapacke_zhbevd_2stage.o \ | lapacke_zhbevd_2stage.o \ | ||||
| lapacke_zhbevd_2stage_work.o \ | lapacke_zhbevd_2stage_work.o \ | ||||
| lapacke_zhbevx.o \ | |||||
| lapacke_zhbevx_work.o \ | |||||
| lapacke_zhbevx_2stage.o \ | lapacke_zhbevx_2stage.o \ | ||||
| lapacke_zhbevx_2stage_work.o \ | lapacke_zhbevx_2stage_work.o \ | ||||
| lapacke_zhbgst.o \ | lapacke_zhbgst.o \ | ||||
| @@ -1953,18 +1975,18 @@ lapacke_zheequb.o \ | |||||
| lapacke_zheequb_work.o \ | lapacke_zheequb_work.o \ | ||||
| lapacke_zheev.o \ | lapacke_zheev.o \ | ||||
| lapacke_zheev_work.o \ | lapacke_zheev_work.o \ | ||||
| lapacke_zheevd.o \ | |||||
| lapacke_zheevd_work.o \ | |||||
| lapacke_zheevr.o \ | |||||
| lapacke_zheevr_work.o \ | |||||
| lapacke_zheevx.o \ | |||||
| lapacke_zheevx_work.o \ | |||||
| lapacke_zheev_2stage.o \ | lapacke_zheev_2stage.o \ | ||||
| lapacke_zheev_2stage_work.o \ | lapacke_zheev_2stage_work.o \ | ||||
| lapacke_zheevd.o \ | |||||
| lapacke_zheevd_work.o \ | |||||
| lapacke_zheevd_2stage.o \ | lapacke_zheevd_2stage.o \ | ||||
| lapacke_zheevd_2stage_work.o \ | lapacke_zheevd_2stage_work.o \ | ||||
| lapacke_zheevr.o \ | |||||
| lapacke_zheevr_work.o \ | |||||
| lapacke_zheevr_2stage.o \ | lapacke_zheevr_2stage.o \ | ||||
| lapacke_zheevr_2stage_work.o \ | lapacke_zheevr_2stage_work.o \ | ||||
| lapacke_zheevx.o \ | |||||
| lapacke_zheevx_work.o \ | |||||
| lapacke_zheevx_2stage.o \ | lapacke_zheevx_2stage.o \ | ||||
| lapacke_zheevx_2stage_work.o \ | lapacke_zheevx_2stage_work.o \ | ||||
| lapacke_zhegst.o \ | lapacke_zhegst.o \ | ||||
| @@ -1994,35 +2016,35 @@ lapacke_zheswapr_work.o \ | |||||
| lapacke_zhetrd.o \ | lapacke_zhetrd.o \ | ||||
| lapacke_zhetrd_work.o \ | lapacke_zhetrd_work.o \ | ||||
| lapacke_zhetrf.o \ | lapacke_zhetrf.o \ | ||||
| lapacke_zhetrf_rook.o \ | |||||
| lapacke_zhetrf_work.o \ | lapacke_zhetrf_work.o \ | ||||
| lapacke_zhetrf_rook_work.o \ | |||||
| lapacke_zhetrf_aa.o \ | lapacke_zhetrf_aa.o \ | ||||
| lapacke_zhetrf_aa_2stage.o \ | |||||
| lapacke_zhetrf_aa_work.o \ | lapacke_zhetrf_aa_work.o \ | ||||
| lapacke_zhetrf_aa_2stage.o \ | |||||
| lapacke_zhetrf_aa_2stage_work.o \ | lapacke_zhetrf_aa_2stage_work.o \ | ||||
| lapacke_zhetrf_rk.o \ | lapacke_zhetrf_rk.o \ | ||||
| lapacke_zhetrf_rk_work.o \ | lapacke_zhetrf_rk_work.o \ | ||||
| lapacke_zhetrf_rook.o \ | |||||
| lapacke_zhetrf_rook_work.o \ | |||||
| lapacke_zhetri.o \ | lapacke_zhetri.o \ | ||||
| lapacke_zhetri_work.o \ | |||||
| lapacke_zhetri2.o \ | lapacke_zhetri2.o \ | ||||
| lapacke_zhetri2_work.o \ | lapacke_zhetri2_work.o \ | ||||
| lapacke_zhetri_3.o \ | |||||
| lapacke_zhetri_3_work.o \ | |||||
| lapacke_zhetri2x.o \ | lapacke_zhetri2x.o \ | ||||
| lapacke_zhetri2x_work.o \ | lapacke_zhetri2x_work.o \ | ||||
| lapacke_zhetri_work.o \ | |||||
| lapacke_zhetri_3.o \ | |||||
| lapacke_zhetri_3_work.o \ | |||||
| lapacke_zhetrs.o \ | lapacke_zhetrs.o \ | ||||
| lapacke_zhetrs_rook.o \ | |||||
| lapacke_zhetrs_work.o \ | |||||
| lapacke_zhetrs2.o \ | lapacke_zhetrs2.o \ | ||||
| lapacke_zhetrs2_work.o \ | lapacke_zhetrs2_work.o \ | ||||
| lapacke_zhetrs_work.o \ | |||||
| lapacke_zhetrs_rook_work.o \ | |||||
| lapacke_zhetrs_3.o \ | |||||
| lapacke_zhetrs_3_work.o \ | |||||
| lapacke_zhetrs_aa.o \ | lapacke_zhetrs_aa.o \ | ||||
| lapacke_zhetrs_aa_2stage.o \ | |||||
| lapacke_zhetrs_aa_work.o \ | lapacke_zhetrs_aa_work.o \ | ||||
| lapacke_zhetrs_aa_2stage.o \ | |||||
| lapacke_zhetrs_aa_2stage_work.o \ | lapacke_zhetrs_aa_2stage_work.o \ | ||||
| lapacke_zhetrs_3.o \ | |||||
| lapacke_zhetrs_3_work.o \ | |||||
| lapacke_zhetrs_rook.o \ | |||||
| lapacke_zhetrs_rook_work.o \ | |||||
| lapacke_zhfrk.o \ | lapacke_zhfrk.o \ | ||||
| lapacke_zhfrk_work.o \ | lapacke_zhfrk_work.o \ | ||||
| lapacke_zhgeqz.o \ | lapacke_zhgeqz.o \ | ||||
| @@ -2213,11 +2235,11 @@ lapacke_zsyconv.o \ | |||||
| lapacke_zsyconv_work.o \ | lapacke_zsyconv_work.o \ | ||||
| lapacke_zsyequb.o \ | lapacke_zsyequb.o \ | ||||
| lapacke_zsyequb_work.o \ | lapacke_zsyequb_work.o \ | ||||
| lapacke_zsyr.o \ | |||||
| lapacke_zsyr_work.o \ | |||||
| lapacke_zsyrfs.o \ | lapacke_zsyrfs.o \ | ||||
| lapacke_zsyrfs_work.o \ | lapacke_zsyrfs_work.o \ | ||||
| lapacke_zsysv.o \ | lapacke_zsysv.o \ | ||||
| lapacke_zsysv_rook.o \ | |||||
| lapacke_zsysv_rook_work.o \ | |||||
| lapacke_zsysv_work.o \ | lapacke_zsysv_work.o \ | ||||
| lapacke_zsysv_aa.o \ | lapacke_zsysv_aa.o \ | ||||
| lapacke_zsysv_aa_work.o \ | lapacke_zsysv_aa_work.o \ | ||||
| @@ -2225,40 +2247,42 @@ lapacke_zsysv_aa_2stage.o \ | |||||
| lapacke_zsysv_aa_2stage_work.o \ | lapacke_zsysv_aa_2stage_work.o \ | ||||
| lapacke_zsysv_rk.o \ | lapacke_zsysv_rk.o \ | ||||
| lapacke_zsysv_rk_work.o \ | lapacke_zsysv_rk_work.o \ | ||||
| lapacke_zsysv_rook.o \ | |||||
| lapacke_zsysv_rook_work.o \ | |||||
| lapacke_zsysvx.o \ | lapacke_zsysvx.o \ | ||||
| lapacke_zsysvx_work.o \ | lapacke_zsysvx_work.o \ | ||||
| lapacke_zsyswapr.o \ | lapacke_zsyswapr.o \ | ||||
| lapacke_zsyswapr_work.o \ | lapacke_zsyswapr_work.o \ | ||||
| lapacke_zsytrf.o \ | lapacke_zsytrf.o \ | ||||
| lapacke_zsytrf_work.o \ | lapacke_zsytrf_work.o \ | ||||
| lapacke_zsytrf_rook.o \ | |||||
| lapacke_zsytrf_rook_work.o \ | |||||
| lapacke_zsytrf_aa.o \ | lapacke_zsytrf_aa.o \ | ||||
| lapacke_zsytrf_aa_2stage.o \ | |||||
| lapacke_zsytrf_aa_work.o \ | lapacke_zsytrf_aa_work.o \ | ||||
| lapacke_zsytrf_aa_2stage.o \ | |||||
| lapacke_zsytrf_aa_2stage_work.o \ | lapacke_zsytrf_aa_2stage_work.o \ | ||||
| lapacke_zsytrf_rk.o \ | lapacke_zsytrf_rk.o \ | ||||
| lapacke_zsytrf_rk_work.o \ | lapacke_zsytrf_rk_work.o \ | ||||
| lapacke_zsytrf_rook.o \ | |||||
| lapacke_zsytrf_rook_work.o \ | |||||
| lapacke_zsytri.o \ | lapacke_zsytri.o \ | ||||
| lapacke_zsytri_work.o \ | |||||
| lapacke_zsytri2.o \ | lapacke_zsytri2.o \ | ||||
| lapacke_zsytri2_work.o \ | lapacke_zsytri2_work.o \ | ||||
| lapacke_zsytri_3.o \ | |||||
| lapacke_zsytri_3_work.o \ | |||||
| lapacke_zsytri2x.o \ | lapacke_zsytri2x.o \ | ||||
| lapacke_zsytri2x_work.o \ | lapacke_zsytri2x_work.o \ | ||||
| lapacke_zsytri_work.o \ | |||||
| lapacke_zsytri_3.o \ | |||||
| lapacke_zsytri_3_work.o \ | |||||
| lapacke_zsytrs.o \ | lapacke_zsytrs.o \ | ||||
| lapacke_zsytrs_rook.o \ | |||||
| lapacke_zsytrs_work.o \ | |||||
| lapacke_zsytrs2.o \ | lapacke_zsytrs2.o \ | ||||
| lapacke_zsytrs2_work.o \ | lapacke_zsytrs2_work.o \ | ||||
| lapacke_zsytrs_work.o \ | |||||
| lapacke_zsytrs_rook_work.o \ | |||||
| lapacke_zsytrs_3.o \ | |||||
| lapacke_zsytrs_3_work.o \ | |||||
| lapacke_zsytrs_aa.o \ | lapacke_zsytrs_aa.o \ | ||||
| lapacke_zsytrs_aa_2stage.o \ | |||||
| lapacke_zsytrs_aa_work.o \ | lapacke_zsytrs_aa_work.o \ | ||||
| lapacke_zsytrs_aa_2stage.o \ | |||||
| lapacke_zsytrs_aa_2stage_work.o \ | lapacke_zsytrs_aa_2stage_work.o \ | ||||
| lapacke_zsytrs_3.o \ | |||||
| lapacke_zsytrs_3_work.o \ | |||||
| lapacke_zsytrs_rook.o \ | |||||
| lapacke_zsytrs_rook_work.o \ | |||||
| lapacke_ztbcon.o \ | lapacke_ztbcon.o \ | ||||
| lapacke_ztbcon_work.o \ | lapacke_ztbcon_work.o \ | ||||
| lapacke_ztbrfs.o \ | lapacke_ztbrfs.o \ | ||||
| @@ -2290,9 +2314,9 @@ lapacke_ztpcon_work.o \ | |||||
| lapacke_ztpmqrt.o \ | lapacke_ztpmqrt.o \ | ||||
| lapacke_ztpmqrt_work.o \ | lapacke_ztpmqrt_work.o \ | ||||
| lapacke_ztpqrt.o \ | lapacke_ztpqrt.o \ | ||||
| lapacke_ztpqrt_work.o \ | |||||
| lapacke_ztpqrt2.o \ | lapacke_ztpqrt2.o \ | ||||
| lapacke_ztpqrt2_work.o \ | lapacke_ztpqrt2_work.o \ | ||||
| lapacke_ztpqrt_work.o \ | |||||
| lapacke_ztprfb.o \ | lapacke_ztprfb.o \ | ||||
| lapacke_ztprfb_work.o \ | lapacke_ztprfb_work.o \ | ||||
| lapacke_ztprfs.o \ | lapacke_ztprfs.o \ | ||||
| @@ -2368,12 +2392,7 @@ lapacke_zunmtr_work.o \ | |||||
| lapacke_zupgtr.o \ | lapacke_zupgtr.o \ | ||||
| lapacke_zupgtr_work.o \ | lapacke_zupgtr_work.o \ | ||||
| lapacke_zupmtr.o \ | lapacke_zupmtr.o \ | ||||
| lapacke_zupmtr_work.o \ | |||||
| lapacke_zsyr.o \ | |||||
| lapacke_csyr.o \ | |||||
| lapacke_zsyr_work.o \ | |||||
| lapacke_csyr_work.o \ | |||||
| lapacke_ilaver.o | |||||
| lapacke_zupmtr_work.o | |||||
| ifdef BUILD_DEPRECATED | ifdef BUILD_DEPRECATED | ||||
| DEPRECATED = \ | DEPRECATED = \ | ||||
| @@ -2452,27 +2471,29 @@ lapacke_zlagsy.o \ | |||||
| lapacke_zlagsy_work.o | lapacke_zlagsy_work.o | ||||
| endif | endif | ||||
| all: ../../$(LAPACKELIB) | |||||
| .PHONY: all | |||||
| all: $(LAPACKELIB) | |||||
| .PHONY: ../../$(LAPACKELIB) | |||||
| ../../$(LAPACKELIB): $(OBJ_A) $(OBJ_B) $(DEPRECATED) $(EXTENDED) $(MATGEN) | |||||
| $(ARCH) $(ARCHFLAGS) $@ $(OBJ_A) | |||||
| $(ARCH) $(ARCHFLAGS) $@ $(OBJ_B) | |||||
| $(LAPACKELIB): $(OBJ) $(OBJ_S) $(OBJ_C) $(OBJ_D) $(OBJ_Z) $(DEPRECATED) $(EXTENDED) $(MATGEN) | |||||
| $(AR) $(ARFLAGS) $@ $(OBJ) | |||||
| $(AR) $(ARFLAGS) $@ $(OBJ_S) | |||||
| $(AR) $(ARFLAGS) $@ $(OBJ_C) | |||||
| $(AR) $(ARFLAGS) $@ $(OBJ_D) | |||||
| $(AR) $(ARFLAGS) $@ $(OBJ_Z) | |||||
| ifdef BUILD_DEPRECATED | ifdef BUILD_DEPRECATED | ||||
| $(ARCH) $(ARCHFLAGS) $@ $(DEPRECATED) | |||||
| $(AR) $(ARFLAGS) $@ $(DEPRECATED) | |||||
| endif | endif | ||||
| ifdef (USEXBLAS) | ifdef (USEXBLAS) | ||||
| $(ARCH) $(ARCHFLAGS) $@ $(EXTENDED) | |||||
| $(AR) $(ARFLAGS) $@ $(EXTENDED) | |||||
| endif | endif | ||||
| ifdef LAPACKE_WITH_TMG | ifdef LAPACKE_WITH_TMG | ||||
| $(ARCH) $(ARCHFLAGS) $@ $(MATGEN) | |||||
| $(AR) $(ARFLAGS) $@ $(MATGEN) | |||||
| endif | endif | ||||
| $(RANLIB) $@ | $(RANLIB) $@ | ||||
| clean: cleanobj | |||||
| .PHONY: clean cleanobj cleanlib | |||||
| clean: cleanobj cleanlib | |||||
| cleanobj: | cleanobj: | ||||
| rm -f *.o | rm -f *.o | ||||
| .c.o: | |||||
| $(CC) $(CFLAGS) -I../include -c -o $@ $< | |||||
| cleanlib: | |||||
| rm -f $(LAPACKELIB) | |||||
| @@ -124,7 +124,6 @@ lapack_int LAPACKE_cgejsv( int matrix_layout, char joba, char jobu, char jobv, | |||||
| float* rwork = NULL; | float* rwork = NULL; | ||||
| lapack_complex_float* cwork = NULL; | lapack_complex_float* cwork = NULL; | ||||
| lapack_int i; | lapack_int i; | ||||
| lapack_int nu, nv; | |||||
| if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { | if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { | ||||
| LAPACKE_xerbla( "LAPACKE_cgejsv", -1 ); | LAPACKE_xerbla( "LAPACKE_cgejsv", -1 ); | ||||
| return -1; | return -1; | ||||
| @@ -132,8 +131,6 @@ lapack_int LAPACKE_cgejsv( int matrix_layout, char joba, char jobu, char jobv, | |||||
| #ifndef LAPACK_DISABLE_NAN_CHECK | #ifndef LAPACK_DISABLE_NAN_CHECK | ||||
| if( LAPACKE_get_nancheck() ) { | if( LAPACKE_get_nancheck() ) { | ||||
| /* Optionally check input matrices for NaNs */ | /* Optionally check input matrices for NaNs */ | ||||
| nu = LAPACKE_lsame( jobu, 'n' ) ? 1 : m; | |||||
| nv = LAPACKE_lsame( jobv, 'n' ) ? 1 : n; | |||||
| if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { | if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { | ||||
| return -10; | return -10; | ||||
| } | } | ||||
| @@ -75,7 +75,7 @@ lapack_int LAPACKE_cgelsd( int matrix_layout, lapack_int m, lapack_int n, | |||||
| if( info != 0 ) { | if( info != 0 ) { | ||||
| goto exit_level_0; | goto exit_level_0; | ||||
| } | } | ||||
| liwork = (lapack_int)iwork_query; | |||||
| liwork = iwork_query; | |||||
| lrwork = (lapack_int)rwork_query; | lrwork = (lapack_int)rwork_query; | ||||
| lwork = LAPACK_C2INT( work_query ); | lwork = LAPACK_C2INT( work_query ); | ||||
| /* Allocate memory for work arrays */ | /* Allocate memory for work arrays */ | ||||
| @@ -0,0 +1,106 @@ | |||||
| /***************************************************************************** | |||||
| Copyright (c) 2014, Intel Corp. | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are met: | |||||
| * Redistributions of source code must retain the above copyright notice, | |||||
| this list of conditions and the following disclaimer. | |||||
| * Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in the | |||||
| documentation and/or other materials provided with the distribution. | |||||
| * Neither the name of Intel Corporation nor the names of its contributors | |||||
| may be used to endorse or promote products derived from this software | |||||
| without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||||
| CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||||
| SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||||
| INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||||
| CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||||
| ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | |||||
| THE POSSIBILITY OF SUCH DAMAGE. | |||||
| ***************************************************************************** | |||||
| * Contents: Native high-level C interface to LAPACK function cgesvdq | |||||
| * Author: Intel Corporation | |||||
| * Generated November 2018 | |||||
| *****************************************************************************/ | |||||
| #include "lapacke_utils.h" | |||||
| lapack_int LAPACKE_cgesvdq( int matrix_layout, char joba, char jobp, | |||||
| char jobr, char jobu, char jobv, | |||||
| lapack_int m, lapack_int n, lapack_complex_float* a, | |||||
| lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu, | |||||
| lapack_complex_float* v, lapack_int ldv, lapack_int* numrank) | |||||
| { | |||||
| lapack_int info = 0; | |||||
| lapack_int liwork = -1; | |||||
| lapack_int* iwork = NULL; | |||||
| lapack_int iwork_query; | |||||
| lapack_int lcwork = -1; | |||||
| lapack_complex_float* cwork = NULL; | |||||
| lapack_complex_float cwork_query; | |||||
| lapack_int lrwork = -1; | |||||
| double* rwork = NULL; | |||||
| double rwork_query; | |||||
| lapack_int i; | |||||
| if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { | |||||
| LAPACKE_xerbla( "LAPACKE_cgesvdq", -1 ); | |||||
| return -1; | |||||
| } | |||||
| #ifndef LAPACK_DISABLE_NAN_CHECK | |||||
| if( LAPACKE_get_nancheck() ) { | |||||
| /* Optionally check input matrices for NaNs */ | |||||
| if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { | |||||
| return -6; | |||||
| } | |||||
| } | |||||
| #endif | |||||
| /* Query optimal working array(s) size */ | |||||
| info = LAPACKE_cgesvdq_work( matrix_layout, joba, jobp, jobr, jobu, jobv, | |||||
| m, n, a, lda, s, u, ldu, v, ldv, numrank, | |||||
| &iwork_query, liwork, &cwork_query, lcwork, | |||||
| &rwork_query, lrwork ); | |||||
| if( info != 0 ) { | |||||
| goto exit_level_0; | |||||
| } | |||||
| liwork = iwork_query; | |||||
| lcwork = LAPACK_C2INT(cwork_query); | |||||
| lrwork = (lapack_int)rwork_query; | |||||
| /* Allocate memory for work arrays */ | |||||
| iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); | |||||
| if( iwork == NULL ) { | |||||
| info = LAPACK_WORK_MEMORY_ERROR; | |||||
| goto exit_level_0; | |||||
| } | |||||
| cwork = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lcwork ); | |||||
| if( cwork == NULL ) { | |||||
| info = LAPACK_WORK_MEMORY_ERROR; | |||||
| goto exit_level_0; | |||||
| } | |||||
| rwork = (double*)LAPACKE_malloc( sizeof(double) * lrwork ); | |||||
| if( rwork == NULL ) { | |||||
| info = LAPACK_WORK_MEMORY_ERROR; | |||||
| goto exit_level_0; | |||||
| } | |||||
| /* Call middle-level interface */ | |||||
| info = LAPACKE_cgesvdq_work( matrix_layout, joba, jobp, jobr, jobu, jobv, | |||||
| m, n, a, lda, s, u, ldu, v, ldv, numrank, | |||||
| iwork, liwork, cwork, lcwork, rwork, lrwork ); | |||||
| /* Release memory and exit */ | |||||
| LAPACKE_free( iwork ); | |||||
| LAPACKE_free( cwork ); | |||||
| LAPACKE_free( rwork ); | |||||
| exit_level_0: | |||||
| if( info == LAPACK_WORK_MEMORY_ERROR ) { | |||||
| LAPACKE_xerbla( "LAPACKE_cgesvdq", info ); | |||||
| } | |||||
| return info; | |||||
| } | |||||
| @@ -0,0 +1,149 @@ | |||||
| /***************************************************************************** | |||||
| Copyright (c) 2014, Intel Corp. | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are met: | |||||
| * Redistributions of source code must retain the above copyright notice, | |||||
| this list of conditions and the following disclaimer. | |||||
| * Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in the | |||||
| documentation and/or other materials provided with the distribution. | |||||
| * Neither the name of Intel Corporation nor the names of its contributors | |||||
| may be used to endorse or promote products derived from this software | |||||
| without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||||
| CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||||
| SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||||
| INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||||
| CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||||
| ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | |||||
| THE POSSIBILITY OF SUCH DAMAGE. | |||||
| ***************************************************************************** | |||||
| * Contents: Native middle-level C interface to LAPACK function cgesvdq | |||||
| * Author: Intel Corporation | |||||
| * Generated November 2015 | |||||
| *****************************************************************************/ | |||||
| #include "lapacke_utils.h" | |||||
| lapack_int LAPACKE_cgesvdq_work( int matrix_layout, char joba, char jobp, | |||||
| char jobr, char jobu, char jobv, | |||||
| lapack_int m, lapack_int n, lapack_complex_float* a, | |||||
| lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu, | |||||
| lapack_complex_float* v, lapack_int ldv, lapack_int* numrank, | |||||
| lapack_int* iwork, lapack_int liwork, | |||||
| lapack_complex_float* cwork, lapack_int lcwork, | |||||
| float* rwork, lapack_int lrwork ) | |||||
| { | |||||
| lapack_int info = 0; | |||||
| if( matrix_layout == LAPACK_COL_MAJOR ) { | |||||
| /* Call LAPACK function and adjust info */ | |||||
| LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda, s, u, &ldu, v, &ldv, | |||||
| numrank, iwork, &liwork, cwork, &lcwork, rwork, &lrwork, &info ); | |||||
| if( info < 0 ) { | |||||
| info = info - 1; | |||||
| } | |||||
| } else if( matrix_layout == LAPACK_ROW_MAJOR ) { | |||||
| lapack_int nrows_u = ( LAPACKE_lsame( jobu, 'a' ) || | |||||
| LAPACKE_lsame( jobu, 's' ) ) ? m : 1; | |||||
| lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m : | |||||
| (LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1); | |||||
| lapack_int nrows_v = LAPACKE_lsame( jobv, 'a' ) ? n : | |||||
| ( LAPACKE_lsame( jobv, 's' ) ? MIN(m,n) : 1); | |||||
| lapack_int lda_t = MAX(1,m); | |||||
| lapack_int ldu_t = MAX(1,nrows_u); | |||||
| lapack_int ldv_t = MAX(1,nrows_v); | |||||
| lapack_complex_float* a_t = NULL; | |||||
| lapack_complex_float* u_t = NULL; | |||||
| lapack_complex_float* v_t = NULL; | |||||
| /* Check leading dimension(s) */ | |||||
| if( lda < n ) { | |||||
| info = -9; | |||||
| LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); | |||||
| return info; | |||||
| } | |||||
| if( ldu < ncols_u ) { | |||||
| info = -12; | |||||
| LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); | |||||
| return info; | |||||
| } | |||||
| if( ldv < n ) { | |||||
| info = -14; | |||||
| LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); | |||||
| return info; | |||||
| } | |||||
| /* Query optimal working array(s) size if requested */ | |||||
| if( lcwork == -1 ) { | |||||
| LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, | |||||
| s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, | |||||
| cwork, &lcwork, rwork, &lrwork, &info ); | |||||
| return (info < 0) ? (info - 1) : info; | |||||
| } | |||||
| /* Allocate memory for temporary array(s) */ | |||||
| a_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) ); | |||||
| if( a_t == NULL ) { | |||||
| info = LAPACK_TRANSPOSE_MEMORY_ERROR; | |||||
| goto exit_level_0; | |||||
| } | |||||
| if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { | |||||
| u_t = (lapack_complex_float*) | |||||
| LAPACKE_malloc( sizeof(lapack_complex_float) * ldu_t * MAX(1,ncols_u) ); | |||||
| if( u_t == NULL ) { | |||||
| info = LAPACK_TRANSPOSE_MEMORY_ERROR; | |||||
| goto exit_level_1; | |||||
| } | |||||
| } | |||||
| if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { | |||||
| v_t = (lapack_complex_float*) | |||||
| LAPACKE_malloc( sizeof(lapack_complex_float) * ldv_t * MAX(1,n) ); | |||||
| if( v_t == NULL ) { | |||||
| info = LAPACK_TRANSPOSE_MEMORY_ERROR; | |||||
| goto exit_level_2; | |||||
| } | |||||
| } | |||||
| /* Transpose input matrices */ | |||||
| LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); | |||||
| /* Call LAPACK function and adjust info */ | |||||
| LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, | |||||
| s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, | |||||
| cwork, &lcwork, rwork, &lrwork, &info ); | |||||
| if( info < 0 ) { | |||||
| info = info - 1; | |||||
| } | |||||
| /* Transpose output matrices */ | |||||
| LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); | |||||
| if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { | |||||
| LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t, | |||||
| u, ldu ); | |||||
| } | |||||
| if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { | |||||
| LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_v, n, v_t, ldv_t, v, | |||||
| ldv ); | |||||
| } | |||||
| /* Release memory and exit */ | |||||
| if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { | |||||
| LAPACKE_free( v_t ); | |||||
| } | |||||
| exit_level_2: | |||||
| if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { | |||||
| LAPACKE_free( u_t ); | |||||
| } | |||||
| exit_level_1: | |||||
| LAPACKE_free( a_t ); | |||||
| exit_level_0: | |||||
| if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { | |||||
| LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); | |||||
| } | |||||
| } else { | |||||
| info = -1; | |||||
| LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); | |||||
| } | |||||
| return info; | |||||
| } | |||||
| @@ -91,7 +91,7 @@ lapack_int LAPACKE_cggesx( int matrix_layout, char jobvsl, char jobvsr, | |||||
| if( info != 0 ) { | if( info != 0 ) { | ||||
| goto exit_level_2; | goto exit_level_2; | ||||
| } | } | ||||
| liwork = (lapack_int)iwork_query; | |||||
| liwork = iwork_query; | |||||
| lwork = LAPACK_C2INT( work_query ); | lwork = LAPACK_C2INT( work_query ); | ||||
| /* Allocate memory for work arrays */ | /* Allocate memory for work arrays */ | ||||
| iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); | iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); | ||||
| @@ -67,7 +67,7 @@ lapack_int LAPACKE_chbevd( int matrix_layout, char jobz, char uplo, lapack_int n | |||||
| if( info != 0 ) { | if( info != 0 ) { | ||||
| goto exit_level_0; | goto exit_level_0; | ||||
| } | } | ||||
| liwork = (lapack_int)iwork_query; | |||||
| liwork = iwork_query; | |||||
| lrwork = (lapack_int)rwork_query; | lrwork = (lapack_int)rwork_query; | ||||
| lwork = LAPACK_C2INT( work_query ); | lwork = LAPACK_C2INT( work_query ); | ||||
| /* Allocate memory for work arrays */ | /* Allocate memory for work arrays */ | ||||
| @@ -67,7 +67,7 @@ lapack_int LAPACKE_chbevd_2stage( int matrix_layout, char jobz, char uplo, lapac | |||||
| if( info != 0 ) { | if( info != 0 ) { | ||||
| goto exit_level_0; | goto exit_level_0; | ||||
| } | } | ||||
| liwork = (lapack_int)iwork_query; | |||||
| liwork = iwork_query; | |||||
| lrwork = (lapack_int)rwork_query; | lrwork = (lapack_int)rwork_query; | ||||
| lwork = LAPACK_C2INT( work_query ); | lwork = LAPACK_C2INT( work_query ); | ||||
| /* Allocate memory for work arrays */ | /* Allocate memory for work arrays */ | ||||
| @@ -71,7 +71,7 @@ lapack_int LAPACKE_chbgvd( int matrix_layout, char jobz, char uplo, lapack_int n | |||||
| if( info != 0 ) { | if( info != 0 ) { | ||||
| goto exit_level_0; | goto exit_level_0; | ||||
| } | } | ||||
| liwork = (lapack_int)iwork_query; | |||||
| liwork = iwork_query; | |||||
| lrwork = (lapack_int)rwork_query; | lrwork = (lapack_int)rwork_query; | ||||
| lwork = LAPACK_C2INT( work_query ); | lwork = LAPACK_C2INT( work_query ); | ||||
| /* Allocate memory for work arrays */ | /* Allocate memory for work arrays */ | ||||
| @@ -70,7 +70,7 @@ lapack_int LAPACKE_cheev_work( int matrix_layout, char jobz, char uplo, | |||||
| goto exit_level_0; | goto exit_level_0; | ||||
| } | } | ||||
| /* Transpose input matrices */ | /* Transpose input matrices */ | ||||
| LAPACKE_cge_trans( matrix_layout, n, n, a, lda, a_t, lda_t ); | |||||
| LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); | |||||
| /* Call LAPACK function and adjust info */ | /* Call LAPACK function and adjust info */ | ||||
| LAPACK_cheev( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork, | LAPACK_cheev( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork, | ||||
| &info ); | &info ); | ||||
| @@ -78,7 +78,7 @@ lapack_int LAPACKE_cheev_work( int matrix_layout, char jobz, char uplo, | |||||
| info = info - 1; | info = info - 1; | ||||
| } | } | ||||
| /* Transpose output matrices */ | /* Transpose output matrices */ | ||||
| LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda ); | |||||
| LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); | |||||
| /* Release memory and exit */ | /* Release memory and exit */ | ||||
| LAPACKE_free( a_t ); | LAPACKE_free( a_t ); | ||||
| exit_level_0: | exit_level_0: | ||||
| @@ -53,7 +53,7 @@ lapack_int LAPACKE_cheevd( int matrix_layout, char jobz, char uplo, lapack_int n | |||||
| #ifndef LAPACK_DISABLE_NAN_CHECK | #ifndef LAPACK_DISABLE_NAN_CHECK | ||||
| if( LAPACKE_get_nancheck() ) { | if( LAPACKE_get_nancheck() ) { | ||||
| /* Optionally check input matrices for NaNs */ | /* Optionally check input matrices for NaNs */ | ||||
| if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { | |||||
| if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { | |||||
| return -5; | return -5; | ||||
| } | } | ||||
| } | } | ||||
| @@ -65,7 +65,7 @@ lapack_int LAPACKE_cheevd( int matrix_layout, char jobz, char uplo, lapack_int n | |||||
| if( info != 0 ) { | if( info != 0 ) { | ||||
| goto exit_level_0; | goto exit_level_0; | ||||
| } | } | ||||
| liwork = (lapack_int)iwork_query; | |||||
| liwork = iwork_query; | |||||
| lrwork = (lapack_int)rwork_query; | lrwork = (lapack_int)rwork_query; | ||||
| lwork = LAPACK_C2INT( work_query ); | lwork = LAPACK_C2INT( work_query ); | ||||
| /* Allocate memory for work arrays */ | /* Allocate memory for work arrays */ | ||||
| @@ -53,7 +53,7 @@ lapack_int LAPACKE_cheevd_2stage( int matrix_layout, char jobz, char uplo, lapac | |||||
| #ifndef LAPACK_DISABLE_NAN_CHECK | #ifndef LAPACK_DISABLE_NAN_CHECK | ||||
| if( LAPACKE_get_nancheck() ) { | if( LAPACKE_get_nancheck() ) { | ||||
| /* Optionally check input matrices for NaNs */ | /* Optionally check input matrices for NaNs */ | ||||
| if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { | |||||
| if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { | |||||
| return -5; | return -5; | ||||
| } | } | ||||
| } | } | ||||
| @@ -65,7 +65,7 @@ lapack_int LAPACKE_cheevd_2stage( int matrix_layout, char jobz, char uplo, lapac | |||||
| if( info != 0 ) { | if( info != 0 ) { | ||||
| goto exit_level_0; | goto exit_level_0; | ||||
| } | } | ||||
| liwork = (lapack_int)iwork_query; | |||||
| liwork = iwork_query; | |||||
| lrwork = (lapack_int)rwork_query; | lrwork = (lapack_int)rwork_query; | ||||
| lwork = LAPACK_C2INT( work_query ); | lwork = LAPACK_C2INT( work_query ); | ||||
| /* Allocate memory for work arrays */ | /* Allocate memory for work arrays */ | ||||
| @@ -71,7 +71,7 @@ lapack_int LAPACKE_cheevd_2stage_work( int matrix_layout, char jobz, char uplo, | |||||
| goto exit_level_0; | goto exit_level_0; | ||||
| } | } | ||||
| /* Transpose input matrices */ | /* Transpose input matrices */ | ||||
| LAPACKE_cge_trans( matrix_layout, n, n, a, lda, a_t, lda_t ); | |||||
| LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); | |||||
| /* Call LAPACK function and adjust info */ | /* Call LAPACK function and adjust info */ | ||||
| LAPACK_cheevd_2stage( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork, | LAPACK_cheevd_2stage( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork, | ||||
| &lrwork, iwork, &liwork, &info ); | &lrwork, iwork, &liwork, &info ); | ||||
| @@ -79,7 +79,7 @@ lapack_int LAPACKE_cheevd_2stage_work( int matrix_layout, char jobz, char uplo, | |||||
| info = info - 1; | info = info - 1; | ||||
| } | } | ||||
| /* Transpose output matrices */ | /* Transpose output matrices */ | ||||
| LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda ); | |||||
| LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); | |||||
| /* Release memory and exit */ | /* Release memory and exit */ | ||||
| LAPACKE_free( a_t ); | LAPACKE_free( a_t ); | ||||
| exit_level_0: | exit_level_0: | ||||
| @@ -71,7 +71,7 @@ lapack_int LAPACKE_cheevd_work( int matrix_layout, char jobz, char uplo, | |||||
| goto exit_level_0; | goto exit_level_0; | ||||
| } | } | ||||
| /* Transpose input matrices */ | /* Transpose input matrices */ | ||||
| LAPACKE_cge_trans( matrix_layout, n, n, a, lda, a_t, lda_t ); | |||||
| LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); | |||||
| /* Call LAPACK function and adjust info */ | /* Call LAPACK function and adjust info */ | ||||
| LAPACK_cheevd( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork, | LAPACK_cheevd( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork, | ||||
| &lrwork, iwork, &liwork, &info ); | &lrwork, iwork, &liwork, &info ); | ||||
| @@ -79,7 +79,8 @@ lapack_int LAPACKE_cheevd_work( int matrix_layout, char jobz, char uplo, | |||||
| info = info - 1; | info = info - 1; | ||||
| } | } | ||||
| /* Transpose output matrices */ | /* Transpose output matrices */ | ||||
| LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda ); | |||||
| LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); | |||||
| /* Release memory and exit */ | /* Release memory and exit */ | ||||
| LAPACKE_free( a_t ); | LAPACKE_free( a_t ); | ||||
| exit_level_0: | exit_level_0: | ||||