| @@ -178,4 +178,4 @@ In chronological order: | |||
| * [2019-11-06] optimize AVX512 SGEMM | |||
| * [2019-11-12] AVX512 CGEMM & ZGEMM kernels | |||
| * [2019-12-23] optimize AVX2 CGEMM and ZGEMM | |||
| * [2019-12-27] AVX2 CGEMM3M kernel | |||
| * [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels | |||
| @@ -247,21 +247,21 @@ prof_lapack : lapack_prebuild | |||
| lapack_prebuild : | |||
| ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) | |||
| -@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "override ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "LAPACKLIB = ../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "TMGLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "LAPACKELIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| @@ -319,7 +319,7 @@ lapack-test : | |||
| ifneq ($(CROSS), 1) | |||
| ( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \ | |||
| ./testsecond; ./testdsecnd; ./testieee; ./testversion ) | |||
| (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) | |||
| (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING) | |||
| endif | |||
| lapack-runtest: | |||
| @@ -25,6 +25,8 @@ else ifeq ($(ARCH), i386) | |||
| override ARCH=x86 | |||
| else ifeq ($(ARCH), aarch64) | |||
| override ARCH=arm64 | |||
| else ifeq ($(ARCH), zarch) | |||
| override ARCH=zarch | |||
| endif | |||
| NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib | |||
| @@ -558,6 +560,11 @@ DYNAMIC_CORE += THUNDERX2T99 | |||
| DYNAMIC_CORE += TSV110 | |||
| endif | |||
| ifeq ($(ARCH), zarch) | |||
| DYNAMIC_CORE = Z13 | |||
| DYNAMIC_CORE += Z14 | |||
| endif | |||
| ifeq ($(ARCH), power) | |||
| DYNAMIC_CORE = POWER6 | |||
| DYNAMIC_CORE += POWER8 | |||
| @@ -115,7 +115,9 @@ set(SLASRC | |||
| stplqt.f stplqt2.f stpmlqt.f | |||
| ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f | |||
| ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f | |||
| ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f) | |||
| ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f | |||
| scombssq.f sgesvdq.f slaorhr_col_getrfnp.f | |||
| slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f ) | |||
| set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f | |||
| sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f | |||
| @@ -210,7 +212,9 @@ set(CLASRC | |||
| ctplqt.f ctplqt2.f ctpmlqt.f | |||
| chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f | |||
| cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f | |||
| chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f) | |||
| chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f | |||
| cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f | |||
| cungtsqr.f cunhr_col.f ) | |||
| set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f | |||
| cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f | |||
| @@ -299,7 +303,9 @@ set(DLASRC | |||
| dtplqt.f dtplqt2.f dtpmlqt.f | |||
| dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f | |||
| dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f | |||
| dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f) | |||
| dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f | |||
| dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f | |||
| dlaorhr_col_getrfnp2.f dorgtsqr.f dorhr_col.f ) | |||
| set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f | |||
| dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f | |||
| @@ -398,7 +404,9 @@ set(ZLASRC | |||
| zgelq.f zlaswlq.f zlamswlq.f zgemlq.f | |||
| zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f | |||
| zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f | |||
| zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f) | |||
| zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f | |||
| zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f | |||
| zungtsqr.f zunhr_col.f) | |||
| set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f | |||
| zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f | |||
| @@ -715,6 +715,8 @@ set(DSRC | |||
| lapacke_dgesv_work.c | |||
| lapacke_dgesvd.c | |||
| lapacke_dgesvd_work.c | |||
| lapacke_dgesvdq.c | |||
| lapacke_dgesvdq_work.c | |||
| lapacke_dgesvdx.c | |||
| lapacke_dgesvdx_work.c | |||
| lapacke_dgesvj.c | |||
| @@ -1287,6 +1289,8 @@ set(SSRC | |||
| lapacke_sgesv_work.c | |||
| lapacke_sgesvd.c | |||
| lapacke_sgesvd_work.c | |||
| lapacke_sgesvdq.c | |||
| lapacke_sgesvdq_work.c | |||
| lapacke_sgesvdx.c | |||
| lapacke_sgesvdx_work.c | |||
| lapacke_sgesvj.c | |||
| @@ -1853,6 +1857,8 @@ set(ZSRC | |||
| lapacke_zgesv_work.c | |||
| lapacke_zgesvd.c | |||
| lapacke_zgesvd_work.c | |||
| lapacke_zgesvdq.c | |||
| lapacke_zgesvdq_work.c | |||
| lapacke_zgesvdx.c | |||
| lapacke_zgesvdx_work.c | |||
| lapacke_zgesvj.c | |||
| @@ -5,7 +5,7 @@ T LOGICAL FLAG, T TO STOP ON FAILURES. | |||
| T LOGICAL FLAG, T TO TEST ERROR EXITS. | |||
| 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | |||
| 16.0 THRESHOLD VALUE OF TEST RATIO | |||
| 7 NUMBER OF VALUES OF N | |||
| 6 NUMBER OF VALUES OF N | |||
| 1 2 3 5 7 9 35 VALUES OF N | |||
| 3 NUMBER OF VALUES OF ALPHA | |||
| 0.0 1.0 0.7 VALUES OF ALPHA | |||
| @@ -5,7 +5,7 @@ T LOGICAL FLAG, T TO STOP ON FAILURES. | |||
| T LOGICAL FLAG, T TO TEST ERROR EXITS. | |||
| 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | |||
| 16.0 THRESHOLD VALUE OF TEST RATIO | |||
| 7 NUMBER OF VALUES OF N | |||
| 6 NUMBER OF VALUES OF N | |||
| 0 1 2 3 5 9 35 VALUES OF N | |||
| 3 NUMBER OF VALUES OF ALPHA | |||
| 0.0 1.0 0.7 VALUES OF ALPHA | |||
| @@ -21,9 +21,13 @@ else | |||
| ifeq ($(ARCH),power) | |||
| COMMONOBJS += dynamic_power.$(SUFFIX) | |||
| else | |||
| ifeq ($(ARCH),zarch) | |||
| COMMONOBJS += dynamic_zarch.$(SUFFIX) | |||
| else | |||
| COMMONOBJS += dynamic.$(SUFFIX) | |||
| endif | |||
| endif | |||
| endif | |||
| else | |||
| COMMONOBJS += parameter.$(SUFFIX) | |||
| endif | |||
| @@ -85,9 +89,13 @@ else | |||
| ifeq ($(ARCH),power) | |||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_power.$(SUFFIX) | |||
| else | |||
| ifeq ($(ARCH),zarch) | |||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_zarch.$(SUFFIX) | |||
| else | |||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) | |||
| endif | |||
| endif | |||
| endif | |||
| else | |||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) | |||
| endif | |||
| @@ -0,0 +1,131 @@ | |||
| #include "common.h" | |||
| extern gotoblas_t gotoblas_Z13; | |||
| extern gotoblas_t gotoblas_Z14; | |||
| extern gotoblas_t gotoblas_Z15; | |||
| //#if (!defined C_GCC) || (GCC_VERSION >= 60000) | |||
| //extern gotoblas_t gotoblas_Z14; | |||
| //#endif | |||
| #define NUM_CORETYPES 5 | |||
| extern void openblas_warning(int verbose, const char* msg); | |||
| static char* corename[] = { | |||
| "unknown", | |||
| "Z13", | |||
| "Z14", | |||
| "Z15", | |||
| "ZARCH_GENERIC", | |||
| }; | |||
| char* gotoblas_corename(void) { | |||
| if (gotoblas == &gotoblas_Z13) return corename[1]; | |||
| if (gotoblas == &gotoblas_Z14) return corename[2]; | |||
| if (gotoblas == &gotoblas_Z15) return corename[3]; | |||
| //#if (!defined C_GCC) || (GCC_VERSION >= 60000) | |||
| // if (gotoblas == &gotoblas_POWER9) return corename[3]; | |||
| //#endif | |||
| return corename[0]; // try generic? | |||
| } | |||
| // __builtin_cpu_is is not supported by zarch | |||
| static gotolabs_t* get_coretype(void) { | |||
| FILE* infile; | |||
| char buffer[512], * p; | |||
| p = (char*)NULL; | |||
| infile = fopen("/proc/sysinfo", "r"); | |||
| while (fgets(buffer, sizeof(buffer), infile)) { | |||
| if (!strncmp("Type", buffer, 4)) { | |||
| p = strchr(buffer, ':') + 2; | |||
| #if 0 | |||
| fprintf(stderr, "%s\n", p); | |||
| #endif | |||
| break; | |||
| } | |||
| } | |||
| fclose(infile); | |||
| if (strstr(p, "2964")) return &gotoblas_Z13; | |||
| if (strstr(p, "2965")) return &gotoblas_Z13; | |||
| if (strstr(p, "3906")) return &gotoblas_Z14; | |||
| if (strstr(p, "3907")) return &gotoblas_Z14; | |||
| if (strstr(p, "8561")) return &gotoblas_Z14; // fallback z15 to z14 | |||
| if (strstr(p, "8562")) return &gotoblas_Z14; // fallback z15 to z14 | |||
| return NULL; // should be ZARCH_GENERIC | |||
| } | |||
| static gotoblas_t* force_coretype(char* coretype) { | |||
| int i; | |||
| int found = -1; | |||
| char message[128]; | |||
| for (i = 0; i < NUM_CORETYPES; i++) | |||
| { | |||
| if (!strncasecmp(coretype, corename[i], 20)) | |||
| { | |||
| found = i; | |||
| break; | |||
| } | |||
| } | |||
| switch (found) | |||
| { | |||
| case 1: return (&gotoblas_Z13); | |||
| case 2: return (&gotoblas_Z14); | |||
| case 3: return (&gotoblas_Z15); | |||
| //#if (!defined C_GCC) || (GCC_VERSION >= 60000) | |||
| // case 3: return (&gotoblas_POWER9); | |||
| //#endif | |||
| default: return NULL; | |||
| } | |||
| snprintf(message, 128, "Core not found: %s\n", coretype); | |||
| openblas_warning(1, message); | |||
| } | |||
| void gotoblas_dynamic_init(void) { | |||
| char coremsg[128]; | |||
| char coren[22]; | |||
| char* p; | |||
| if (gotoblas) return; | |||
| p = getenv("OPENBLAS_CORETYPE"); | |||
| if (p) | |||
| { | |||
| gotoblas = force_coretype(p); | |||
| } | |||
| else | |||
| { | |||
| gotoblas = get_coretype(); | |||
| } | |||
| if (gotoblas == NULL) | |||
| { | |||
| snprintf(coremsg, 128, "Falling back to Z14 core\n"); | |||
| openblas_warning(1, coremsg); | |||
| gotoblas = &gotoblas_Z14; | |||
| } | |||
| if (gotoblas && gotoblas->init) { | |||
| strncpy(coren, gotoblas_corename(), 20); | |||
| sprintf(coremsg, "Core: %s\n", coren); | |||
| openblas_warning(2, coremsg); | |||
| gotoblas->init(); | |||
| } | |||
| else { | |||
| openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); | |||
| exit(1); | |||
| } | |||
| } | |||
| void gotoblas_dynamic_quit(void) { | |||
| gotoblas = NULL; | |||
| } | |||
| @@ -694,7 +694,19 @@ | |||
| # functions added for lapack-3.8.0 | |||
| ilaenv2stage | |||
| ilaenv2stage, | |||
| # functions added for lapack-3.9.0 | |||
| cgesvdq, | |||
| cungtsqr, | |||
| dcombssq, | |||
| dgesvdq, | |||
| dorgtsqr, | |||
| scombssq, | |||
| sgesvdq, | |||
| sorgtsqr, | |||
| zgesvdq, | |||
| zungtsqr | |||
| ); | |||
| @lapack_extendedprecision_objs = ( | |||
| @@ -3347,6 +3359,15 @@ | |||
| LAPACKE_zsytrf_aa_2stage_work, | |||
| LAPACKE_zsytrs_aa_2stage, | |||
| LAPACKE_zsytrs_aa_2stage_work, | |||
| # new functions from 3.9.0 | |||
| LAPACKE_dgesvdq, | |||
| LAPACKE_dgesvdq_work, | |||
| LAPACKE_sgesvdq, | |||
| LAPACKE_sgesvdq_work, | |||
| LAPACKE_zgesvdq, | |||
| LAPACKE_zgesvdq_work | |||
| ); | |||
| #These function may need 2 underscores. | |||
| @@ -3419,7 +3440,13 @@ | |||
| dsytrf_aa_2stage, dsytrs_aa_2stage, | |||
| zhesv_aa_2stage, zhetrf_aa_2stage, | |||
| zhetrs_aa_2stage, zsysv_aa_2stage, | |||
| zsytrf_aa_2stage, zsytrs_aa_2stage | |||
| zsytrf_aa_2stage, zsytrs_aa_2stage, | |||
| # 3.9.0 | |||
| claunhr_col_getrfnp, claunhr_col_getrfnp2, cunhr_col, | |||
| dlaorhr_col_getrfnp, dlaorhr_col_getrfnp2, dorhr_col, | |||
| slaorhr_col_getrfnp, slaorhr_col_getrfnp2, sorhr_col, | |||
| zlaunhr_col_getrfnp, zlaunhr_col_getrfnp2, zunhr_col | |||
| ); | |||
| @@ -103,26 +103,34 @@ ZDOTKERNEL = zdot.S | |||
| DSDOTKERNEL = dot.S | |||
| DGEMM_BETA = dgemm_beta.S | |||
| SGEMM_BETA = sgemm_beta.S | |||
| SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||
| STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||
| ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||
| ifeq ($(SGEMM_UNROLL_N), 4) | |||
| SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S | |||
| ifeq ($(SGEMM_UNROLL_M), 16) | |||
| SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S | |||
| else | |||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||
| endif | |||
| ifeq ($(SGEMM_UNROLL_M), 4) | |||
| SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S | |||
| else | |||
| SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||
| endif | |||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(SGEMM_UNROLL_N), 16) | |||
| SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S | |||
| else | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||
| endif | |||
| ifeq ($(SGEMM_UNROLL_N), 4) | |||
| SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S | |||
| else | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | |||
| endif | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| @@ -109,22 +109,29 @@ ZGEMVTKERNEL = zgemv_t.S | |||
| SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||
| STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||
| ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||
| ifeq ($(SGEMM_UNROLL_N), 4) | |||
| SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S | |||
| ifeq ($(SGEMM_UNROLL_M), 16) | |||
| SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S | |||
| else | |||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||
| endif | |||
| ifeq ($(SGEMM_UNROLL_M), 4) | |||
| SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S | |||
| else | |||
| SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||
| endif | |||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(SGEMM_UNROLL_N), 16) | |||
| SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S | |||
| else | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||
| endif | |||
| ifeq ($(SGEMM_UNROLL_N), 4) | |||
| SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S | |||
| else | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | |||
| endif | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| @@ -43,7 +43,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define betaV0 v11.d[0] | |||
| #define I x16 | |||
| #define size 128 | |||
| #define prfm_size 640 | |||
| #define calc_size 128 | |||
| /************************************************************************************** | |||
| * Macro definitions | |||
| @@ -119,27 +120,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| ldp q2, q3, [A02] | |||
| ldp q4, q5, [A03] | |||
| ldp q6, q7, [A04] | |||
| fmul v0.2d, v0.2d, betaV0 | |||
| fmul v1.2d, v1.2d, betaV0 | |||
| fmul v2.2d, v2.2d, betaV0 | |||
| fmul v3.2d, v3.2d, betaV0 | |||
| prfm PLDL1KEEP, [A01, prfm_size] | |||
| fmul v4.2d, v4.2d, betaV0 | |||
| fmul v5.2d, v5.2d, betaV0 | |||
| prfm PLDL1KEEP, [A03, prfm_size] | |||
| fmul v6.2d, v6.2d, betaV0 | |||
| fmul v7.2d, v7.2d, betaV0 | |||
| st1 {v0.2d, v1.2d}, [A01] | |||
| add A01, A01, size | |||
| add A01, A01, calc_size | |||
| st1 {v2.2d, v3.2d}, [A02] | |||
| add A02, A02, size | |||
| add A02, A02, calc_size | |||
| st1 {v4.2d, v5.2d}, [A03] | |||
| add A03, A03, size | |||
| add A03, A03, calc_size | |||
| st1 {v6.2d, v7.2d}, [A04] | |||
| add A04, A04, size | |||
| add A04, A04, calc_size | |||
| subs I , I , #1 | |||
| bne .Lgemm_beta_03 | |||
| @@ -0,0 +1,259 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2016, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A00 PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #define ASSEMBLER | |||
| #include "common.h" | |||
| #define M x0 | |||
| #define N x1 | |||
| #define BETA s0 | |||
| #define LDC x6 | |||
| #define C00 x7 | |||
| #define A01 x8 | |||
| #define A02 x9 | |||
| #define A03 x10 | |||
| #define A04 x11 | |||
| #define I x12 | |||
| #define beta0 s11 | |||
| #define betaV0 v11.s[0] | |||
| #define prfm_size 640 | |||
| #define calc_size 128 | |||
| /************************************************************************************** | |||
| * Macro definitions | |||
| **************************************************************************************/ | |||
| .macro SAVE_REGS | |||
| add sp, sp, #-(11 * 16) | |||
| stp d8, d9, [sp, #(0 * 16)] | |||
| stp d10, d11, [sp, #(1 * 16)] | |||
| stp d12, d13, [sp, #(2 * 16)] | |||
| stp d14, d15, [sp, #(3 * 16)] | |||
| stp d16, d17, [sp, #(4 * 16)] | |||
| stp x18, x19, [sp, #(5 * 16)] | |||
| stp x20, x21, [sp, #(6 * 16)] | |||
| stp x22, x23, [sp, #(7 * 16)] | |||
| stp x24, x25, [sp, #(8 * 16)] | |||
| stp x26, x27, [sp, #(9 * 16)] | |||
| str x28, [sp, #(10 * 16)] | |||
| .endm | |||
| .macro RESTORE_REGS | |||
| ldp d8, d9, [sp, #(0 * 16)] | |||
| ldp d10, d11, [sp, #(1 * 16)] | |||
| ldp d12, d13, [sp, #(2 * 16)] | |||
| ldp d14, d15, [sp, #(3 * 16)] | |||
| ldp d16, d17, [sp, #(4 * 16)] | |||
| ldp x18, x19, [sp, #(5 * 16)] | |||
| ldp x20, x21, [sp, #(6 * 16)] | |||
| ldp x22, x23, [sp, #(7 * 16)] | |||
| ldp x24, x25, [sp, #(8 * 16)] | |||
| ldp x26, x27, [sp, #(9 * 16)] | |||
| ldr x28, [sp, #(10 * 16)] | |||
| add sp, sp, #(11*16) | |||
| .endm | |||
| .macro INIT_ZERO | |||
| fmul v0.4s, v0.4s, betaV0 | |||
| fmul v1.4s, v1.4s, betaV0 | |||
| fmul v2.4s, v2.4s, betaV0 | |||
| fmul v3.4s, v3.4s, betaV0 | |||
| fmul v4.4s, v4.4s, betaV0 | |||
| fmul v5.4s, v5.4s, betaV0 | |||
| fmul v6.4s, v6.4s, betaV0 | |||
| fmul v7.4s, v7.4s, betaV0 | |||
| .endm | |||
| /************************************************************************************** | |||
| * End of macro definitions | |||
| **************************************************************************************/ | |||
| PROLOGUE | |||
| .align 5 | |||
| ldr LDC, [sp] | |||
| SAVE_REGS | |||
| .Lgemm_beta_BEGIN: | |||
| fmov beta0, BETA | |||
| cmp N, #0 | |||
| ble .Lgemm_beta_L999 | |||
| fcmp BETA, #0.0 | |||
| beq .Lgemm_beta_zero_01 | |||
| .Lgemm_beta_01: | |||
| lsl LDC, LDC, #2 | |||
| .align 5 | |||
| .Lgemm_beta_02: | |||
| mov A01, C00 | |||
| add C00, C00, LDC | |||
| asr I, M, #5 | |||
| cmp I, #0 | |||
| ble .Lgemm_beta_04 | |||
| add A02, A01, #32 | |||
| add A03, A02, #32 | |||
| add A04, A03, #32 | |||
| .align 5 | |||
| .Lgemm_beta_03: | |||
| prfm PLDL1KEEP, [A01, prfm_size] | |||
| ldp q0, q1, [A01] | |||
| ldp q2, q3, [A02] | |||
| ldp q4, q5, [A03] | |||
| ldp q6, q7, [A04] | |||
| fmul v0.4s, v0.4s, betaV0 | |||
| fmul v1.4s, v1.4s, betaV0 | |||
| fmul v2.4s, v2.4s, betaV0 | |||
| fmul v3.4s, v3.4s, betaV0 | |||
| fmul v4.4s, v4.4s, betaV0 | |||
| fmul v5.4s, v5.4s, betaV0 | |||
| fmul v6.4s, v6.4s, betaV0 | |||
| fmul v7.4s, v7.4s, betaV0 | |||
| prfm PLDL1KEEP, [A01, prfm_size + 64] | |||
| st1 {v0.4s, v1.4s}, [A01] | |||
| add A01, A01, calc_size | |||
| st1 {v2.4s, v3.4s}, [A02] | |||
| add A02, A02, calc_size | |||
| st1 {v4.4s, v5.4s}, [A03] | |||
| add A03, A03, calc_size | |||
| st1 {v6.4s, v7.4s}, [A04] | |||
| add A04, A04, calc_size | |||
| subs I , I , #1 | |||
| bne .Lgemm_beta_03 | |||
| .align 5 | |||
| .Lgemm_beta_04: | |||
| and I, M , #31 | |||
| cmp I, #0 | |||
| ble .Lgemm_beta_06 | |||
| .align 5 | |||
| .Lgemm_beta_05: | |||
| ldr s12, [A01] | |||
| fmul s12, s12, beta0 | |||
| str s12, [A01] | |||
| add A01, A01, #4 | |||
| subs I , I , #1 | |||
| bne .Lgemm_beta_05 | |||
| .align 5 | |||
| .Lgemm_beta_06: | |||
| subs N , N, #1 // N-- | |||
| bne .Lgemm_beta_02 | |||
| .align 5 | |||
| .Lgemm_beta_L999: | |||
| mov x0, #0 | |||
| RESTORE_REGS | |||
| ret | |||
| .align 5 | |||
| .Lgemm_beta_zero_01: | |||
| INIT_ZERO | |||
| lsl LDC, LDC, #2 | |||
| .align 5 | |||
| .Lgemm_beta_zero_02: | |||
| mov A01, C00 | |||
| add C00, C00, LDC | |||
| asr I, M, #5 | |||
| cmp I, #0 | |||
| ble .Lgemm_beta_zero_04 | |||
| add A02, A01, #32 | |||
| add A03, A02, #32 | |||
| add A04, A03, #32 | |||
| .align 5 | |||
| .Lgemm_beta_zero_03: | |||
| st1 {v0.4s, v1.4s}, [A01] | |||
| add A01, A01, calc_size | |||
| st1 {v2.4s, v3.4s}, [A02] | |||
| add A02, A02, calc_size | |||
| st1 {v4.4s, v5.4s}, [A03] | |||
| add A03, A03, calc_size | |||
| st1 {v6.4s, v7.4s}, [A04] | |||
| add A04, A04, calc_size | |||
| subs I, I, #1 | |||
| bne .Lgemm_beta_zero_03 | |||
| .align 5 | |||
| .Lgemm_beta_zero_04: | |||
| and I, M, #31 | |||
| cmp I, #0 | |||
| ble .Lgemm_beta_zero_06 | |||
| .align 5 | |||
| .Lgemm_beta_zero_05: | |||
| str beta0, [A01] | |||
| add A01, A01, #4 | |||
| subs I, I, #1 | |||
| bne .Lgemm_beta_zero_05 | |||
| .align 5 | |||
| .Lgemm_beta_zero_06: | |||
| subs N, N, #1 | |||
| bne .Lgemm_beta_zero_02 | |||
| .align 5 | |||
| .Lgemm_beta_zero_L999: | |||
| mov x0, #0 | |||
| RESTORE_REGS | |||
| ret | |||
| EPILOGUE | |||
| @@ -0,0 +1,824 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2019, The OpenBLAS Project | |||
| All rights reserved. | |||
| *****************************************************************************/ | |||
| #define ASSEMBLER | |||
| #include "common.h" | |||
| #define M x0 | |||
| #define N x1 | |||
| #define A x2 | |||
| #define LDA x3 | |||
| #define B x4 | |||
| #define M8 x5 | |||
| #define A01 x6 | |||
| #define A02 x7 | |||
| #define A03 x8 | |||
| #define A04 x9 | |||
| #define A05 x10 | |||
| #define A06 x11 | |||
| #define A07 x12 | |||
| #define A08 x13 | |||
| #define B01 x14 | |||
| #define B02 x15 | |||
| #define B03 x16 | |||
| #define B04 x17 | |||
| #define B00 x22 | |||
| #define I x18 | |||
| #define J x19 | |||
| #define TEMP1 x20 | |||
| #define A_PREFETCH 256 | |||
| /************************************************************************************** | |||
| * Macro definitions | |||
| **************************************************************************************/ | |||
| .macro SAVE_REGS | |||
| add sp, sp, #-(11 * 16) | |||
| stp d8, d9, [sp, #(0 * 16)] | |||
| stp d10, d11, [sp, #(1 * 16)] | |||
| stp d12, d13, [sp, #(2 * 16)] | |||
| stp d14, d15, [sp, #(3 * 16)] | |||
| stp d16, d17, [sp, #(4 * 16)] | |||
| stp x18, x19, [sp, #(5 * 16)] | |||
| stp x20, x21, [sp, #(6 * 16)] | |||
| stp x22, x23, [sp, #(7 * 16)] | |||
| stp x24, x25, [sp, #(8 * 16)] | |||
| stp x26, x27, [sp, #(9 * 16)] | |||
| str x28, [sp, #(10 * 16)] | |||
| .endm | |||
| .macro RESTORE_REGS | |||
| ldp d8, d9, [sp, #(0 * 16)] | |||
| ldp d10, d11, [sp, #(1 * 16)] | |||
| ldp d12, d13, [sp, #(2 * 16)] | |||
| ldp d14, d15, [sp, #(3 * 16)] | |||
| ldp d16, d17, [sp, #(4 * 16)] | |||
| ldp x18, x19, [sp, #(5 * 16)] | |||
| ldp x20, x21, [sp, #(6 * 16)] | |||
| ldp x22, x23, [sp, #(7 * 16)] | |||
| ldp x24, x25, [sp, #(8 * 16)] | |||
| ldp x26, x27, [sp, #(9 * 16)] | |||
| ldr x28, [sp, #(10 * 16)] | |||
| add sp, sp, #(11*16) | |||
| .endm | |||
| /*************************************************************************************************************************/ | |||
| .macro COPY16x8 | |||
| prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A05, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A06, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A07, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A08, #A_PREFETCH] | |||
| //prfm PSTL1KEEP, [B00, M8] | |||
| ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01] | |||
| add A01, A01, #64 | |||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00] | |||
| add TEMP1, B00, #64 | |||
| ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [A02] | |||
| add A02, A02, #64 | |||
| st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [TEMP1] | |||
| add TEMP1, TEMP1, #64 | |||
| ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [A03] | |||
| add A03, A03, #64 | |||
| st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [TEMP1] | |||
| add TEMP1, TEMP1, #64 | |||
| ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [A04] | |||
| add A04, A04, #64 | |||
| st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [TEMP1] | |||
| add TEMP1, TEMP1, #64 | |||
| ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [A05] | |||
| add A05, A05, #64 | |||
| st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [TEMP1] | |||
| add TEMP1, TEMP1, #64 | |||
| ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [A06] | |||
| add A06, A06, #64 | |||
| st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [TEMP1] | |||
| add TEMP1, TEMP1, #64 | |||
| ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [A07] | |||
| add A07, A07, #64 | |||
| st1 {v24.4s, v25.4s, v26.4s, v27.4s}, [TEMP1] | |||
| add TEMP1, TEMP1, #64 | |||
| ld1 {v28.4s, v29.4s, v30.4s, v31.4s}, [A08] | |||
| add A08, A08, #64 | |||
| st1 {v28.4s, v29.4s, v30.4s, v31.4s}, [TEMP1] | |||
| add TEMP1, TEMP1, #64 | |||
| add B00, B00, M8 | |||
| .endm | |||
| .macro COPY8x8 | |||
| prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A05, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A06, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A07, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A08, #A_PREFETCH] | |||
| ldp q0, q1, [A01] | |||
| ldp q2, q3, [A02] | |||
| add A01, A01, #32 | |||
| add A02, A02, #32 | |||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B01] | |||
| add B01, B01, #64 | |||
| ldp q4, q5, [A03] | |||
| ldp q6, q7, [A04] | |||
| add A03, A03, #32 | |||
| add A04, A04, #32 | |||
| st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [B01] | |||
| add B01, B01, #64 | |||
| ldp q8, q9, [A05] | |||
| ldp q10, q11, [A06] | |||
| add A05, A05, #32 | |||
| add A06, A06, #32 | |||
| st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [B01] | |||
| add B01, B01, #64 | |||
| ldp q12, q13, [A07] | |||
| ldp q14, q15, [A08] | |||
| add A07, A07, #32 | |||
| add A08, A08, #32 | |||
| st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [B01] | |||
| add B01, B01, #64 | |||
| .endm | |||
| .macro COPY4x8 | |||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A05, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A06, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A07, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A08, #A_PREFETCH] | |||
| ldr q0, [A01] | |||
| ldr q1, [A02] | |||
| ldr q2, [A03] | |||
| ldr q3, [A04] | |||
| add A01, A01, #16 | |||
| add A02, A02, #16 | |||
| add A03, A03, #16 | |||
| add A04, A04, #16 | |||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B02] | |||
| add B02, B02, #64 | |||
| ldr q4, [A05] | |||
| ldr q5, [A06] | |||
| ldr q6, [A07] | |||
| ldr q7, [A08] | |||
| add A05, A05, #16 | |||
| add A06, A06, #16 | |||
| add A07, A07, #16 | |||
| add A08, A08, #16 | |||
| st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [B02] | |||
| add B02, B02, #64 | |||
| .endm | |||
| .macro COPY2x8 | |||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A05, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A06, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A07, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A08, #A_PREFETCH] | |||
| ldr d0, [A01] | |||
| ldr d1, [A02] | |||
| ldr d2, [A03] | |||
| ldr d3, [A04] | |||
| add A01, A01, #8 | |||
| add A02, A02, #8 | |||
| add A03, A03, #8 | |||
| add A04, A04, #8 | |||
| stp d0, d1, [B03] | |||
| add B03, B03, #16 | |||
| stp d2, d3, [B03] | |||
| add B03, B03, #16 | |||
| ldr d4, [A05] | |||
| ldr d5, [A06] | |||
| ldr d6, [A07] | |||
| ldr d7, [A08] | |||
| add A05, A05, #8 | |||
| add A06, A06, #8 | |||
| add A07, A07, #8 | |||
| add A08, A08, #8 | |||
| stp d4, d5, [B03] | |||
| add B03, B03, #16 | |||
| stp d6, d7, [B03] | |||
| add B03, B03, #16 | |||
| .endm | |||
| .macro COPY1x8 | |||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A05, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A06, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A07, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A08, #A_PREFETCH] | |||
| ldr s0, [A01] | |||
| ldr s1, [A02] | |||
| ldr s2, [A03] | |||
| ldr s3, [A04] | |||
| add A01, A01, #4 | |||
| add A02, A02, #4 | |||
| add A03, A03, #4 | |||
| add A04, A04, #4 | |||
| stp s0, s1, [B04] | |||
| add B04, B04, #8 | |||
| stp s2, s3, [B04] | |||
| add B04, B04, #8 | |||
| ldr s4, [A05] | |||
| ldr s5, [A06] | |||
| ldr s6, [A07] | |||
| ldr s7, [A08] | |||
| ldr d4, [A05], #8 | |||
| ldr d5, [A06], #8 | |||
| ldr d6, [A07], #8 | |||
| ldr d7, [A08], #8 | |||
| stp s4, s5, [B04] | |||
| add B04, B04, #8 | |||
| stp s6, s7, [B04] | |||
| add B04, B04, #8 | |||
| .endm | |||
| /*************************************************************************************************************************/ | |||
| .macro COPY16x4 | |||
| prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
| ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01] | |||
| add A01, A01, #64 | |||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00] | |||
| add TEMP1, B00, #64 | |||
| ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [A02] | |||
| add A02, A02, #64 | |||
| st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [TEMP1] | |||
| add TEMP1, TEMP1, #64 | |||
| ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [A03] | |||
| add A03, A03, #64 | |||
| st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [TEMP1] | |||
| add TEMP1, TEMP1, #64 | |||
| ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [A04] | |||
| add A04, A04, #64 | |||
| st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [TEMP1] | |||
| add B00, B00, M8 | |||
| .endm | |||
| .macro COPY8x4 | |||
| prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
| ldp q0, q1, [A01] | |||
| ldp q2, q3, [A02] | |||
| add A01, A01, #32 | |||
| add A02, A02, #32 | |||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B01] | |||
| add B01, B01, #64 | |||
| ldp q4, q5, [A03] | |||
| ldp q6, q7, [A04] | |||
| add A03, A03, #32 | |||
| add A04, A04, #32 | |||
| st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [B01] | |||
| add B01, B01, #64 | |||
| .endm | |||
| .macro COPY4x4 | |||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
| ldr q0, [A01] | |||
| ldr q1, [A02] | |||
| ldr q2, [A03] | |||
| ldr q3, [A04] | |||
| add A01, A01, #16 | |||
| add A02, A02, #16 | |||
| add A03, A03, #16 | |||
| add A04, A04, #16 | |||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B02] | |||
| add B02, B02, #64 | |||
| .endm | |||
| .macro COPY2x4 | |||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
| ldr d0, [A01] | |||
| ldr d1, [A02] | |||
| ldr d2, [A03] | |||
| ldr d3, [A04] | |||
| add A01, A01, #8 | |||
| add A02, A02, #8 | |||
| add A03, A03, #8 | |||
| add A04, A04, #8 | |||
| stp d0, d1, [B03] | |||
| add B03, B03, #16 | |||
| stp d2, d3, [B03] | |||
| add B03, B03, #16 | |||
| .endm | |||
| .macro COPY1x4 | |||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
| ldr s0, [A01] | |||
| ldr s1, [A02] | |||
| ldr s2, [A03] | |||
| ldr s3, [A04] | |||
| add A01, A01, #4 | |||
| add A02, A02, #4 | |||
| add A03, A03, #4 | |||
| add A04, A04, #4 | |||
| stp s0, s1, [B04] | |||
| add B04, B04, #8 | |||
| stp s2, s3, [B04] | |||
| add B04, B04, #8 | |||
| .endm | |||
| /*************************************************************************************************************************/ | |||
| .macro COPY16x2 | |||
| prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
| ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01] | |||
| add A01, A01, #64 | |||
| ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [A02] | |||
| add A02, A02, #64 | |||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00] | |||
| add TEMP1, B00, #64 | |||
| st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [TEMP1] | |||
| add B00, B00, M8 | |||
| .endm | |||
| .macro COPY8x2 | |||
| prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
| ld1 {v0.4s, v1.4s}, [A01] | |||
| ld1 {v2.4s, v3.4s}, [A02] | |||
| add A01, A01, #32 | |||
| add A02, A02, #32 | |||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B01] | |||
| add B01, B01, #64 | |||
| .endm | |||
| .macro COPY4x2 | |||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
| ldr q0, [A01] | |||
| ldr q1, [A02] | |||
| add A01, A01, #16 | |||
| add A02, A02, #16 | |||
| stp q0, q1, [B02] | |||
| add B02, B02, #32 | |||
| .endm | |||
| .macro COPY2x2 | |||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
| ldr d0, [A01] | |||
| ldr d1, [A02] | |||
| add A01, A01, #8 | |||
| add A02, A02, #8 | |||
| stp d0, d1, [B03] | |||
| add B03, B03, #16 | |||
| .endm | |||
| .macro COPY1x2 | |||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| //prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
| ldr s0, [A01] | |||
| ldr s1, [A02] | |||
| add A01, A01, #4 | |||
| add A02, A02, #4 | |||
| stp s0, s1, [B04] | |||
| add B04, B04, #8 | |||
| .endm | |||
| /*************************************************************************************************************************/ | |||
| .macro COPY16x1 | |||
| prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01] | |||
| add A01, A01, #64 | |||
| st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00] | |||
| add B00, B00, M8 | |||
| .endm | |||
| .macro COPY8x1 | |||
| prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| ldp q0, q1, [A01] | |||
| add A01, A01, #32 | |||
| stp q0, q1, [B01] | |||
| add B01, B01, #32 | |||
| .endm | |||
| .macro COPY4x1 | |||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| ldr q0, [A01] | |||
| add A01, A01, #16 | |||
| str q0, [B02] | |||
| add B02, B02, #16 | |||
| .endm | |||
| .macro COPY2x1 | |||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| ldr d0, [A01] | |||
| add A01, A01, #8 | |||
| str d0, [B03] | |||
| add B03, B03, #8 | |||
| .endm | |||
| .macro COPY1x1 | |||
| //prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
| ldr s0, [A01] | |||
| add A01, A01, #4 | |||
| str s0, [B04] | |||
| add B04, B04, #4 | |||
| .endm | |||
| /************************************************************************************** | |||
| * End of macro definitions | |||
| **************************************************************************************/ | |||
| PROLOGUE | |||
| .align 5 | |||
| SAVE_REGS | |||
| lsl LDA, LDA, #2 // LDA = LDA * SIZE | |||
| lsl TEMP1, M, #2 // TEMP1 = M * SIZE | |||
| and B01 , N , #-16 | |||
| and B02 , N , #-8 | |||
| and B03 , N , #-4 | |||
| and B04 , N , #-2 | |||
| mul B01, B01, TEMP1 | |||
| mul B02, B02, TEMP1 | |||
| mul B03, B03, TEMP1 | |||
| mul B04, B04, TEMP1 | |||
| add B01 , B01, B | |||
| add B02 , B02, B | |||
| add B03 , B03, B | |||
| add B04 , B04, B | |||
| lsl M8, M, #6 // M8 = M * 16 * SIZE | |||
| .Lsgemm_tcopy_L8_BEGIN: | |||
| asr J, M, #3 // J = M / 8 | |||
| cmp J, #0 | |||
| ble .Lsgemm_tcopy_L4_BEGIN | |||
| .align 5 | |||
| .Lsgemm_tcopy_L8_M16_BEGIN: | |||
| mov A01, A | |||
| add A02, A01, LDA | |||
| add A03, A02, LDA | |||
| add A04, A03, LDA | |||
| add A05, A04, LDA | |||
| add A06, A05, LDA | |||
| add A07, A06, LDA | |||
| add A08, A07, LDA | |||
| add A, A08, LDA | |||
| mov B00, B | |||
| add B, B00, #512 // B = B + 8 * 16 * SIZE | |||
| asr I, N, #4 // I = N / 16 | |||
| cmp I, #0 | |||
| ble .Lsgemm_tcopy_L8_M16_40 | |||
| .align 5 | |||
| .Lsgemm_tcopy_L8_M16_20: | |||
| COPY16x8 | |||
| subs I , I , #1 | |||
| bne .Lsgemm_tcopy_L8_M16_20 | |||
| .Lsgemm_tcopy_L8_M16_40: | |||
| tst N , #8 | |||
| ble .Lsgemm_tcopy_L8_M16_60 | |||
| COPY8x8 | |||
| .Lsgemm_tcopy_L8_M16_60: | |||
| tst N , #4 | |||
| ble .Lsgemm_tcopy_L8_M16_80 | |||
| COPY4x8 | |||
| .Lsgemm_tcopy_L8_M16_80: | |||
| tst N , #2 | |||
| ble .Lsgemm_tcopy_L8_M16_100 | |||
| COPY2x8 | |||
| .Lsgemm_tcopy_L8_M16_100: | |||
| tst N, #1 | |||
| ble .Lsgemm_tcopy_L8_M16_END | |||
| COPY1x8 | |||
| .Lsgemm_tcopy_L8_M16_END: | |||
| subs J , J, #1 // j-- | |||
| bne .Lsgemm_tcopy_L8_M16_BEGIN | |||
| /*********************************************************************************************/ | |||
| .Lsgemm_tcopy_L4_BEGIN: | |||
| tst M, #7 | |||
| ble .Lsgemm_tcopy_L999 | |||
| tst M, #4 | |||
| ble .Lsgemm_tcopy_L2_BEGIN | |||
| .Lsgemm_tcopy_L4_M16_BEGIN: | |||
| mov A01, A | |||
| add A02, A01, LDA | |||
| add A03, A02, LDA | |||
| add A04, A03, LDA | |||
| add A, A04, LDA | |||
| mov B00, B | |||
| add B, B00, #256 // B = B + 4 * 16 * SIZE | |||
| asr I, N, #4 // I = N / 16 | |||
| cmp I, #0 | |||
| ble .Lsgemm_tcopy_L4_M16_40 | |||
| .align 5 | |||
| .Lsgemm_tcopy_L4_M16_20: | |||
| COPY16x4 | |||
| subs I , I , #1 | |||
| bne .Lsgemm_tcopy_L4_M16_20 | |||
| .Lsgemm_tcopy_L4_M16_40: | |||
| tst N , #8 | |||
| ble .Lsgemm_tcopy_L4_M16_60 | |||
| COPY8x4 | |||
| .Lsgemm_tcopy_L4_M16_60: | |||
| tst N , #4 | |||
| ble .Lsgemm_tcopy_L4_M16_80 | |||
| COPY4x4 | |||
| .Lsgemm_tcopy_L4_M16_80: | |||
| tst N , #2 | |||
| ble .Lsgemm_tcopy_L4_M16_100 | |||
| COPY2x4 | |||
| .Lsgemm_tcopy_L4_M16_100: | |||
| tst N, #1 | |||
| ble .Lsgemm_tcopy_L4_M16_END | |||
| COPY1x4 | |||
| .Lsgemm_tcopy_L4_M16_END: | |||
| /*********************************************************************************************/ | |||
| .Lsgemm_tcopy_L2_BEGIN: | |||
| tst M, #3 | |||
| ble .Lsgemm_tcopy_L999 | |||
| tst M, #2 | |||
| ble .Lsgemm_tcopy_L1_BEGIN | |||
| .Lsgemm_tcopy_L2_M16_BEGIN: | |||
| mov A01, A | |||
| add A02, A01, LDA | |||
| add A, A02, LDA | |||
| mov B00, B | |||
| add B, B00, #128 // B = B + 2 * 16 * SIZE | |||
| asr I, N, #4 // I = N / 16 | |||
| cmp I, #0 | |||
| ble .Lsgemm_tcopy_L2_M16_40 | |||
| .align 5 | |||
| .Lsgemm_tcopy_L2_M16_20: | |||
| COPY16x2 | |||
| subs I , I , #1 | |||
| bne .Lsgemm_tcopy_L2_M16_20 | |||
| .Lsgemm_tcopy_L2_M16_40: | |||
| tst N , #8 | |||
| ble .Lsgemm_tcopy_L2_M16_60 | |||
| COPY8x2 | |||
| .Lsgemm_tcopy_L2_M16_60: | |||
| tst N , #4 | |||
| ble .Lsgemm_tcopy_L2_M16_80 | |||
| COPY4x2 | |||
| .Lsgemm_tcopy_L2_M16_80: | |||
| tst N , #2 | |||
| ble .Lsgemm_tcopy_L2_M16_100 | |||
| COPY2x2 | |||
| .Lsgemm_tcopy_L2_M16_100: | |||
| tst N , #1 | |||
| ble .Lsgemm_tcopy_L2_M16_END | |||
| COPY1x2 | |||
| .Lsgemm_tcopy_L2_M16_END: | |||
| /*********************************************************************************************/ | |||
| .Lsgemm_tcopy_L1_BEGIN: | |||
| tst M, #1 | |||
| ble .Lsgemm_tcopy_L999 | |||
| .Lsgemm_tcopy_L1_M16_BEGIN: | |||
| mov A01, A // A01 = A | |||
| mov B00, B | |||
| asr I, N, #4 // I = M / 16 | |||
| cmp I, #0 | |||
| ble .Lsgemm_tcopy_L1_M16_40 | |||
| .align 5 | |||
| .Lsgemm_tcopy_L1_M16_20: | |||
| COPY16x1 | |||
| subs I , I , #1 | |||
| bne .Lsgemm_tcopy_L1_M16_20 | |||
| .Lsgemm_tcopy_L1_M16_40: | |||
| tst N , #8 | |||
| ble .Lsgemm_tcopy_L1_M16_60 | |||
| COPY8x1 | |||
| .Lsgemm_tcopy_L1_M16_60: | |||
| tst N , #4 | |||
| ble .Lsgemm_tcopy_L1_M16_80 | |||
| COPY4x1 | |||
| .Lsgemm_tcopy_L1_M16_80: | |||
| tst N , #2 | |||
| ble .Lsgemm_tcopy_L1_M16_100 | |||
| COPY2x1 | |||
| .Lsgemm_tcopy_L1_M16_100: | |||
| tst N , #1 | |||
| ble .Lsgemm_tcopy_L1_M16_END | |||
| COPY1x1 | |||
| .Lsgemm_tcopy_L1_M16_END: | |||
| .Lsgemm_tcopy_L999: | |||
| mov x0, #0 // set return value | |||
| RESTORE_REGS | |||
| ret | |||
| EPILOGUE | |||
| @@ -739,6 +739,26 @@ static void init_parameter(void) { | |||
| } | |||
| #else //POWER | |||
| #if defined(ARCH_ZARCH) | |||
| static void init_parameter(void) { | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | |||
| TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | |||
| TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | |||
| TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | |||
| TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||
| TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | |||
| TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | |||
| TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; | |||
| } | |||
| #else //ZARCH | |||
| #ifdef ARCH_X86 | |||
| static int get_l2_size_old(void){ | |||
| int i, eax, ebx, ecx, edx, cpuid_level; | |||
| @@ -1325,4 +1345,5 @@ static void init_parameter(void) { | |||
| } | |||
| #endif //POWER | |||
| #endif //ZARCH | |||
| #endif //defined(ARCH_ARM64) | |||
| @@ -98,5 +98,5 @@ ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c | |||
| ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S | |||
| ZGEMM3MKERNEL = zgemm3m_kernel_4x4_haswell.c | |||
| @@ -95,5 +95,5 @@ ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c | |||
| ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S | |||
| ZGEMM3MKERNEL = zgemm3m_kernel_4x4_haswell.c | |||
| @@ -0,0 +1,224 @@ | |||
| /* %0 = "+r"(a_pointer), %1 = "+r"(b_pointer), %2 = "+r"(c_pointer), %3 = "+r"(ldc_in_bytes), %4 for k_count, %5 for c_store */ | |||
| /* r12 = k << 5(const), r13 = k(const), r14 = b_head_pos(const), r15 = tmp */ | |||
| #include "common.h" | |||
| #include <stdint.h> | |||
| //recommended settings: GEMM_Q=256, GEMM_P=256 | |||
| /* m = 4 *//* ymm0 for alpha, ymm1-ymm3 for temporary use, ymm4-ymm15 for accumulators */ | |||
| #define KERNEL_k1m4n1 \ | |||
| "vmovupd (%0),%%ymm1; addq $32,%0;"\ | |||
| "vbroadcastsd (%1),%%ymm2; vfmadd231pd %%ymm1,%%ymm2,%%ymm4;"\ | |||
| "addq $8,%1;" | |||
| #define KERNEL_h_k1m4n2 \ | |||
| "vmovddup (%0),%%ymm1; vmovddup 8(%0),%%ymm2; addq $32,%0;"\ | |||
| "vbroadcastf128 (%1),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,%%ymm4; vfmadd231pd %%ymm2,%%ymm3,%%ymm5;" | |||
| #define KERNEL_k1m4n2 KERNEL_h_k1m4n2 "addq $16,%1;" | |||
| #define KERNEL_h_k1m4n4 \ | |||
| KERNEL_h_k1m4n2 "vbroadcastf128 16(%1),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,%%ymm6; vfmadd231pd %%ymm2,%%ymm3,%%ymm7;" | |||
| #define KERNEL_k1m4n4 KERNEL_h_k1m4n4 "addq $32,%1;" | |||
| #define unit_kernel_k1m4n4(c1,c2,c3,c4,off1,off2,...) \ | |||
| "vbroadcastf128 "#off1"("#__VA_ARGS__"),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,"#c1"; vfmadd231pd %%ymm2,%%ymm3,"#c2";"\ | |||
| "vbroadcastf128 "#off2"("#__VA_ARGS__"),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,"#c3"; vfmadd231pd %%ymm2,%%ymm3,"#c4";" | |||
| #define KERNEL_h_k1m4n8 KERNEL_h_k1m4n4 unit_kernel_k1m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,0,16,%1,%%r12,1) | |||
| #define KERNEL_k1m4n8 KERNEL_h_k1m4n8 "addq $32,%1;" | |||
| #define KERNEL_h_k1m4n12 KERNEL_h_k1m4n8 unit_kernel_k1m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15,0,16,%1,%%r12,2) | |||
| #define KERNEL_k1m4n12 KERNEL_h_k1m4n12 "addq $32,%1;" | |||
| #define KERNEL_k2m4n1 KERNEL_k1m4n1 KERNEL_k1m4n1 | |||
| #define KERNEL_k2m4n2 KERNEL_k1m4n2 KERNEL_k1m4n2 | |||
| #define KERNEL_k2m4n4 KERNEL_k1m4n4 KERNEL_k1m4n4 | |||
| #define KERNEL_k2m4n8 KERNEL_k1m4n8 KERNEL_k1m4n8 | |||
| #define KERNEL_k2m4n12 \ | |||
| "vmovddup (%0),%%ymm1; vmovddup 8(%0),%%ymm2;"\ | |||
| unit_kernel_k1m4n4(%%ymm4,%%ymm5,%%ymm6,%%ymm7,0,16,%1)\ | |||
| unit_kernel_k1m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,0,16,%1,%%r12,1)\ | |||
| unit_kernel_k1m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15,0,16,%1,%%r12,2)\ | |||
| "vmovddup 32(%0),%%ymm1; vmovddup 40(%0),%%ymm2; prefetcht0 512(%0); addq $64,%0;"\ | |||
| unit_kernel_k1m4n4(%%ymm4,%%ymm5,%%ymm6,%%ymm7,32,48,%1)\ | |||
| unit_kernel_k1m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,32,48,%1,%%r12,1)\ | |||
| unit_kernel_k1m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15,32,48,%1,%%r12,2) "addq $64,%1;" | |||
| #define INIT_m4n1 "vpxor %%ymm4,%%ymm4,%%ymm4;" | |||
| #define INIT_m4n2 INIT_m4n1 "vpxor %%ymm5,%%ymm5,%%ymm5;" | |||
| #define INIT_m4n4 INIT_m4n2 "vpxor %%ymm6,%%ymm6,%%ymm6;vpxor %%ymm7,%%ymm7,%%ymm7;" | |||
| #define unit_init_m4n4(c1,c2,c3,c4) \ | |||
| "vpxor "#c1","#c1","#c1";vpxor "#c2","#c2","#c2";vpxor "#c3","#c3","#c3";vpxor "#c4","#c4","#c4";" | |||
| #define INIT_m4n8 INIT_m4n4 unit_init_m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11) | |||
| #define INIT_m4n12 INIT_m4n8 unit_init_m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15) | |||
| #define SAVE_h_m4n1 \ | |||
| "vpermpd $216,%%ymm4,%%ymm3; vunpcklpd %%ymm3,%%ymm3,%%ymm1; vunpckhpd %%ymm3,%%ymm3,%%ymm2;"\ | |||
| "vfmadd213pd (%2),%%ymm0,%%ymm1; vfmadd213pd 32(%2),%%ymm0,%%ymm2; vmovupd %%ymm1,(%2); vmovupd %%ymm2,32(%2);" | |||
| #define unit_save_m4n2(c1,c2) \ | |||
| "vperm2f128 $2,"#c1","#c2",%%ymm2; vperm2f128 $19,"#c1","#c2","#c2"; vmovapd %%ymm2,"#c1";"\ | |||
| "vunpcklpd "#c1","#c1",%%ymm2; vunpcklpd "#c2","#c2",%%ymm3;"\ | |||
| "vfmadd213pd (%5),%%ymm0,%%ymm2; vfmadd213pd 32(%5),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5); vmovupd %%ymm3,32(%5);"\ | |||
| "vunpckhpd "#c1","#c1",%%ymm2; vunpckhpd "#c2","#c2",%%ymm3;"\ | |||
| "vfmadd213pd (%5,%3,1),%%ymm0,%%ymm2; vfmadd213pd 32(%5,%3,1),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5,%3,1); vmovupd %%ymm3,32(%5,%3,1);"\ | |||
| "leaq (%5,%3,2),%5;" | |||
| #define SAVE_h_m4n2 "movq %2,%5;" unit_save_m4n2(%%ymm4,%%ymm5) | |||
| #define SAVE_h_m4n4 SAVE_h_m4n2 unit_save_m4n2(%%ymm6,%%ymm7) | |||
| #define SAVE_h_m4n8 SAVE_h_m4n4 unit_save_m4n2(%%ymm8,%%ymm9) unit_save_m4n2(%%ymm10,%%ymm11) | |||
| #define SAVE_h_m4n12 SAVE_h_m4n8 unit_save_m4n2(%%ymm12,%%ymm13) unit_save_m4n2(%%ymm14,%%ymm15) | |||
| #define SAVE_m4(ndim) SAVE_h_m4n##ndim "addq $64,%2;" | |||
| #define COMPUTE_m4(ndim) \ | |||
| INIT_m4n##ndim\ | |||
| "movq %%r13,%4; movq %%r14,%1; movq %2,%5; xorq %%r15,%%r15;"\ | |||
| "cmpq $24,%4; jb "#ndim"004042f;"\ | |||
| #ndim"004041:\n\t"\ | |||
| "cmpq $126,%%r15; movq $126,%%r15; cmoveq %3,%%r15;"\ | |||
| KERNEL_k2m4n##ndim KERNEL_k2m4n##ndim\ | |||
| "prefetcht1 (%5); subq $63,%5;"\ | |||
| KERNEL_k2m4n##ndim KERNEL_k2m4n##ndim\ | |||
| "addq %%r15,%5; prefetcht1 (%8); addq $32,%8;"\ | |||
| "subq $8,%4; cmpq $16,%4; jnb "#ndim"004041b;"\ | |||
| "movq %2,%5;"\ | |||
| #ndim"004042:\n\t"\ | |||
| "testq %4,%4; jz "#ndim"004043f;"\ | |||
| "prefetcht0 (%5); prefetcht0 63(%5);"\ | |||
| KERNEL_k1m4n##ndim\ | |||
| "prefetcht0 (%5,%3,4); prefetcht0 63(%5,%3,4); addq %3,%5;"\ | |||
| "decq %4; jmp "#ndim"004042b;"\ | |||
| #ndim"004043:\n\t"\ | |||
| "prefetcht0 (%%r14); prefetcht0 64(%%r14);"\ | |||
| SAVE_m4(ndim) | |||
| /* m = 2 *//* vmm0 for alpha, vmm1-vmm3 for temporary use, vmm4-vmm9 for accumulators */ | |||
| #define KERNEL_k1m2n1 \ | |||
| "vmovupd (%0),%%xmm1; addq $16,%0;"\ | |||
| "vmovddup (%1),%%xmm2; vfmadd231pd %%xmm1,%%xmm2,%%xmm4;"\ | |||
| "addq $8,%1;" | |||
| #define KERNEL_h_k1m2n2 \ | |||
| "vmovddup (%0),%%xmm1; vmovddup 8(%0),%%xmm2; addq $16,%0;"\ | |||
| "vmovupd (%1),%%xmm3; vfmadd231pd %%xmm1,%%xmm3,%%xmm4; vfmadd231pd %%xmm2,%%xmm3,%%xmm5;" | |||
| #define KERNEL_k1m2n2 KERNEL_h_k1m2n2 "addq $16,%1;" | |||
| #define unit_kernel_k1m2n4(c1,c2,...) \ | |||
| "vmovupd ("#__VA_ARGS__"),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,"#c1"; vfmadd231pd %%ymm2,%%ymm3,"#c2";" | |||
| #define KERNEL_h_k1m2n4 \ | |||
| "vbroadcastsd (%0),%%ymm1; vbroadcastsd 8(%0),%%ymm2; addq $16,%0;"\ | |||
| unit_kernel_k1m2n4(%%ymm4,%%ymm5,%1) | |||
| #define KERNEL_k1m2n4 KERNEL_h_k1m2n4 "addq $32,%1;" | |||
| #define KERNEL_h_k1m2n8 KERNEL_h_k1m2n4 \ | |||
| unit_kernel_k1m2n4(%%ymm6,%%ymm7,%1,%%r12,1) | |||
| #define KERNEL_k1m2n8 KERNEL_h_k1m2n8 "addq $32,%1;" | |||
| #define KERNEL_h_k1m2n12 KERNEL_h_k1m2n8 \ | |||
| unit_kernel_k1m2n4(%%ymm8,%%ymm9,%1,%%r12,2) | |||
| #define KERNEL_k1m2n12 KERNEL_h_k1m2n12 "addq $32,%1;" | |||
| #define INIT_m2n1 "vpxor %%xmm4,%%xmm4,%%xmm4;" | |||
| #define INIT_m2n2 INIT_m2n1 "vpxor %%xmm5,%%xmm5,%%xmm5;" | |||
| #define unit_init_m2n4(c1,c2) "vpxor "#c1","#c1","#c1";vpxor "#c2","#c2","#c2";" | |||
| #define INIT_m2n4 unit_init_m2n4(%%ymm4,%%ymm5) | |||
| #define INIT_m2n8 INIT_m2n4 unit_init_m2n4(%%ymm6,%%ymm7) | |||
| #define INIT_m2n12 INIT_m2n8 unit_init_m2n4(%%ymm8,%%ymm9) | |||
| #define SAVE_h_m2n1 \ | |||
| "vinsertf128 $1,%%xmm4,%%ymm4,%%ymm4; vpermilpd $12,%%ymm4,%%ymm4; vfmadd213pd (%2),%%ymm0,%%ymm4; vmovupd %%ymm4,(%2);" | |||
| #define SAVE_h_m2n2 \ | |||
| "vinsertf128 $1,%%xmm5,%%ymm4,%%ymm4; vunpcklpd %%ymm4,%%ymm4,%%ymm1; vunpckhpd %%ymm4,%%ymm4,%%ymm2;"\ | |||
| "vfmadd213pd (%2),%%ymm0,%%ymm1; vmovupd %%ymm1,(%2);"\ | |||
| "vfmadd213pd (%2,%3,1),%%ymm0,%%ymm2; vmovupd %%ymm2,(%2,%3,1);" | |||
| #define unit_save_m2n4(c1,c2) \ | |||
| "vperm2f128 $2,"#c1","#c2",%%ymm1; vunpcklpd %%ymm1,%%ymm1,%%ymm2; vunpckhpd %%ymm1,%%ymm1,%%ymm3;"\ | |||
| "vfmadd213pd (%5),%%ymm0,%%ymm2; vfmadd213pd (%5,%3,1),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5); vmovupd %%ymm3,(%5,%3,1); leaq (%5,%3,2),%5;"\ | |||
| "vperm2f128 $19,"#c1","#c2",%%ymm1; vunpcklpd %%ymm1,%%ymm1,%%ymm2; vunpckhpd %%ymm1,%%ymm1,%%ymm3;"\ | |||
| "vfmadd213pd (%5),%%ymm0,%%ymm2; vfmadd213pd (%5,%3,1),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5); vmovupd %%ymm3,(%5,%3,1); leaq (%5,%3,2),%5;" | |||
| #define SAVE_h_m2n4 "movq %2,%5;" unit_save_m2n4(%%ymm4,%%ymm5) | |||
| #define SAVE_h_m2n8 SAVE_h_m2n4 unit_save_m2n4(%%ymm6,%%ymm7) | |||
| #define SAVE_h_m2n12 SAVE_h_m2n8 unit_save_m2n4(%%ymm8,%%ymm9) | |||
| #define SAVE_m2(ndim) SAVE_h_m2n##ndim "addq $32,%2;" | |||
| #define COMPUTE_m2(ndim) \ | |||
| INIT_m2n##ndim\ | |||
| "movq %%r13,%4; movq %%r14,%1;"\ | |||
| #ndim"002022:\n\t"\ | |||
| "testq %4,%4; jz "#ndim"002023f;"\ | |||
| KERNEL_k1m2n##ndim\ | |||
| "decq %4; jmp "#ndim"002022b;"\ | |||
| #ndim"002023:\n\t"\ | |||
| SAVE_m2(ndim) | |||
| /* m = 1 *//* vmm0 for alpha, vmm1-vmm3 and vmm10-vmm15 for temporary use, vmm4-vmm6 for accumulators */ | |||
| #define KERNEL_k1m1n1 \ | |||
| "vmovsd (%0),%%xmm1; addq $8,%0;"\ | |||
| "vfmadd231sd (%1),%%xmm1,%%xmm4; addq $8,%1;" | |||
| #define KERNEL_k1m1n2 \ | |||
| "vmovddup (%0),%%xmm1; addq $8,%0;"\ | |||
| "vfmadd231pd (%1),%%xmm1,%%xmm4; addq $16,%1;" | |||
| #define unit_kernel_k1m1n4(c1,...) \ | |||
| "vmovupd ("#__VA_ARGS__"),%%ymm2; vfmadd231pd %%ymm1,%%ymm2,"#c1";" | |||
| #define KERNEL_h_k1m1n4 \ | |||
| "vbroadcastsd (%0),%%ymm1; addq $8,%0;"\ | |||
| unit_kernel_k1m1n4(%%ymm4,%1) | |||
| #define KERNEL_k1m1n4 KERNEL_h_k1m1n4 "addq $32,%1;" | |||
| #define KERNEL_h_k1m1n8 KERNEL_h_k1m1n4 unit_kernel_k1m1n4(%%ymm5,%1,%%r12,1) | |||
| #define KERNEL_k1m1n8 KERNEL_h_k1m1n8 "addq $32,%1;" | |||
| #define KERNEL_h_k1m1n12 KERNEL_h_k1m1n8 unit_kernel_k1m1n4(%%ymm6,%1,%%r12,2) | |||
| #define KERNEL_k1m1n12 KERNEL_h_k1m1n12 "addq $32,%1;" | |||
| #define INIT_m1n1 INIT_m2n1 | |||
| #define INIT_m1n2 INIT_m2n1 | |||
| #define INIT_m1n4 "vpxor %%ymm4,%%ymm4,%%ymm4;" | |||
| #define INIT_m1n8 INIT_m1n4 "vpxor %%ymm5,%%ymm5,%%ymm5;" | |||
| #define INIT_m1n12 INIT_m1n8 "vpxor %%ymm6,%%ymm6,%%ymm6;" | |||
| #define SAVE_h_m1n1 \ | |||
| "vmovddup %%xmm4,%%xmm4; vfmadd213pd (%2),%%xmm0,%%xmm4; vmovupd %%xmm4,(%2);" | |||
| #define SAVE_h_m1n2 \ | |||
| "vunpcklpd %%xmm4,%%xmm4,%%xmm1; vunpckhpd %%xmm4,%%xmm4,%%xmm2;"\ | |||
| "vfmadd213pd (%2),%%xmm0,%%xmm1; vmovupd %%xmm1,(%2);"\ | |||
| "vfmadd213pd (%2,%3,1),%%xmm0,%%xmm2; vmovupd %%xmm2,(%2,%3,1);" | |||
| #define unit_save_m1n4(c1) \ | |||
| "vunpcklpd "#c1","#c1",%%ymm1; vunpckhpd "#c1","#c1",%%ymm2;"\ | |||
| "vmovupd (%5),%%xmm3; vinsertf128 $1,(%5,%3,2),%%ymm3,%%ymm3;"\ | |||
| "vfmadd213pd %%ymm3,%%ymm0,%%ymm1; vmovupd %%xmm1,(%5); vextractf128 $1,%%ymm1,(%5,%3,2); addq %3,%5;"\ | |||
| "vmovupd (%5),%%xmm3; vinsertf128 $1,(%5,%3,2),%%ymm3,%%ymm3;"\ | |||
| "vfmadd213pd %%ymm3,%%ymm0,%%ymm2; vmovupd %%xmm2,(%5); vextractf128 $1,%%ymm2,(%5,%3,2); addq %3,%5; leaq (%5,%3,2),%5;" | |||
| #define SAVE_h_m1n4 "movq %2,%5;" unit_save_m1n4(%%ymm4) | |||
| #define SAVE_h_m1n8 SAVE_h_m1n4 unit_save_m1n4(%%ymm5) | |||
| #define SAVE_h_m1n12 SAVE_h_m1n8 unit_save_m1n4(%%ymm6) | |||
| #define SAVE_m1(ndim) SAVE_h_m1n##ndim "addq $16,%2;" | |||
| #define COMPUTE_m1(ndim) \ | |||
| INIT_m1n##ndim\ | |||
| "movq %%r13,%4; movq %%r14,%1;"\ | |||
| #ndim"001011:\n\t"\ | |||
| "testq %4,%4; jz "#ndim"001012f;"\ | |||
| KERNEL_k1m1n##ndim\ | |||
| "decq %4; jmp "#ndim"001011b;"\ | |||
| #ndim"001012:\n\t"\ | |||
| SAVE_m1(ndim) | |||
| #define COMPUTE(ndim) {\ | |||
| next_b = b_pointer + ndim * K;\ | |||
| __asm__ __volatile__(\ | |||
| "vbroadcastf128 (%6),%%ymm0;"\ | |||
| "movq %4,%%r13; movq %4,%%r12; salq $5,%%r12; movq %1,%%r14; movq %7,%%r11;"\ | |||
| "cmpq $4,%7;jb 33101"#ndim"f;"\ | |||
| "33109"#ndim":\n\t"\ | |||
| COMPUTE_m4(ndim)\ | |||
| "subq $4,%7;cmpq $4,%7;jnb 33109"#ndim"b;"\ | |||
| "33101"#ndim":\n\t"\ | |||
| "cmpq $2,%7;jb 33104"#ndim"f;"\ | |||
| COMPUTE_m2(ndim)\ | |||
| "subq $2,%7;"\ | |||
| "33104"#ndim":\n\t"\ | |||
| "testq %7,%7;jz 33105"#ndim"f;"\ | |||
| COMPUTE_m1(ndim)\ | |||
| "33105"#ndim":\n\t"\ | |||
| "movq %%r13,%4; movq %%r14,%1; movq %%r11,%7;"\ | |||
| :"+r"(a_pointer),"+r"(b_pointer),"+r"(c_pointer),"+r"(ldc_in_bytes),"+r"(K),"+r"(ctemp),"+r"(const_val),"+r"(M),"+r"(next_b)\ | |||
| ::"r11","r12","r13","r14","r15","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14",\ | |||
| "xmm15","cc","memory");\ | |||
| a_pointer -= M * K; b_pointer += ndim * K; c_pointer += 2*(LDC * ndim - M);\ | |||
| } | |||
| int __attribute__ ((noinline)) | |||
| CNAME(BLASLONG m, BLASLONG n, BLASLONG k, double alphar, double alphai, double * __restrict__ A, double * __restrict__ B, double * __restrict__ C, BLASLONG LDC) | |||
| { | |||
| if(m==0||n==0||k==0) return 0; | |||
| int64_t ldc_in_bytes = (int64_t)LDC * sizeof(double) * 2; | |||
| double constval[2]; constval[0] = alphar; constval[1] = alphai; | |||
| double *const_val=constval; | |||
| int64_t M = (int64_t)m, K = (int64_t)k; | |||
| BLASLONG n_count = n; | |||
| double *a_pointer = A,*b_pointer = B,*c_pointer = C,*ctemp = C,*next_b = B; | |||
| for(;n_count>11;n_count-=12) COMPUTE(12) | |||
| for(;n_count>7;n_count-=8) COMPUTE(8) | |||
| for(;n_count>3;n_count-=4) COMPUTE(4) | |||
| for(;n_count>1;n_count-=2) COMPUTE(2) | |||
| if(n_count>0) COMPUTE(1) | |||
| return 0; | |||
| } | |||
| @@ -96,10 +96,10 @@ SGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||
| SGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
| SGEMMINCOPYOBJ = sgemm_incopy.o | |||
| SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| @@ -108,16 +108,16 @@ DGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||
| DGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||
| DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
| DGEMMINCOPYOBJ = dgemm_incopy.o | |||
| DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMKERNEL = ctrmm4x4V.S | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMKERNEL = ztrmm4x4V.S | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
| @@ -96,10 +96,10 @@ SGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||
| SGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
| SGEMMINCOPYOBJ = sgemm_incopy.o | |||
| SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| @@ -108,16 +108,16 @@ DGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||
| DGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||
| DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
| DGEMMINCOPYOBJ = dgemm_incopy.o | |||
| DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMKERNEL = ctrmm4x4V.S | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMKERNEL = ztrmm4x4V.S | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
| @@ -94,26 +94,26 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| @@ -0,0 +1,38 @@ | |||
| image: | |||
| - Visual Studio 2017 | |||
| configuration: Release | |||
| clone_depth: 3 | |||
| matrix: | |||
| fast_finish: false | |||
| skip_commits: | |||
| # Add [av skip] to commit messages | |||
| message: /\[av skip\]/ | |||
| cache: | |||
| - '%APPVEYOR_BUILD_FOLDER%\build' | |||
| environment: | |||
| global: | |||
| CONDA_INSTALL_LOCN: C:\\Miniconda36-x64 | |||
| install: | |||
| - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat | |||
| - conda config --add channels conda-forge --force | |||
| - conda install --yes --quiet flang jom | |||
| - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 | |||
| - set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" | |||
| - set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" | |||
| before_build: | |||
| - ps: if (-Not (Test-Path .\build)) { mkdir build } | |||
| - cd build | |||
| - cmake -G "NMake Makefiles JOM" -DCMAKE_Fortran_COMPILER=flang -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON .. | |||
| build_script: | |||
| - cmake --build . | |||
| test_script: | |||
| - ctest -j2 | |||
| @@ -35,3 +35,9 @@ LAPACKE/example/xexample* | |||
| # SED | |||
| SRC/*-e | |||
| LAPACKE/src/*-e | |||
| build* | |||
| # DOCS documentation | |||
| DOCS/man | |||
| DOCS/explore-html | |||
| output_err | |||
| @@ -1,33 +1,32 @@ | |||
| language: cpp | |||
| language: c | |||
| dist: xenial | |||
| group: travis_latest | |||
| git: | |||
| depth: 3 | |||
| quiet: true | |||
| addons: | |||
| apt: | |||
| sources: | |||
| - george-edison55-precise-backports # cmake | |||
| packages: | |||
| - cmake | |||
| - cmake-data | |||
| - gfortran | |||
| os: | |||
| - linux | |||
| - osx | |||
| env: | |||
| - CMAKE_BUILD_TYPE=Release | |||
| - CMAKE_BUILD_TYPE=Coverage | |||
| - gfortran | |||
| install: | |||
| - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; | |||
| then | |||
| for pkg in gcc cmake; do | |||
| if brew list -1 | grep -q "^${pkg}\$"; then | |||
| brew outdated $pkg || brew upgrade $pkg; | |||
| else | |||
| brew install $pkg; | |||
| fi | |||
| done | |||
| fi | |||
| matrix: | |||
| include: | |||
| - os: linux | |||
| env: CMAKE_BUILD_TYPE=Release | |||
| - os: linux | |||
| env: CMAKE_BUILD_TYPE=Coverage | |||
| - os: osx | |||
| env: CMAKE_BUILD_TYPE=Release | |||
| before_install: | |||
| - brew update > /dev/null | |||
| - brew install gcc > /dev/null | |||
| - os: osx | |||
| env: CMAKE_BUILD_TYPE=Coverage | |||
| before_install: | |||
| - brew update > /dev/null | |||
| - brew install gcc > /dev/null | |||
| script: | |||
| - export PR=https://api.github.com/repos/$TRAVIS_REPO_SLUG/pulls/$TRAVIS_PULL_REQUEST | |||
| @@ -6,4 +6,5 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR | |||
| install(FILES | |||
| ${CMAKE_CURRENT_BINARY_DIR}/blas.pc | |||
| DESTINATION ${PKG_CONFIG_DIR} | |||
| COMPONENT Development | |||
| ) | |||
| @@ -1,13 +1,18 @@ | |||
| include ../make.inc | |||
| TOPSRCDIR = .. | |||
| include $(TOPSRCDIR)/make.inc | |||
| .PHONY: all | |||
| all: blas | |||
| .PHONY: blas | |||
| blas: | |||
| $(MAKE) -C SRC | |||
| .PHONY: blas_testing | |||
| blas_testing: blas | |||
| $(MAKE) -C TESTING run | |||
| .PHONY: clean cleanobj cleanlib cleanexe cleantest | |||
| clean: | |||
| $(MAKE) -C SRC clean | |||
| $(MAKE) -C TESTING clean | |||
| @@ -1,5 +1,3 @@ | |||
| include ../../make.inc | |||
| ####################################################################### | |||
| # This is the makefile to create a library for the BLAS. | |||
| # The files are grouped as follows: | |||
| @@ -55,6 +53,10 @@ include ../../make.inc | |||
| # | |||
| ####################################################################### | |||
| TOPSRCDIR = ../.. | |||
| include $(TOPSRCDIR)/make.inc | |||
| .PHONY: all | |||
| all: $(BLASLIB) | |||
| #--------------------------------------------------------- | |||
| @@ -138,33 +140,32 @@ ALLOBJ = $(SBLAS1) $(SBLAS2) $(SBLAS3) $(DBLAS1) $(DBLAS2) $(DBLAS3) \ | |||
| $(ZBLAS2) $(ZBLAS3) $(ALLBLAS) | |||
| $(BLASLIB): $(ALLOBJ) | |||
| $(ARCH) $(ARCHFLAGS) $@ $^ | |||
| $(AR) $(ARFLAGS) $@ $^ | |||
| $(RANLIB) $@ | |||
| .PHONY: single double complex complex16 | |||
| single: $(SBLAS1) $(ALLBLAS) $(SBLAS2) $(SBLAS3) | |||
| $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(BLASLIB) $^ | |||
| $(RANLIB) $(BLASLIB) | |||
| double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3) | |||
| $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(BLASLIB) $^ | |||
| $(RANLIB) $(BLASLIB) | |||
| complex: $(CBLAS1) $(CB1AUX) $(ALLBLAS) $(CBLAS2) $(CBLAS3) | |||
| $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(BLASLIB) $^ | |||
| $(RANLIB) $(BLASLIB) | |||
| complex16: $(ZBLAS1) $(ZB1AUX) $(ALLBLAS) $(ZBLAS2) $(ZBLAS3) | |||
| $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(BLASLIB) $^ | |||
| $(RANLIB) $(BLASLIB) | |||
| FRC: | |||
| @FRC=$(FRC) | |||
| .PHONY: clean cleanobj cleanlib | |||
| clean: cleanobj cleanlib | |||
| cleanobj: | |||
| rm -f *.o | |||
| cleanlib: | |||
| #rm -f $(BLASLIB) # May point to a system lib, e.g. -lblas | |||
| .f.o: | |||
| $(FORTRAN) $(OPTS) -c -o $@ $< | |||
| @@ -43,7 +43,7 @@ | |||
| *> \param[in] INCX | |||
| *> \verbatim | |||
| *> INCX is INTEGER | |||
| *> storage spacing between elements of SX | |||
| *> storage spacing between elements of CX | |||
| *> \endverbatim | |||
| * | |||
| * Authors: | |||
| @@ -43,7 +43,7 @@ | |||
| *> \param[in] INCX | |||
| *> \verbatim | |||
| *> INCX is INTEGER | |||
| *> storage spacing between elements of SX | |||
| *> storage spacing between elements of DX | |||
| *> \endverbatim | |||
| * | |||
| * Authors: | |||
| @@ -43,7 +43,7 @@ | |||
| *> \param[in] INCX | |||
| *> \verbatim | |||
| *> INCX is INTEGER | |||
| *> storage spacing between elements of SX | |||
| *> storage spacing between elements of ZX | |||
| *> \endverbatim | |||
| * | |||
| * Authors: | |||
| @@ -0,0 +1,29 @@ | |||
| SBLAS1 = files('isamax.f', 'sasum.f', 'saxpy.f', 'scopy.f', 'sdot.f', 'snrm2.f', 'srot.f', 'srotg.f', 'sscal.f', 'sswap.f', 'sdsdot.f', 'srotmg.f', 'srotm.f') | |||
| CBLAS1 = files('scabs1.f', 'scasum.f', 'scnrm2.f', 'icamax.f', 'caxpy.f', 'ccopy.f', 'cdotc.f', 'cdotu.f', 'csscal.f', 'crotg.f', 'cscal.f', 'cswap.f', 'csrot.f') | |||
| DBLAS1 = files('idamax.f', 'dasum.f', 'daxpy.f', 'dcopy.f', 'ddot.f', 'dnrm2.f', 'drot.f', 'drotg.f', 'dscal.f', 'dsdot.f', 'dswap.f', 'drotmg.f', 'drotm.f') | |||
| ZBLAS1 = files('dcabs1.f', 'dzasum.f', 'dznrm2.f', 'izamax.f', 'zaxpy.f', 'zcopy.f', 'zdotc.f', 'zdotu.f', 'zdscal.f', 'zrotg.f', 'zscal.f', 'zswap.f', 'zdrot.f') | |||
| CB1AUX = files('isamax.f', 'sasum.f', 'saxpy.f', 'scopy.f', 'snrm2.f', 'sscal.f') | |||
| ZB1AUX = files('idamax.f', 'dasum.f', 'daxpy.f', 'dcopy.f', 'dnrm2.f', 'dscal.f') | |||
| ALLBLAS = files('lsame.f', 'xerbla.f', 'xerbla_array.f') | |||
| SBLAS2 = files('sgemv.f', 'sgbmv.f', 'ssymv.f', 'ssbmv.f', 'sspmv.f', 'strmv.f', 'stbmv.f', 'stpmv.f', 'strsv.f', 'stbsv.f', 'stpsv.f', 'sger.f', 'ssyr.f', 'sspr.f', 'ssyr2.f', 'sspr2.f') | |||
| CBLAS2 = files('cgemv.f', 'cgbmv.f', 'chemv.f', 'chbmv.f', 'chpmv.f', 'ctrmv.f', 'ctbmv.f', 'ctpmv.f', 'ctrsv.f', 'ctbsv.f', 'ctpsv.f', 'cgerc.f', 'cgeru.f', 'cher.f', 'chpr.f', 'cher2.f', 'chpr2.f') | |||
| DBLAS2 = files('dgemv.f', 'dgbmv.f', 'dsymv.f', 'dsbmv.f', 'dspmv.f', 'dtrmv.f', 'dtbmv.f', 'dtpmv.f', 'dtrsv.f', 'dtbsv.f', 'dtpsv.f', 'dger.f', 'dsyr.f', 'dspr.f', 'dsyr2.f', 'dspr2.f') | |||
| ZBLAS2 = files('zgemv.f', 'zgbmv.f', 'zhemv.f', 'zhbmv.f', 'zhpmv.f', 'ztrmv.f', 'ztbmv.f', 'ztpmv.f', 'ztrsv.f', 'ztbsv.f', 'ztpsv.f', 'zgerc.f', 'zgeru.f', 'zher.f', 'zhpr.f', 'zher2.f', 'zhpr2.f') | |||
| SBLAS3 = files('sgemm.f', 'ssymm.f', 'ssyrk.f', 'ssyr2k.f', 'strmm.f', 'strsm.f') | |||
| CBLAS3 = files('cgemm.f', 'csymm.f', 'csyrk.f', 'csyr2k.f', 'ctrmm.f', 'ctrsm.f', 'chemm.f', 'cherk.f', 'cher2k.f') | |||
| DBLAS3 = files('dgemm.f', 'dsymm.f', 'dsyrk.f', 'dsyr2k.f', 'dtrmm.f', 'dtrsm.f') | |||
| ZBLAS3 = files('zgemm.f', 'zsymm.f', 'zsyrk.f', 'zsyr2k.f', 'ztrmm.f', 'ztrsm.f', 'zhemm.f', 'zherk.f', 'zher2k.f') | |||
| @@ -23,13 +23,13 @@ | |||
| *> | |||
| *> \verbatim | |||
| *> | |||
| * Compute the inner product of two vectors with extended | |||
| * precision accumulation. | |||
| * | |||
| * Returns S.P. result with dot product accumulated in D.P. | |||
| * SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY), | |||
| * where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is | |||
| * defined in a similar way using INCY. | |||
| *> Compute the inner product of two vectors with extended | |||
| *> precision accumulation. | |||
| *> | |||
| *> Returns S.P. result with dot product accumulated in D.P. | |||
| *> SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY), | |||
| *> where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is | |||
| *> defined in a similar way using INCY. | |||
| *> \endverbatim | |||
| * | |||
| * Arguments: | |||
| @@ -77,7 +77,14 @@ | |||
| *> \author Lawson, C. L., (JPL), Hanson, R. J., (SNLA), | |||
| *> \author Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL) | |||
| * | |||
| *> \ingroup complex_blas_level1 | |||
| *> \author Univ. of Tennessee | |||
| *> \author Univ. of California Berkeley | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2017 | |||
| * | |||
| *> \ingroup single_blas_level1 | |||
| * | |||
| *> \par Further Details: | |||
| * ===================== | |||
| @@ -102,65 +109,7 @@ | |||
| *> 920501 Reformatted the REFERENCES section. (WRB) | |||
| *> 070118 Reformat to LAPACK coding style | |||
| *> \endverbatim | |||
| * | |||
| * ===================================================================== | |||
| * | |||
| * .. Local Scalars .. | |||
| * DOUBLE PRECISION DSDOT | |||
| * INTEGER I,KX,KY,NS | |||
| * .. | |||
| * .. Intrinsic Functions .. | |||
| * INTRINSIC DBLE | |||
| * .. | |||
| * DSDOT = SB | |||
| * IF (N.LE.0) THEN | |||
| * SDSDOT = DSDOT | |||
| * RETURN | |||
| * END IF | |||
| * IF (INCX.EQ.INCY .AND. INCX.GT.0) THEN | |||
| * | |||
| * Code for equal and positive increments. | |||
| * | |||
| * NS = N*INCX | |||
| * DO I = 1,NS,INCX | |||
| * DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I)) | |||
| * END DO | |||
| * ELSE | |||
| * | |||
| * Code for unequal or nonpositive increments. | |||
| * | |||
| * KX = 1 | |||
| * KY = 1 | |||
| * IF (INCX.LT.0) KX = 1 + (1-N)*INCX | |||
| * IF (INCY.LT.0) KY = 1 + (1-N)*INCY | |||
| * DO I = 1,N | |||
| * DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY)) | |||
| * KX = KX + INCX | |||
| * KY = KY + INCY | |||
| * END DO | |||
| * END IF | |||
| * SDSDOT = DSDOT | |||
| * RETURN | |||
| * END | |||
| * | |||
| *> \par Purpose: | |||
| * ============= | |||
| *> | |||
| *> \verbatim | |||
| *> \endverbatim | |||
| * | |||
| * Authors: | |||
| * ======== | |||
| * | |||
| *> \author Univ. of Tennessee | |||
| *> \author Univ. of California Berkeley | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2017 | |||
| * | |||
| *> \ingroup single_blas_level1 | |||
| * | |||
| * ===================================================================== | |||
| REAL FUNCTION SDSDOT(N,SB,SX,INCX,SY,INCY) | |||
| * | |||
| @@ -175,71 +124,6 @@ | |||
| * .. | |||
| * .. Array Arguments .. | |||
| REAL SX(*),SY(*) | |||
| * .. | |||
| * | |||
| * PURPOSE | |||
| * ======= | |||
| * | |||
| * Compute the inner product of two vectors with extended | |||
| * precision accumulation. | |||
| * | |||
| * Returns S.P. result with dot product accumulated in D.P. | |||
| * SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY), | |||
| * where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is | |||
| * defined in a similar way using INCY. | |||
| * | |||
| * AUTHOR | |||
| * ====== | |||
| * Lawson, C. L., (JPL), Hanson, R. J., (SNLA), | |||
| * Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL) | |||
| * | |||
| * ARGUMENTS | |||
| * ========= | |||
| * | |||
| * N (input) INTEGER | |||
| * number of elements in input vector(s) | |||
| * | |||
| * SB (input) REAL | |||
| * single precision scalar to be added to inner product | |||
| * | |||
| * SX (input) REAL array, dimension (N) | |||
| * single precision vector with N elements | |||
| * | |||
| * INCX (input) INTEGER | |||
| * storage spacing between elements of SX | |||
| * | |||
| * SY (input) REAL array, dimension (N) | |||
| * single precision vector with N elements | |||
| * | |||
| * INCY (input) INTEGER | |||
| * storage spacing between elements of SY | |||
| * | |||
| * SDSDOT (output) REAL | |||
| * single precision dot product (SB if N .LE. 0) | |||
| * | |||
| * Further Details | |||
| * =============== | |||
| * | |||
| * REFERENCES | |||
| * | |||
| * C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T. | |||
| * Krogh, Basic linear algebra subprograms for Fortran | |||
| * usage, Algorithm No. 539, Transactions on Mathematical | |||
| * Software 5, 3 (September 1979), pp. 308-323. | |||
| * | |||
| * REVISION HISTORY (YYMMDD) | |||
| * | |||
| * 791001 DATE WRITTEN | |||
| * 890531 Changed all specific intrinsics to generic. (WRB) | |||
| * 890831 Modified array declarations. (WRB) | |||
| * 890831 REVISION DATE from Version 3.2 | |||
| * 891214 Prologue converted to Version 4.0 format. (BAB) | |||
| * 920310 Corrected definition of LX in DESCRIPTION. (WRB) | |||
| * 920501 Reformatted the REFERENCES section. (WRB) | |||
| * 070118 Reformat to LAPACK coding style | |||
| * | |||
| * ===================================================================== | |||
| * | |||
| * .. Local Scalars .. | |||
| DOUBLE PRECISION DSDOT | |||
| INTEGER I,KX,KY,NS | |||
| @@ -1,5 +1,7 @@ | |||
| include ../../make.inc | |||
| TOPSRCDIR = ../.. | |||
| include $(TOPSRCDIR)/make.inc | |||
| .PHONY: all single double complex complex16 | |||
| all: single double complex complex16 | |||
| single: xblat1s xblat2s xblat3s | |||
| double: xblat1d xblat2d xblat3d | |||
| @@ -7,32 +9,33 @@ complex: xblat1c xblat2c xblat3c | |||
| complex16: xblat1z xblat2z xblat3z | |||
| xblat1s: sblat1.o $(BLASLIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xblat1d: dblat1.o $(BLASLIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xblat1c: cblat1.o $(BLASLIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xblat1z: zblat1.o $(BLASLIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xblat2s: sblat2.o $(BLASLIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xblat2d: dblat2.o $(BLASLIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xblat2c: cblat2.o $(BLASLIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xblat2z: zblat2.o $(BLASLIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xblat3s: sblat3.o $(BLASLIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xblat3d: dblat3.o $(BLASLIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xblat3c: cblat3.o $(BLASLIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xblat3z: zblat3.o $(BLASLIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| .PHONY: run | |||
| run: all | |||
| ./xblat1s > sblat1.out | |||
| ./xblat1d > dblat1.out | |||
| @@ -47,6 +50,7 @@ run: all | |||
| ./xblat3c < cblat3.in | |||
| ./xblat3z < zblat3.in | |||
| .PHONY: clean cleanobj cleanexe cleantest | |||
| clean: cleanobj cleanexe cleantest | |||
| cleanobj: | |||
| rm -f *.o | |||
| @@ -54,6 +58,3 @@ cleanexe: | |||
| rm -f xblat* | |||
| cleantest: | |||
| rm -f *.out core | |||
| .f.o: | |||
| $(FORTRAN) $(OPTS) -c -o $@ $< | |||
| @@ -619,7 +619,7 @@ | |||
| SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | |||
| * ************************* STEST1 ***************************** | |||
| * | |||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||
| * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | |||
| * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | |||
| * | |||
| @@ -991,7 +991,7 @@ | |||
| SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | |||
| * ************************* STEST1 ***************************** | |||
| * | |||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||
| * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | |||
| * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | |||
| * | |||
| @@ -946,7 +946,7 @@ | |||
| SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | |||
| * ************************* STEST1 ***************************** | |||
| * | |||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||
| * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | |||
| * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | |||
| * | |||
| @@ -619,7 +619,7 @@ | |||
| SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | |||
| * ************************* STEST1 ***************************** | |||
| * | |||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||
| * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | |||
| * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | |||
| * | |||
| @@ -12,8 +12,10 @@ FortranCInterface_HEADER(${LAPACK_BINARY_DIR}/include/cblas_mangling.h | |||
| SYMBOL_NAMESPACE "F77_") | |||
| if(NOT FortranCInterface_GLOBAL_FOUND OR NOT FortranCInterface_MODULE_FOUND) | |||
| message(WARNING "Reverting to pre-defined include/lapacke_mangling.h") | |||
| configure_file(include/lapacke_mangling_with_flags.h.in | |||
| ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h) | |||
| configure_file(include/lapacke_mangling_with_flags.h.in | |||
| ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h) | |||
| configure_file(include/cblas_mangling_with_flags.h.in | |||
| ${LAPACK_BINARY_DIR}/include/cblas_mangling.h) | |||
| endif() | |||
| include_directories(include ${LAPACK_BINARY_DIR}/include) | |||
| @@ -28,7 +30,10 @@ endforeach() | |||
| endmacro() | |||
| append_subdir_files(CBLAS_INCLUDE "include") | |||
| install(FILES ${CBLAS_INCLUDE} ${LAPACK_BINARY_DIR}/include/cblas_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||
| install(FILES ${CBLAS_INCLUDE} ${LAPACK_BINARY_DIR}/include/cblas_mangling.h | |||
| DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} | |||
| COMPONENT Development | |||
| ) | |||
| # -------------------------------------------------- | |||
| if(BUILD_TESTING) | |||
| @@ -45,7 +50,9 @@ endif() | |||
| set(_cblas_config_install_guard_target "") | |||
| if(ALL_TARGETS) | |||
| install(EXPORT cblas-targets | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}) | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} | |||
| COMPONENT Development | |||
| ) | |||
| # Choose one of the cblas targets to use as a guard for | |||
| # cblas-config.cmake to load targets from the install tree. | |||
| list(GET ALL_TARGETS 0 _cblas_config_install_guard_target) | |||
| @@ -82,4 +89,6 @@ install(FILES | |||
| ) | |||
| #install(EXPORT cblas-targets | |||
| # DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}) | |||
| # DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} | |||
| # COMPONENT Development | |||
| # ) | |||
| @@ -1,19 +1,25 @@ | |||
| include ../make.inc | |||
| TOPSRCDIR = .. | |||
| include $(TOPSRCDIR)/make.inc | |||
| .PHONY: all | |||
| all: cblas | |||
| .PHONY: cblas | |||
| cblas: include/cblas_mangling.h | |||
| $(MAKE) -C src | |||
| include/cblas_mangling.h: include/cblas_mangling_with_flags.h.in | |||
| cp $< $@ | |||
| cp include/cblas_mangling_with_flags.h.in $@ | |||
| .PHONY: cblas_testing | |||
| cblas_testing: cblas | |||
| $(MAKE) -C testing run | |||
| .PHONY: cblas_example | |||
| cblas_example: cblas | |||
| $(MAKE) -C examples | |||
| .PHONY: clean cleanobj cleanlib cleanexe cleantest | |||
| clean: | |||
| $(MAKE) -C src clean | |||
| $(MAKE) -C testing clean | |||
| @@ -1,17 +1,21 @@ | |||
| include ../../make.inc | |||
| TOPSRCDIR = ../.. | |||
| include $(TOPSRCDIR)/make.inc | |||
| .SUFFIXES: .c .o | |||
| .c.o: | |||
| $(CC) $(CFLAGS) -I../include -c -o $@ $< | |||
| .PHONY: all | |||
| all: cblas_ex1 cblas_ex2 | |||
| cblas_ex1: cblas_example1.o $(CBLASLIB) $(BLASLIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| cblas_ex2: cblas_example2.o $(CBLASLIB) $(BLASLIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| .PHONY: clean cleanobj cleanexe | |||
| clean: cleanobj cleanexe | |||
| cleanobj: | |||
| rm -f *.o | |||
| cleanexe: | |||
| rm -f cblas_ex1 cblas_ex2 | |||
| .c.o: | |||
| $(CC) $(CFLAGS) -I../include -c -o $@ $< | |||
| @@ -47,7 +47,7 @@ int main ( ) | |||
| a[m*3+1] = 6; | |||
| a[m*3+2] = 7; | |||
| a[m*3+3] = 8; | |||
| /* The elemetns of x and y */ | |||
| /* The elements of x and y */ | |||
| x[0] = 1; | |||
| x[1] = 2; | |||
| x[2] = 1; | |||
| @@ -1,7 +1,13 @@ | |||
| # This Makefile compiles the CBLAS routines | |||
| include ../../make.inc | |||
| TOPSRCDIR = ../.. | |||
| include $(TOPSRCDIR)/make.inc | |||
| .SUFFIXES: .c .o | |||
| .c.o: | |||
| $(CC) $(CFLAGS) -I../include -c -o $@ $< | |||
| .PHONY: all | |||
| all: $(CBLASLIB) | |||
| # Error handling routines for level 2 & 3 | |||
| @@ -43,24 +49,25 @@ zlev1 = cblas_zswap.o cblas_zscal.o cblas_zdscal.o cblas_zcopy.o \ | |||
| # Common files for level 1 single precision | |||
| sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o | |||
| .PHONY: slib1 dlib1 clib1 zlib1 | |||
| # Single precision real | |||
| slib1: $(slev1) $(sclev1) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Double precision real | |||
| dlib1: $(dlev1) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Single precision complex | |||
| clib1: $(clev1) $(sclev1) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Double precision complex | |||
| zlib1: $(zlev1) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
| $(RANLIB) $(CBLASLIB) | |||
| # | |||
| @@ -95,24 +102,25 @@ zlev2 = cblas_zgemv.o cblas_zgbmv.o cblas_zhemv.o cblas_zhbmv.o cblas_zhpmv.o \ | |||
| cblas_ztpsv.o cblas_zgeru.o cblas_zgerc.o cblas_zher.o cblas_zher2.o \ | |||
| cblas_zhpr.o cblas_zhpr2.o | |||
| .PHONY: slib2 dlib2 clib2 zlib2 | |||
| # Single precision real | |||
| slib2: $(slev2) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Double precision real | |||
| dlib2: $(dlev2) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Single precision complex | |||
| clib2: $(clev2) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Double precision complex | |||
| zlib2: $(zlev2) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
| $(RANLIB) $(CBLASLIB) | |||
| # | |||
| @@ -141,24 +149,25 @@ zlev3 = cblas_zgemm.o cblas_zsymm.o cblas_zhemm.o cblas_zherk.o \ | |||
| cblas_zher2k.o cblas_ztrmm.o cblas_ztrsm.o cblas_zsyrk.o \ | |||
| cblas_zsyr2k.o | |||
| .PHONY: slib3 dlib3 clib3 zlib3 | |||
| # Single precision real | |||
| slib3: $(slev3) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Double precision real | |||
| dlib3: $(dlev3) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Single precision complex | |||
| clib3: $(clev3) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Double precision complex | |||
| zlib3: $(zlev3) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
| $(RANLIB) $(CBLASLIB) | |||
| @@ -166,36 +175,33 @@ alev1 = $(slev1) $(dlev1) $(clev1) $(zlev1) $(sclev1) | |||
| alev2 = $(slev2) $(dlev2) $(clev2) $(zlev2) | |||
| alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3) | |||
| .PHONY: all1 all2 all3 | |||
| # All level 1 | |||
| all1: $(alev1) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
| $(RANLIB) $(CBLASLIB) | |||
| # All level 2 | |||
| all2: $(alev2) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
| $(RANLIB) $(CBLASLIB) | |||
| # All level 3 | |||
| all3: $(alev3) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
| $(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
| $(RANLIB) $(CBLASLIB) | |||
| # All levels and precisions | |||
| $(CBLASLIB): $(alev1) $(alev2) $(alev3) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $@ $^ | |||
| $(AR) $(ARFLAGS) $@ $^ | |||
| $(RANLIB) $@ | |||
| FRC: | |||
| @FRC=$(FRC) | |||
| .PHONY: clean cleanobj cleanlib | |||
| clean: cleanobj cleanlib | |||
| cleanobj: | |||
| rm -f *.o | |||
| cleanlib: | |||
| rm -f $(CBLASLIB) | |||
| .c.o: | |||
| $(CC) $(CFLAGS) -I../include -c -o $@ $< | |||
| .f.o: | |||
| $(FORTRAN) $(OPTS) -c -o $@ $< | |||
| @@ -91,7 +91,7 @@ void cblas_sgemm(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE TransA, | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_sgemm", | |||
| "Illegal TransA setting, %d\n", TransA); | |||
| "Illegal TransB setting, %d\n", TransB); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| @@ -2,7 +2,12 @@ | |||
| # The Makefile compiles c wrappers and testers for CBLAS. | |||
| # | |||
| include ../../make.inc | |||
| TOPSRCDIR = ../.. | |||
| include $(TOPSRCDIR)/make.inc | |||
| .SUFFIXES: .c .o | |||
| .c.o: | |||
| $(CC) $(CFLAGS) -I../include -c -o $@ $< | |||
| # Archive files necessary to compile | |||
| LIB = $(CBLASLIB) $(BLASLIB) | |||
| @@ -27,6 +32,7 @@ ztestl1o = c_zblas1.o | |||
| ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o | |||
| ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o | |||
| .PHONY: all all1 all2 all3 | |||
| all: all1 all2 all3 | |||
| all1: xscblat1 xdcblat1 xccblat1 xzcblat1 | |||
| all2: xscblat2 xdcblat2 xccblat2 xzcblat2 | |||
| @@ -38,37 +44,38 @@ all3: xscblat3 xdcblat3 xccblat3 xzcblat3 | |||
| # Single real | |||
| xscblat1: c_sblat1.o $(stestl1o) $(LIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xscblat2: c_sblat2.o $(stestl2o) $(LIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xscblat3: c_sblat3.o $(stestl3o) $(LIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| # Double real | |||
| xdcblat1: c_dblat1.o $(dtestl1o) $(LIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xdcblat2: c_dblat2.o $(dtestl2o) $(LIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xdcblat3: c_dblat3.o $(dtestl3o) $(LIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| # Single complex | |||
| xccblat1: c_cblat1.o $(ctestl1o) $(LIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xccblat2: c_cblat2.o $(ctestl2o) $(LIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xccblat3: c_cblat3.o $(ctestl3o) $(LIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| # Double complex | |||
| xzcblat1: c_zblat1.o $(ztestl1o) $(LIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xzcblat2: c_zblat2.o $(ztestl2o) $(LIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| xzcblat3: c_zblat3.o $(ztestl3o) $(LIB) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| # RUN TESTS | |||
| .PHONY: run | |||
| run: all | |||
| @echo "--> TESTING CBLAS 1 - SINGLE PRECISION REAL <--" | |||
| @./xscblat1 > stest1.out | |||
| @@ -95,6 +102,7 @@ run: all | |||
| @echo "--> TESTING CBLAS 3 - DOUBLE PRECISION COMPLEX <--" | |||
| @./xzcblat3 < zin3 > ztest3.out | |||
| .PHONY: clean cleanobj cleanexe cleantest | |||
| clean: cleanobj cleanexe cleantest | |||
| cleanobj: | |||
| rm -f *.o | |||
| @@ -102,9 +110,3 @@ cleanexe: | |||
| rm -f x* | |||
| cleantest: | |||
| rm -f *.out core | |||
| .SUFFIXES: .o .f .c | |||
| .c.o: | |||
| $(CC) $(CFLAGS) -I../include -c -o $@ $< | |||
| .f.o: | |||
| $(FORTRAN) $(OPTS) -c -o $@ $< | |||
| @@ -577,7 +577,7 @@ | |||
| SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | |||
| * ************************* STEST1 ***************************** | |||
| * | |||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||
| * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | |||
| * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | |||
| * | |||
| @@ -653,7 +653,7 @@ | |||
| SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | |||
| * ************************* STEST1 ***************************** | |||
| * | |||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||
| * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | |||
| * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | |||
| * | |||
| @@ -653,7 +653,7 @@ | |||
| SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | |||
| * ************************* STEST1 ***************************** | |||
| * | |||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||
| * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | |||
| * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | |||
| * | |||
| @@ -577,7 +577,7 @@ | |||
| SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | |||
| * ************************* STEST1 ***************************** | |||
| * | |||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||
| * THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||
| * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | |||
| * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | |||
| * | |||
| @@ -1,4 +1,4 @@ | |||
| # This module checks against various known compilers and thier respective | |||
| # This module checks against various known compilers and their respective | |||
| # flags to determine any specific flags needing to be set. | |||
| # | |||
| # 1. If FPE traps are enabled either abort or disable them | |||
| @@ -20,7 +20,7 @@ set(CMAKE_REQUIRED_QUIET ${codecov_FIND_QUIETLY}) | |||
| get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) | |||
| foreach (LANG ${ENABLED_LANGUAGES}) | |||
| # Gcov evaluation is dependend on the used compiler. Check gcov support for | |||
| # Gcov evaluation is dependent on the used compiler. Check gcov support for | |||
| # each compiler that is used. If gcov binary was already found for this | |||
| # compiler, do not try to find it again. | |||
| if(NOT GCOV_${CMAKE_${LANG}_COMPILER_ID}_BIN) | |||
| @@ -42,7 +42,7 @@ set(CMAKE_REQUIRED_QUIET ${codecov_FIND_QUIETLY}) | |||
| get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) | |||
| foreach (LANG ${ENABLED_LANGUAGES}) | |||
| # Coverage flags are not dependend on language, but the used compiler. So | |||
| # Coverage flags are not dependent on language, but the used compiler. So | |||
| # instead of searching flags foreach language, search flags foreach compiler | |||
| # used. | |||
| set(COMPILER ${CMAKE_${LANG}_COMPILER_ID}) | |||
| @@ -24,7 +24,7 @@ message(STATUS "=========") | |||
| set(F77_OUTPUT_EXE "/Fe" CACHE INTERNAL | |||
| "Fortran compiler option for setting executable file name.") | |||
| else() | |||
| # in other case, let user specify their fortran configrations. | |||
| # in other case, let user specify their fortran configurations. | |||
| set(F77_OPTION_COMPILE "-c" CACHE STRING | |||
| "Fortran compiler option for compiling without linking.") | |||
| set(F77_OUTPUT_OBJ "-o" CACHE STRING | |||
| @@ -5,6 +5,10 @@ if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}") | |||
| endif() | |||
| unset(_LAPACK_TARGET) | |||
| # Hint for project building against lapack | |||
| set(LAPACK_Fortran_COMPILER_ID "@CMAKE_Fortran_COMPILER_ID@") | |||
| # Report the blas and lapack raw or imported libraries. | |||
| set(LAPACK_blas_LIBRARIES "@BLAS_LIBRARIES@") | |||
| set(LAPACK_lapack_LIBRARIES "@LAPACK_LIBRARIES@") | |||
| set(LAPACK_LIBRARIES ${LAPACK_blas_LIBRARIES} ${LAPACK_lapack_LIBRARIES}) | |||
| @@ -8,8 +8,12 @@ if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}") | |||
| endif() | |||
| unset(_LAPACK_TARGET) | |||
| # Hint for project building against lapack | |||
| set(LAPACK_Fortran_COMPILER_ID "@CMAKE_Fortran_COMPILER_ID@") | |||
| # Report the blas and lapack raw or imported libraries. | |||
| set(LAPACK_blas_LIBRARIES "@BLAS_LIBRARIES@") | |||
| set(LAPACK_lapack_LIBRARIES "@LAPACK_LIBRARIES@") | |||
| set(LAPACK_LIBRARIES ${LAPACK_blas_LIBRARIES} ${LAPACK_lapack_LIBRARIES}) | |||
| unset(_LAPACK_SELF_DIR) | |||
| @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8.12) | |||
| project(LAPACK Fortran C) | |||
| set(LAPACK_MAJOR_VERSION 3) | |||
| set(LAPACK_MINOR_VERSION 8) | |||
| set(LAPACK_MINOR_VERSION 9) | |||
| set(LAPACK_PATCH_VERSION 0) | |||
| set( | |||
| LAPACK_VERSION | |||
| @@ -13,6 +13,9 @@ set( | |||
| # Add the CMake directory for custon CMake modules | |||
| set(CMAKE_MODULE_PATH "${LAPACK_SOURCE_DIR}/CMAKE" ${CMAKE_MODULE_PATH}) | |||
| # Export all symbols on Windows when building shared libraries | |||
| SET(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE) | |||
| # Set a default build type if none was specified | |||
| if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) | |||
| message(STATUS "Setting build type to 'Release' as none was specified.") | |||
| @@ -21,8 +24,19 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) | |||
| set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "Coverage") | |||
| endif() | |||
| string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER) | |||
| if(${CMAKE_BUILD_TYPE_UPPER} STREQUAL "COVERAGE") | |||
| # Coverage | |||
| set(_is_coverage_build 0) | |||
| set(_msg "Checking if build type is 'Coverage'") | |||
| message(STATUS "${_msg}") | |||
| if(NOT CMAKE_CONFIGURATION_TYPES) | |||
| string(TOLOWER ${CMAKE_BUILD_TYPE} _build_type_lc) | |||
| if(${_build_type_lc} STREQUAL "coverage") | |||
| set(_is_coverage_build 1) | |||
| endif() | |||
| endif() | |||
| message(STATUS "${_msg}: ${_is_coverage_build}") | |||
| if(_is_coverage_build) | |||
| message(STATUS "Adding coverage") | |||
| find_package(codecov) | |||
| endif() | |||
| @@ -58,18 +72,18 @@ include(PreventInSourceBuilds) | |||
| include(PreventInBuildInstalls) | |||
| if(UNIX) | |||
| if("${CMAKE_Fortran_COMPILER}" MATCHES "ifort") | |||
| set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fp-model strict") | |||
| if(CMAKE_Fortran_COMPILER_ID STREQUAL Intel) | |||
| list(APPEND CMAKE_Fortran_FLAGS "-fp-model strict") | |||
| endif() | |||
| if("${CMAKE_Fortran_COMPILER}" MATCHES "xlf") | |||
| set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qnosave -qstrict=none") | |||
| if(CMAKE_Fortran_COMPILER_ID STREQUAL XL) | |||
| list(APPEND CMAKE_Fortran_FLAGS "-qnosave -qstrict=none") | |||
| endif() | |||
| # Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler. | |||
| # This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin | |||
| string(REPLACE \;mtsk\; \; CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES "${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}") | |||
| endif() | |||
| if(CMAKE_Fortran_COMPILER_ID STREQUAL "Compaq") | |||
| if(CMAKE_Fortran_COMPILER_ID STREQUAL Compaq) | |||
| if(WIN32) | |||
| if(CMAKE_GENERATOR STREQUAL "NMake Makefiles") | |||
| get_filename_component(CMAKE_Fortran_COMPILER_CMDNAM ${CMAKE_Fortran_COMPILER} NAME_WE) | |||
| @@ -96,24 +110,16 @@ if(CMAKE_Fortran_COMPILER_ID STREQUAL "Compaq") | |||
| endif() | |||
| endif() | |||
| # Get Python | |||
| message(STATUS "Looking for Python greater than 2.6 - ${PYTHONINTERP_FOUND}") | |||
| find_package(PythonInterp 2.7) # lapack_testing.py uses features from python 2.7 and greater | |||
| if(PYTHONINTERP_FOUND) | |||
| message(STATUS "Using Python version ${PYTHON_VERSION_STRING}") | |||
| else() | |||
| message(STATUS "No suitable Python version found, so skipping summary tests.") | |||
| endif() | |||
| # -------------------------------------------------- | |||
| # -------------------------------------------------- | |||
| set(LAPACK_INSTALL_EXPORT_NAME lapack-targets) | |||
| macro(lapack_install_library lib) | |||
| install(TARGETS ${lib} | |||
| EXPORT ${LAPACK_INSTALL_EXPORT_NAME} | |||
| ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} | |||
| LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} | |||
| RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} | |||
| ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT Development | |||
| LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT RuntimeLibraries | |||
| RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT RuntimeLibraries | |||
| ) | |||
| endmacro() | |||
| @@ -121,12 +127,22 @@ set(PKG_CONFIG_DIR ${CMAKE_INSTALL_LIBDIR}/pkgconfig) | |||
| # -------------------------------------------------- | |||
| # Testing | |||
| option(BUILD_TESTING "Build tests" OFF) | |||
| enable_testing() | |||
| option(BUILD_TESTING "Build tests" ${_is_coverage_build}) | |||
| include(CTest) | |||
| enable_testing() | |||
| message(STATUS "Build tests: ${BUILD_TESTING}") | |||
| # lapack_testing.py uses features from python 2.7 and greater | |||
| if(BUILD_TESTING) | |||
| set(_msg "Looking for Python >= 2.7 needed for summary tests") | |||
| message(STATUS "${_msg}") | |||
| find_package(PythonInterp 2.7 QUIET) | |||
| if(PYTHONINTERP_FOUND) | |||
| message(STATUS "${_msg} - found (${PYTHON_VERSION_STRING})") | |||
| else() | |||
| message(STATUS "${_msg} - not found (skipping summary tests)") | |||
| endif() | |||
| endif() | |||
| # -------------------------------------------------- | |||
| # Organize output files. On Windows this also keeps .dll files next | |||
| # to the .exe files that need them, making tests easy to run. | |||
| @@ -299,16 +315,40 @@ if(LAPACKE) | |||
| add_subdirectory(LAPACKE) | |||
| endif() | |||
| #------------------------------------- | |||
| # BLAS++ / LAPACK++ | |||
| option(BLAS++ "Build BLAS++" OFF) | |||
| option(LAPACK++ "Build LAPACK++" OFF) | |||
| function(_display_cpp_implementation_msg name) | |||
| string(TOLOWER ${name} name_lc) | |||
| message(STATUS "${name}++ enable") | |||
| message(STATUS "----------------") | |||
| message(STATUS "Thank you for your interest in ${name}++, a newly developed C++ API for ${name} library") | |||
| message(STATUS "The objective of ${name}++ is to provide a convenient, performance oriented API for development in the C++ language, that, for the most part, preserves established conventions, while, at the same time, takes advantages of modern C++ features, such as: namespaces, templates, exceptions, etc.") | |||
| message(STATUS "We are still working on integrating ${name}++ in our library. For the moment, you can download directly ${name_lc}++ from https://bitbucket.org/icl/${name_lc}pp") | |||
| message(STATUS "For support ${name}++ related question, please email: slate-user@icl.utk.edu") | |||
| message(STATUS "----------------") | |||
| endfunction() | |||
| if(BLAS++) | |||
| _display_cpp_implementation_msg("BLAS") | |||
| endif() | |||
| if(LAPACK++) | |||
| _display_cpp_implementation_msg("LAPACK") | |||
| endif() | |||
| # -------------------------------------------------- | |||
| # CPACK Packaging | |||
| set(CPACK_PACKAGE_NAME "LAPACK") | |||
| set(CPACK_PACKAGE_VENDOR "University of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd") | |||
| set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "LAPACK- Linear Algebra Package") | |||
| set(CPACK_PACKAGE_VERSION_MAJOR 3) | |||
| set(CPACK_PACKAGE_VERSION_MINOR 5) | |||
| set(CPACK_PACKAGE_VERSION_PATCH 0) | |||
| set(CPACK_PACKAGE_VERSION_MAJOR ${LAPACK_MAJOR_VERSION}) | |||
| set(CPACK_PACKAGE_VERSION_MINOR ${LAPACK_MINOR_VERSION}) | |||
| set(CPACK_PACKAGE_VERSION_PATCH ${LAPACK_PATCH_VERSION}) | |||
| set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") | |||
| set(CPACK_MONOLITHIC_INSTALL ON) | |||
| set(CPACK_PACKAGE_INSTALL_DIRECTORY "LAPACK") | |||
| if(WIN32 AND NOT UNIX) | |||
| # There is a bug in NSI that does not handle full unix paths properly. Make | |||
| @@ -347,7 +387,9 @@ endif() | |||
| set(_lapack_config_install_guard_target "") | |||
| if(ALL_TARGETS) | |||
| install(EXPORT lapack-targets | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION}) | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION} | |||
| COMPONENT Development | |||
| ) | |||
| # Choose one of the lapack targets to use as a guard for | |||
| # lapack-config.cmake to load targets from the install tree. | |||
| @@ -382,6 +424,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_D | |||
| install(FILES | |||
| ${CMAKE_CURRENT_BINARY_DIR}/lapack.pc | |||
| DESTINATION ${PKG_CONFIG_DIR} | |||
| COMPONENT Development | |||
| ) | |||
| configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-install.cmake.in | |||
| @@ -398,4 +441,6 @@ install(FILES | |||
| ${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake | |||
| ${LAPACK_BINARY_DIR}/lapack-config-version.cmake | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION} | |||
| COMPONENT Development | |||
| ) | |||
| @@ -38,7 +38,7 @@ PROJECT_NAME = LAPACK | |||
| # could be handy for archiving the generated documentation or if some version | |||
| # control system is used. | |||
| PROJECT_NUMBER = 3.8.0 | |||
| PROJECT_NUMBER = 3.9.0 | |||
| # Using the PROJECT_BRIEF tag one can provide an optional one line description | |||
| # for a project that appears at the top of each page and should give viewer a | |||
| @@ -38,7 +38,7 @@ PROJECT_NAME = LAPACK | |||
| # could be handy for archiving the generated documentation or if some version | |||
| # control system is used. | |||
| PROJECT_NUMBER = 3.8.0 | |||
| PROJECT_NUMBER = 3.9.0 | |||
| # Using the PROJECT_BRIEF tag one can provide an optional one line description | |||
| # for a project that appears at the top of each page and should give viewer a | |||
| @@ -439,39 +439,39 @@ SHELL = /bin/sh | |||
| \end{quote} | |||
| and it will need to be modified to \texttt{SHELL = /sbin/sh} if you are | |||
| installing LAPACK on an SGI architecture. | |||
| Second, you will | |||
| need to modify the \texttt{PLAT} definition, which is appended to all | |||
| library names, to specify the architecture to which you are installing | |||
| LAPACK. This features avoids confusion in library names when you are | |||
| installing LAPACK on more than one architecture. Next, you will need | |||
| to modify \texttt{FORTRAN}, \texttt{OPTS}, \texttt{DRVOPTS}, \texttt{NOOPT}, \texttt{LOADER}, | |||
| and \texttt{LOADOPTS} to specify | |||
| Next, you will need to modify \texttt{FC}, \texttt{FFLAGS}, | |||
| \texttt{FFLAGS\_DRV}, \texttt{FFLAGS\_NOOPT}, and \texttt{LDFLAGS} to specify | |||
| the compiler, compiler options, compiler options for the testing and | |||
| timing\footnotemark[\value{footnote}] main programs, loader, loader options. | |||
| Next you will have to choose which function you will use to time in the \texttt{SECOND} and \texttt{DSECND} routines. | |||
| timing\footnotemark[\value{footnote}] main programs, and linker options. | |||
| Next you will have to choose which function you will use to time in the | |||
| \texttt{SECOND} and \texttt{DSECND} routines. | |||
| \begin{verbatim} | |||
| #The Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME | |||
| TIMER = EXT_ETIME | |||
| # For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ | |||
| # TIMER = EXT_ETIME_ | |||
| # For gfortran compiler: SECOND and DSECND will use the INTERNAL FUNCTION ETIME | |||
| # TIMER = INT_ETIME | |||
| # If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) | |||
| # SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME | |||
| # TIMER = INT_CPU_TIME | |||
| # If neither of this works...you can use the NONE value... | |||
| # In that case, SECOND and DSECND will always return 0 | |||
| # TIMER = NONE | |||
| # Default: SECOND and DSECND will use a call to the | |||
| # EXTERNAL FUNCTION ETIME | |||
| #TIMER = EXT_ETIME | |||
| # For RS6K: SECOND and DSECND will use a call to the | |||
| # EXTERNAL FUNCTION ETIME_ | |||
| #TIMER = EXT_ETIME_ | |||
| # For gfortran compiler: SECOND and DSECND will use a call to the | |||
| # INTERNAL FUNCTION ETIME | |||
| TIMER = INT_ETIME | |||
| # If your Fortran compiler does not provide etime (like Nag Fortran | |||
| # Compiler, etc...) SECOND and DSECND will use a call to the | |||
| # INTERNAL FUNCTION CPU_TIME | |||
| #TIMER = INT_CPU_TIME | |||
| # If none of these work, you can use the NONE value. | |||
| # In that case, SECOND and DSECND will always return 0. | |||
| #TIMER = NONE | |||
| \end{verbatim} | |||
| Refer to the section~\ref{second} to get more information. | |||
| Next, you will need to modify \texttt{ARCH}, \texttt{ARCHFLAGS}, and \texttt{RANLIB} to specify archiver, | |||
| Next, you will need to modify \texttt{AR}, \texttt{ARFLAGS}, and \texttt{RANLIB} to specify archiver, | |||
| archiver options, and ranlib for your machine. If your architecture | |||
| does not require \texttt{ranlib} to be run after each archive command (as | |||
| is the case with CRAY computers running UNICOS, Hewlett Packard | |||
| computers running HP-UX, or SUN SPARCstations running Solaris), set | |||
| \texttt{ranlib=echo}. And finally, you must | |||
| \texttt{RANLIB = echo}. And finally, you must | |||
| modify the \texttt{BLASLIB} definition to specify the BLAS library to which | |||
| you will be linking. If an optimized version of the BLAS is available | |||
| on your machine, you are highly recommended to link to that library. | |||
| @@ -721,24 +721,24 @@ The version that will be used depends on the value of the TIMER variable in the | |||
| \begin{itemize} | |||
| \item If ETIME is available as an external function, set the value of the TIMER variable in your | |||
| make.inc to \texttt{EXT\_ETIME}:\texttt{second\_EXT\_ETIME.f} and \texttt{dsecnd\_EXT\_ETIME.f} will be used. | |||
| make.inc to \texttt{EXT\_ETIME}: \texttt{second\_EXT\_ETIME.f} and \texttt{dsecnd\_EXT\_ETIME.f} will be used. | |||
| Usually on HPPA architectures, | |||
| the compiler and loader flag \texttt{+U77} should be included to access | |||
| the compiler and linker flag \texttt{+U77} should be included to access | |||
| the function \texttt{ETIME}. | |||
| \item If ETIME\_ is available as an external function, set the value of the TIMER variable in your make.inc | |||
| to \texttt{EXT\_ETIME\_}:\texttt{second\_EXT\_ETIME\_.f} and \texttt{dsecnd\_EXT\_ETIME\_.f} will be used. | |||
| to \texttt{EXT\_ETIME\_}: \texttt{second\_EXT\_ETIME\_.f} and \texttt{dsecnd\_EXT\_ETIME\_.f} will be used. | |||
| It is the case on some IBM architectures such as IBM RS/6000s. | |||
| \item If ETIME is available as an internal function, set the value of the TIMER variable in your make.inc | |||
| to \texttt{INT\_ETIME}:\texttt{second\_INT\_ETIME.f} and \texttt{dsecnd\_INT\_ETIME.f} will be used. | |||
| to \texttt{INT\_ETIME}: \texttt{second\_INT\_ETIME.f} and \texttt{dsecnd\_INT\_ETIME.f} will be used. | |||
| This is the case with gfortan. | |||
| \item If CPU\_TIME is available as an internal function, set the value of the TIMER variable in your make.inc | |||
| to \texttt{INT\_CPU\_TIME}:\texttt{second\_INT\_CPU\_TIME.f} and \texttt{dsecnd\_INT\_CPU\_TIME.f} will be used. | |||
| to \texttt{INT\_CPU\_TIME}: \texttt{second\_INT\_CPU\_TIME.f} and \texttt{dsecnd\_INT\_CPU\_TIME.f} will be used. | |||
| \item If none of these function is available, set the value of the TIMER variable in your make.inc | |||
| to \texttt{NONE:}\texttt{second\_NONE.f} and \texttt{dsecnd\_NONE.f} will be used. | |||
| to \texttt{NONE}: \texttt{second\_NONE.f} and \texttt{dsecnd\_NONE.f} will be used. | |||
| These routines will always return zero. | |||
| \end{itemize} | |||
| @@ -829,8 +829,8 @@ data type to the library if necessary. | |||
| \end{itemize} | |||
| \noindent | |||
| The BLAS library is created in \texttt{LAPACK/blas\_PLAT.a}, where | |||
| \texttt{PLAT} is the user-defined architecture suffix specified in the file | |||
| The BLAS library is created in \texttt{LAPACK/librefblas.a}, | |||
| or in the user-defined location specified by \texttt{BLASLIB} in the file | |||
| \texttt{LAPACK/make.inc}. | |||
| \subsection{Run the BLAS Test Programs}\label{testblas} | |||
| @@ -882,8 +882,8 @@ data type to the library if necessary. | |||
| \end{itemize} | |||
| \noindent | |||
| The LAPACK library is created in \texttt{LAPACK/lapack\_PLAT.a}, where | |||
| \texttt{PLAT} is the user-defined architecture suffix specified in the file | |||
| The LAPACK library is created in \texttt{LAPACK/liblapack.a}, | |||
| or in the user-defined location specified by \texttt{LAPACKLIB} in the file | |||
| \texttt{LAPACK/make.inc}. | |||
| \subsection{Create the Test Matrix Generator Library} | |||
| @@ -902,9 +902,9 @@ data type to the library if necessary. | |||
| \end{itemize} | |||
| \noindent | |||
| The test matrix generator library is created in \texttt{LAPACK/tmglib\_PLAT.a}, | |||
| where \texttt{PLAT} is the user-defined architecture suffix specified in the | |||
| file \texttt{LAPACK/make.inc}. | |||
| The test matrix generator library is created in \texttt{LAPACK/libtmglib.a}, | |||
| or in the user-defined location specified by \texttt{TMGLIB} in the file | |||
| \texttt{LAPACK/make.inc}. | |||
| \subsection{Run the LAPACK Test Programs} | |||
| @@ -1114,9 +1114,7 @@ To make a library of the instrumented LAPACK routines, first | |||
| go to \texttt{LAPACK/TIMING/LIN/LINSRC} and type \texttt{make} followed | |||
| by the data types desired, as in the examples of Section~\ref{toplevelmakefile}. | |||
| The library of instrumented code is created in | |||
| \texttt{LAPACK/TIMING/LIN/linsrc\_PLAT.a}, | |||
| where \texttt{PLAT} is the user-defined architecture suffix specified in the | |||
| file \texttt{LAPACK/make.inc}. | |||
| \texttt{LAPACK/TIMING/LIN/linsrc.a}. | |||
| \end{sloppypar} | |||
| \item[b)] | |||
| @@ -1251,9 +1249,7 @@ To make a library of the instrumented LAPACK routines, first | |||
| go to \texttt{LAPACK/TIMING/EIG/EIGSRC} and type \texttt{make} followed | |||
| by the data types desired, as in the examples of Section~\ref{toplevelmakefile}. | |||
| The library of instrumented code is created in | |||
| \texttt{LAPACK/TIMING/EIG/eigsrc\_PLAT.a}, | |||
| where \texttt{PLAT} is the user-defined architecture suffix specified in the | |||
| file \texttt{LAPACK/make.inc}. | |||
| \texttt{LAPACK/TIMING/EIG/eigsrc.a}. | |||
| \end{sloppypar} | |||
| \item[b)] | |||
| @@ -1389,7 +1385,7 @@ installing LAPACK on an SGI architecture. | |||
| \section{ETIME} | |||
| On HPPA architectures, | |||
| the compiler and loader flag \texttt{+U77} should be included to access | |||
| the compiler and linker flag \texttt{+U77} should be included to access | |||
| the function \texttt{ETIME}. | |||
| \section{ILAENV and IEEE-754 compliance} | |||
| @@ -1494,13 +1490,13 @@ has two options: increase your stack size, or force all local variables | |||
| to be allocated statically. | |||
| On HPPA architectures, the | |||
| compiler and loader flag \texttt{-K} should be used when compiling these testing | |||
| compiler and linker flag \texttt{-K} should be used when compiling these testing | |||
| and timing main programs to avoid such a stack overflow. I.e., set | |||
| \texttt{DRVOPTS = -K} in the \texttt{LAPACK/make.inc} file. | |||
| \texttt{FFLAGS\_DRV = -K} in the \texttt{LAPACK/make.inc} file. | |||
| For similar reasons, | |||
| on SGI architectures, the compiler and loader flag \texttt{-static} should be | |||
| used. I.e., set \texttt{DRVOPTS = -static} in the \texttt{LAPACK/make.inc} file. | |||
| on SGI architectures, the compiler and linker flag \texttt{-static} should be | |||
| used. I.e., set \texttt{FFLAGS\_DRV = -static} in the \texttt{LAPACK/make.inc} file. | |||
| \section{IEEE arithmetic} | |||
| @@ -1,30 +1,33 @@ | |||
| include ../make.inc | |||
| TOPSRCDIR = .. | |||
| include $(TOPSRCDIR)/make.inc | |||
| .PHONY: all testlsame testslamch testdlamch testsecond testdsecnd testieee testversion | |||
| all: testlsame testslamch testdlamch testsecond testdsecnd testieee testversion | |||
| testlsame: lsame.o lsametst.o | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| testslamch: slamch.o lsame.o slamchtst.o | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| testdlamch: dlamch.o lsame.o dlamchtst.o | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| testsecond: second_$(TIMER).o secondtst.o | |||
| @echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)" | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| testdsecnd: dsecnd_$(TIMER).o dsecndtst.o | |||
| @echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)" | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| testieee: tstiee.o | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| testversion: ilaver.o LAPACK_version.o | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| .PHONY: run | |||
| run: all | |||
| ./testlsame | |||
| ./testslamch | |||
| @@ -34,6 +37,7 @@ run: all | |||
| ./testieee | |||
| ./testversion | |||
| .PHONY: clean cleanobj cleanexe cleantest | |||
| clean: cleanobj cleanexe cleantest | |||
| cleanobj: | |||
| rm -f *.o | |||
| @@ -42,9 +46,5 @@ cleanexe: | |||
| cleantest: | |||
| rm -f core | |||
| .SUFFIXES: .o .f | |||
| .f.o: | |||
| $(FORTRAN) $(OPTS) -c -o $@ $< | |||
| slamch.o: slamch.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< | |||
| dlamch.o: dlamch.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< | |||
| slamch.o: slamch.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< | |||
| dlamch.o: dlamch.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< | |||
| @@ -10,6 +10,10 @@ | |||
| * | |||
| * DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) | |||
| * | |||
| * .. Scalar Arguments .. | |||
| * CHARACTER CMACH | |||
| * .. | |||
| * | |||
| * | |||
| *> \par Purpose: | |||
| * ============= | |||
| @@ -24,6 +28,7 @@ | |||
| * | |||
| *> \param[in] CMACH | |||
| *> \verbatim | |||
| *> CMACH is CHARACTER*1 | |||
| *> Specifies the value to be returned by DLAMCH: | |||
| *> = 'E' or 'e', DLAMCH := eps | |||
| *> = 'S' or 's , DLAMCH := sfmin | |||
| @@ -10,6 +10,10 @@ | |||
| * | |||
| * DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) | |||
| * | |||
| * .. Scalar Arguments .. | |||
| * CHARACTER CMACH | |||
| * .. | |||
| * | |||
| * | |||
| *> \par Purpose: | |||
| * ============= | |||
| @@ -25,12 +25,15 @@ | |||
| * ========== | |||
| * | |||
| *> \param[out] VERS_MAJOR | |||
| *> VERS_MAJOR is INTEGER | |||
| *> return the lapack major version | |||
| *> | |||
| *> \param[out] VERS_MINOR | |||
| *> VERS_MINOR is INTEGER | |||
| *> return the lapack minor version from the major version | |||
| *> | |||
| *> \param[out] VERS_PATCH | |||
| *> VERS_PATCH is INTEGER | |||
| *> return the lapack patch version from the minor version | |||
| * | |||
| * Authors: | |||
| @@ -41,24 +44,23 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date June 2017 | |||
| *> \date November 2019 | |||
| * | |||
| *> \ingroup auxOTHERauxiliary | |||
| * | |||
| * ===================================================================== | |||
| SUBROUTINE ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) | |||
| * | |||
| * -- LAPACK computational routine (version 3.7.1) -- | |||
| * -- LAPACK computational routine -- | |||
| * -- LAPACK is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * June 2017 | |||
| * | |||
| * ===================================================================== | |||
| * | |||
| INTEGER VERS_MAJOR, VERS_MINOR, VERS_PATCH | |||
| * ===================================================================== | |||
| VERS_MAJOR = 3 | |||
| VERS_MINOR = 8 | |||
| VERS_MINOR = 9 | |||
| VERS_PATCH = 0 | |||
| * ===================================================================== | |||
| * | |||
| @@ -8,30 +8,28 @@ SHELL = /bin/sh | |||
| # CC is the C compiler, normally invoked with options CFLAGS. | |||
| # | |||
| CC = cc | |||
| CC = cc | |||
| CFLAGS = -O4 | |||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||
| # and desired compiler options for your machine. NOOPT refers to | |||
| # the compiler options desired when NO OPTIMIZATION is selected. | |||
| # | |||
| FORTRAN = f77 | |||
| OPTS = -O4 -fpe1 | |||
| DRVOPTS = $(OPTS) | |||
| NOOPT = | |||
| FC = f77 | |||
| FFLAGS = -O4 -fpe1 | |||
| FFLAGS_DRV = $(FFLAGS) | |||
| FFLAGS_NOOPT = | |||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||
| # load options for your machine. | |||
| # Define LDFLAGS to the desired linker options for your machine. | |||
| # | |||
| LOADER = f77 | |||
| LOADOPTS = | |||
| LDFLAGS = | |||
| # The archiver and the flag(s) to use when building an archive | |||
| # (library). If your system has no ranlib, set RANLIB = echo. | |||
| # | |||
| ARCH = ar | |||
| ARCHFLAGS = cr | |||
| RANLIB = ranlib | |||
| AR = ar | |||
| ARFLAGS = cr | |||
| RANLIB = ranlib | |||
| # Timer for the SECOND and DSECND routines | |||
| # | |||
| @@ -74,9 +72,9 @@ TIMER = EXT_ETIME | |||
| # machine-specific, optimized BLAS library should be used whenever | |||
| # possible.) | |||
| # | |||
| #BLASLIB = ../../librefblas.a | |||
| #BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
| BLASLIB = -ldxml | |||
| CBLASLIB = ../../libcblas.a | |||
| LAPACKLIB = liblapack.a | |||
| TMGLIB = libtmglib.a | |||
| LAPACKELIB = liblapacke.a | |||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||
| @@ -8,30 +8,28 @@ SHELL = /bin/sh | |||
| # CC is the C compiler, normally invoked with options CFLAGS. | |||
| # | |||
| CC = cc | |||
| CC = cc | |||
| CFLAGS = | |||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||
| # and desired compiler options for your machine. NOOPT refers to | |||
| # the compiler options desired when NO OPTIMIZATION is selected. | |||
| # | |||
| FORTRAN = f77 | |||
| OPTS = +O4 +U77 | |||
| DRVOPTS = $(OPTS) -K | |||
| NOOPT = +U77 | |||
| FC = f77 | |||
| FFLAGS = +O4 +U77 | |||
| FFLAGS_DRV = $(FFLAGS) -K | |||
| FFLAGS_NOOPT = +U77 | |||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||
| # load options for your machine. | |||
| # Define LDFLAGS to the desired linker options for your machine. | |||
| # | |||
| LOADER = f77 | |||
| LOADOPTS = -Aa +U77 | |||
| LDFLAGS = | |||
| # The archiver and the flag(s) to use when building an archive | |||
| # (library). If your system has no ranlib, set RANLIB = echo. | |||
| # | |||
| ARCH = ar | |||
| ARCHFLAGS = cr | |||
| RANLIB = echo | |||
| AR = ar | |||
| ARFLAGS = cr | |||
| RANLIB = echo | |||
| # Timer for the SECOND and DSECND routines | |||
| # | |||
| @@ -74,9 +72,9 @@ TIMER = EXT_ETIME | |||
| # machine-specific, optimized BLAS library should be used whenever | |||
| # possible.) | |||
| # | |||
| #BLASLIB = ../../librefblas.a | |||
| #BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
| BLASLIB = -lblas | |||
| CBLASLIB = ../../libcblas.a | |||
| LAPACKLIB = liblapack.a | |||
| TMGLIB = libtmglib.a | |||
| LAPACKELIB = liblapacke.a | |||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||
| @@ -8,33 +8,30 @@ SHELL = /sbin/sh | |||
| # CC is the C compiler, normally invoked with options CFLAGS. | |||
| # | |||
| CC = cc | |||
| CC = cc | |||
| CFLAGS = -O3 | |||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||
| # and desired compiler options for your machine. NOOPT refers to | |||
| # the compiler options desired when NO OPTIMIZATION is selected. | |||
| # | |||
| FORTRAN = f77 | |||
| OPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON | |||
| #OPTS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON | |||
| DRVOPTS = $(OPTS) -static | |||
| NOOPT = -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON | |||
| #NOOPT = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON | |||
| FC = f77 | |||
| FFLAGS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON | |||
| #FFLAGS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON | |||
| FFLAGS_DRV = $(FFLAGS) -static | |||
| FFLAGS_NOOPT = -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON | |||
| #FFLAGS_NOOPT = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON | |||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||
| # load options for your machine. | |||
| # Define LDFLAGS to the desired linker options for your machine. | |||
| # | |||
| LOADER = f77 | |||
| LOADOPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON | |||
| #LOADOPTS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON | |||
| LDFLAGS = | |||
| # The archiver and the flag(s) to use when building an archive | |||
| # (library). If your system has no ranlib, set RANLIB = echo. | |||
| # | |||
| ARCH = ar | |||
| ARCHFLAGS = cr | |||
| RANLIB = echo | |||
| AR = ar | |||
| ARFLAGS = cr | |||
| RANLIB = echo | |||
| # Timer for the SECOND and DSECND routines | |||
| # | |||
| @@ -78,8 +75,8 @@ TIMER = EXT_ETIME | |||
| # possible.) | |||
| # | |||
| #BLASLIB = -lblas | |||
| BLASLIB = ../../librefblas.a | |||
| CBLASLIB = ../../libcblas.a | |||
| LAPACKLIB = liblapack.a | |||
| TMGLIB = libtmglib.a | |||
| LAPACKELIB = liblapacke.a | |||
| BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||
| @@ -8,33 +8,30 @@ SHELL = /sbin/sh | |||
| # CC is the C compiler, normally invoked with options CFLAGS. | |||
| # | |||
| CC = cc | |||
| CC = cc | |||
| CFLAGS = -O3 | |||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||
| # and desired compiler options for your machine. NOOPT refers to | |||
| # the compiler options desired when NO OPTIMIZATION is selected. | |||
| # | |||
| FORTRAN = f77 | |||
| OPTS = -O3 -64 -mips4 -r10000 | |||
| #OPTS = -O3 -64 -mips4 -r10000 -mp | |||
| DRVOPTS = $(OPTS) -static | |||
| NOOPT = -64 -mips4 -r10000 | |||
| #NOOPT = -64 -mips4 -r10000 -mp | |||
| FC = f77 | |||
| FFLAGS = -O3 -64 -mips4 -r10000 | |||
| #FFLAGS = -O3 -64 -mips4 -r10000 -mp | |||
| FFLAGS_DRV = $(FFLAGS) -static | |||
| FFLAGS_NOOPT = -64 -mips4 -r10000 | |||
| #FFLAGS_NOOPT = -64 -mips4 -r10000 -mp | |||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||
| # load options for your machine. | |||
| # Define LDFLAGS to the desired linker options for your machine. | |||
| # | |||
| LOADER = f77 | |||
| LOADOPTS = -O3 -64 -mips4 -r10000 | |||
| #LOADOPTS = -O3 -64 -mips4 -r10000 -mp | |||
| LDFLAGS = | |||
| # The archiver and the flag(s) to use when building an archive | |||
| # (library). If your system has no ranlib, set RANLIB = echo. | |||
| # | |||
| ARCH = ar | |||
| ARCHFLAGS = cr | |||
| RANLIB = echo | |||
| AR = ar | |||
| ARFLAGS = cr | |||
| RANLIB = echo | |||
| # Timer for the SECOND and DSECND routines | |||
| # | |||
| @@ -79,8 +76,8 @@ TIMER = EXT_ETIME | |||
| # | |||
| BLASLIB = -lblas | |||
| #BLASLIB = -lblas_mp | |||
| #BLASLIB = ../../librefblas.a | |||
| CBLASLIB = ../../libcblas.a | |||
| LAPACKLIB = liblapack.a | |||
| TMGLIB = libtmglib.a | |||
| LAPACKELIB = liblapacke.a | |||
| #BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||
| @@ -8,30 +8,28 @@ SHELL = /sbin/sh | |||
| # CC is the C compiler, normally invoked with options CFLAGS. | |||
| # | |||
| CC = cc | |||
| CC = cc | |||
| CFLAGS = -O4 | |||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||
| # and desired compiler options for your machine. NOOPT refers to | |||
| # the compiler options desired when NO OPTIMIZATION is selected. | |||
| # | |||
| FORTRAN = f77 | |||
| OPTS = -O4 | |||
| DRVOPTS = $(OPTS) -static | |||
| NOOPT = | |||
| FC = f77 | |||
| FFLAGS = -O4 | |||
| FFLAGS_DRV = $(FFLAGS) -static | |||
| FFLAGS_NOOPT = | |||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||
| # load options for your machine. | |||
| # Define LDFLAGS to the desired linker options for your machine. | |||
| # | |||
| LOADER = f77 | |||
| LOADOPTS = | |||
| LDFLAGS = | |||
| # The archiver and the flag(s) to use when building an archive | |||
| # (library). If your system has no ranlib, set RANLIB = echo. | |||
| # | |||
| ARCH = ar | |||
| ARCHFLAGS = cr | |||
| RANLIB = echo | |||
| AR = ar | |||
| ARFLAGS = cr | |||
| RANLIB = echo | |||
| # Timer for the SECOND and DSECND routines | |||
| # | |||
| @@ -75,8 +73,8 @@ TIMER = EXT_ETIME | |||
| # possible.) | |||
| # | |||
| #BLASLIB = -lblas | |||
| BLASLIB = ../../librefblas.a | |||
| CBLASLIB = ../../libcblas.a | |||
| LAPACKLIB = liblapack.a | |||
| TMGLIB = libtmglib.a | |||
| LAPACKELIB = liblapacke.a | |||
| BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||
| @@ -8,30 +8,28 @@ SHELL = /bin/sh | |||
| # CC is the C compiler, normally invoked with options CFLAGS. | |||
| # | |||
| CC = cc | |||
| CC = cc | |||
| CFLAGS = -O3 | |||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||
| # and desired compiler options for your machine. NOOPT refers to | |||
| # the compiler options desired when NO OPTIMIZATION is selected. | |||
| # | |||
| FORTRAN = f77 | |||
| OPTS = -dalign -O4 -fast | |||
| DRVOPTS = $(OPTS) | |||
| NOOPT = | |||
| FC = f77 | |||
| FFLAGS = -dalign -O4 -fast | |||
| FFLAGS_DRV = $(FFLAGS) | |||
| FFLAGS_NOOPT = | |||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||
| # load options for your machine. | |||
| # Define LDFLAGS to the desired linker options for your machine. | |||
| # | |||
| LOADER = f77 | |||
| LOADOPTS = -dalign -O4 -fast | |||
| LDFLAGS = | |||
| # The archiver and the flag(s) to use when building an archive | |||
| # (library). If your system has no ranlib, set RANLIB = echo. | |||
| # | |||
| ARCH = ar | |||
| ARCHFLAGS = cr | |||
| RANLIB = ranlib | |||
| AR = ar | |||
| ARFLAGS = cr | |||
| RANLIB = ranlib | |||
| # Timer for the SECOND and DSECND routines | |||
| # | |||
| @@ -75,8 +73,8 @@ TIMER = EXT_ETIME | |||
| # possible.) | |||
| # | |||
| #BLASLIB = -lblas | |||
| BLASLIB = ../../librefblas.a | |||
| CBLASLIB = ../../libcblas.a | |||
| LAPACKLIB = liblapack.a | |||
| TMGLIB = libtmglib.a | |||
| LAPACKELIB = liblapacke.a | |||
| BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||
| @@ -8,34 +8,31 @@ SHELL = /bin/sh | |||
| # CC is the C compiler, normally invoked with options CFLAGS. | |||
| # | |||
| CC = cc | |||
| CC = cc | |||
| CFLAGS = -O3 | |||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||
| # and desired compiler options for your machine. NOOPT refers to | |||
| # the compiler options desired when NO OPTIMIZATION is selected. | |||
| # | |||
| FORTRAN = f77 | |||
| #OPTS = -O4 -u -f -mt | |||
| #OPTS = -u -f -dalign -native -xO5 -xarch=v8plusa | |||
| OPTS = -u -f -dalign -native -xO2 -xarch=v8plusa | |||
| DRVOPTS = $(OPTS) | |||
| NOOPT = -u -f | |||
| #NOOPT = -u -f -mt | |||
| FC = f77 | |||
| #FFLAGS = -O4 -u -f -mt | |||
| #FFLAGS = -u -f -dalign -native -xO5 -xarch=v8plusa | |||
| FFLAGS = -u -f -dalign -native -xO2 -xarch=v8plusa | |||
| FFLAGS_DRV = $(FFLAGS) | |||
| FFLAGS_NOOPT = -u -f | |||
| #FFLAGS_NOOPT = -u -f -mt | |||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||
| # load options for your machine. | |||
| # Define LDFLAGS to the desired linker options for your machine. | |||
| # | |||
| LOADER = f77 | |||
| #LOADOPTS = -mt | |||
| LOADOPTS = -f -dalign -native -xO2 -xarch=v8plusa | |||
| LDFLAGS = | |||
| # The archiver and the flag(s) to use when building an archive | |||
| # (library). If your system has no ranlib, set RANLIB = echo. | |||
| # | |||
| ARCH = ar | |||
| ARCHFLAGS = cr | |||
| RANLIB = echo | |||
| AR = ar | |||
| ARFLAGS = cr | |||
| RANLIB = echo | |||
| # Timer for the SECOND and DSECND routines | |||
| # | |||
| @@ -78,10 +75,10 @@ TIMER = EXT_ETIME | |||
| # machine-specific, optimized BLAS library should be used whenever | |||
| # possible.) | |||
| # | |||
| #BLASLIB = ../../librefblas.a | |||
| #BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
| #BLASLIB = -xlic_lib=sunperf_mt | |||
| BLASLIB = -xlic_lib=sunperf | |||
| CBLASLIB = ../../libcblas.a | |||
| LAPACKLIB = liblapack.a | |||
| TMGLIB = libtmglib.a | |||
| LAPACKELIB = liblapacke.a | |||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||
| @@ -8,31 +8,29 @@ SHELL = /bin/sh | |||
| # CC is the C compiler, normally invoked with options CFLAGS. | |||
| # | |||
| CC = xlc | |||
| CC = xlc | |||
| CFLAGS = -O3 -qnosave | |||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||
| # and desired compiler options for your machine. NOOPT refers to | |||
| # the compiler options desired when NO OPTIMIZATION is selected. | |||
| # | |||
| FORTRAN = xlf | |||
| OPTS = -O3 -qfixed -qnosave | |||
| FC = xlf | |||
| FFLAGS = -O3 -qfixed -qnosave | |||
| # For -O2, add -qstrict=none | |||
| DRVOPTS = $(OPTS) | |||
| NOOPT = -O0 -qfixed -qnosave | |||
| FFLAGS_DRV = $(FFLAGS) | |||
| FFLAGS_NOOPT = -O0 -qfixed -qnosave | |||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||
| # load options for your machine. | |||
| # Define LDFLAGS to the desired linker options for your machine. | |||
| # | |||
| LOADER = xlf | |||
| LOADOPTS = -qnosave | |||
| LDFLAGS = | |||
| # The archiver and the flag(s) to use when building an archive | |||
| # (library). If your system has no ranlib, set RANLIB = echo. | |||
| # | |||
| ARCH = ar | |||
| ARCHFLAGS = cr | |||
| RANLIB = ranlib | |||
| AR = ar | |||
| ARFLAGS = cr | |||
| RANLIB = ranlib | |||
| # Timer for the SECOND and DSECND routines | |||
| # | |||
| @@ -75,9 +73,9 @@ TIMER = EXT_ETIME_ | |||
| # machine-specific, optimized BLAS library should be used whenever | |||
| # possible.) | |||
| # | |||
| #BLASLIB = ../../librefblas.a | |||
| #BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
| BLASLIB = -lessl | |||
| CBLASLIB = ../../libcblas.a | |||
| LAPACKLIB = liblapack.a | |||
| TMGLIB = libtmglib.a | |||
| LAPACKELIB = liblapacke.a | |||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||
| @@ -8,10 +8,10 @@ SHELL = /bin/sh | |||
| # CC is the C compiler, normally invoked with options CFLAGS. | |||
| # | |||
| CC = gcc | |||
| CC = gcc | |||
| CFLAGS = -O3 | |||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||
| # and desired compiler options for your machine. NOOPT refers to | |||
| # the compiler options desired when NO OPTIMIZATION is selected. | |||
| # | |||
| @@ -19,23 +19,21 @@ CFLAGS = -O3 | |||
| # and handle these quantities appropriately. As a consequence, one | |||
| # should not compile LAPACK with flags such as -ffpe-trap=overflow. | |||
| # | |||
| FORTRAN = gfortran | |||
| OPTS = -O2 -frecursive | |||
| DRVOPTS = $(OPTS) | |||
| NOOPT = -O0 -frecursive | |||
| FC = gfortran | |||
| FFLAGS = -O2 -frecursive | |||
| FFLAGS_DRV = $(FFLAGS) | |||
| FFLAGS_NOOPT = -O0 -frecursive | |||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||
| # load options for your machine. | |||
| # Define LDFLAGS to the desired linker options for your machine. | |||
| # | |||
| LOADER = gfortran | |||
| LOADOPTS = | |||
| LDFLAGS = | |||
| # The archiver and the flag(s) to use when building an archive | |||
| # (library). If your system has no ranlib, set RANLIB = echo. | |||
| # | |||
| ARCH = ar | |||
| ARCHFLAGS = cr | |||
| RANLIB = ranlib | |||
| AR = ar | |||
| ARFLAGS = cr | |||
| RANLIB = ranlib | |||
| # Timer for the SECOND and DSECND routines | |||
| # | |||
| @@ -78,8 +76,8 @@ TIMER = INT_ETIME | |||
| # machine-specific, optimized BLAS library should be used whenever | |||
| # possible.) | |||
| # | |||
| BLASLIB = ../../librefblas.a | |||
| CBLASLIB = ../../libcblas.a | |||
| LAPACKLIB = liblapack.a | |||
| TMGLIB = libtmglib.a | |||
| LAPACKELIB = liblapacke.a | |||
| BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||
| @@ -8,10 +8,10 @@ SHELL = /bin/sh | |||
| # CC is the C compiler, normally invoked with options CFLAGS. | |||
| # | |||
| CC = gcc | |||
| CC = gcc | |||
| CFLAGS = -g | |||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||
| # and desired compiler options for your machine. NOOPT refers to | |||
| # the compiler options desired when NO OPTIMIZATION is selected. | |||
| # | |||
| @@ -19,23 +19,21 @@ CFLAGS = -g | |||
| # and handle these quantities appropriately. As a consequence, one | |||
| # should not compile LAPACK with flags such as -ffpe-trap=overflow. | |||
| # | |||
| FORTRAN = gfortran -fimplicit-none -g -frecursive | |||
| OPTS = | |||
| DRVOPTS = $(OPTS) | |||
| NOOPT = -g -O0 -frecursive | |||
| FC = gfortran | |||
| FFLAGS = -fimplicit-none -g -frecursive | |||
| FFLAGS_DRV = $(FFLAGS) | |||
| FFLAGS_NOOPT = $(FFLAGS) -O0 | |||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||
| # load options for your machine. | |||
| # Define LDFLAGS to the desired linker options for your machine. | |||
| # | |||
| LOADER = gfortran -g | |||
| LOADOPTS = | |||
| LDFLAGS = | |||
| # The archiver and the flag(s) to use when building an archive | |||
| # (library). If your system has no ranlib, set RANLIB = echo. | |||
| # | |||
| ARCH = ar | |||
| ARCHFLAGS = cr | |||
| RANLIB = ranlib | |||
| AR = ar | |||
| ARFLAGS = cr | |||
| RANLIB = ranlib | |||
| # Timer for the SECOND and DSECND routines | |||
| # | |||
| @@ -78,8 +76,8 @@ TIMER = INT_CPU_TIME | |||
| # machine-specific, optimized BLAS library should be used whenever | |||
| # possible.) | |||
| # | |||
| BLASLIB = ../../librefblas.a | |||
| CBLASLIB = ../../libcblas.a | |||
| LAPACKLIB = liblapack.a | |||
| TMGLIB = libtmglib.a | |||
| LAPACKELIB = liblapacke.a | |||
| BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||
| @@ -8,30 +8,28 @@ SHELL = /bin/sh | |||
| # CC is the C compiler, normally invoked with options CFLAGS. | |||
| # | |||
| CC = icc | |||
| CC = icc | |||
| CFLAGS = -O3 | |||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||
| # and desired compiler options for your machine. NOOPT refers to | |||
| # the compiler options desired when NO OPTIMIZATION is selected. | |||
| # | |||
| FORTRAN = ifort | |||
| OPTS = -O3 -fp-model strict -assume protect_parens | |||
| DRVOPTS = $(OPTS) | |||
| NOOPT = -O0 -fp-model strict -assume protect_parens | |||
| FC = ifort | |||
| FFLAGS = -O3 -fp-model strict -assume protect_parens | |||
| FFLAGS_DRV = $(FFLAGS) | |||
| FFLAGS_NOOPT = -O0 -fp-model strict -assume protect_parens | |||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||
| # load options for your machine. | |||
| # Define LDFLAGS to the desired linker options for your machine. | |||
| # | |||
| LOADER = ifort | |||
| LOADOPTS = | |||
| LDFLAGS = | |||
| # The archiver and the flag(s) to use when building an archive | |||
| # (library). If your system has no ranlib, set RANLIB = echo. | |||
| # | |||
| ARCH = ar | |||
| ARCHFLAGS = cr | |||
| RANLIB = ranlib | |||
| AR = ar | |||
| ARFLAGS = cr | |||
| RANLIB = ranlib | |||
| # Timer for the SECOND and DSECND routines | |||
| # | |||
| @@ -74,8 +72,8 @@ TIMER = EXT_ETIME | |||
| # machine-specific, optimized BLAS library should be used whenever | |||
| # possible.) | |||
| # | |||
| BLASLIB = ../../librefblas.a | |||
| CBLASLIB = ../../libcblas.a | |||
| LAPACKLIB = liblapack.a | |||
| TMGLIB = libtmglib.a | |||
| LAPACKELIB = liblapacke.a | |||
| BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||
| @@ -8,30 +8,28 @@ SHELL = /bin/sh | |||
| # CC is the C compiler, normally invoked with options CFLAGS. | |||
| # | |||
| CC = pgcc | |||
| CC = pgcc | |||
| CFLAGS = | |||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||
| # and desired compiler options for your machine. NOOPT refers to | |||
| # the compiler options desired when NO OPTIMIZATION is selected. | |||
| # | |||
| FORTRAN = pgf95 | |||
| OPTS = -O3 | |||
| DRVOPTS = $(OPTS) | |||
| NOOPT = -O0 | |||
| FC = pgf95 | |||
| FFLAGS = -O3 | |||
| FFLAGS_DRV = $(FFLAGS) | |||
| FFLAGS_NOOPT = -O0 | |||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||
| # load options for your machine. | |||
| # Define LDFLAGS to the desired linker options for your machine. | |||
| # | |||
| LOADER = $(FORTRAN) | |||
| LOADOPTS = | |||
| LDFLAGS = | |||
| # The archiver and the flag(s) to use when building an archive | |||
| # (library). If your system has no ranlib, set RANLIB = echo. | |||
| # | |||
| ARCH = ar | |||
| ARCHFLAGS = cr | |||
| RANLIB = echo | |||
| AR = ar | |||
| ARFLAGS = cr | |||
| RANLIB = echo | |||
| # Timer for the SECOND and DSECND routines | |||
| # | |||
| @@ -74,8 +72,8 @@ TIMER = INT_CPU_TIME | |||
| # machine-specific, optimized BLAS library should be used whenever | |||
| # possible.) | |||
| # | |||
| BLASLIB = ../../librefblas.a | |||
| CBLASLIB = ../../libcblas.a | |||
| LAPACKLIB = liblapack.a | |||
| TMGLIB = libtmglib.a | |||
| LAPACKELIB = liblapacke.a | |||
| BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||
| @@ -8,30 +8,28 @@ SHELL = /bin/sh | |||
| # CC is the C compiler, normally invoked with options CFLAGS. | |||
| # | |||
| CC = pghpc | |||
| CC = pghpc | |||
| CFLAGS = | |||
| # Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
| # Modify the FC and FFLAGS definitions to the desired compiler | |||
| # and desired compiler options for your machine. NOOPT refers to | |||
| # the compiler options desired when NO OPTIMIZATION is selected. | |||
| # | |||
| FORTRAN = pghpf | |||
| OPTS = -O4 -Mnohpfc -Mdclchk | |||
| DRVOPTS = $(OPTS) | |||
| NOOPT = -Mnohpfc -Mdclchk | |||
| FC = pghpf | |||
| FFLAGS = -O4 -Mnohpfc -Mdclchk | |||
| FFLAGS_DRV = $(FFLAGS) | |||
| FFLAGS_NOOPT = -Mnohpfc -Mdclchk | |||
| # Define LOADER and LOADOPTS to refer to the loader and desired | |||
| # load options for your machine. | |||
| # Define LDFLAGS to the desired linker options for your machine. | |||
| # | |||
| LOADER = pghpf | |||
| LOADOPTS = | |||
| LDFLAGS = | |||
| # The archiver and the flag(s) to use when building an archive | |||
| # (library). If your system has no ranlib, set RANLIB = echo. | |||
| # | |||
| ARCH = ar | |||
| ARCHFLAGS = cr | |||
| RANLIB = echo | |||
| AR = ar | |||
| ARFLAGS = cr | |||
| RANLIB = echo | |||
| # Timer for the SECOND and DSECND routines | |||
| # | |||
| @@ -75,8 +73,8 @@ TIMER = EXT_ETIME | |||
| # possible.) | |||
| # | |||
| #BLASLIB = -lessl | |||
| BLASLIB = ../../librefblas.a | |||
| CBLASLIB = ../../libcblas.a | |||
| LAPACKLIB = liblapack.a | |||
| TMGLIB = libtmglib.a | |||
| LAPACKELIB = liblapacke.a | |||
| BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
| CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
| LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
| TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
| LAPACKELIB = $(TOPSRCDIR)/liblapacke.a | |||
| @@ -28,6 +28,7 @@ | |||
| * | |||
| *> \param[in] CMACH | |||
| *> \verbatim | |||
| *> CMACH is CHARACTER*1 | |||
| *> Specifies the value to be returned by SLAMCH: | |||
| *> = 'E' or 'e', SLAMCH := eps | |||
| *> = 'S' or 's , SLAMCH := sfmin | |||
| @@ -16,18 +16,16 @@ if(NOT FortranCInterface_GLOBAL_FOUND OR NOT FortranCInterface_MODULE_FOUND) | |||
| ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h) | |||
| endif() | |||
| if(WIN32 AND NOT UNIX) | |||
| add_definitions(-DHAVE_LAPACK_CONFIG_H -DLAPACK_COMPLEX_STRUCTURE) | |||
| message(STATUS "Windows BUILD") | |||
| endif() | |||
| get_directory_property(DirDefs COMPILE_DEFINITIONS) | |||
| include_directories(include ${LAPACK_BINARY_DIR}/include) | |||
| add_subdirectory(include) | |||
| add_subdirectory(src) | |||
| add_subdirectory(utils) | |||
| option(LAPACKE_BUILD_SINGLE "Build LAPACKE single precision real" ON) | |||
| option(LAPACKE_BUILD_DOUBLE "Build LAPACKE double precision real" ON) | |||
| option(LAPACKE_BUILD_COMPLEX "Build LAPACKE single precision complex" ON) | |||
| option(LAPACKE_BUILD_COMPLEX16 "Build LAPACKE double precision complex" ON) | |||
| macro(append_subdir_files variable dirname) | |||
| get_directory_property(holder DIRECTORY ${dirname} DEFINITION ${variable}) | |||
| foreach(depfile ${holder}) | |||
| @@ -35,8 +33,29 @@ macro(append_subdir_files variable dirname) | |||
| endforeach() | |||
| endmacro() | |||
| message(STATUS "Build LAPACKE single precision real: ${LAPACKE_BUILD_SINGLE}") | |||
| message(STATUS "Build LAPACKE double precision real: ${LAPACKE_BUILD_DOUBLE}") | |||
| message(STATUS "Build LAPACKE single precision complex: ${LAPACKE_BUILD_COMPLEX}") | |||
| message(STATUS "Build LAPACKE double precision complex: ${LAPACKE_BUILD_COMPLEX16}") | |||
| append_subdir_files(LAPACKE_INCLUDE "include") | |||
| append_subdir_files(SOURCES "src") | |||
| if (LAPACKE_BUILD_SINGLE) | |||
| append_subdir_files(SOURCES_SINGLE "src") | |||
| list(APPEND SOURCES ${SOURCES_SINGLE}) | |||
| endif() | |||
| if (LAPACKE_BUILD_DOUBLE) | |||
| append_subdir_files(SOURCES_DOUBLE "src") | |||
| list(APPEND SOURCES ${SOURCES_DOUBLE}) | |||
| endif() | |||
| if (LAPACKE_BUILD_COMPLEX) | |||
| append_subdir_files(SOURCES_COMPLEX "src") | |||
| list(APPEND SOURCES ${SOURCES_COMPLEX}) | |||
| endif() | |||
| if (LAPACKE_BUILD_COMPLEX16) | |||
| append_subdir_files(SOURCES_COMPLEX16 "src") | |||
| list(APPEND SOURCES ${SOURCES_COMPLEX16}) | |||
| endif() | |||
| append_subdir_files(DEPRECATED "src") | |||
| append_subdir_files(EXTENDED "src") | |||
| append_subdir_files(MATGEN "src") | |||
| @@ -61,9 +80,13 @@ set_target_properties( | |||
| SOVERSION ${LAPACK_MAJOR_VERSION} | |||
| ) | |||
| target_include_directories(lapacke PUBLIC | |||
| $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> | |||
| $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> | |||
| $<INSTALL_INTERFACE:include> | |||
| ) | |||
| if(WIN32 AND NOT UNIX) | |||
| target_compile_definitions(lapacke PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE) | |||
| message(STATUS "Windows BUILD") | |||
| endif() | |||
| if(LAPACKE_WITH_TMG) | |||
| target_link_libraries(lapacke PRIVATE tmglib) | |||
| @@ -71,7 +94,11 @@ endif() | |||
| target_link_libraries(lapacke PRIVATE ${LAPACK_LIBRARIES}) | |||
| lapack_install_library(lapacke) | |||
| install(FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||
| install( | |||
| FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h | |||
| DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} | |||
| COMPONENT Development | |||
| ) | |||
| if(BUILD_TESTING) | |||
| add_subdirectory(example) | |||
| @@ -82,6 +109,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_ | |||
| install(FILES | |||
| ${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc | |||
| DESTINATION ${PKG_CONFIG_DIR} | |||
| COMPONENT Development | |||
| ) | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-version.cmake.in | |||
| @@ -95,7 +123,10 @@ install(FILES | |||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake | |||
| ${LAPACK_BINARY_DIR}/lapacke-config-version.cmake | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION} | |||
| COMPONENT Development | |||
| ) | |||
| install(EXPORT lapacke-targets | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION}) | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION} | |||
| COMPONENT Development | |||
| ) | |||
| @@ -40,22 +40,26 @@ | |||
| # To clean everything including lapacke library type | |||
| # 'make cleanall' | |||
| # | |||
| include ../make.inc | |||
| TOPSRCDIR = .. | |||
| include $(TOPSRCDIR)/make.inc | |||
| .PHONY: all | |||
| all: lapacke | |||
| .PHONY: lapacke | |||
| lapacke: include/lapacke_mangling.h | |||
| $(MAKE) -C src | |||
| $(MAKE) -C utils | |||
| include/lapacke_mangling.h: include/lapacke_mangling_with_flags.h.in | |||
| cp $< $@ | |||
| cp include/lapacke_mangling_with_flags.h.in $@ | |||
| .PHONY: lapacke_example | |||
| lapacke_example: lapacke | |||
| $(MAKE) -C example | |||
| #clean: cleanlib | |||
| clean: cleanobj | |||
| .PHONY: clean cleanobj cleanlib cleanexe | |||
| clean: | |||
| $(MAKE) -C src clean | |||
| $(MAKE) -C utils clean | |||
| $(MAKE) -C example clean | |||
| @@ -64,6 +68,6 @@ cleanobj: | |||
| $(MAKE) -C utils cleanobj | |||
| $(MAKE) -C example cleanobj | |||
| cleanlib: | |||
| rm -f ../$(LAPACKELIB) | |||
| $(MAKE) -C src cleanlib | |||
| cleanexe: | |||
| $(MAKE) -C example cleanexe | |||
| @@ -7,8 +7,11 @@ if(NOT TARGET lapacke) | |||
| include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") | |||
| endif() | |||
| # Hint for project building against lapack | |||
| set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID}) | |||
| # Report lapacke header search locations from build tree. | |||
| set(LAPACKE_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include") | |||
| # Report lapacke libraries. | |||
| set(LAPACKE_LIBRARIES lapacke) | |||
| set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES}) | |||
| @@ -13,11 +13,14 @@ if(NOT TARGET lapacke) | |||
| include(${_LAPACKE_SELF_DIR}/lapacke-targets.cmake) | |||
| endif() | |||
| # Hint for project building against lapack | |||
| set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID}) | |||
| # Report lapacke header search locations. | |||
| set(LAPACKE_INCLUDE_DIRS ${_LAPACKE_PREFIX}/include) | |||
| # Report lapacke libraries. | |||
| set(LAPACKE_LIBRARIES lapacke) | |||
| set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES}) | |||
| unset(_LAPACKE_PREFIX) | |||
| unset(_LAPACKE_SELF_DIR) | |||
| @@ -1,34 +1,38 @@ | |||
| include ../../make.inc | |||
| TOPSRCDIR = ../.. | |||
| include $(TOPSRCDIR)/make.inc | |||
| .SUFFIXES: .c .o | |||
| .c.o: | |||
| $(CC) $(CFLAGS) -I. -I../include -c -o $@ $< | |||
| .PHONY: all | |||
| all: xexample_DGESV_rowmajor \ | |||
| xexample_DGESV_colmajor \ | |||
| xexample_DGELS_rowmajor \ | |||
| xexample_DGELS_colmajor | |||
| LIBRARIES = ../../$(LAPACKELIB) ../../$(LAPACKLIB) $(BLASLIB) | |||
| LIBRARIES = $(LAPACKELIB) $(LAPACKLIB) $(BLASLIB) | |||
| # Double Precision Examples | |||
| xexample_DGESV_rowmajor: example_DGESV_rowmajor.o lapacke_example_aux.o $(LIBRARIES) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| ./$@ | |||
| xexample_DGESV_colmajor: example_DGESV_colmajor.o lapacke_example_aux.o $(LIBRARIES) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| ./$@ | |||
| xexample_DGELS_rowmajor: example_DGELS_rowmajor.o lapacke_example_aux.o $(LIBRARIES) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| ./$@ | |||
| xexample_DGELS_colmajor: example_DGELS_colmajor.o lapacke_example_aux.o $(LIBRARIES) | |||
| $(LOADER) $(LOADOPTS) -o $@ $^ | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
| ./$@ | |||
| .PHONY: clean cleanobj cleanexe | |||
| clean: cleanobj cleanexe | |||
| cleanobj: | |||
| rm -f *.o | |||
| cleanexe: | |||
| rm -f x* | |||
| .c.o: | |||
| $(CC) $(CFLAGS) -I. -I../include -c -o $@ $< | |||
| @@ -1,3 +1,3 @@ | |||
| set(LAPACKE_INCLUDE lapacke.h lapacke_config.h lapacke_utils.h) | |||
| set(LAPACKE_INCLUDE lapacke.h lapack.h lapacke_config.h lapacke_utils.h) | |||
| file(COPY ${LAPACKE_INCLUDE} DESTINATION ${LAPACK_BINARY_DIR}/include) | |||
| @@ -1,4 +1,4 @@ | |||
| set(SOURCES | |||
| set(SOURCES_COMPLEX | |||
| lapacke_cbbcsd.c | |||
| lapacke_cbbcsd_work.c | |||
| lapacke_cbdsqr.c | |||
| @@ -78,11 +78,11 @@ lapacke_cgeqrf_work.c | |||
| lapacke_cgeqrfp.c | |||
| lapacke_cgeqrfp_work.c | |||
| lapacke_cgeqrt.c | |||
| lapacke_cgeqrt_work.c | |||
| lapacke_cgeqrt2.c | |||
| lapacke_cgeqrt2_work.c | |||
| lapacke_cgeqrt3.c | |||
| lapacke_cgeqrt3_work.c | |||
| lapacke_cgeqrt_work.c | |||
| lapacke_cgerfs.c | |||
| lapacke_cgerfs_work.c | |||
| lapacke_cgerqf.c | |||
| @@ -93,6 +93,8 @@ lapacke_cgesv.c | |||
| lapacke_cgesv_work.c | |||
| lapacke_cgesvd.c | |||
| lapacke_cgesvd_work.c | |||
| lapacke_cgesvdq.c | |||
| lapacke_cgesvdq_work.c | |||
| lapacke_cgesvdx.c | |||
| lapacke_cgesvdx_work.c | |||
| lapacke_cgesvj.c | |||
| @@ -129,10 +131,10 @@ lapacke_cggevx.c | |||
| lapacke_cggevx_work.c | |||
| lapacke_cggglm.c | |||
| lapacke_cggglm_work.c | |||
| lapacke_cgghrd.c | |||
| lapacke_cgghrd_work.c | |||
| lapacke_cgghd3.c | |||
| lapacke_cgghd3_work.c | |||
| lapacke_cgghrd.c | |||
| lapacke_cgghrd_work.c | |||
| lapacke_cgglse.c | |||
| lapacke_cgglse_work.c | |||
| lapacke_cggqrf.c | |||
| @@ -157,14 +159,14 @@ lapacke_cgttrs.c | |||
| lapacke_cgttrs_work.c | |||
| lapacke_chbev.c | |||
| lapacke_chbev_work.c | |||
| lapacke_chbevd.c | |||
| lapacke_chbevd_work.c | |||
| lapacke_chbevx.c | |||
| lapacke_chbevx_work.c | |||
| lapacke_chbev_2stage.c | |||
| lapacke_chbev_2stage_work.c | |||
| lapacke_chbevd.c | |||
| lapacke_chbevd_work.c | |||
| lapacke_chbevd_2stage.c | |||
| lapacke_chbevd_2stage_work.c | |||
| lapacke_chbevx.c | |||
| lapacke_chbevx_work.c | |||
| lapacke_chbevx_2stage.c | |||
| lapacke_chbevx_2stage_work.c | |||
| lapacke_chbgst.c | |||
| @@ -185,18 +187,18 @@ lapacke_cheequb.c | |||
| lapacke_cheequb_work.c | |||
| lapacke_cheev.c | |||
| lapacke_cheev_work.c | |||
| lapacke_cheevd.c | |||
| lapacke_cheevd_work.c | |||
| lapacke_cheevr.c | |||
| lapacke_cheevr_work.c | |||
| lapacke_cheevx.c | |||
| lapacke_cheevx_work.c | |||
| lapacke_cheev_2stage.c | |||
| lapacke_cheev_2stage_work.c | |||
| lapacke_cheevd.c | |||
| lapacke_cheevd_work.c | |||
| lapacke_cheevd_2stage.c | |||
| lapacke_cheevd_2stage_work.c | |||
| lapacke_cheevr.c | |||
| lapacke_cheevr_work.c | |||
| lapacke_cheevr_2stage.c | |||
| lapacke_cheevr_2stage_work.c | |||
| lapacke_cheevx.c | |||
| lapacke_cheevx_work.c | |||
| lapacke_cheevx_2stage.c | |||
| lapacke_cheevx_2stage_work.c | |||
| lapacke_chegst.c | |||
| @@ -214,8 +216,8 @@ lapacke_cherfs_work.c | |||
| lapacke_chesv.c | |||
| lapacke_chesv_work.c | |||
| lapacke_chesv_aa.c | |||
| lapacke_chesv_aa_2stage.c | |||
| lapacke_chesv_aa_work.c | |||
| lapacke_chesv_aa_2stage.c | |||
| lapacke_chesv_aa_2stage_work.c | |||
| lapacke_chesv_rk.c | |||
| lapacke_chesv_rk_work.c | |||
| @@ -226,35 +228,35 @@ lapacke_cheswapr_work.c | |||
| lapacke_chetrd.c | |||
| lapacke_chetrd_work.c | |||
| lapacke_chetrf.c | |||
| lapacke_chetrf_rook.c | |||
| lapacke_chetrf_work.c | |||
| lapacke_chetrf_rook_work.c | |||
| lapacke_chetrf_aa.c | |||
| lapacke_chetrf_aa_2stage.c | |||
| lapacke_chetrf_aa_work.c | |||
| lapacke_chetrf_aa_2stage.c | |||
| lapacke_chetrf_aa_2stage_work.c | |||
| lapacke_chetrf_rk.c | |||
| lapacke_chetrf_rk_work.c | |||
| lapacke_chetrf_rook.c | |||
| lapacke_chetrf_rook_work.c | |||
| lapacke_chetri.c | |||
| lapacke_chetri_work.c | |||
| lapacke_chetri2.c | |||
| lapacke_chetri2_work.c | |||
| lapacke_chetri_3.c | |||
| lapacke_chetri_3_work.c | |||
| lapacke_chetri2x.c | |||
| lapacke_chetri2x_work.c | |||
| lapacke_chetri_work.c | |||
| lapacke_chetri_3.c | |||
| lapacke_chetri_3_work.c | |||
| lapacke_chetrs.c | |||
| lapacke_chetrs_rook.c | |||
| lapacke_chetrs_work.c | |||
| lapacke_chetrs2.c | |||
| lapacke_chetrs2_work.c | |||
| lapacke_chetrs_work.c | |||
| lapacke_chetrs_rook_work.c | |||
| lapacke_chetrs_3.c | |||
| lapacke_chetrs_3_work.c | |||
| lapacke_chetrs_aa.c | |||
| lapacke_chetrs_aa_2stage.c | |||
| lapacke_chetrs_aa_work.c | |||
| lapacke_chetrs_aa_2stage.c | |||
| lapacke_chetrs_aa_2stage_work.c | |||
| lapacke_chetrs_3.c | |||
| lapacke_chetrs_3_work.c | |||
| lapacke_chetrs_rook.c | |||
| lapacke_chetrs_rook_work.c | |||
| lapacke_chfrk.c | |||
| lapacke_chfrk_work.c | |||
| lapacke_chgeqz.c | |||
| @@ -445,52 +447,54 @@ lapacke_csyconv.c | |||
| lapacke_csyconv_work.c | |||
| lapacke_csyequb.c | |||
| lapacke_csyequb_work.c | |||
| lapacke_csyr.c | |||
| lapacke_csyr_work.c | |||
| lapacke_csyrfs.c | |||
| lapacke_csyrfs_work.c | |||
| lapacke_csysv.c | |||
| lapacke_csysv_rook.c | |||
| lapacke_csysv_rook_work.c | |||
| lapacke_csysv_work.c | |||
| lapacke_csysv_aa.c | |||
| lapacke_csysv_aa_2stage.c | |||
| lapacke_csysv_aa_work.c | |||
| lapacke_csysv_aa_2stage.c | |||
| lapacke_csysv_aa_2stage_work.c | |||
| lapacke_csysv_rk.c | |||
| lapacke_csysv_rk_work.c | |||
| lapacke_csysv_rook.c | |||
| lapacke_csysv_rook_work.c | |||
| lapacke_csysvx.c | |||
| lapacke_csysvx_work.c | |||
| lapacke_csyswapr.c | |||
| lapacke_csyswapr_work.c | |||
| lapacke_csytrf.c | |||
| lapacke_csytrf_work.c | |||
| lapacke_csytrf_rook.c | |||
| lapacke_csytrf_rook_work.c | |||
| lapacke_csytrf_aa.c | |||
| lapacke_csytrf_aa_2stage.c | |||
| lapacke_csytrf_aa_work.c | |||
| lapacke_csytrf_aa_2stage.c | |||
| lapacke_csytrf_aa_2stage_work.c | |||
| lapacke_csytrf_rk.c | |||
| lapacke_csytrf_rk_work.c | |||
| lapacke_csytrf_rook.c | |||
| lapacke_csytrf_rook_work.c | |||
| lapacke_csytri.c | |||
| lapacke_csytri_work.c | |||
| lapacke_csytri2.c | |||
| lapacke_csytri2_work.c | |||
| lapacke_csytri_3.c | |||
| lapacke_csytri_3_work.c | |||
| lapacke_csytri2x.c | |||
| lapacke_csytri2x_work.c | |||
| lapacke_csytri_work.c | |||
| lapacke_csytri_3.c | |||
| lapacke_csytri_3_work.c | |||
| lapacke_csytrs.c | |||
| lapacke_csytrs_rook.c | |||
| lapacke_csytrs_work.c | |||
| lapacke_csytrs2.c | |||
| lapacke_csytrs2_work.c | |||
| lapacke_csytrs_work.c | |||
| lapacke_csytrs_rook_work.c | |||
| lapacke_csytrs_3.c | |||
| lapacke_csytrs_3_work.c | |||
| lapacke_csytrs_aa.c | |||
| lapacke_csytrs_aa_2stage.c | |||
| lapacke_csytrs_aa_work.c | |||
| lapacke_csytrs_aa_2stage.c | |||
| lapacke_csytrs_aa_2stage_work.c | |||
| lapacke_csytrs_3.c | |||
| lapacke_csytrs_3_work.c | |||
| lapacke_csytrs_rook.c | |||
| lapacke_csytrs_rook_work.c | |||
| lapacke_ctbcon.c | |||
| lapacke_ctbcon_work.c | |||
| lapacke_ctbrfs.c | |||
| @@ -522,9 +526,9 @@ lapacke_ctpcon_work.c | |||
| lapacke_ctpmqrt.c | |||
| lapacke_ctpmqrt_work.c | |||
| lapacke_ctpqrt.c | |||
| lapacke_ctpqrt_work.c | |||
| lapacke_ctpqrt2.c | |||
| lapacke_ctpqrt2_work.c | |||
| lapacke_ctpqrt_work.c | |||
| lapacke_ctprfb.c | |||
| lapacke_ctprfb_work.c | |||
| lapacke_ctprfs.c | |||
| @@ -601,14 +605,16 @@ lapacke_cupgtr.c | |||
| lapacke_cupgtr_work.c | |||
| lapacke_cupmtr.c | |||
| lapacke_cupmtr_work.c | |||
| ) | |||
| set(SOURCES_DOUBLE | |||
| lapacke_dbbcsd.c | |||
| lapacke_dbbcsd_work.c | |||
| lapacke_dbdsdc.c | |||
| lapacke_dbdsdc_work.c | |||
| lapacke_dbdsvdx.c | |||
| lapacke_dbdsvdx_work.c | |||
| lapacke_dbdsqr.c | |||
| lapacke_dbdsqr_work.c | |||
| lapacke_dbdsvdx.c | |||
| lapacke_dbdsvdx_work.c | |||
| lapacke_ddisna.c | |||
| lapacke_ddisna_work.c | |||
| lapacke_dgbbrd.c | |||
| @@ -686,11 +692,11 @@ lapacke_dgeqrf_work.c | |||
| lapacke_dgeqrfp.c | |||
| lapacke_dgeqrfp_work.c | |||
| lapacke_dgeqrt.c | |||
| lapacke_dgeqrt_work.c | |||
| lapacke_dgeqrt2.c | |||
| lapacke_dgeqrt2_work.c | |||
| lapacke_dgeqrt3.c | |||
| lapacke_dgeqrt3_work.c | |||
| lapacke_dgeqrt_work.c | |||
| lapacke_dgerfs.c | |||
| lapacke_dgerfs_work.c | |||
| lapacke_dgerqf.c | |||
| @@ -701,6 +707,8 @@ lapacke_dgesv.c | |||
| lapacke_dgesv_work.c | |||
| lapacke_dgesvd.c | |||
| lapacke_dgesvd_work.c | |||
| lapacke_dgesvdq.c | |||
| lapacke_dgesvdq_work.c | |||
| lapacke_dgesvdx.c | |||
| lapacke_dgesvdx_work.c | |||
| lapacke_dgesvj.c | |||
| @@ -737,10 +745,10 @@ lapacke_dggevx.c | |||
| lapacke_dggevx_work.c | |||
| lapacke_dggglm.c | |||
| lapacke_dggglm_work.c | |||
| lapacke_dgghrd.c | |||
| lapacke_dgghrd_work.c | |||
| lapacke_dgghd3.c | |||
| lapacke_dgghd3_work.c | |||
| lapacke_dgghrd.c | |||
| lapacke_dgghrd_work.c | |||
| lapacke_dgglse.c | |||
| lapacke_dgglse_work.c | |||
| lapacke_dggqrf.c | |||
| @@ -823,10 +831,10 @@ lapacke_dopmtr.c | |||
| lapacke_dopmtr_work.c | |||
| lapacke_dorbdb.c | |||
| lapacke_dorbdb_work.c | |||
| lapacke_dorcsd2by1.c | |||
| lapacke_dorcsd2by1_work.c | |||
| lapacke_dorcsd.c | |||
| lapacke_dorcsd_work.c | |||
| lapacke_dorcsd2by1.c | |||
| lapacke_dorcsd2by1_work.c | |||
| lapacke_dorgbr.c | |||
| lapacke_dorgbr_work.c | |||
| lapacke_dorghr.c | |||
| @@ -933,14 +941,14 @@ lapacke_dpttrs.c | |||
| lapacke_dpttrs_work.c | |||
| lapacke_dsbev.c | |||
| lapacke_dsbev_work.c | |||
| lapacke_dsbevd.c | |||
| lapacke_dsbevd_work.c | |||
| lapacke_dsbevx.c | |||
| lapacke_dsbevx_work.c | |||
| lapacke_dsbev_2stage.c | |||
| lapacke_dsbev_2stage_work.c | |||
| lapacke_dsbevd.c | |||
| lapacke_dsbevd_work.c | |||
| lapacke_dsbevd_2stage.c | |||
| lapacke_dsbevd_2stage_work.c | |||
| lapacke_dsbevx.c | |||
| lapacke_dsbevx_work.c | |||
| lapacke_dsbevx_2stage.c | |||
| lapacke_dsbevx_2stage_work.c | |||
| lapacke_dsbgst.c | |||
| @@ -1021,18 +1029,18 @@ lapacke_dsyequb.c | |||
| lapacke_dsyequb_work.c | |||
| lapacke_dsyev.c | |||
| lapacke_dsyev_work.c | |||
| lapacke_dsyevd.c | |||
| lapacke_dsyevd_work.c | |||
| lapacke_dsyevr.c | |||
| lapacke_dsyevr_work.c | |||
| lapacke_dsyevx.c | |||
| lapacke_dsyevx_work.c | |||
| lapacke_dsyev_2stage.c | |||
| lapacke_dsyev_2stage_work.c | |||
| lapacke_dsyevd.c | |||
| lapacke_dsyevd_work.c | |||
| lapacke_dsyevd_2stage.c | |||
| lapacke_dsyevd_2stage_work.c | |||
| lapacke_dsyevr.c | |||
| lapacke_dsyevr_work.c | |||
| lapacke_dsyevr_2stage.c | |||
| lapacke_dsyevr_2stage_work.c | |||
| lapacke_dsyevx.c | |||
| lapacke_dsyevx_work.c | |||
| lapacke_dsyevx_2stage.c | |||
| lapacke_dsyevx_2stage_work.c | |||
| lapacke_dsygst.c | |||
| @@ -1048,15 +1056,15 @@ lapacke_dsygvx_work.c | |||
| lapacke_dsyrfs.c | |||
| lapacke_dsyrfs_work.c | |||
| lapacke_dsysv.c | |||
| lapacke_dsysv_rook.c | |||
| lapacke_dsysv_rook_work.c | |||
| lapacke_dsysv_work.c | |||
| lapacke_dsysv_aa.c | |||
| lapacke_dsysv_aa_2stage.c | |||
| lapacke_dsysv_aa_work.c | |||
| lapacke_dsysv_aa_2stage.c | |||
| lapacke_dsysv_aa_2stage_work.c | |||
| lapacke_dsysv_rk.c | |||
| lapacke_dsysv_rk_work.c | |||
| lapacke_dsysv_rook.c | |||
| lapacke_dsysv_rook_work.c | |||
| lapacke_dsysvx.c | |||
| lapacke_dsysvx_work.c | |||
| lapacke_dsyswapr.c | |||
| @@ -1065,33 +1073,33 @@ lapacke_dsytrd.c | |||
| lapacke_dsytrd_work.c | |||
| lapacke_dsytrf.c | |||
| lapacke_dsytrf_work.c | |||
| lapacke_dsytrf_rook.c | |||
| lapacke_dsytrf_rook_work.c | |||
| lapacke_dsytrf_aa.c | |||
| lapacke_dsytrf_aa_2stage.c | |||
| lapacke_dsytrf_aa_work.c | |||
| lapacke_dsytrf_aa_2stage.c | |||
| lapacke_dsytrf_aa_2stage_work.c | |||
| lapacke_dsytrf_rk.c | |||
| lapacke_dsytrf_rk_work.c | |||
| lapacke_dsytrf_rook.c | |||
| lapacke_dsytrf_rook_work.c | |||
| lapacke_dsytri.c | |||
| lapacke_dsytri_work.c | |||
| lapacke_dsytri2.c | |||
| lapacke_dsytri2_work.c | |||
| lapacke_dsytri_3.c | |||
| lapacke_dsytri_3_work.c | |||
| lapacke_dsytri2x.c | |||
| lapacke_dsytri2x_work.c | |||
| lapacke_dsytri_work.c | |||
| lapacke_dsytri_3.c | |||
| lapacke_dsytri_3_work.c | |||
| lapacke_dsytrs.c | |||
| lapacke_dsytrs_rook.c | |||
| lapacke_dsytrs_work.c | |||
| lapacke_dsytrs2.c | |||
| lapacke_dsytrs2_work.c | |||
| lapacke_dsytrs_3.c | |||
| lapacke_dsytrs_3_work.c | |||
| lapacke_dsytrs_aa.c | |||
| lapacke_dsytrs_aa_2stage.c | |||
| lapacke_dsytrs_aa_work.c | |||
| lapacke_dsytrs_aa_2stage.c | |||
| lapacke_dsytrs_aa_2stage_work.c | |||
| lapacke_dsytrs_3.c | |||
| lapacke_dsytrs_3_work.c | |||
| lapacke_dsytrs_work.c | |||
| lapacke_dsytrs_rook.c | |||
| lapacke_dsytrs_rook_work.c | |||
| lapacke_dtbcon.c | |||
| lapacke_dtbcon_work.c | |||
| @@ -1124,9 +1132,9 @@ lapacke_dtpcon_work.c | |||
| lapacke_dtpmqrt.c | |||
| lapacke_dtpmqrt_work.c | |||
| lapacke_dtpqrt.c | |||
| lapacke_dtpqrt_work.c | |||
| lapacke_dtpqrt2.c | |||
| lapacke_dtpqrt2_work.c | |||
| lapacke_dtpqrt_work.c | |||
| lapacke_dtprfb.c | |||
| lapacke_dtprfb_work.c | |||
| lapacke_dtprfs.c | |||
| @@ -1163,15 +1171,21 @@ lapacke_dtrttp.c | |||
| lapacke_dtrttp_work.c | |||
| lapacke_dtzrzf.c | |||
| lapacke_dtzrzf_work.c | |||
| ) | |||
| set(SOURCES | |||
| lapacke_nancheck.c | |||
| lapacke_ilaver.c | |||
| ) | |||
| set(SOURCES_SINGLE | |||
| lapacke_sbbcsd.c | |||
| lapacke_sbbcsd_work.c | |||
| lapacke_sbdsdc.c | |||
| lapacke_sbdsdc_work.c | |||
| lapacke_sbdsvdx.c | |||
| lapacke_sbdsvdx_work.c | |||
| lapacke_sbdsqr.c | |||
| lapacke_sbdsqr_work.c | |||
| lapacke_sbdsvdx.c | |||
| lapacke_sbdsvdx_work.c | |||
| lapacke_sdisna.c | |||
| lapacke_sdisna_work.c | |||
| lapacke_sgbbrd.c | |||
| @@ -1249,11 +1263,11 @@ lapacke_sgeqrf_work.c | |||
| lapacke_sgeqrfp.c | |||
| lapacke_sgeqrfp_work.c | |||
| lapacke_sgeqrt.c | |||
| lapacke_sgeqrt_work.c | |||
| lapacke_sgeqrt2.c | |||
| lapacke_sgeqrt2_work.c | |||
| lapacke_sgeqrt3.c | |||
| lapacke_sgeqrt3_work.c | |||
| lapacke_sgeqrt_work.c | |||
| lapacke_sgerfs.c | |||
| lapacke_sgerfs_work.c | |||
| lapacke_sgerqf.c | |||
| @@ -1264,6 +1278,8 @@ lapacke_sgesv.c | |||
| lapacke_sgesv_work.c | |||
| lapacke_sgesvd.c | |||
| lapacke_sgesvd_work.c | |||
| lapacke_sgesvdq.c | |||
| lapacke_sgesvdq_work.c | |||
| lapacke_sgesvdx.c | |||
| lapacke_sgesvdx_work.c | |||
| lapacke_sgesvj.c | |||
| @@ -1300,10 +1316,10 @@ lapacke_sggevx.c | |||
| lapacke_sggevx_work.c | |||
| lapacke_sggglm.c | |||
| lapacke_sggglm_work.c | |||
| lapacke_sgghrd.c | |||
| lapacke_sgghrd_work.c | |||
| lapacke_sgghd3.c | |||
| lapacke_sgghd3_work.c | |||
| lapacke_sgghrd.c | |||
| lapacke_sgghrd_work.c | |||
| lapacke_sgglse.c | |||
| lapacke_sgglse_work.c | |||
| lapacke_sggqrf.c | |||
| @@ -1496,14 +1512,14 @@ lapacke_spttrs.c | |||
| lapacke_spttrs_work.c | |||
| lapacke_ssbev.c | |||
| lapacke_ssbev_work.c | |||
| lapacke_ssbevd.c | |||
| lapacke_ssbevd_work.c | |||
| lapacke_ssbevx.c | |||
| lapacke_ssbevx_work.c | |||
| lapacke_ssbev_2stage.c | |||
| lapacke_ssbev_2stage_work.c | |||
| lapacke_ssbevd.c | |||
| lapacke_ssbevd_work.c | |||
| lapacke_ssbevd_2stage.c | |||
| lapacke_ssbevd_2stage_work.c | |||
| lapacke_ssbevx.c | |||
| lapacke_ssbevx_work.c | |||
| lapacke_ssbevx_2stage.c | |||
| lapacke_ssbevx_2stage_work.c | |||
| lapacke_ssbgst.c | |||
| @@ -1580,18 +1596,18 @@ lapacke_ssyequb.c | |||
| lapacke_ssyequb_work.c | |||
| lapacke_ssyev.c | |||
| lapacke_ssyev_work.c | |||
| lapacke_ssyevd.c | |||
| lapacke_ssyevd_work.c | |||
| lapacke_ssyevr.c | |||
| lapacke_ssyevr_work.c | |||
| lapacke_ssyevx.c | |||
| lapacke_ssyevx_work.c | |||
| lapacke_ssyev_2stage.c | |||
| lapacke_ssyev_2stage_work.c | |||
| lapacke_ssyevd.c | |||
| lapacke_ssyevd_work.c | |||
| lapacke_ssyevd_2stage.c | |||
| lapacke_ssyevd_2stage_work.c | |||
| lapacke_ssyevr.c | |||
| lapacke_ssyevr_work.c | |||
| lapacke_ssyevr_2stage.c | |||
| lapacke_ssyevr_2stage_work.c | |||
| lapacke_ssyevx.c | |||
| lapacke_ssyevx_work.c | |||
| lapacke_ssyevx_2stage.c | |||
| lapacke_ssyevx_2stage_work.c | |||
| lapacke_ssygst.c | |||
| @@ -1607,8 +1623,6 @@ lapacke_ssygvx_work.c | |||
| lapacke_ssyrfs.c | |||
| lapacke_ssyrfs_work.c | |||
| lapacke_ssysv.c | |||
| lapacke_ssysv_rook.c | |||
| lapacke_ssysv_rook_work.c | |||
| lapacke_ssysv_work.c | |||
| lapacke_ssysv_aa.c | |||
| lapacke_ssysv_aa_work.c | |||
| @@ -1616,6 +1630,8 @@ lapacke_ssysv_aa_2stage.c | |||
| lapacke_ssysv_aa_2stage_work.c | |||
| lapacke_ssysv_rk.c | |||
| lapacke_ssysv_rk_work.c | |||
| lapacke_ssysv_rook.c | |||
| lapacke_ssysv_rook_work.c | |||
| lapacke_ssysvx.c | |||
| lapacke_ssysvx_work.c | |||
| lapacke_ssyswapr.c | |||
| @@ -1624,33 +1640,33 @@ lapacke_ssytrd.c | |||
| lapacke_ssytrd_work.c | |||
| lapacke_ssytrf.c | |||
| lapacke_ssytrf_work.c | |||
| lapacke_ssytrf_rook.c | |||
| lapacke_ssytrf_rook_work.c | |||
| lapacke_ssytrf_aa.c | |||
| lapacke_ssytrf_aa_2stage.c | |||
| lapacke_ssytrf_aa_work.c | |||
| lapacke_ssytrf_aa_2stage.c | |||
| lapacke_ssytrf_aa_2stage_work.c | |||
| lapacke_ssytrf_rk.c | |||
| lapacke_ssytrf_rk_work.c | |||
| lapacke_ssytrf_rook.c | |||
| lapacke_ssytrf_rook_work.c | |||
| lapacke_ssytri.c | |||
| lapacke_ssytri_work.c | |||
| lapacke_ssytri2.c | |||
| lapacke_ssytri2_work.c | |||
| lapacke_ssytri_3.c | |||
| lapacke_ssytri_3_work.c | |||
| lapacke_ssytri2x.c | |||
| lapacke_ssytri2x_work.c | |||
| lapacke_ssytri_work.c | |||
| lapacke_ssytri_3.c | |||
| lapacke_ssytri_3_work.c | |||
| lapacke_ssytrs.c | |||
| lapacke_ssytrs_rook.c | |||
| lapacke_ssytrs_work.c | |||
| lapacke_ssytrs2.c | |||
| lapacke_ssytrs2_work.c | |||
| lapacke_ssytrs_3.c | |||
| lapacke_ssytrs_3_work.c | |||
| lapacke_ssytrs_aa.c | |||
| lapacke_ssytrs_aa_2stage.c | |||
| lapacke_ssytrs_aa_work.c | |||
| lapacke_ssytrs_aa_2stage.c | |||
| lapacke_ssytrs_aa_2stage_work.c | |||
| lapacke_ssytrs_3.c | |||
| lapacke_ssytrs_3_work.c | |||
| lapacke_ssytrs_work.c | |||
| lapacke_ssytrs_rook.c | |||
| lapacke_ssytrs_rook_work.c | |||
| lapacke_stbcon.c | |||
| lapacke_stbcon_work.c | |||
| @@ -1722,6 +1738,8 @@ lapacke_strttp.c | |||
| lapacke_strttp_work.c | |||
| lapacke_stzrzf.c | |||
| lapacke_stzrzf_work.c | |||
| ) | |||
| set(SOURCES_COMPLEX16 | |||
| lapacke_zbbcsd.c | |||
| lapacke_zbbcsd_work.c | |||
| lapacke_zbdsqr.c | |||
| @@ -1805,11 +1823,11 @@ lapacke_zgeqrf_work.c | |||
| lapacke_zgeqrfp.c | |||
| lapacke_zgeqrfp_work.c | |||
| lapacke_zgeqrt.c | |||
| lapacke_zgeqrt_work.c | |||
| lapacke_zgeqrt2.c | |||
| lapacke_zgeqrt2_work.c | |||
| lapacke_zgeqrt3.c | |||
| lapacke_zgeqrt3_work.c | |||
| lapacke_zgeqrt_work.c | |||
| lapacke_zgerfs.c | |||
| lapacke_zgerfs_work.c | |||
| lapacke_zgerqf.c | |||
| @@ -1820,6 +1838,8 @@ lapacke_zgesv.c | |||
| lapacke_zgesv_work.c | |||
| lapacke_zgesvd.c | |||
| lapacke_zgesvd_work.c | |||
| lapacke_zgesvdq.c | |||
| lapacke_zgesvdq_work.c | |||
| lapacke_zgesvdx.c | |||
| lapacke_zgesvdx_work.c | |||
| lapacke_zgesvj.c | |||
| @@ -1856,10 +1876,10 @@ lapacke_zggevx.c | |||
| lapacke_zggevx_work.c | |||
| lapacke_zggglm.c | |||
| lapacke_zggglm_work.c | |||
| lapacke_zgghrd.c | |||
| lapacke_zgghrd_work.c | |||
| lapacke_zgghd3.c | |||
| lapacke_zgghd3_work.c | |||
| lapacke_zgghrd.c | |||
| lapacke_zgghrd_work.c | |||
| lapacke_zgglse.c | |||
| lapacke_zgglse_work.c | |||
| lapacke_zggqrf.c | |||
| @@ -1884,14 +1904,14 @@ lapacke_zgttrs.c | |||
| lapacke_zgttrs_work.c | |||
| lapacke_zhbev.c | |||
| lapacke_zhbev_work.c | |||
| lapacke_zhbevd.c | |||
| lapacke_zhbevd_work.c | |||
| lapacke_zhbevx.c | |||
| lapacke_zhbevx_work.c | |||
| lapacke_zhbev_2stage.c | |||
| lapacke_zhbev_2stage_work.c | |||
| lapacke_zhbevd.c | |||
| lapacke_zhbevd_work.c | |||
| lapacke_zhbevd_2stage.c | |||
| lapacke_zhbevd_2stage_work.c | |||
| lapacke_zhbevx.c | |||
| lapacke_zhbevx_work.c | |||
| lapacke_zhbevx_2stage.c | |||
| lapacke_zhbevx_2stage_work.c | |||
| lapacke_zhbgst.c | |||
| @@ -1912,18 +1932,18 @@ lapacke_zheequb.c | |||
| lapacke_zheequb_work.c | |||
| lapacke_zheev.c | |||
| lapacke_zheev_work.c | |||
| lapacke_zheevd.c | |||
| lapacke_zheevd_work.c | |||
| lapacke_zheevr.c | |||
| lapacke_zheevr_work.c | |||
| lapacke_zheevx.c | |||
| lapacke_zheevx_work.c | |||
| lapacke_zheev_2stage.c | |||
| lapacke_zheev_2stage_work.c | |||
| lapacke_zheevd.c | |||
| lapacke_zheevd_work.c | |||
| lapacke_zheevd_2stage.c | |||
| lapacke_zheevd_2stage_work.c | |||
| lapacke_zheevr.c | |||
| lapacke_zheevr_work.c | |||
| lapacke_zheevr_2stage.c | |||
| lapacke_zheevr_2stage_work.c | |||
| lapacke_zheevx.c | |||
| lapacke_zheevx_work.c | |||
| lapacke_zheevx_2stage.c | |||
| lapacke_zheevx_2stage_work.c | |||
| lapacke_zhegst.c | |||
| @@ -1941,8 +1961,8 @@ lapacke_zherfs_work.c | |||
| lapacke_zhesv.c | |||
| lapacke_zhesv_work.c | |||
| lapacke_zhesv_aa.c | |||
| lapacke_zhesv_aa_2stage.c | |||
| lapacke_zhesv_aa_work.c | |||
| lapacke_zhesv_aa_2stage.c | |||
| lapacke_zhesv_aa_2stage_work.c | |||
| lapacke_zhesv_rk.c | |||
| lapacke_zhesv_rk_work.c | |||
| @@ -1953,34 +1973,34 @@ lapacke_zheswapr_work.c | |||
| lapacke_zhetrd.c | |||
| lapacke_zhetrd_work.c | |||
| lapacke_zhetrf.c | |||
| lapacke_zhetrf_rook.c | |||
| lapacke_zhetrf_work.c | |||
| lapacke_zhetrf_rook_work.c | |||
| lapacke_zhetrf_aa.c | |||
| lapacke_zhetrf_aa_2stage.c | |||
| lapacke_zhetrf_aa_work.c | |||
| lapacke_zhetrf_aa_2stage.c | |||
| lapacke_zhetrf_aa_2stage_work.c | |||
| lapacke_zhetrf_rk.c | |||
| lapacke_zhetrf_rk_work.c | |||
| lapacke_zhetrf_rook.c | |||
| lapacke_zhetrf_rook_work.c | |||
| lapacke_zhetri.c | |||
| lapacke_zhetri_work.c | |||
| lapacke_zhetri2.c | |||
| lapacke_zhetri2_work.c | |||
| lapacke_zhetri_3.c | |||
| lapacke_zhetri_3_work.c | |||
| lapacke_zhetri2x.c | |||
| lapacke_zhetri2x_work.c | |||
| lapacke_zhetri_work.c | |||
| lapacke_zhetri_3.c | |||
| lapacke_zhetri_3_work.c | |||
| lapacke_zhetrs.c | |||
| lapacke_zhetrs_rook.c | |||
| lapacke_zhetrs_work.c | |||
| lapacke_zhetrs2.c | |||
| lapacke_zhetrs2_work.c | |||
| lapacke_zhetrs_work.c | |||
| lapacke_zhetrs_3.c | |||
| lapacke_zhetrs_3_work.c | |||
| lapacke_zhetrs_aa.c | |||
| lapacke_zhetrs_aa_2stage.c | |||
| lapacke_zhetrs_aa_work.c | |||
| lapacke_zhetrs_aa_2stage.c | |||
| lapacke_zhetrs_aa_2stage_work.c | |||
| lapacke_zhetrs_3.c | |||
| lapacke_zhetrs_3_work.c | |||
| lapacke_zhetrs_rook.c | |||
| lapacke_zhetrs_rook_work.c | |||
| lapacke_zhfrk.c | |||
| lapacke_zhfrk_work.c | |||
| @@ -2172,52 +2192,54 @@ lapacke_zsyconv.c | |||
| lapacke_zsyconv_work.c | |||
| lapacke_zsyequb.c | |||
| lapacke_zsyequb_work.c | |||
| lapacke_zsyr.c | |||
| lapacke_zsyr_work.c | |||
| lapacke_zsyrfs.c | |||
| lapacke_zsyrfs_work.c | |||
| lapacke_zsysv.c | |||
| lapacke_zsysv_rook.c | |||
| lapacke_zsysv_rook_work.c | |||
| lapacke_zsysv_work.c | |||
| lapacke_zsysv_aa.c | |||
| lapacke_zsysv_aa_2stage.c | |||
| lapacke_zsysv_aa_work.c | |||
| lapacke_zsysv_aa_2stage.c | |||
| lapacke_zsysv_aa_2stage_work.c | |||
| lapacke_zsysv_rk.c | |||
| lapacke_zsysv_rk_work.c | |||
| lapacke_zsysv_rook.c | |||
| lapacke_zsysv_rook_work.c | |||
| lapacke_zsysvx.c | |||
| lapacke_zsysvx_work.c | |||
| lapacke_zsyswapr.c | |||
| lapacke_zsyswapr_work.c | |||
| lapacke_zsytrf.c | |||
| lapacke_zsytrf_work.c | |||
| lapacke_zsytrf_rook.c | |||
| lapacke_zsytrf_rook_work.c | |||
| lapacke_zsytrf_aa.c | |||
| lapacke_zsytrf_aa_2stage.c | |||
| lapacke_zsytrf_aa_work.c | |||
| lapacke_zsytrf_aa_2stage.c | |||
| lapacke_zsytrf_aa_2stage_work.c | |||
| lapacke_zsytrf_rk.c | |||
| lapacke_zsytrf_rk_work.c | |||
| lapacke_zsytrf_rook.c | |||
| lapacke_zsytrf_rook_work.c | |||
| lapacke_zsytri.c | |||
| lapacke_zsytri_work.c | |||
| lapacke_zsytri2.c | |||
| lapacke_zsytri2_work.c | |||
| lapacke_zsytri_3.c | |||
| lapacke_zsytri_3_work.c | |||
| lapacke_zsytri2x.c | |||
| lapacke_zsytri2x_work.c | |||
| lapacke_zsytri_work.c | |||
| lapacke_zsytri_3.c | |||
| lapacke_zsytri_3_work.c | |||
| lapacke_zsytrs.c | |||
| lapacke_zsytrs_rook.c | |||
| lapacke_zsytrs_work.c | |||
| lapacke_zsytrs2.c | |||
| lapacke_zsytrs2_work.c | |||
| lapacke_zsytrs_work.c | |||
| lapacke_zsytrs_rook_work.c | |||
| lapacke_zsytrs_3.c | |||
| lapacke_zsytrs_3_work.c | |||
| lapacke_zsytrs_aa.c | |||
| lapacke_zsytrs_aa_2stage.c | |||
| lapacke_zsytrs_aa_work.c | |||
| lapacke_zsytrs_aa_2stage.c | |||
| lapacke_zsytrs_aa_2stage_work.c | |||
| lapacke_zsytrs_3.c | |||
| lapacke_zsytrs_3_work.c | |||
| lapacke_zsytrs_rook.c | |||
| lapacke_zsytrs_rook_work.c | |||
| lapacke_ztbcon.c | |||
| lapacke_ztbcon_work.c | |||
| lapacke_ztbrfs.c | |||
| @@ -2249,9 +2271,9 @@ lapacke_ztpcon_work.c | |||
| lapacke_ztpmqrt.c | |||
| lapacke_ztpmqrt_work.c | |||
| lapacke_ztpqrt.c | |||
| lapacke_ztpqrt_work.c | |||
| lapacke_ztpqrt2.c | |||
| lapacke_ztpqrt2_work.c | |||
| lapacke_ztpqrt_work.c | |||
| lapacke_ztprfb.c | |||
| lapacke_ztprfb_work.c | |||
| lapacke_ztprfs.c | |||
| @@ -2328,11 +2350,6 @@ lapacke_zupgtr.c | |||
| lapacke_zupgtr_work.c | |||
| lapacke_zupmtr.c | |||
| lapacke_zupmtr_work.c | |||
| lapacke_zsyr.c | |||
| lapacke_csyr.c | |||
| lapacke_zsyr_work.c | |||
| lapacke_csyr_work.c | |||
| lapacke_ilaver.c | |||
| ) | |||
| set(DEPRECATED | |||
| @@ -32,12 +32,21 @@ | |||
| ############################################################################## | |||
| # makefile for LAPACKE, used to build lapacke binary. | |||
| # | |||
| # Note: we use multiple OBJ_A, OBJ_B, etc, instead of a single OBJ | |||
| # Note: we use multiple OBJ_S, OBJ_C, etc, instead of a single OBJ | |||
| # to allow build with mingw (argument list too long for the msys ar) | |||
| # | |||
| include ../../make.inc | |||
| TOPSRCDIR = ../.. | |||
| include $(TOPSRCDIR)/make.inc | |||
| OBJ_A = \ | |||
| .SUFFIXES: .c .o | |||
| .c.o: | |||
| $(CC) $(CFLAGS) -I../include -c -o $@ $< | |||
| OBJ = \ | |||
| lapacke_ilaver.o \ | |||
| lapacke_nancheck.o | |||
| OBJ_C = \ | |||
| lapacke_cbbcsd.o \ | |||
| lapacke_cbbcsd_work.o \ | |||
| lapacke_cbdsqr.o \ | |||
| @@ -82,12 +91,12 @@ lapacke_cgeevx.o \ | |||
| lapacke_cgeevx_work.o \ | |||
| lapacke_cgehrd.o \ | |||
| lapacke_cgehrd_work.o \ | |||
| lapacke_cgejsv.o \ | |||
| lapacke_cgejsv_work.o \ | |||
| lapacke_cgelq.o \ | |||
| lapacke_cgelq_work.o \ | |||
| lapacke_cgelq2.o \ | |||
| lapacke_cgelq2_work.o \ | |||
| lapacke_cgejsv.o \ | |||
| lapacke_cgejsv_work.o \ | |||
| lapacke_cgelqf.o \ | |||
| lapacke_cgelqf_work.o \ | |||
| lapacke_cgels.o \ | |||
| @@ -117,11 +126,11 @@ lapacke_cgeqrf_work.o \ | |||
| lapacke_cgeqrfp.o \ | |||
| lapacke_cgeqrfp_work.o \ | |||
| lapacke_cgeqrt.o \ | |||
| lapacke_cgeqrt_work.o \ | |||
| lapacke_cgeqrt2.o \ | |||
| lapacke_cgeqrt2_work.o \ | |||
| lapacke_cgeqrt3.o \ | |||
| lapacke_cgeqrt3_work.o \ | |||
| lapacke_cgeqrt_work.o \ | |||
| lapacke_cgerfs.o \ | |||
| lapacke_cgerfs_work.o \ | |||
| lapacke_cgerqf.o \ | |||
| @@ -132,6 +141,8 @@ lapacke_cgesv.o \ | |||
| lapacke_cgesv_work.o \ | |||
| lapacke_cgesvd.o \ | |||
| lapacke_cgesvd_work.o \ | |||
| lapacke_cgesvdq.o \ | |||
| lapacke_cgesvdq_work.o \ | |||
| lapacke_cgesvdx.o \ | |||
| lapacke_cgesvdx_work.o \ | |||
| lapacke_cgesvj.o \ | |||
| @@ -168,10 +179,10 @@ lapacke_cggevx.o \ | |||
| lapacke_cggevx_work.o \ | |||
| lapacke_cggglm.o \ | |||
| lapacke_cggglm_work.o \ | |||
| lapacke_cgghrd.o \ | |||
| lapacke_cgghrd_work.o \ | |||
| lapacke_cgghd3.o \ | |||
| lapacke_cgghd3_work.o \ | |||
| lapacke_cgghrd.o \ | |||
| lapacke_cgghrd_work.o \ | |||
| lapacke_cgglse.o \ | |||
| lapacke_cgglse_work.o \ | |||
| lapacke_cggqrf.o \ | |||
| @@ -196,14 +207,14 @@ lapacke_cgttrs.o \ | |||
| lapacke_cgttrs_work.o \ | |||
| lapacke_chbev.o \ | |||
| lapacke_chbev_work.o \ | |||
| lapacke_chbevd.o \ | |||
| lapacke_chbevd_work.o \ | |||
| lapacke_chbevx.o \ | |||
| lapacke_chbevx_work.o \ | |||
| lapacke_chbev_2stage.o \ | |||
| lapacke_chbev_2stage_work.o \ | |||
| lapacke_chbevd.o \ | |||
| lapacke_chbevd_work.o \ | |||
| lapacke_chbevd_2stage.o \ | |||
| lapacke_chbevd_2stage_work.o \ | |||
| lapacke_chbevx.o \ | |||
| lapacke_chbevx_work.o \ | |||
| lapacke_chbevx_2stage.o \ | |||
| lapacke_chbevx_2stage_work.o \ | |||
| lapacke_chbgst.o \ | |||
| @@ -224,18 +235,18 @@ lapacke_cheequb.o \ | |||
| lapacke_cheequb_work.o \ | |||
| lapacke_cheev.o \ | |||
| lapacke_cheev_work.o \ | |||
| lapacke_cheevd.o \ | |||
| lapacke_cheevd_work.o \ | |||
| lapacke_cheevr.o \ | |||
| lapacke_cheevr_work.o \ | |||
| lapacke_cheevx.o \ | |||
| lapacke_cheevx_work.o \ | |||
| lapacke_cheev_2stage.o \ | |||
| lapacke_cheev_2stage_work.o \ | |||
| lapacke_cheevd.o \ | |||
| lapacke_cheevd_work.o \ | |||
| lapacke_cheevd_2stage.o \ | |||
| lapacke_cheevd_2stage_work.o \ | |||
| lapacke_cheevr.o \ | |||
| lapacke_cheevr_work.o \ | |||
| lapacke_cheevr_2stage.o \ | |||
| lapacke_cheevr_2stage_work.o \ | |||
| lapacke_cheevx.o \ | |||
| lapacke_cheevx_work.o \ | |||
| lapacke_cheevx_2stage.o \ | |||
| lapacke_cheevx_2stage_work.o \ | |||
| lapacke_chegst.o \ | |||
| @@ -265,35 +276,35 @@ lapacke_cheswapr_work.o \ | |||
| lapacke_chetrd.o \ | |||
| lapacke_chetrd_work.o \ | |||
| lapacke_chetrf.o \ | |||
| lapacke_chetrf_rook.o \ | |||
| lapacke_chetrf_work.o \ | |||
| lapacke_chetrf_rook_work.o \ | |||
| lapacke_chetrf_aa.o \ | |||
| lapacke_chetrf_aa_2stage.o \ | |||
| lapacke_chetrf_aa_work.o \ | |||
| lapacke_chetrf_aa_2stage.o \ | |||
| lapacke_chetrf_aa_2stage_work.o \ | |||
| lapacke_chetrf_rk.o \ | |||
| lapacke_chetrf_rk_work.o \ | |||
| lapacke_chetrf_rook.o \ | |||
| lapacke_chetrf_rook_work.o \ | |||
| lapacke_chetri.o \ | |||
| lapacke_chetri_work.o \ | |||
| lapacke_chetri2.o \ | |||
| lapacke_chetri2_work.o \ | |||
| lapacke_chetri_3.o \ | |||
| lapacke_chetri_3_work.o \ | |||
| lapacke_chetri2x.o \ | |||
| lapacke_chetri2x_work.o \ | |||
| lapacke_chetri_work.o \ | |||
| lapacke_chetri_3.o \ | |||
| lapacke_chetri_3_work.o \ | |||
| lapacke_chetrs.o \ | |||
| lapacke_chetrs_rook.o \ | |||
| lapacke_chetrs_work.o \ | |||
| lapacke_chetrs2.o \ | |||
| lapacke_chetrs2_work.o \ | |||
| lapacke_chetrs_work.o \ | |||
| lapacke_chetrs_rook_work.o \ | |||
| lapacke_chetrs_3.o \ | |||
| lapacke_chetrs_3_work.o \ | |||
| lapacke_chetrs_aa.o \ | |||
| lapacke_chetrs_aa_2stage.o \ | |||
| lapacke_chetrs_aa_work.o \ | |||
| lapacke_chetrs_aa_2stage.o \ | |||
| lapacke_chetrs_aa_2stage_work.o \ | |||
| lapacke_chetrs_3.o \ | |||
| lapacke_chetrs_3_work.o \ | |||
| lapacke_chetrs_rook.o \ | |||
| lapacke_chetrs_rook_work.o \ | |||
| lapacke_chfrk.o \ | |||
| lapacke_chfrk_work.o \ | |||
| lapacke_chgeqz.o \ | |||
| @@ -484,11 +495,11 @@ lapacke_csyconv.o \ | |||
| lapacke_csyconv_work.o \ | |||
| lapacke_csyequb.o \ | |||
| lapacke_csyequb_work.o \ | |||
| lapacke_csyr.o \ | |||
| lapacke_csyr_work.o \ | |||
| lapacke_csyrfs.o \ | |||
| lapacke_csyrfs_work.o \ | |||
| lapacke_csysv.o \ | |||
| lapacke_csysv_rook.o \ | |||
| lapacke_csysv_rook_work.o \ | |||
| lapacke_csysv_work.o \ | |||
| lapacke_csysv_aa.o \ | |||
| lapacke_csysv_aa_work.o \ | |||
| @@ -496,40 +507,42 @@ lapacke_csysv_aa_2stage.o \ | |||
| lapacke_csysv_aa_2stage_work.o \ | |||
| lapacke_csysv_rk.o \ | |||
| lapacke_csysv_rk_work.o \ | |||
| lapacke_csysv_rook.o \ | |||
| lapacke_csysv_rook_work.o \ | |||
| lapacke_csysvx.o \ | |||
| lapacke_csysvx_work.o \ | |||
| lapacke_csyswapr.o \ | |||
| lapacke_csyswapr_work.o \ | |||
| lapacke_csytrf.o \ | |||
| lapacke_csytrf_work.o \ | |||
| lapacke_csytrf_rook.o \ | |||
| lapacke_csytrf_rook_work.o \ | |||
| lapacke_csytrf_aa.o \ | |||
| lapacke_csytrf_aa_2stage.o \ | |||
| lapacke_csytrf_aa_work.o \ | |||
| lapacke_csytrf_aa_2stage.o \ | |||
| lapacke_csytrf_aa_2stage_work.o \ | |||
| lapacke_csytrf_rk.o \ | |||
| lapacke_csytrf_rk_work.o \ | |||
| lapacke_csytrf_rook.o \ | |||
| lapacke_csytrf_rook_work.o \ | |||
| lapacke_csytri.o \ | |||
| lapacke_csytri_work.o \ | |||
| lapacke_csytri2.o \ | |||
| lapacke_csytri2_work.o \ | |||
| lapacke_csytri_3.o \ | |||
| lapacke_csytri_3_work.o \ | |||
| lapacke_csytri2x.o \ | |||
| lapacke_csytri2x_work.o \ | |||
| lapacke_csytri_work.o \ | |||
| lapacke_csytri_3.o \ | |||
| lapacke_csytri_3_work.o \ | |||
| lapacke_csytrs.o \ | |||
| lapacke_csytrs_rook.o \ | |||
| lapacke_csytrs_work.o \ | |||
| lapacke_csytrs2.o \ | |||
| lapacke_csytrs2_work.o \ | |||
| lapacke_csytrs_work.o \ | |||
| lapacke_csytrs_rook_work.o \ | |||
| lapacke_csytrs_3.o \ | |||
| lapacke_csytrs_3_work.o \ | |||
| lapacke_csytrs_aa.o \ | |||
| lapacke_csytrs_aa_2stage.o \ | |||
| lapacke_csytrs_aa_work.o \ | |||
| lapacke_csytrs_aa_2stage.o \ | |||
| lapacke_csytrs_aa_2stage_work.o \ | |||
| lapacke_csytrs_3.o \ | |||
| lapacke_csytrs_3_work.o \ | |||
| lapacke_csytrs_rook.o \ | |||
| lapacke_csytrs_rook_work.o \ | |||
| lapacke_ctbcon.o \ | |||
| lapacke_ctbcon_work.o \ | |||
| lapacke_ctbrfs.o \ | |||
| @@ -561,9 +574,9 @@ lapacke_ctpcon_work.o \ | |||
| lapacke_ctpmqrt.o \ | |||
| lapacke_ctpmqrt_work.o \ | |||
| lapacke_ctpqrt.o \ | |||
| lapacke_ctpqrt_work.o \ | |||
| lapacke_ctpqrt2.o \ | |||
| lapacke_ctpqrt2_work.o \ | |||
| lapacke_ctpqrt_work.o \ | |||
| lapacke_ctprfb.o \ | |||
| lapacke_ctprfb_work.o \ | |||
| lapacke_ctprfs.o \ | |||
| @@ -639,15 +652,17 @@ lapacke_cunmtr_work.o \ | |||
| lapacke_cupgtr.o \ | |||
| lapacke_cupgtr_work.o \ | |||
| lapacke_cupmtr.o \ | |||
| lapacke_cupmtr_work.o \ | |||
| lapacke_cupmtr_work.o | |||
| OBJ_D = \ | |||
| lapacke_dbbcsd.o \ | |||
| lapacke_dbbcsd_work.o \ | |||
| lapacke_dbdsdc.o \ | |||
| lapacke_dbdsdc_work.o \ | |||
| lapacke_dbdsvdx.o \ | |||
| lapacke_dbdsvdx_work.o \ | |||
| lapacke_dbdsqr.o \ | |||
| lapacke_dbdsqr_work.o \ | |||
| lapacke_dbdsvdx.o \ | |||
| lapacke_dbdsvdx_work.o \ | |||
| lapacke_ddisna.o \ | |||
| lapacke_ddisna_work.o \ | |||
| lapacke_dgbbrd.o \ | |||
| @@ -725,11 +740,11 @@ lapacke_dgeqrf_work.o \ | |||
| lapacke_dgeqrfp.o \ | |||
| lapacke_dgeqrfp_work.o \ | |||
| lapacke_dgeqrt.o \ | |||
| lapacke_dgeqrt_work.o \ | |||
| lapacke_dgeqrt2.o \ | |||
| lapacke_dgeqrt2_work.o \ | |||
| lapacke_dgeqrt3.o \ | |||
| lapacke_dgeqrt3_work.o \ | |||
| lapacke_dgeqrt_work.o \ | |||
| lapacke_dgerfs.o \ | |||
| lapacke_dgerfs_work.o \ | |||
| lapacke_dgerqf.o \ | |||
| @@ -740,6 +755,8 @@ lapacke_dgesv.o \ | |||
| lapacke_dgesv_work.o \ | |||
| lapacke_dgesvd.o \ | |||
| lapacke_dgesvd_work.o \ | |||
| lapacke_dgesvdq.o \ | |||
| lapacke_dgesvdq_work.o \ | |||
| lapacke_dgesvdx.o \ | |||
| lapacke_dgesvdx_work.o \ | |||
| lapacke_dgesvj.o \ | |||
| @@ -776,10 +793,10 @@ lapacke_dggevx.o \ | |||
| lapacke_dggevx_work.o \ | |||
| lapacke_dggglm.o \ | |||
| lapacke_dggglm_work.o \ | |||
| lapacke_dgghrd.o \ | |||
| lapacke_dgghrd_work.o \ | |||
| lapacke_dgghd3.o \ | |||
| lapacke_dgghd3_work.o \ | |||
| lapacke_dgghrd.o \ | |||
| lapacke_dgghrd_work.o \ | |||
| lapacke_dgglse.o \ | |||
| lapacke_dgglse_work.o \ | |||
| lapacke_dggqrf.o \ | |||
| @@ -972,14 +989,14 @@ lapacke_dpttrs.o \ | |||
| lapacke_dpttrs_work.o \ | |||
| lapacke_dsbev.o \ | |||
| lapacke_dsbev_work.o \ | |||
| lapacke_dsbevd.o \ | |||
| lapacke_dsbevd_work.o \ | |||
| lapacke_dsbevx.o \ | |||
| lapacke_dsbevx_work.o \ | |||
| lapacke_dsbev_2stage.o \ | |||
| lapacke_dsbev_2stage_work.o \ | |||
| lapacke_dsbevd.o \ | |||
| lapacke_dsbevd_work.o \ | |||
| lapacke_dsbevd_2stage.o \ | |||
| lapacke_dsbevd_2stage_work.o \ | |||
| lapacke_dsbevx.o \ | |||
| lapacke_dsbevx_work.o \ | |||
| lapacke_dsbevx_2stage.o \ | |||
| lapacke_dsbevx_2stage_work.o \ | |||
| lapacke_dsbgst.o \ | |||
| @@ -1060,18 +1077,18 @@ lapacke_dsyequb.o \ | |||
| lapacke_dsyequb_work.o \ | |||
| lapacke_dsyev.o \ | |||
| lapacke_dsyev_work.o \ | |||
| lapacke_dsyevd.o \ | |||
| lapacke_dsyevd_work.o \ | |||
| lapacke_dsyevr.o \ | |||
| lapacke_dsyevr_work.o \ | |||
| lapacke_dsyevx.o \ | |||
| lapacke_dsyevx_work.o \ | |||
| lapacke_dsyev_2stage.o \ | |||
| lapacke_dsyev_2stage_work.o \ | |||
| lapacke_dsyevd.o \ | |||
| lapacke_dsyevd_work.o \ | |||
| lapacke_dsyevd_2stage.o \ | |||
| lapacke_dsyevd_2stage_work.o \ | |||
| lapacke_dsyevr.o \ | |||
| lapacke_dsyevr_work.o \ | |||
| lapacke_dsyevr_2stage.o \ | |||
| lapacke_dsyevr_2stage_work.o \ | |||
| lapacke_dsyevx.o \ | |||
| lapacke_dsyevx_work.o \ | |||
| lapacke_dsyevx_2stage.o \ | |||
| lapacke_dsyevx_2stage_work.o \ | |||
| lapacke_dsygst.o \ | |||
| @@ -1087,8 +1104,6 @@ lapacke_dsygvx_work.o \ | |||
| lapacke_dsyrfs.o \ | |||
| lapacke_dsyrfs_work.o \ | |||
| lapacke_dsysv.o \ | |||
| lapacke_dsysv_rook.o \ | |||
| lapacke_dsysv_rook_work.o \ | |||
| lapacke_dsysv_work.o \ | |||
| lapacke_dsysv_aa.o \ | |||
| lapacke_dsysv_aa_work.o \ | |||
| @@ -1096,6 +1111,8 @@ lapacke_dsysv_aa_2stage.o \ | |||
| lapacke_dsysv_aa_2stage_work.o \ | |||
| lapacke_dsysv_rk.o \ | |||
| lapacke_dsysv_rk_work.o \ | |||
| lapacke_dsysv_rook.o \ | |||
| lapacke_dsysv_rook_work.o \ | |||
| lapacke_dsysvx.o \ | |||
| lapacke_dsysvx_work.o \ | |||
| lapacke_dsyswapr.o \ | |||
| @@ -1104,36 +1121,34 @@ lapacke_dsytrd.o \ | |||
| lapacke_dsytrd_work.o \ | |||
| lapacke_dsytrf.o \ | |||
| lapacke_dsytrf_work.o \ | |||
| lapacke_dsytrf_rook.o \ | |||
| lapacke_dsytrf_rook_work.o \ | |||
| lapacke_dsytrf_aa.o \ | |||
| lapacke_dsytrf_aa_work.o \ | |||
| lapacke_dsytrf_aa_2stage.o \ | |||
| lapacke_dsytrf_aa_2stage_work.o \ | |||
| lapacke_dsytrf_rk.o \ | |||
| lapacke_dsytrf_rk_work.o \ | |||
| lapacke_dsytrf_rook.o \ | |||
| lapacke_dsytrf_rook_work.o \ | |||
| lapacke_dsytri.o \ | |||
| lapacke_dsytri_work.o \ | |||
| lapacke_dsytri2.o \ | |||
| lapacke_dsytri2_work.o \ | |||
| lapacke_dsytri_3.o \ | |||
| lapacke_dsytri_3_work.o \ | |||
| lapacke_dsytri2x.o \ | |||
| lapacke_dsytri2x_work.o \ | |||
| lapacke_dsytri_work.o | |||
| OBJ_B = \ | |||
| lapacke_dsytri_3.o \ | |||
| lapacke_dsytri_3_work.o \ | |||
| lapacke_dsytrs.o \ | |||
| lapacke_dsytrs_rook.o \ | |||
| lapacke_dsytrs_work.o \ | |||
| lapacke_dsytrs2.o \ | |||
| lapacke_dsytrs2_work.o \ | |||
| lapacke_dsytrs_work.o \ | |||
| lapacke_dsytrs_rook_work.o \ | |||
| lapacke_dsytrs_3.o \ | |||
| lapacke_dsytrs_3_work.o \ | |||
| lapacke_dsytrs_aa.o \ | |||
| lapacke_dsytrs_aa_2stage.o \ | |||
| lapacke_dsytrs_aa_work.o \ | |||
| lapacke_dsytrs_aa_2stage.o \ | |||
| lapacke_dsytrs_aa_2stage_work.o \ | |||
| lapacke_dsytrs_3.o \ | |||
| lapacke_dsytrs_3_work.o \ | |||
| lapacke_dsytrs_rook.o \ | |||
| lapacke_dsytrs_rook_work.o \ | |||
| lapacke_dtbcon.o \ | |||
| lapacke_dtbcon_work.o \ | |||
| lapacke_dtbrfs.o \ | |||
| @@ -1165,9 +1180,9 @@ lapacke_dtpcon_work.o \ | |||
| lapacke_dtpmqrt.o \ | |||
| lapacke_dtpmqrt_work.o \ | |||
| lapacke_dtpqrt.o \ | |||
| lapacke_dtpqrt_work.o \ | |||
| lapacke_dtpqrt2.o \ | |||
| lapacke_dtpqrt2_work.o \ | |||
| lapacke_dtpqrt_work.o \ | |||
| lapacke_dtprfb.o \ | |||
| lapacke_dtprfb_work.o \ | |||
| lapacke_dtprfs.o \ | |||
| @@ -1203,16 +1218,17 @@ lapacke_dtrttf_work.o \ | |||
| lapacke_dtrttp.o \ | |||
| lapacke_dtrttp_work.o \ | |||
| lapacke_dtzrzf.o \ | |||
| lapacke_dtzrzf_work.o \ | |||
| lapacke_nancheck.o \ | |||
| lapacke_dtzrzf_work.o | |||
| OBJ_S = \ | |||
| lapacke_sbbcsd.o \ | |||
| lapacke_sbbcsd_work.o \ | |||
| lapacke_sbdsdc.o \ | |||
| lapacke_sbdsdc_work.o \ | |||
| lapacke_sbdsvdx.o \ | |||
| lapacke_sbdsvdx_work.o \ | |||
| lapacke_sbdsqr.o \ | |||
| lapacke_sbdsqr_work.o \ | |||
| lapacke_sbdsvdx.o \ | |||
| lapacke_sbdsvdx_work.o \ | |||
| lapacke_sdisna.o \ | |||
| lapacke_sdisna_work.o \ | |||
| lapacke_sgbbrd.o \ | |||
| @@ -1290,11 +1306,11 @@ lapacke_sgeqrf_work.o \ | |||
| lapacke_sgeqrfp.o \ | |||
| lapacke_sgeqrfp_work.o \ | |||
| lapacke_sgeqrt.o \ | |||
| lapacke_sgeqrt_work.o \ | |||
| lapacke_sgeqrt2.o \ | |||
| lapacke_sgeqrt2_work.o \ | |||
| lapacke_sgeqrt3.o \ | |||
| lapacke_sgeqrt3_work.o \ | |||
| lapacke_sgeqrt_work.o \ | |||
| lapacke_sgerfs.o \ | |||
| lapacke_sgerfs_work.o \ | |||
| lapacke_sgerqf.o \ | |||
| @@ -1305,6 +1321,8 @@ lapacke_sgesv.o \ | |||
| lapacke_sgesv_work.o \ | |||
| lapacke_sgesvd.o \ | |||
| lapacke_sgesvd_work.o \ | |||
| lapacke_sgesvdq.o \ | |||
| lapacke_sgesvdq_work.o \ | |||
| lapacke_sgesvdx.o \ | |||
| lapacke_sgesvdx_work.o \ | |||
| lapacke_sgesvj.o \ | |||
| @@ -1341,10 +1359,10 @@ lapacke_sggevx.o \ | |||
| lapacke_sggevx_work.o \ | |||
| lapacke_sggglm.o \ | |||
| lapacke_sggglm_work.o \ | |||
| lapacke_sgghrd.o \ | |||
| lapacke_sgghrd_work.o \ | |||
| lapacke_sgghd3.o \ | |||
| lapacke_sgghd3_work.o \ | |||
| lapacke_sgghrd.o \ | |||
| lapacke_sgghrd_work.o \ | |||
| lapacke_sgglse.o \ | |||
| lapacke_sgglse_work.o \ | |||
| lapacke_sggqrf.o \ | |||
| @@ -1537,14 +1555,14 @@ lapacke_spttrs.o \ | |||
| lapacke_spttrs_work.o \ | |||
| lapacke_ssbev.o \ | |||
| lapacke_ssbev_work.o \ | |||
| lapacke_ssbevd.o \ | |||
| lapacke_ssbevd_work.o \ | |||
| lapacke_ssbevx.o \ | |||
| lapacke_ssbevx_work.o \ | |||
| lapacke_ssbev_2stage.o \ | |||
| lapacke_ssbev_2stage_work.o \ | |||
| lapacke_ssbevd.o \ | |||
| lapacke_ssbevd_work.o \ | |||
| lapacke_ssbevd_2stage.o \ | |||
| lapacke_ssbevd_2stage_work.o \ | |||
| lapacke_ssbevx.o \ | |||
| lapacke_ssbevx_work.o \ | |||
| lapacke_ssbevx_2stage.o \ | |||
| lapacke_ssbevx_2stage_work.o \ | |||
| lapacke_ssbgst.o \ | |||
| @@ -1621,18 +1639,18 @@ lapacke_ssyequb.o \ | |||
| lapacke_ssyequb_work.o \ | |||
| lapacke_ssyev.o \ | |||
| lapacke_ssyev_work.o \ | |||
| lapacke_ssyevd.o \ | |||
| lapacke_ssyevd_work.o \ | |||
| lapacke_ssyevr.o \ | |||
| lapacke_ssyevr_work.o \ | |||
| lapacke_ssyevx.o \ | |||
| lapacke_ssyevx_work.o \ | |||
| lapacke_ssyev_2stage.o \ | |||
| lapacke_ssyev_2stage_work.o \ | |||
| lapacke_ssyevd.o \ | |||
| lapacke_ssyevd_work.o \ | |||
| lapacke_ssyevd_2stage.o \ | |||
| lapacke_ssyevd_2stage_work.o \ | |||
| lapacke_ssyevr.o \ | |||
| lapacke_ssyevr_work.o \ | |||
| lapacke_ssyevr_2stage.o \ | |||
| lapacke_ssyevr_2stage_work.o \ | |||
| lapacke_ssyevx.o \ | |||
| lapacke_ssyevx_work.o \ | |||
| lapacke_ssyevx_2stage.o \ | |||
| lapacke_ssyevx_2stage_work.o \ | |||
| lapacke_ssygst.o \ | |||
| @@ -1648,8 +1666,6 @@ lapacke_ssygvx_work.o \ | |||
| lapacke_ssyrfs.o \ | |||
| lapacke_ssyrfs_work.o \ | |||
| lapacke_ssysv.o \ | |||
| lapacke_ssysv_rook.o \ | |||
| lapacke_ssysv_rook_work.o \ | |||
| lapacke_ssysv_work.o \ | |||
| lapacke_ssysv_aa.o \ | |||
| lapacke_ssysv_aa_work.o \ | |||
| @@ -1657,6 +1673,8 @@ lapacke_ssysv_aa_2stage.o \ | |||
| lapacke_ssysv_aa_2stage_work.o \ | |||
| lapacke_ssysv_rk.o \ | |||
| lapacke_ssysv_rk_work.o \ | |||
| lapacke_ssysv_rook.o \ | |||
| lapacke_ssysv_rook_work.o \ | |||
| lapacke_ssysvx.o \ | |||
| lapacke_ssysvx_work.o \ | |||
| lapacke_ssyswapr.o \ | |||
| @@ -1665,34 +1683,34 @@ lapacke_ssytrd.o \ | |||
| lapacke_ssytrd_work.o \ | |||
| lapacke_ssytrf.o \ | |||
| lapacke_ssytrf_work.o \ | |||
| lapacke_ssytrf_rook.o \ | |||
| lapacke_ssytrf_rook_work.o \ | |||
| lapacke_ssytrf_aa.o \ | |||
| lapacke_ssytrf_aa_work.o \ | |||
| lapacke_ssytrf_aa_2stage.o \ | |||
| lapacke_ssytrf_aa_2stage_work.o \ | |||
| lapacke_ssytrf_rk.o \ | |||
| lapacke_ssytrf_rk_work.o \ | |||
| lapacke_ssytrf_rook.o \ | |||
| lapacke_ssytrf_rook_work.o \ | |||
| lapacke_ssytri.o \ | |||
| lapacke_ssytri_work.o \ | |||
| lapacke_ssytri2.o \ | |||
| lapacke_ssytri2_work.o \ | |||
| lapacke_ssytri_3.o \ | |||
| lapacke_ssytri_3_work.o \ | |||
| lapacke_ssytri2x.o \ | |||
| lapacke_ssytri2x_work.o \ | |||
| lapacke_ssytri_work.o \ | |||
| lapacke_ssytri_3.o \ | |||
| lapacke_ssytri_3_work.o \ | |||
| lapacke_ssytrs.o \ | |||
| lapacke_ssytrs_rook.o \ | |||
| lapacke_ssytrs_work.o \ | |||
| lapacke_ssytrs2.o \ | |||
| lapacke_ssytrs2_work.o \ | |||
| lapacke_ssytrs_work.o \ | |||
| lapacke_ssytrs_rook_work.o \ | |||
| lapacke_ssytrs_3.o \ | |||
| lapacke_ssytrs_3_work.o \ | |||
| lapacke_ssytrs_aa.o \ | |||
| lapacke_ssytrs_aa_2stage.o \ | |||
| lapacke_ssytrs_aa_work.o \ | |||
| lapacke_ssytrs_aa_2stage.o \ | |||
| lapacke_ssytrs_aa_2stage_work.o \ | |||
| lapacke_ssytrs_3.o \ | |||
| lapacke_ssytrs_3_work.o \ | |||
| lapacke_ssytrs_rook.o \ | |||
| lapacke_ssytrs_rook_work.o \ | |||
| lapacke_stbcon.o \ | |||
| lapacke_stbcon_work.o \ | |||
| lapacke_stbrfs.o \ | |||
| @@ -1762,7 +1780,9 @@ lapacke_strttf_work.o \ | |||
| lapacke_strttp.o \ | |||
| lapacke_strttp_work.o \ | |||
| lapacke_stzrzf.o \ | |||
| lapacke_stzrzf_work.o \ | |||
| lapacke_stzrzf_work.o | |||
| OBJ_Z = \ | |||
| lapacke_zbbcsd.o \ | |||
| lapacke_zbbcsd_work.o \ | |||
| lapacke_zbdsqr.o \ | |||
| @@ -1846,11 +1866,11 @@ lapacke_zgeqrf_work.o \ | |||
| lapacke_zgeqrfp.o \ | |||
| lapacke_zgeqrfp_work.o \ | |||
| lapacke_zgeqrt.o \ | |||
| lapacke_zgeqrt_work.o \ | |||
| lapacke_zgeqrt2.o \ | |||
| lapacke_zgeqrt2_work.o \ | |||
| lapacke_zgeqrt3.o \ | |||
| lapacke_zgeqrt3_work.o \ | |||
| lapacke_zgeqrt_work.o \ | |||
| lapacke_zgerfs.o \ | |||
| lapacke_zgerfs_work.o \ | |||
| lapacke_zgerqf.o \ | |||
| @@ -1861,6 +1881,8 @@ lapacke_zgesv.o \ | |||
| lapacke_zgesv_work.o \ | |||
| lapacke_zgesvd.o \ | |||
| lapacke_zgesvd_work.o \ | |||
| lapacke_zgesvdq.o \ | |||
| lapacke_zgesvdq_work.o \ | |||
| lapacke_zgesvdx.o \ | |||
| lapacke_zgesvdx_work.o \ | |||
| lapacke_zgesvj.o \ | |||
| @@ -1897,10 +1919,10 @@ lapacke_zggevx.o \ | |||
| lapacke_zggevx_work.o \ | |||
| lapacke_zggglm.o \ | |||
| lapacke_zggglm_work.o \ | |||
| lapacke_zgghrd.o \ | |||
| lapacke_zgghrd_work.o \ | |||
| lapacke_zgghd3.o \ | |||
| lapacke_zgghd3_work.o \ | |||
| lapacke_zgghrd.o \ | |||
| lapacke_zgghrd_work.o \ | |||
| lapacke_zgglse.o \ | |||
| lapacke_zgglse_work.o \ | |||
| lapacke_zggqrf.o \ | |||
| @@ -1925,14 +1947,14 @@ lapacke_zgttrs.o \ | |||
| lapacke_zgttrs_work.o \ | |||
| lapacke_zhbev.o \ | |||
| lapacke_zhbev_work.o \ | |||
| lapacke_zhbevd.o \ | |||
| lapacke_zhbevd_work.o \ | |||
| lapacke_zhbevx.o \ | |||
| lapacke_zhbevx_work.o \ | |||
| lapacke_zhbev_2stage.o \ | |||
| lapacke_zhbev_2stage_work.o \ | |||
| lapacke_zhbevd.o \ | |||
| lapacke_zhbevd_work.o \ | |||
| lapacke_zhbevd_2stage.o \ | |||
| lapacke_zhbevd_2stage_work.o \ | |||
| lapacke_zhbevx.o \ | |||
| lapacke_zhbevx_work.o \ | |||
| lapacke_zhbevx_2stage.o \ | |||
| lapacke_zhbevx_2stage_work.o \ | |||
| lapacke_zhbgst.o \ | |||
| @@ -1953,18 +1975,18 @@ lapacke_zheequb.o \ | |||
| lapacke_zheequb_work.o \ | |||
| lapacke_zheev.o \ | |||
| lapacke_zheev_work.o \ | |||
| lapacke_zheevd.o \ | |||
| lapacke_zheevd_work.o \ | |||
| lapacke_zheevr.o \ | |||
| lapacke_zheevr_work.o \ | |||
| lapacke_zheevx.o \ | |||
| lapacke_zheevx_work.o \ | |||
| lapacke_zheev_2stage.o \ | |||
| lapacke_zheev_2stage_work.o \ | |||
| lapacke_zheevd.o \ | |||
| lapacke_zheevd_work.o \ | |||
| lapacke_zheevd_2stage.o \ | |||
| lapacke_zheevd_2stage_work.o \ | |||
| lapacke_zheevr.o \ | |||
| lapacke_zheevr_work.o \ | |||
| lapacke_zheevr_2stage.o \ | |||
| lapacke_zheevr_2stage_work.o \ | |||
| lapacke_zheevx.o \ | |||
| lapacke_zheevx_work.o \ | |||
| lapacke_zheevx_2stage.o \ | |||
| lapacke_zheevx_2stage_work.o \ | |||
| lapacke_zhegst.o \ | |||
| @@ -1994,35 +2016,35 @@ lapacke_zheswapr_work.o \ | |||
| lapacke_zhetrd.o \ | |||
| lapacke_zhetrd_work.o \ | |||
| lapacke_zhetrf.o \ | |||
| lapacke_zhetrf_rook.o \ | |||
| lapacke_zhetrf_work.o \ | |||
| lapacke_zhetrf_rook_work.o \ | |||
| lapacke_zhetrf_aa.o \ | |||
| lapacke_zhetrf_aa_2stage.o \ | |||
| lapacke_zhetrf_aa_work.o \ | |||
| lapacke_zhetrf_aa_2stage.o \ | |||
| lapacke_zhetrf_aa_2stage_work.o \ | |||
| lapacke_zhetrf_rk.o \ | |||
| lapacke_zhetrf_rk_work.o \ | |||
| lapacke_zhetrf_rook.o \ | |||
| lapacke_zhetrf_rook_work.o \ | |||
| lapacke_zhetri.o \ | |||
| lapacke_zhetri_work.o \ | |||
| lapacke_zhetri2.o \ | |||
| lapacke_zhetri2_work.o \ | |||
| lapacke_zhetri_3.o \ | |||
| lapacke_zhetri_3_work.o \ | |||
| lapacke_zhetri2x.o \ | |||
| lapacke_zhetri2x_work.o \ | |||
| lapacke_zhetri_work.o \ | |||
| lapacke_zhetri_3.o \ | |||
| lapacke_zhetri_3_work.o \ | |||
| lapacke_zhetrs.o \ | |||
| lapacke_zhetrs_rook.o \ | |||
| lapacke_zhetrs_work.o \ | |||
| lapacke_zhetrs2.o \ | |||
| lapacke_zhetrs2_work.o \ | |||
| lapacke_zhetrs_work.o \ | |||
| lapacke_zhetrs_rook_work.o \ | |||
| lapacke_zhetrs_3.o \ | |||
| lapacke_zhetrs_3_work.o \ | |||
| lapacke_zhetrs_aa.o \ | |||
| lapacke_zhetrs_aa_2stage.o \ | |||
| lapacke_zhetrs_aa_work.o \ | |||
| lapacke_zhetrs_aa_2stage.o \ | |||
| lapacke_zhetrs_aa_2stage_work.o \ | |||
| lapacke_zhetrs_3.o \ | |||
| lapacke_zhetrs_3_work.o \ | |||
| lapacke_zhetrs_rook.o \ | |||
| lapacke_zhetrs_rook_work.o \ | |||
| lapacke_zhfrk.o \ | |||
| lapacke_zhfrk_work.o \ | |||
| lapacke_zhgeqz.o \ | |||
| @@ -2213,11 +2235,11 @@ lapacke_zsyconv.o \ | |||
| lapacke_zsyconv_work.o \ | |||
| lapacke_zsyequb.o \ | |||
| lapacke_zsyequb_work.o \ | |||
| lapacke_zsyr.o \ | |||
| lapacke_zsyr_work.o \ | |||
| lapacke_zsyrfs.o \ | |||
| lapacke_zsyrfs_work.o \ | |||
| lapacke_zsysv.o \ | |||
| lapacke_zsysv_rook.o \ | |||
| lapacke_zsysv_rook_work.o \ | |||
| lapacke_zsysv_work.o \ | |||
| lapacke_zsysv_aa.o \ | |||
| lapacke_zsysv_aa_work.o \ | |||
| @@ -2225,40 +2247,42 @@ lapacke_zsysv_aa_2stage.o \ | |||
| lapacke_zsysv_aa_2stage_work.o \ | |||
| lapacke_zsysv_rk.o \ | |||
| lapacke_zsysv_rk_work.o \ | |||
| lapacke_zsysv_rook.o \ | |||
| lapacke_zsysv_rook_work.o \ | |||
| lapacke_zsysvx.o \ | |||
| lapacke_zsysvx_work.o \ | |||
| lapacke_zsyswapr.o \ | |||
| lapacke_zsyswapr_work.o \ | |||
| lapacke_zsytrf.o \ | |||
| lapacke_zsytrf_work.o \ | |||
| lapacke_zsytrf_rook.o \ | |||
| lapacke_zsytrf_rook_work.o \ | |||
| lapacke_zsytrf_aa.o \ | |||
| lapacke_zsytrf_aa_2stage.o \ | |||
| lapacke_zsytrf_aa_work.o \ | |||
| lapacke_zsytrf_aa_2stage.o \ | |||
| lapacke_zsytrf_aa_2stage_work.o \ | |||
| lapacke_zsytrf_rk.o \ | |||
| lapacke_zsytrf_rk_work.o \ | |||
| lapacke_zsytrf_rook.o \ | |||
| lapacke_zsytrf_rook_work.o \ | |||
| lapacke_zsytri.o \ | |||
| lapacke_zsytri_work.o \ | |||
| lapacke_zsytri2.o \ | |||
| lapacke_zsytri2_work.o \ | |||
| lapacke_zsytri_3.o \ | |||
| lapacke_zsytri_3_work.o \ | |||
| lapacke_zsytri2x.o \ | |||
| lapacke_zsytri2x_work.o \ | |||
| lapacke_zsytri_work.o \ | |||
| lapacke_zsytri_3.o \ | |||
| lapacke_zsytri_3_work.o \ | |||
| lapacke_zsytrs.o \ | |||
| lapacke_zsytrs_rook.o \ | |||
| lapacke_zsytrs_work.o \ | |||
| lapacke_zsytrs2.o \ | |||
| lapacke_zsytrs2_work.o \ | |||
| lapacke_zsytrs_work.o \ | |||
| lapacke_zsytrs_rook_work.o \ | |||
| lapacke_zsytrs_3.o \ | |||
| lapacke_zsytrs_3_work.o \ | |||
| lapacke_zsytrs_aa.o \ | |||
| lapacke_zsytrs_aa_2stage.o \ | |||
| lapacke_zsytrs_aa_work.o \ | |||
| lapacke_zsytrs_aa_2stage.o \ | |||
| lapacke_zsytrs_aa_2stage_work.o \ | |||
| lapacke_zsytrs_3.o \ | |||
| lapacke_zsytrs_3_work.o \ | |||
| lapacke_zsytrs_rook.o \ | |||
| lapacke_zsytrs_rook_work.o \ | |||
| lapacke_ztbcon.o \ | |||
| lapacke_ztbcon_work.o \ | |||
| lapacke_ztbrfs.o \ | |||
| @@ -2290,9 +2314,9 @@ lapacke_ztpcon_work.o \ | |||
| lapacke_ztpmqrt.o \ | |||
| lapacke_ztpmqrt_work.o \ | |||
| lapacke_ztpqrt.o \ | |||
| lapacke_ztpqrt_work.o \ | |||
| lapacke_ztpqrt2.o \ | |||
| lapacke_ztpqrt2_work.o \ | |||
| lapacke_ztpqrt_work.o \ | |||
| lapacke_ztprfb.o \ | |||
| lapacke_ztprfb_work.o \ | |||
| lapacke_ztprfs.o \ | |||
| @@ -2368,12 +2392,7 @@ lapacke_zunmtr_work.o \ | |||
| lapacke_zupgtr.o \ | |||
| lapacke_zupgtr_work.o \ | |||
| lapacke_zupmtr.o \ | |||
| lapacke_zupmtr_work.o \ | |||
| lapacke_zsyr.o \ | |||
| lapacke_csyr.o \ | |||
| lapacke_zsyr_work.o \ | |||
| lapacke_csyr_work.o \ | |||
| lapacke_ilaver.o | |||
| lapacke_zupmtr_work.o | |||
| ifdef BUILD_DEPRECATED | |||
| DEPRECATED = \ | |||
| @@ -2452,27 +2471,29 @@ lapacke_zlagsy.o \ | |||
| lapacke_zlagsy_work.o | |||
| endif | |||
| all: ../../$(LAPACKELIB) | |||
| .PHONY: all | |||
| all: $(LAPACKELIB) | |||
| .PHONY: ../../$(LAPACKELIB) | |||
| ../../$(LAPACKELIB): $(OBJ_A) $(OBJ_B) $(DEPRECATED) $(EXTENDED) $(MATGEN) | |||
| $(ARCH) $(ARCHFLAGS) $@ $(OBJ_A) | |||
| $(ARCH) $(ARCHFLAGS) $@ $(OBJ_B) | |||
| $(LAPACKELIB): $(OBJ) $(OBJ_S) $(OBJ_C) $(OBJ_D) $(OBJ_Z) $(DEPRECATED) $(EXTENDED) $(MATGEN) | |||
| $(AR) $(ARFLAGS) $@ $(OBJ) | |||
| $(AR) $(ARFLAGS) $@ $(OBJ_S) | |||
| $(AR) $(ARFLAGS) $@ $(OBJ_C) | |||
| $(AR) $(ARFLAGS) $@ $(OBJ_D) | |||
| $(AR) $(ARFLAGS) $@ $(OBJ_Z) | |||
| ifdef BUILD_DEPRECATED | |||
| $(ARCH) $(ARCHFLAGS) $@ $(DEPRECATED) | |||
| $(AR) $(ARFLAGS) $@ $(DEPRECATED) | |||
| endif | |||
| ifdef (USEXBLAS) | |||
| $(ARCH) $(ARCHFLAGS) $@ $(EXTENDED) | |||
| $(AR) $(ARFLAGS) $@ $(EXTENDED) | |||
| endif | |||
| ifdef LAPACKE_WITH_TMG | |||
| $(ARCH) $(ARCHFLAGS) $@ $(MATGEN) | |||
| $(AR) $(ARFLAGS) $@ $(MATGEN) | |||
| endif | |||
| $(RANLIB) $@ | |||
| clean: cleanobj | |||
| .PHONY: clean cleanobj cleanlib | |||
| clean: cleanobj cleanlib | |||
| cleanobj: | |||
| rm -f *.o | |||
| .c.o: | |||
| $(CC) $(CFLAGS) -I../include -c -o $@ $< | |||
| cleanlib: | |||
| rm -f $(LAPACKELIB) | |||
| @@ -124,7 +124,6 @@ lapack_int LAPACKE_cgejsv( int matrix_layout, char joba, char jobu, char jobv, | |||
| float* rwork = NULL; | |||
| lapack_complex_float* cwork = NULL; | |||
| lapack_int i; | |||
| lapack_int nu, nv; | |||
| if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { | |||
| LAPACKE_xerbla( "LAPACKE_cgejsv", -1 ); | |||
| return -1; | |||
| @@ -132,8 +131,6 @@ lapack_int LAPACKE_cgejsv( int matrix_layout, char joba, char jobu, char jobv, | |||
| #ifndef LAPACK_DISABLE_NAN_CHECK | |||
| if( LAPACKE_get_nancheck() ) { | |||
| /* Optionally check input matrices for NaNs */ | |||
| nu = LAPACKE_lsame( jobu, 'n' ) ? 1 : m; | |||
| nv = LAPACKE_lsame( jobv, 'n' ) ? 1 : n; | |||
| if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { | |||
| return -10; | |||
| } | |||
| @@ -75,7 +75,7 @@ lapack_int LAPACKE_cgelsd( int matrix_layout, lapack_int m, lapack_int n, | |||
| if( info != 0 ) { | |||
| goto exit_level_0; | |||
| } | |||
| liwork = (lapack_int)iwork_query; | |||
| liwork = iwork_query; | |||
| lrwork = (lapack_int)rwork_query; | |||
| lwork = LAPACK_C2INT( work_query ); | |||
| /* Allocate memory for work arrays */ | |||
| @@ -0,0 +1,106 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2014, Intel Corp. | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are met: | |||
| * Redistributions of source code must retain the above copyright notice, | |||
| this list of conditions and the following disclaimer. | |||
| * Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in the | |||
| documentation and/or other materials provided with the distribution. | |||
| * Neither the name of Intel Corporation nor the names of its contributors | |||
| may be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
| CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
| SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
| INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
| CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
| ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | |||
| THE POSSIBILITY OF SUCH DAMAGE. | |||
| ***************************************************************************** | |||
| * Contents: Native high-level C interface to LAPACK function cgesvdq | |||
| * Author: Intel Corporation | |||
| * Generated November 2018 | |||
| *****************************************************************************/ | |||
| #include "lapacke_utils.h" | |||
| lapack_int LAPACKE_cgesvdq( int matrix_layout, char joba, char jobp, | |||
| char jobr, char jobu, char jobv, | |||
| lapack_int m, lapack_int n, lapack_complex_float* a, | |||
| lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu, | |||
| lapack_complex_float* v, lapack_int ldv, lapack_int* numrank) | |||
| { | |||
| lapack_int info = 0; | |||
| lapack_int liwork = -1; | |||
| lapack_int* iwork = NULL; | |||
| lapack_int iwork_query; | |||
| lapack_int lcwork = -1; | |||
| lapack_complex_float* cwork = NULL; | |||
| lapack_complex_float cwork_query; | |||
| lapack_int lrwork = -1; | |||
| double* rwork = NULL; | |||
| double rwork_query; | |||
| lapack_int i; | |||
| if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { | |||
| LAPACKE_xerbla( "LAPACKE_cgesvdq", -1 ); | |||
| return -1; | |||
| } | |||
| #ifndef LAPACK_DISABLE_NAN_CHECK | |||
| if( LAPACKE_get_nancheck() ) { | |||
| /* Optionally check input matrices for NaNs */ | |||
| if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { | |||
| return -6; | |||
| } | |||
| } | |||
| #endif | |||
| /* Query optimal working array(s) size */ | |||
| info = LAPACKE_cgesvdq_work( matrix_layout, joba, jobp, jobr, jobu, jobv, | |||
| m, n, a, lda, s, u, ldu, v, ldv, numrank, | |||
| &iwork_query, liwork, &cwork_query, lcwork, | |||
| &rwork_query, lrwork ); | |||
| if( info != 0 ) { | |||
| goto exit_level_0; | |||
| } | |||
| liwork = iwork_query; | |||
| lcwork = LAPACK_C2INT(cwork_query); | |||
| lrwork = (lapack_int)rwork_query; | |||
| /* Allocate memory for work arrays */ | |||
| iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); | |||
| if( iwork == NULL ) { | |||
| info = LAPACK_WORK_MEMORY_ERROR; | |||
| goto exit_level_0; | |||
| } | |||
| cwork = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lcwork ); | |||
| if( cwork == NULL ) { | |||
| info = LAPACK_WORK_MEMORY_ERROR; | |||
| goto exit_level_0; | |||
| } | |||
| rwork = (double*)LAPACKE_malloc( sizeof(double) * lrwork ); | |||
| if( rwork == NULL ) { | |||
| info = LAPACK_WORK_MEMORY_ERROR; | |||
| goto exit_level_0; | |||
| } | |||
| /* Call middle-level interface */ | |||
| info = LAPACKE_cgesvdq_work( matrix_layout, joba, jobp, jobr, jobu, jobv, | |||
| m, n, a, lda, s, u, ldu, v, ldv, numrank, | |||
| iwork, liwork, cwork, lcwork, rwork, lrwork ); | |||
| /* Release memory and exit */ | |||
| LAPACKE_free( iwork ); | |||
| LAPACKE_free( cwork ); | |||
| LAPACKE_free( rwork ); | |||
| exit_level_0: | |||
| if( info == LAPACK_WORK_MEMORY_ERROR ) { | |||
| LAPACKE_xerbla( "LAPACKE_cgesvdq", info ); | |||
| } | |||
| return info; | |||
| } | |||
| @@ -0,0 +1,149 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2014, Intel Corp. | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are met: | |||
| * Redistributions of source code must retain the above copyright notice, | |||
| this list of conditions and the following disclaimer. | |||
| * Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in the | |||
| documentation and/or other materials provided with the distribution. | |||
| * Neither the name of Intel Corporation nor the names of its contributors | |||
| may be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
| CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
| SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
| INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
| CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
| ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | |||
| THE POSSIBILITY OF SUCH DAMAGE. | |||
| ***************************************************************************** | |||
| * Contents: Native middle-level C interface to LAPACK function cgesvdq | |||
| * Author: Intel Corporation | |||
| * Generated November 2015 | |||
| *****************************************************************************/ | |||
| #include "lapacke_utils.h" | |||
| lapack_int LAPACKE_cgesvdq_work( int matrix_layout, char joba, char jobp, | |||
| char jobr, char jobu, char jobv, | |||
| lapack_int m, lapack_int n, lapack_complex_float* a, | |||
| lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu, | |||
| lapack_complex_float* v, lapack_int ldv, lapack_int* numrank, | |||
| lapack_int* iwork, lapack_int liwork, | |||
| lapack_complex_float* cwork, lapack_int lcwork, | |||
| float* rwork, lapack_int lrwork ) | |||
| { | |||
| lapack_int info = 0; | |||
| if( matrix_layout == LAPACK_COL_MAJOR ) { | |||
| /* Call LAPACK function and adjust info */ | |||
| LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda, s, u, &ldu, v, &ldv, | |||
| numrank, iwork, &liwork, cwork, &lcwork, rwork, &lrwork, &info ); | |||
| if( info < 0 ) { | |||
| info = info - 1; | |||
| } | |||
| } else if( matrix_layout == LAPACK_ROW_MAJOR ) { | |||
| lapack_int nrows_u = ( LAPACKE_lsame( jobu, 'a' ) || | |||
| LAPACKE_lsame( jobu, 's' ) ) ? m : 1; | |||
| lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m : | |||
| (LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1); | |||
| lapack_int nrows_v = LAPACKE_lsame( jobv, 'a' ) ? n : | |||
| ( LAPACKE_lsame( jobv, 's' ) ? MIN(m,n) : 1); | |||
| lapack_int lda_t = MAX(1,m); | |||
| lapack_int ldu_t = MAX(1,nrows_u); | |||
| lapack_int ldv_t = MAX(1,nrows_v); | |||
| lapack_complex_float* a_t = NULL; | |||
| lapack_complex_float* u_t = NULL; | |||
| lapack_complex_float* v_t = NULL; | |||
| /* Check leading dimension(s) */ | |||
| if( lda < n ) { | |||
| info = -9; | |||
| LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); | |||
| return info; | |||
| } | |||
| if( ldu < ncols_u ) { | |||
| info = -12; | |||
| LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); | |||
| return info; | |||
| } | |||
| if( ldv < n ) { | |||
| info = -14; | |||
| LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); | |||
| return info; | |||
| } | |||
| /* Query optimal working array(s) size if requested */ | |||
| if( lcwork == -1 ) { | |||
| LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, | |||
| s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, | |||
| cwork, &lcwork, rwork, &lrwork, &info ); | |||
| return (info < 0) ? (info - 1) : info; | |||
| } | |||
| /* Allocate memory for temporary array(s) */ | |||
| a_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) ); | |||
| if( a_t == NULL ) { | |||
| info = LAPACK_TRANSPOSE_MEMORY_ERROR; | |||
| goto exit_level_0; | |||
| } | |||
| if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { | |||
| u_t = (lapack_complex_float*) | |||
| LAPACKE_malloc( sizeof(lapack_complex_float) * ldu_t * MAX(1,ncols_u) ); | |||
| if( u_t == NULL ) { | |||
| info = LAPACK_TRANSPOSE_MEMORY_ERROR; | |||
| goto exit_level_1; | |||
| } | |||
| } | |||
| if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { | |||
| v_t = (lapack_complex_float*) | |||
| LAPACKE_malloc( sizeof(lapack_complex_float) * ldv_t * MAX(1,n) ); | |||
| if( v_t == NULL ) { | |||
| info = LAPACK_TRANSPOSE_MEMORY_ERROR; | |||
| goto exit_level_2; | |||
| } | |||
| } | |||
| /* Transpose input matrices */ | |||
| LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); | |||
| /* Call LAPACK function and adjust info */ | |||
| LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, | |||
| s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, | |||
| cwork, &lcwork, rwork, &lrwork, &info ); | |||
| if( info < 0 ) { | |||
| info = info - 1; | |||
| } | |||
| /* Transpose output matrices */ | |||
| LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); | |||
| if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { | |||
| LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t, | |||
| u, ldu ); | |||
| } | |||
| if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { | |||
| LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_v, n, v_t, ldv_t, v, | |||
| ldv ); | |||
| } | |||
| /* Release memory and exit */ | |||
| if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { | |||
| LAPACKE_free( v_t ); | |||
| } | |||
| exit_level_2: | |||
| if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { | |||
| LAPACKE_free( u_t ); | |||
| } | |||
| exit_level_1: | |||
| LAPACKE_free( a_t ); | |||
| exit_level_0: | |||
| if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { | |||
| LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); | |||
| } | |||
| } else { | |||
| info = -1; | |||
| LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); | |||
| } | |||
| return info; | |||
| } | |||
| @@ -91,7 +91,7 @@ lapack_int LAPACKE_cggesx( int matrix_layout, char jobvsl, char jobvsr, | |||
| if( info != 0 ) { | |||
| goto exit_level_2; | |||
| } | |||
| liwork = (lapack_int)iwork_query; | |||
| liwork = iwork_query; | |||
| lwork = LAPACK_C2INT( work_query ); | |||
| /* Allocate memory for work arrays */ | |||
| iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); | |||
| @@ -67,7 +67,7 @@ lapack_int LAPACKE_chbevd( int matrix_layout, char jobz, char uplo, lapack_int n | |||
| if( info != 0 ) { | |||
| goto exit_level_0; | |||
| } | |||
| liwork = (lapack_int)iwork_query; | |||
| liwork = iwork_query; | |||
| lrwork = (lapack_int)rwork_query; | |||
| lwork = LAPACK_C2INT( work_query ); | |||
| /* Allocate memory for work arrays */ | |||
| @@ -67,7 +67,7 @@ lapack_int LAPACKE_chbevd_2stage( int matrix_layout, char jobz, char uplo, lapac | |||
| if( info != 0 ) { | |||
| goto exit_level_0; | |||
| } | |||
| liwork = (lapack_int)iwork_query; | |||
| liwork = iwork_query; | |||
| lrwork = (lapack_int)rwork_query; | |||
| lwork = LAPACK_C2INT( work_query ); | |||
| /* Allocate memory for work arrays */ | |||
| @@ -71,7 +71,7 @@ lapack_int LAPACKE_chbgvd( int matrix_layout, char jobz, char uplo, lapack_int n | |||
| if( info != 0 ) { | |||
| goto exit_level_0; | |||
| } | |||
| liwork = (lapack_int)iwork_query; | |||
| liwork = iwork_query; | |||
| lrwork = (lapack_int)rwork_query; | |||
| lwork = LAPACK_C2INT( work_query ); | |||
| /* Allocate memory for work arrays */ | |||
| @@ -70,7 +70,7 @@ lapack_int LAPACKE_cheev_work( int matrix_layout, char jobz, char uplo, | |||
| goto exit_level_0; | |||
| } | |||
| /* Transpose input matrices */ | |||
| LAPACKE_cge_trans( matrix_layout, n, n, a, lda, a_t, lda_t ); | |||
| LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); | |||
| /* Call LAPACK function and adjust info */ | |||
| LAPACK_cheev( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork, | |||
| &info ); | |||
| @@ -78,7 +78,7 @@ lapack_int LAPACKE_cheev_work( int matrix_layout, char jobz, char uplo, | |||
| info = info - 1; | |||
| } | |||
| /* Transpose output matrices */ | |||
| LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda ); | |||
| LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); | |||
| /* Release memory and exit */ | |||
| LAPACKE_free( a_t ); | |||
| exit_level_0: | |||
| @@ -53,7 +53,7 @@ lapack_int LAPACKE_cheevd( int matrix_layout, char jobz, char uplo, lapack_int n | |||
| #ifndef LAPACK_DISABLE_NAN_CHECK | |||
| if( LAPACKE_get_nancheck() ) { | |||
| /* Optionally check input matrices for NaNs */ | |||
| if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { | |||
| if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { | |||
| return -5; | |||
| } | |||
| } | |||
| @@ -65,7 +65,7 @@ lapack_int LAPACKE_cheevd( int matrix_layout, char jobz, char uplo, lapack_int n | |||
| if( info != 0 ) { | |||
| goto exit_level_0; | |||
| } | |||
| liwork = (lapack_int)iwork_query; | |||
| liwork = iwork_query; | |||
| lrwork = (lapack_int)rwork_query; | |||
| lwork = LAPACK_C2INT( work_query ); | |||
| /* Allocate memory for work arrays */ | |||
| @@ -53,7 +53,7 @@ lapack_int LAPACKE_cheevd_2stage( int matrix_layout, char jobz, char uplo, lapac | |||
| #ifndef LAPACK_DISABLE_NAN_CHECK | |||
| if( LAPACKE_get_nancheck() ) { | |||
| /* Optionally check input matrices for NaNs */ | |||
| if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { | |||
| if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { | |||
| return -5; | |||
| } | |||
| } | |||
| @@ -65,7 +65,7 @@ lapack_int LAPACKE_cheevd_2stage( int matrix_layout, char jobz, char uplo, lapac | |||
| if( info != 0 ) { | |||
| goto exit_level_0; | |||
| } | |||
| liwork = (lapack_int)iwork_query; | |||
| liwork = iwork_query; | |||
| lrwork = (lapack_int)rwork_query; | |||
| lwork = LAPACK_C2INT( work_query ); | |||
| /* Allocate memory for work arrays */ | |||
| @@ -71,7 +71,7 @@ lapack_int LAPACKE_cheevd_2stage_work( int matrix_layout, char jobz, char uplo, | |||
| goto exit_level_0; | |||
| } | |||
| /* Transpose input matrices */ | |||
| LAPACKE_cge_trans( matrix_layout, n, n, a, lda, a_t, lda_t ); | |||
| LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); | |||
| /* Call LAPACK function and adjust info */ | |||
| LAPACK_cheevd_2stage( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork, | |||
| &lrwork, iwork, &liwork, &info ); | |||
| @@ -79,7 +79,7 @@ lapack_int LAPACKE_cheevd_2stage_work( int matrix_layout, char jobz, char uplo, | |||
| info = info - 1; | |||
| } | |||
| /* Transpose output matrices */ | |||
| LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda ); | |||
| LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); | |||
| /* Release memory and exit */ | |||
| LAPACKE_free( a_t ); | |||
| exit_level_0: | |||
| @@ -71,7 +71,7 @@ lapack_int LAPACKE_cheevd_work( int matrix_layout, char jobz, char uplo, | |||
| goto exit_level_0; | |||
| } | |||
| /* Transpose input matrices */ | |||
| LAPACKE_cge_trans( matrix_layout, n, n, a, lda, a_t, lda_t ); | |||
| LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); | |||
| /* Call LAPACK function and adjust info */ | |||
| LAPACK_cheevd( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork, | |||
| &lrwork, iwork, &liwork, &info ); | |||
| @@ -79,7 +79,8 @@ lapack_int LAPACKE_cheevd_work( int matrix_layout, char jobz, char uplo, | |||
| info = info - 1; | |||
| } | |||
| /* Transpose output matrices */ | |||
| LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda ); | |||
| LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); | |||
| /* Release memory and exit */ | |||
| LAPACKE_free( a_t ); | |||
| exit_level_0: | |||