Cortex A57 fixes and Lapack 3.6.0tags/v0.2.16.rc1
| @@ -249,10 +249,14 @@ ifndef NOFORTRAN | |||
| -@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| ifeq ($(FC), gfortran) | |||
| ifeq ($(F_COMPILER), GFORTRAN) | |||
| -@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| ifdef SMP | |||
| ifeq ($(OSNAME), WINNT) | |||
| -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| else | |||
| -@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| endif | |||
| else | |||
| -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| endif | |||
| @@ -288,7 +292,17 @@ endif | |||
| lapack-test : | |||
| (cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out) | |||
| make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc | |||
| ifneq ($(CROSS), 1) | |||
| ( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \ | |||
| ./testsecond; ./testdsecnd; ./testieee; ./testversion ) | |||
| (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) | |||
| endif | |||
| lapack-runtest: | |||
| ( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \ | |||
| ./testsecond; ./testdsecnd; ./testieee; ./testversion ) | |||
| (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) | |||
| blas-test: | |||
| (cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out) | |||
| @@ -971,16 +971,29 @@ ifeq ($(DEBUG), 1) | |||
| COMMON_OPT += -g | |||
| endif | |||
| ifeq ($(DEBUG), 1) | |||
| FCOMMON_OPT += -g | |||
| endif | |||
| ifndef COMMON_OPT | |||
| COMMON_OPT = -O2 | |||
| endif | |||
| ifndef FCOMMON_OPT | |||
| ifeq ($(OSNAME), WINNT) | |||
| FCOMMON_OPT = -O0 | |||
| else | |||
| FCOMMON_OPT = -O2 -frecursive | |||
| endif | |||
| endif | |||
| override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) | |||
| override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF) | |||
| override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) | |||
| override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF) | |||
| override FFLAGS += $(FCOMMON_OPT) | |||
| override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF) | |||
| #MAKEOVERRIDES = | |||
| #For LAPACK Fortran codes. | |||
| @@ -43,28 +43,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #ifndef ASSEMBLER | |||
| static void __inline blas_lock(volatile BLASULONG *address){ | |||
| long register ret; | |||
| BLASULONG ret; | |||
| do { | |||
| while (*address) {YIELDING;}; | |||
| __asm__ __volatile__( | |||
| "ldaxr %0, [%1] \n\t" | |||
| "stlxr w2, %2, [%1] \n\t" | |||
| "orr %0, %0, x2 \n\t" | |||
| : "=r"(ret) | |||
| : "r"(address), "r"(1l) | |||
| : "memory", "x2" | |||
| "mov x4, #1 \n\t" | |||
| "1: \n\t" | |||
| "ldaxr x2, [%1] \n\t" | |||
| "cbnz x2, 1b \n\t" | |||
| "2: \n\t" | |||
| "stxr w3, x4, [%1] \n\t" | |||
| "cbnz w3, 1b \n\t" | |||
| "mov %0, #0 \n\t" | |||
| : "=r"(ret), "=r"(address) | |||
| : "1"(address) | |||
| : "memory", "x2" , "x3", "x4" | |||
| ); | |||
| } while (ret); | |||
| MB; | |||
| } | |||
| #define BLAS_LOCK_DEFINED | |||
| static inline int blas_quickdivide(blasint x, blasint y){ | |||
| return x / y; | |||
| } | |||
| @@ -110,7 +121,7 @@ REALNAME: | |||
| #define HUGE_PAGESIZE ( 4 << 20) | |||
| #if defined(CORTEXA57) | |||
| #define BUFFER_SIZE (40 << 20) | |||
| #define BUFFER_SIZE (20 << 20) | |||
| #else | |||
| #define BUFFER_SIZE (16 << 20) | |||
| #endif | |||
| @@ -104,6 +104,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include <errno.h> | |||
| #include <linux/unistd.h> | |||
| #include <sys/syscall.h> | |||
| #include <sys/time.h> | |||
| #include <sys/resource.h> | |||
| #endif | |||
| #if defined(OS_FREEBSD) || defined(OS_DARWIN) | |||
| @@ -1361,6 +1363,18 @@ void CONSTRUCTOR gotoblas_init(void) { | |||
| gotoblas_memory_init(); | |||
| #endif | |||
| #if defined(OS_LINUX) | |||
| struct rlimit curlimit; | |||
| if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 ) | |||
| { | |||
| if ( curlimit.rlim_cur != curlimit.rlim_max ) | |||
| { | |||
| curlimit.rlim_cur = curlimit.rlim_max; | |||
| setrlimit(RLIMIT_STACK, &curlimit); | |||
| } | |||
| } | |||
| #endif | |||
| #ifdef SMP | |||
| if (blas_cpu_number == 0) blas_get_cpu_number(); | |||
| #ifdef SMP_SERVER | |||
| @@ -173,18 +173,18 @@ | |||
| sgbbrd, sgbcon, sgbequ, sgbrfs, sgbsv, | |||
| sgbsvx, sgbtf2, sgbtrf, sgbtrs, sgebak, sgebal, sgebd2, | |||
| sgebrd, sgecon, sgeequ, sgees, sgeesx, sgeev, sgeevx, | |||
| sgegs, sgegv, sgehd2, sgehrd, sgelq2, sgelqf, | |||
| sgels, sgelsd, sgelss, sgelsx, sgelsy, sgeql2, sgeqlf, | |||
| sgeqp3, sgeqpf, sgeqr2, sgeqr2p, sgeqrf, sgeqrfp, sgerfs, | |||
| sgehd2, sgehrd, sgelq2, sgelqf, | |||
| sgels, sgelsd, sgelss, sgelsy, sgeql2, sgeqlf, | |||
| sgeqp3, sgeqr2, sgeqr2p, sgeqrf, sgeqrfp, sgerfs, | |||
| sgerq2, sgerqf, sgesc2, sgesdd, sgesvd, sgesvx, | |||
| sgetc2, sgetri, | |||
| sggbak, sggbal, sgges, sggesx, sggev, sggevx, | |||
| sggglm, sgghrd, sgglse, sggqrf, | |||
| sggrqf, sggsvd, sggsvp, sgtcon, sgtrfs, sgtsv, | |||
| sggrqf, sgtcon, sgtrfs, sgtsv, | |||
| sgtsvx, sgttrf, sgttrs, sgtts2, shgeqz, | |||
| shsein, shseqr, slabrd, slacon, slacn2, | |||
| slaein, slaexc, slag2, slags2, slagtm, slagv2, slahqr, | |||
| slahrd, slahr2, slaic1, slaln2, slals0, slalsa, slalsd, | |||
| slahr2, slaic1, slaln2, slals0, slalsa, slalsd, | |||
| slangb, slange, slangt, slanhs, slansb, slansp, | |||
| slansy, slantb, slantp, slantr, slanv2, | |||
| slapll, slapmt, | |||
| @@ -194,7 +194,7 @@ | |||
| slarf, slarfb, slarfg, slarfgp, slarft, slarfx, slargv, | |||
| slarrv, slartv, | |||
| slarz, slarzb, slarzt, slasy2, slasyf, | |||
| slatbs, slatdf, slatps, slatrd, slatrs, slatrz, slatzm, | |||
| slatbs, slatdf, slatps, slatrd, slatrs, slatrz, | |||
| sopgtr, sopmtr, sorg2l, sorg2r, | |||
| sorgbr, sorghr, sorgl2, sorglq, sorgql, sorgqr, sorgr2, | |||
| sorgrq, sorgtr, sorm2l, sorm2r, | |||
| @@ -220,7 +220,7 @@ | |||
| stgsja, stgsna, stgsy2, stgsyl, stpcon, stprfs, stptri, | |||
| stptrs, | |||
| strcon, strevc, strexc, strrfs, strsen, strsna, strsyl, | |||
| strtrs, stzrqf, stzrzf, sstemr, | |||
| strtrs, stzrzf, sstemr, | |||
| slansf, spftrf, spftri, spftrs, ssfrk, stfsm, stftri, stfttp, | |||
| stfttr, stpttf, stpttr, strttf, strttp, | |||
| sgejsv, sgesvj, sgsvj0, sgsvj1, | |||
| @@ -245,14 +245,13 @@ | |||
| cbdsqr, cgbbrd, cgbcon, cgbequ, cgbrfs, cgbsv, cgbsvx, | |||
| cgbtf2, cgbtrf, cgbtrs, cgebak, cgebal, cgebd2, cgebrd, | |||
| cgecon, cgeequ, cgees, cgeesx, cgeev, cgeevx, | |||
| cgegs, cgegv, cgehd2, cgehrd, cgelq2, cgelqf, | |||
| cgels, cgelsd, cgelss, cgelsx, cgelsy, cgeql2, cgeqlf, cgeqp3, | |||
| cgeqpf, cgeqr2, cgeqr2p, cgeqrf, cgeqrfp, cgerfs, | |||
| cgehd2, cgehrd, cgelq2, cgelqf, | |||
| cgels, cgelsd, cgelss, cgelsy, cgeql2, cgeqlf, cgeqp3, | |||
| cgeqr2, cgeqr2p, cgeqrf, cgeqrfp, cgerfs, | |||
| cgerq2, cgerqf, cgesc2, cgesdd, cgesvd, | |||
| cgesvx, cgetc2, cgetri, | |||
| cggbak, cggbal, cgges, cggesx, cggev, cggevx, cggglm, | |||
| cgghrd, cgglse, cggqrf, cggrqf, | |||
| cggsvd, cggsvp, | |||
| cgtcon, cgtrfs, cgtsv, cgtsvx, cgttrf, cgttrs, cgtts2, chbev, | |||
| chbevd, chbevx, chbgst, chbgv, chbgvd, chbgvx, chbtrd, | |||
| checon, cheev, cheevd, cheevr, cheevx, chegs2, chegst, | |||
| @@ -267,7 +266,7 @@ | |||
| claed0, claed7, claed8, | |||
| claein, claesy, claev2, clags2, clagtm, | |||
| clahef, clahqr, | |||
| clahrd, clahr2, claic1, clals0, clalsa, clalsd, clangb, clange, clangt, | |||
| clahr2, claic1, clals0, clalsa, clalsd, clangb, clange, clangt, | |||
| clanhb, clanhe, | |||
| clanhp, clanhs, clanht, clansb, clansp, clansy, clantb, | |||
| clantp, clantr, clapll, clapmt, clarcm, claqgb, claqge, | |||
| @@ -278,7 +277,7 @@ | |||
| clarfx, clargv, clarnv, clarrv, clartg, clartv, | |||
| clarz, clarzb, clarzt, clascl, claset, clasr, classq, | |||
| clasyf, clatbs, clatdf, clatps, clatrd, clatrs, clatrz, | |||
| clatzm, cpbcon, cpbequ, cpbrfs, cpbstf, cpbsv, | |||
| cpbcon, cpbequ, cpbrfs, cpbstf, cpbsv, | |||
| cpbsvx, cpbtf2, cpbtrf, cpbtrs, cpocon, cpoequ, cporfs, | |||
| cposv, cposvx, cpstrf, cpstf2, | |||
| cppcon, cppequ, cpprfs, cppsv, cppsvx, cpptrf, cpptri, cpptrs, | |||
| @@ -293,7 +292,7 @@ | |||
| ctgexc, ctgsen, ctgsja, ctgsna, ctgsy2, ctgsyl, ctpcon, | |||
| ctprfs, ctptri, | |||
| ctptrs, ctrcon, ctrevc, ctrexc, ctrrfs, ctrsen, ctrsna, | |||
| ctrsyl, ctrtrs, ctzrqf, ctzrzf, cung2l, cung2r, | |||
| ctrsyl, ctrtrs, ctzrzf, cung2l, cung2r, | |||
| cungbr, cunghr, cungl2, cunglq, cungql, cungqr, cungr2, | |||
| cungrq, cungtr, cunm2l, cunm2r, cunmbr, cunmhr, cunml2, | |||
| cunmlq, cunmql, cunmqr, cunmr2, cunmr3, cunmrq, cunmrz, | |||
| @@ -321,18 +320,18 @@ | |||
| dgbbrd, dgbcon, dgbequ, dgbrfs, dgbsv, | |||
| dgbsvx, dgbtf2, dgbtrf, dgbtrs, dgebak, dgebal, dgebd2, | |||
| dgebrd, dgecon, dgeequ, dgees, dgeesx, dgeev, dgeevx, | |||
| dgegs, dgegv, dgehd2, dgehrd, dgelq2, dgelqf, | |||
| dgels, dgelsd, dgelss, dgelsx, dgelsy, dgeql2, dgeqlf, | |||
| dgeqp3, dgeqpf, dgeqr2, dgeqr2p, dgeqrf, dgeqrfp, dgerfs, | |||
| dgehd2, dgehrd, dgelq2, dgelqf, | |||
| dgels, dgelsd, dgelss, dgelsy, dgeql2, dgeqlf, | |||
| dgeqp3, dgeqr2, dgeqr2p, dgeqrf, dgeqrfp, dgerfs, | |||
| dgerq2, dgerqf, dgesc2, dgesdd, dgesvd, dgesvx, | |||
| dgetc2, dgetri, | |||
| dggbak, dggbal, dgges, dggesx, dggev, dggevx, | |||
| dggglm, dgghrd, dgglse, dggqrf, | |||
| dggrqf, dggsvd, dggsvp, dgtcon, dgtrfs, dgtsv, | |||
| dggrqf, dgtcon, dgtrfs, dgtsv, | |||
| dgtsvx, dgttrf, dgttrs, dgtts2, dhgeqz, | |||
| dhsein, dhseqr, dlabrd, dlacon, dlacn2, | |||
| dlaein, dlaexc, dlag2, dlags2, dlagtm, dlagv2, dlahqr, | |||
| dlahrd, dlahr2, dlaic1, dlaln2, dlals0, dlalsa, dlalsd, | |||
| dlahr2, dlaic1, dlaln2, dlals0, dlalsa, dlalsd, | |||
| dlangb, dlange, dlangt, dlanhs, dlansb, dlansp, | |||
| dlansy, dlantb, dlantp, dlantr, dlanv2, | |||
| dlapll, dlapmt, | |||
| @@ -342,7 +341,7 @@ | |||
| dlarf, dlarfb, dlarfg, dlarfgp, dlarft, dlarfx, | |||
| dlargv, dlarrv, dlartv, | |||
| dlarz, dlarzb, dlarzt, dlasy2, dlasyf, | |||
| dlatbs, dlatdf, dlatps, dlatrd, dlatrs, dlatrz, dlatzm, | |||
| dlatbs, dlatdf, dlatps, dlatrd, dlatrs, dlatrz, | |||
| dopgtr, dopmtr, dorg2l, dorg2r, | |||
| dorgbr, dorghr, dorgl2, dorglq, dorgql, dorgqr, dorgr2, | |||
| dorgrq, dorgtr, dorm2l, dorm2r, | |||
| @@ -368,7 +367,7 @@ | |||
| dtgsja, dtgsna, dtgsy2, dtgsyl, dtpcon, dtprfs, dtptri, | |||
| dtptrs, | |||
| dtrcon, dtrevc, dtrexc, dtrrfs, dtrsen, dtrsna, dtrsyl, | |||
| dtrtrs, dtzrqf, dtzrzf, dstemr, | |||
| dtrtrs, dtzrzf, dstemr, | |||
| dsgesv, dsposv, dlag2s, slag2d, dlat2s, | |||
| dlansf, dpftrf, dpftri, dpftrs, dsfrk, dtfsm, dtftri, dtfttp, | |||
| dtfttr, dtpttf, dtpttr, dtrttf, dtrttp, | |||
| @@ -387,14 +386,13 @@ | |||
| zbdsqr, zgbbrd, zgbcon, zgbequ, zgbrfs, zgbsv, zgbsvx, | |||
| zgbtf2, zgbtrf, zgbtrs, zgebak, zgebal, zgebd2, zgebrd, | |||
| zgecon, zgeequ, zgees, zgeesx, zgeev, zgeevx, | |||
| zgegs, zgegv, zgehd2, zgehrd, zgelq2, zgelqf, | |||
| zgels, zgelsd, zgelss, zgelsx, zgelsy, zgeql2, zgeqlf, zgeqp3, | |||
| zgeqpf, zgeqr2, zgeqr2p, zgeqrf, zgeqrfp, zgerfs, zgerq2, zgerqf, | |||
| zgehd2, zgehrd, zgelq2, zgelqf, | |||
| zgels, zgelsd, zgelss, zgelsy, zgeql2, zgeqlf, zgeqp3, | |||
| zgeqr2, zgeqr2p, zgeqrf, zgeqrfp, zgerfs, zgerq2, zgerqf, | |||
| zgesc2, zgesdd, zgesvd, zgesvx, zgetc2, | |||
| zgetri, | |||
| zggbak, zggbal, zgges, zggesx, zggev, zggevx, zggglm, | |||
| zgghrd, zgglse, zggqrf, zggrqf, | |||
| zggsvd, zggsvp, | |||
| zgtcon, zgtrfs, zgtsv, zgtsvx, zgttrf, zgttrs, zgtts2, zhbev, | |||
| zhbevd, zhbevx, zhbgst, zhbgv, zhbgvd, zhbgvx, zhbtrd, | |||
| zhecon, zheev, zheevd, zheevr, zheevx, zhegs2, zhegst, | |||
| @@ -409,7 +407,7 @@ | |||
| zlaed0, zlaed7, zlaed8, | |||
| zlaein, zlaesy, zlaev2, zlags2, zlagtm, | |||
| zlahef, zlahqr, | |||
| zlahrd, zlahr2, zlaic1, zlals0, zlalsa, zlalsd, zlangb, zlange, | |||
| zlahr2, zlaic1, zlals0, zlalsa, zlalsd, zlangb, zlange, | |||
| zlangt, zlanhb, | |||
| zlanhe, | |||
| zlanhp, zlanhs, zlanht, zlansb, zlansp, zlansy, zlantb, | |||
| @@ -422,7 +420,7 @@ | |||
| zlarfx, zlargv, zlarnv, zlarrv, zlartg, zlartv, | |||
| zlarz, zlarzb, zlarzt, zlascl, zlaset, zlasr, | |||
| zlassq, zlasyf, | |||
| zlatbs, zlatdf, zlatps, zlatrd, zlatrs, zlatrz, zlatzm, | |||
| zlatbs, zlatdf, zlatps, zlatrd, zlatrs, zlatrz, | |||
| zpbcon, zpbequ, zpbrfs, zpbstf, zpbsv, | |||
| zpbsvx, zpbtf2, zpbtrf, zpbtrs, zpocon, zpoequ, zporfs, | |||
| zposv, zposvx, zpotrs, zpstrf, zpstf2, | |||
| @@ -438,7 +436,7 @@ | |||
| ztgexc, ztgsen, ztgsja, ztgsna, ztgsy2, ztgsyl, ztpcon, | |||
| ztprfs, ztptri, | |||
| ztptrs, ztrcon, ztrevc, ztrexc, ztrrfs, ztrsen, ztrsna, | |||
| ztrsyl, ztrtrs, ztzrqf, ztzrzf, zung2l, | |||
| ztrsyl, ztrtrs, ztzrzf, zung2l, | |||
| zung2r, zungbr, zunghr, zungl2, zunglq, zungql, zungqr, zungr2, | |||
| zungrq, zungtr, zunm2l, zunm2r, zunmbr, zunmhr, zunml2, | |||
| zunmlq, zunmql, zunmqr, zunmr2, zunmr3, zunmrq, zunmrz, | |||
| @@ -452,6 +450,140 @@ | |||
| zunbdb5, zunbdb6, zuncsd, zuncsd2by1, | |||
| zgeqrt, zgeqrt2, zgeqrt3, zgemqrt, | |||
| ztpqrt, ztpqrt2, ztpmqrt, ztprfb, | |||
| # functions added for lapack-3.6.0 | |||
| cgejsv, | |||
| cgesvdx, | |||
| cgesvj, | |||
| cgetrf2, | |||
| cgges3, | |||
| cggev3, | |||
| cgghd3, | |||
| cggsvd3, | |||
| cggsvp3, | |||
| cgsvj0, | |||
| cgsvj1, | |||
| clagge, | |||
| claghe, | |||
| clagsy, | |||
| clahilb, | |||
| clakf2, | |||
| clarge, | |||
| clarnd, | |||
| claror, | |||
| clarot, | |||
| clatm1, | |||
| clatm2, | |||
| clatm3, | |||
| clatm5, | |||
| clatm6, | |||
| clatme, | |||
| clatmr, | |||
| clatms, | |||
| clatmt, | |||
| cpotrf2, | |||
| csbmv, | |||
| cspr2, | |||
| csyr2, | |||
| cunm22, | |||
| dbdsvdx, | |||
| dgesvdx, | |||
| dgetrf2, | |||
| dgges3, | |||
| dggev3, | |||
| dgghd3, | |||
| dggsvd3, | |||
| dggsvp3, | |||
| dladiv2, | |||
| dlagge, | |||
| dlagsy, | |||
| dlahilb, | |||
| dlakf2, | |||
| dlaran, | |||
| dlarge, | |||
| dlarnd, | |||
| dlaror, | |||
| dlarot, | |||
| dlatm1, | |||
| dlatm2, | |||
| dlatm3, | |||
| dlatm5, | |||
| dlatm6, | |||
| dlatm7, | |||
| dlatme, | |||
| dlatmr, | |||
| dlatms, | |||
| dlatmt, | |||
| dorm22, | |||
| dpotrf2, | |||
| dsecnd, | |||
| sbdsvdx, | |||
| second, | |||
| sgesvdx, | |||
| sgetrf2, | |||
| sgges3, | |||
| sggev3, | |||
| sgghd3, | |||
| sggsvd3, | |||
| sggsvp3, | |||
| sladiv2, | |||
| slagge, | |||
| slagsy, | |||
| slahilb, | |||
| slakf2, | |||
| slaran, | |||
| slarge, | |||
| slarnd, | |||
| slaror, | |||
| slarot, | |||
| slatm1, | |||
| slatm2, | |||
| slatm3, | |||
| slatm5, | |||
| slatm6, | |||
| slatm7, | |||
| slatme, | |||
| slatmr, | |||
| slatms, | |||
| slatmt, | |||
| sorm22, | |||
| spotrf2, | |||
| xerbla, | |||
| zgejsv, | |||
| zgesvdx, | |||
| zgesvj, | |||
| zgetrf2, | |||
| zgges3, | |||
| zggev3, | |||
| zgghd3, | |||
| zggsvd3, | |||
| zggsvp3, | |||
| zgsvj0, | |||
| zgsvj1, | |||
| zlagge, | |||
| zlaghe, | |||
| zlagsy, | |||
| zlahilb, | |||
| zlakf2, | |||
| zlarge, | |||
| zlarnd, | |||
| zlaror, | |||
| zlarot, | |||
| zlatm1, | |||
| zlatm2, | |||
| zlatm3, | |||
| zlatm5, | |||
| zlatm6, | |||
| zlatme, | |||
| zlatmr, | |||
| zlatms, | |||
| zlatmt, | |||
| zpotrf2, | |||
| zsbmv, | |||
| zspr2, | |||
| zsyr2, | |||
| zunm22 | |||
| ); | |||
| @lapack_extendedprecision_objs = ( | |||
| @@ -682,8 +814,6 @@ | |||
| LAPACKE_cgeqlf_work, | |||
| LAPACKE_cgeqp3, | |||
| LAPACKE_cgeqp3_work, | |||
| LAPACKE_cgeqpf, | |||
| LAPACKE_cgeqpf_work, | |||
| LAPACKE_cgeqr2, | |||
| LAPACKE_cgeqr2_work, | |||
| LAPACKE_cgeqrf, | |||
| @@ -738,10 +868,6 @@ | |||
| LAPACKE_cggqrf_work, | |||
| LAPACKE_cggrqf, | |||
| LAPACKE_cggrqf_work, | |||
| LAPACKE_cggsvd, | |||
| LAPACKE_cggsvd_work, | |||
| LAPACKE_cggsvp, | |||
| LAPACKE_cggsvp_work, | |||
| LAPACKE_cgtcon, | |||
| LAPACKE_cgtcon_work, | |||
| LAPACKE_cgtrfs, | |||
| @@ -1186,8 +1312,6 @@ | |||
| LAPACKE_dgeqlf_work, | |||
| LAPACKE_dgeqp3, | |||
| LAPACKE_dgeqp3_work, | |||
| LAPACKE_dgeqpf, | |||
| LAPACKE_dgeqpf_work, | |||
| LAPACKE_dgeqr2, | |||
| LAPACKE_dgeqr2_work, | |||
| LAPACKE_dgeqrf, | |||
| @@ -1244,10 +1368,6 @@ | |||
| LAPACKE_dggqrf_work, | |||
| LAPACKE_dggrqf, | |||
| LAPACKE_dggrqf_work, | |||
| LAPACKE_dggsvd, | |||
| LAPACKE_dggsvd_work, | |||
| LAPACKE_dggsvp, | |||
| LAPACKE_dggsvp_work, | |||
| LAPACKE_dgtcon, | |||
| LAPACKE_dgtcon_work, | |||
| LAPACKE_dgtrfs, | |||
| @@ -1676,8 +1796,6 @@ | |||
| LAPACKE_sgeqlf_work, | |||
| LAPACKE_sgeqp3, | |||
| LAPACKE_sgeqp3_work, | |||
| LAPACKE_sgeqpf, | |||
| LAPACKE_sgeqpf_work, | |||
| LAPACKE_sgeqr2, | |||
| LAPACKE_sgeqr2_work, | |||
| LAPACKE_sgeqrf, | |||
| @@ -1734,10 +1852,6 @@ | |||
| LAPACKE_sggqrf_work, | |||
| LAPACKE_sggrqf, | |||
| LAPACKE_sggrqf_work, | |||
| LAPACKE_sggsvd, | |||
| LAPACKE_sggsvd_work, | |||
| LAPACKE_sggsvp, | |||
| LAPACKE_sggsvp_work, | |||
| LAPACKE_sgtcon, | |||
| LAPACKE_sgtcon_work, | |||
| LAPACKE_sgtrfs, | |||
| @@ -2158,8 +2272,6 @@ | |||
| LAPACKE_zgeqlf_work, | |||
| LAPACKE_zgeqp3, | |||
| LAPACKE_zgeqp3_work, | |||
| LAPACKE_zgeqpf, | |||
| LAPACKE_zgeqpf_work, | |||
| LAPACKE_zgeqr2, | |||
| LAPACKE_zgeqr2_work, | |||
| LAPACKE_zgeqrf, | |||
| @@ -2214,10 +2326,6 @@ | |||
| LAPACKE_zggqrf_work, | |||
| LAPACKE_zggrqf, | |||
| LAPACKE_zggrqf_work, | |||
| LAPACKE_zggsvd, | |||
| LAPACKE_zggsvd_work, | |||
| LAPACKE_zggsvp, | |||
| LAPACKE_zggsvp_work, | |||
| LAPACKE_zgtcon, | |||
| LAPACKE_zgtcon_work, | |||
| LAPACKE_zgtrfs, | |||
| @@ -2707,6 +2815,134 @@ | |||
| LAPACKE_slagsy_work, | |||
| LAPACKE_zlagsy, | |||
| LAPACKE_zlagsy_work, | |||
| ## new function from lapack-3.6.0 | |||
| LAPACKE_cgejsv, | |||
| LAPACKE_cgejsv_work, | |||
| LAPACKE_cgesvdx, | |||
| LAPACKE_cgesvdx_work, | |||
| LAPACKE_cgesvj, | |||
| LAPACKE_cgesvj_work, | |||
| LAPACKE_cgetrf2, | |||
| LAPACKE_cgetrf2_work, | |||
| LAPACKE_cgges3, | |||
| LAPACKE_cgges3_work, | |||
| LAPACKE_cggev3, | |||
| LAPACKE_cggev3_work, | |||
| LAPACKE_cgghd3, | |||
| LAPACKE_cgghd3_work, | |||
| LAPACKE_cggsvd3, | |||
| LAPACKE_cggsvd3_work, | |||
| LAPACKE_cggsvp3, | |||
| LAPACKE_cggsvp3_work, | |||
| LAPACKE_chetrf_rook, | |||
| LAPACKE_chetrf_rook_work, | |||
| LAPACKE_chetrs_rook, | |||
| LAPACKE_chetrs_rook_work, | |||
| LAPACKE_clapmt, | |||
| LAPACKE_clapmt_work, | |||
| LAPACKE_clascl, | |||
| LAPACKE_clascl_work, | |||
| LAPACKE_cpotrf2, | |||
| LAPACKE_cpotrf2_work, | |||
| LAPACKE_csytrf_rook, | |||
| LAPACKE_csytrf_rook_work, | |||
| LAPACKE_csytrs_rook, | |||
| LAPACKE_csytrs_rook_work, | |||
| LAPACKE_cuncsd2by1, | |||
| LAPACKE_cuncsd2by1_work, | |||
| LAPACKE_dbdsvdx, | |||
| LAPACKE_dbdsvdx_work, | |||
| LAPACKE_dgesvdx, | |||
| LAPACKE_dgesvdx_work, | |||
| LAPACKE_dgetrf2, | |||
| LAPACKE_dgetrf2_work, | |||
| LAPACKE_dgges3, | |||
| LAPACKE_dgges3_work, | |||
| LAPACKE_dggev3, | |||
| LAPACKE_dggev3_work, | |||
| LAPACKE_dgghd3, | |||
| LAPACKE_dgghd3_work, | |||
| LAPACKE_dggsvd3, | |||
| LAPACKE_dggsvd3_work, | |||
| LAPACKE_dggsvp3, | |||
| LAPACKE_dggsvp3_work, | |||
| LAPACKE_dlapmt, | |||
| LAPACKE_dlapmt_work, | |||
| LAPACKE_dlascl, | |||
| LAPACKE_dlascl_work, | |||
| LAPACKE_dorcsd2by1, | |||
| LAPACKE_dorcsd2by1_work, | |||
| LAPACKE_dpotrf2, | |||
| LAPACKE_dpotrf2_work, | |||
| LAPACKE_dsytrf_rook, | |||
| LAPACKE_dsytrf_rook_work, | |||
| LAPACKE_dsytrs_rook, | |||
| LAPACKE_dsytrs_rook_work, | |||
| LAPACKE_sbdsvdx, | |||
| LAPACKE_sbdsvdx_work, | |||
| LAPACKE_sgesvdx, | |||
| LAPACKE_sgesvdx_work, | |||
| LAPACKE_sgetrf2, | |||
| LAPACKE_sgetrf2_work, | |||
| LAPACKE_sgges3, | |||
| LAPACKE_sgges3_work, | |||
| LAPACKE_sggev3, | |||
| LAPACKE_sggev3_work, | |||
| LAPACKE_sgghd3, | |||
| LAPACKE_sgghd3_work, | |||
| LAPACKE_sggsvd3, | |||
| LAPACKE_sggsvd3_work, | |||
| LAPACKE_sggsvp3, | |||
| LAPACKE_sggsvp3_work, | |||
| LAPACKE_slapmt, | |||
| LAPACKE_slapmt_work, | |||
| LAPACKE_slascl, | |||
| LAPACKE_slascl_work, | |||
| LAPACKE_sorcsd2by1, | |||
| LAPACKE_sorcsd2by1_work, | |||
| LAPACKE_spotrf2, | |||
| LAPACKE_spotrf2_work, | |||
| LAPACKE_ssytrf_rook, | |||
| LAPACKE_ssytrf_rook_work, | |||
| LAPACKE_ssytrs_rook, | |||
| LAPACKE_ssytrs_rook_work, | |||
| LAPACKE_stpqrt, | |||
| LAPACKE_stpqrt_work, | |||
| LAPACKE_zgejsv, | |||
| LAPACKE_zgejsv_work, | |||
| LAPACKE_zgesvdx, | |||
| LAPACKE_zgesvdx_work, | |||
| LAPACKE_zgesvj, | |||
| LAPACKE_zgesvj_work, | |||
| LAPACKE_zgetrf2, | |||
| LAPACKE_zgetrf2_work, | |||
| LAPACKE_zgges3, | |||
| LAPACKE_zgges3_work, | |||
| LAPACKE_zggev3, | |||
| LAPACKE_zggev3_work, | |||
| LAPACKE_zgghd3, | |||
| LAPACKE_zgghd3_work, | |||
| LAPACKE_zggsvd3, | |||
| LAPACKE_zggsvd3_work, | |||
| LAPACKE_zggsvp3, | |||
| LAPACKE_zggsvp3_work, | |||
| LAPACKE_zhetrf_rook, | |||
| LAPACKE_zhetrf_rook_work, | |||
| LAPACKE_zhetrs_rook, | |||
| LAPACKE_zhetrs_rook_work, | |||
| LAPACKE_zlapmt, | |||
| LAPACKE_zlapmt_work, | |||
| LAPACKE_zlascl, | |||
| LAPACKE_zlascl_work, | |||
| LAPACKE_zpotrf2, | |||
| LAPACKE_zpotrf2_work, | |||
| LAPACKE_zsytrf_rook, | |||
| LAPACKE_zsytrf_rook_work, | |||
| LAPACKE_zsytrs_rook, | |||
| LAPACKE_zsytrs_rook_work, | |||
| LAPACKE_zuncsd2by1, | |||
| LAPACKE_zuncsd2by1_work | |||
| ); | |||
| #These function may need 2 underscores. | |||
| @@ -5,8 +5,8 @@ DAMAXKERNEL = amax.S | |||
| CAMAXKERNEL = zamax.S | |||
| ZAMAXKERNEL = zamax.S | |||
| ISAMAXKERNEL = isamax.S | |||
| IDAMAXKERNEL = idamax.S | |||
| ISAMAXKERNEL = iamax.S | |||
| IDAMAXKERNEL = iamax.S | |||
| ICAMAXKERNEL = izamax.S | |||
| IZAMAXKERNEL = izamax.S | |||
| @@ -25,13 +25,13 @@ DCOPYKERNEL = copy.S | |||
| CCOPYKERNEL = copy.S | |||
| ZCOPYKERNEL = copy.S | |||
| DOTKERNEL = dot.S | |||
| SDOTKERNEL = dot.S | |||
| DDOTKERNEL = dot.S | |||
| CDOTKERNEL = zdot.S | |||
| ZDOTKERNEL = zdot.S | |||
| SNRM2KERNEL = snrm2.S | |||
| DNRM2KERNEL = dnrm2.S | |||
| SNRM2KERNEL = nrm2.S | |||
| DNRM2KERNEL = nrm2.S | |||
| CNRM2KERNEL = znrm2.S | |||
| ZNRM2KERNEL = znrm2.S | |||
| @@ -40,7 +40,7 @@ DROTKERNEL = rot.S | |||
| CROTKERNEL = zrot.S | |||
| ZROTKERNEL = zrot.S | |||
| SCALKERNEL = scal.S | |||
| SSCALKERNEL = scal.S | |||
| DSCALKERNEL = scal.S | |||
| CSCALKERNEL = zscal.S | |||
| ZSCALKERNEL = zscal.S | |||
| @@ -181,73 +181,89 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| fmul v16.4s, v0.4s, v8.4s[0] | |||
| OP_ii v16.4s, v1.4s, v9.4s[0] | |||
| fmul v17.4s, v0.4s, v9.4s[0] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v17.4s, v17.4s | |||
| eor v17.16b, v17.16b, v17.16b | |||
| fmls v17.4s, v0.4s, v9.4s[0] | |||
| #else | |||
| fmul v17.4s, v0.4s, v9.4s[0] | |||
| #endif | |||
| OP_ir v17.4s, v1.4s, v8.4s[0] | |||
| fmul v20.4s, v0.4s, v8.4s[1] | |||
| OP_ii v20.4s, v1.4s, v9.4s[1] | |||
| fmul v21.4s, v0.4s, v9.4s[1] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v21.4s, v21.4s | |||
| eor v21.16b, v21.16b, v21.16b | |||
| fmls v21.4s, v0.4s, v9.4s[1] | |||
| #else | |||
| fmul v21.4s, v0.4s, v9.4s[1] | |||
| #endif | |||
| OP_ir v21.4s, v1.4s, v8.4s[1] | |||
| fmul v24.4s, v0.4s, v8.4s[2] | |||
| OP_ii v24.4s, v1.4s, v9.4s[2] | |||
| fmul v25.4s, v0.4s, v9.4s[2] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v25.4s, v25.4s | |||
| eor v25.16b, v25.16b, v25.16b | |||
| fmls v25.4s, v0.4s, v9.4s[2] | |||
| #else | |||
| fmul v25.4s, v0.4s, v9.4s[2] | |||
| #endif | |||
| OP_ir v25.4s, v1.4s, v8.4s[2] | |||
| fmul v28.4s, v0.4s, v8.4s[3] | |||
| OP_ii v28.4s, v1.4s, v9.4s[3] | |||
| fmul v29.4s, v0.4s, v9.4s[3] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v29.4s, v29.4s | |||
| eor v29.16b, v29.16b, v29.16b | |||
| fmls v29.4s, v0.4s, v9.4s[3] | |||
| #else | |||
| fmul v29.4s, v0.4s, v9.4s[3] | |||
| #endif | |||
| OP_ir v29.4s, v1.4s, v8.4s[3] | |||
| fmul v18.4s, v2.4s, v8.4s[0] | |||
| OP_ii v18.4s, v3.4s, v9.4s[0] | |||
| fmul v19.4s, v2.4s, v9.4s[0] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v19.4s, v19.4s | |||
| eor v19.16b, v19.16b, v19.16b | |||
| fmls v19.4s, v2.4s, v9.4s[0] | |||
| #else | |||
| fmul v19.4s, v2.4s, v9.4s[0] | |||
| #endif | |||
| OP_ir v19.4s, v3.4s, v8.4s[0] | |||
| fmul v22.4s, v2.4s, v8.4s[1] | |||
| OP_ii v22.4s, v3.4s, v9.4s[1] | |||
| fmul v23.4s, v2.4s, v9.4s[1] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v23.4s, v23.4s | |||
| eor v23.16b, v23.16b, v23.16b | |||
| fmls v23.4s, v2.4s, v9.4s[1] | |||
| #else | |||
| fmul v23.4s, v2.4s, v9.4s[1] | |||
| #endif | |||
| OP_ir v23.4s, v3.4s, v8.4s[1] | |||
| fmul v26.4s, v2.4s, v8.4s[2] | |||
| OP_ii v26.4s, v3.4s, v9.4s[2] | |||
| fmul v27.4s, v2.4s, v9.4s[2] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v27.4s, v27.4s | |||
| eor v27.16b, v27.16b, v27.16b | |||
| fmls v27.4s, v2.4s, v9.4s[2] | |||
| #else | |||
| fmul v27.4s, v2.4s, v9.4s[2] | |||
| #endif | |||
| OP_ir v27.4s, v3.4s, v8.4s[2] | |||
| fmul v30.4s, v2.4s, v8.4s[3] | |||
| OP_ii v30.4s, v3.4s, v9.4s[3] | |||
| fmul v31.4s, v2.4s, v9.4s[3] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v31.4s, v31.4s | |||
| eor v31.16b, v31.16b, v31.16b | |||
| fmls v31.4s, v2.4s, v9.4s[3] | |||
| #else | |||
| fmul v31.4s, v2.4s, v9.4s[3] | |||
| #endif | |||
| OP_ir v31.4s, v3.4s, v8.4s[3] | |||
| @@ -172,37 +172,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| fmul v16.4s, v0.4s, v8.4s[0] | |||
| OP_ii v16.4s, v1.4s, v9.4s[0] | |||
| fmul v17.4s, v0.4s, v9.4s[0] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v17.4s, v17.4s | |||
| eor v17.16b, v17.16b, v17.16b | |||
| fmls v17.4s, v0.4s, v9.4s[0] | |||
| #else | |||
| fmul v17.4s, v0.4s, v9.4s[0] | |||
| #endif | |||
| OP_ir v17.4s, v1.4s, v8.4s[0] | |||
| fmul v20.4s, v0.4s, v8.4s[1] | |||
| OP_ii v20.4s, v1.4s, v9.4s[1] | |||
| fmul v21.4s, v0.4s, v9.4s[1] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v21.4s, v21.4s | |||
| eor v21.16b, v21.16b, v21.16b | |||
| fmls v21.4s, v0.4s, v9.4s[1] | |||
| #else | |||
| fmul v21.4s, v0.4s, v9.4s[1] | |||
| #endif | |||
| OP_ir v21.4s, v1.4s, v8.4s[1] | |||
| fmul v24.4s, v0.4s, v8.4s[2] | |||
| OP_ii v24.4s, v1.4s, v9.4s[2] | |||
| fmul v25.4s, v0.4s, v9.4s[2] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v25.4s, v25.4s | |||
| eor v25.16b, v25.16b, v25.16b | |||
| fmls v25.4s, v0.4s, v9.4s[2] | |||
| #else | |||
| fmul v25.4s, v0.4s, v9.4s[2] | |||
| #endif | |||
| OP_ir v25.4s, v1.4s, v8.4s[2] | |||
| fmul v28.4s, v0.4s, v8.4s[3] | |||
| OP_ii v28.4s, v1.4s, v9.4s[3] | |||
| fmul v29.4s, v0.4s, v9.4s[3] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v29.4s, v29.4s | |||
| eor v29.16b, v29.16b, v29.16b | |||
| fmls v29.4s, v0.4s, v9.4s[3] | |||
| #else | |||
| fmul v29.4s, v0.4s, v9.4s[3] | |||
| #endif | |||
| OP_ir v29.4s, v1.4s, v8.4s[3] | |||
| @@ -0,0 +1,135 @@ | |||
| /******************************************************************************* | |||
| Copyright (c) 2015, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *******************************************************************************/ | |||
| #define ASSEMBLER | |||
| #include "common.h" | |||
| #define N x0 /* vector length */ | |||
| #define X x1 /* X vector address */ | |||
| #define INC_X x2 /* X stride */ | |||
| #define INDEX x3 /* index of max/min value */ | |||
| #define Z x4 /* vector index */ | |||
| #define I x5 /* loop variable */ | |||
| /******************************************************************************* | |||
| * Macro definitions | |||
| *******************************************************************************/ | |||
| #if defined(USE_MIN) | |||
| #define COND le | |||
| #else | |||
| #define COND ge | |||
| #endif | |||
| #if !defined(DOUBLE) | |||
| #define MAXF s0 | |||
| #define TMPF s1 | |||
| #define TMPVF {v1.s}[0] | |||
| #define SZ 4 | |||
| #else | |||
| #define MAXF d0 | |||
| #define TMPF d1 | |||
| #define TMPVF {v1.d}[0] | |||
| #define SZ 8 | |||
| #endif | |||
| /******************************************************************************/ | |||
| .macro INIT_S | |||
| #if !defined(DOUBLE) | |||
| lsl INC_X, INC_X, #2 | |||
| ld1 {v0.s}[0], [X], INC_X | |||
| #else | |||
| lsl INC_X, INC_X, #3 | |||
| ld1 {v0.d}[0], [X], INC_X | |||
| #endif | |||
| mov Z, #1 | |||
| mov INDEX, Z | |||
| fabs MAXF, MAXF | |||
| .endm | |||
| .macro KERNEL_S1 | |||
| ld1 TMPVF, [X], INC_X | |||
| add Z, Z, #1 | |||
| fabs TMPF, TMPF | |||
| fcmp MAXF, TMPF | |||
| fcsel MAXF, MAXF, TMPF, COND | |||
| csel INDEX, INDEX, Z, COND | |||
| .endm | |||
| /******************************************************************************* | |||
| * End of macro definitions | |||
| *******************************************************************************/ | |||
| PROLOGUE | |||
| cmp N, xzr | |||
| ble iamax_kernel_zero | |||
| cmp INC_X, xzr | |||
| ble iamax_kernel_zero | |||
| INIT_S | |||
| subs N, N, #1 | |||
| ble iamax_kernel_L999 | |||
| asr I, N, #2 | |||
| cmp I, xzr | |||
| ble iamax_kernel_S1 | |||
| iamax_kernel_S4: | |||
| KERNEL_S1 | |||
| KERNEL_S1 | |||
| KERNEL_S1 | |||
| KERNEL_S1 | |||
| subs I, I, #1 | |||
| bne iamax_kernel_S4 | |||
| iamax_kernel_S1: | |||
| ands I, N, #3 | |||
| ble iamax_kernel_L999 | |||
| iamax_kernel_S10: | |||
| KERNEL_S1 | |||
| subs I, I, #1 | |||
| bne iamax_kernel_S10 | |||
| iamax_kernel_L999: | |||
| mov x0, INDEX | |||
| ret | |||
| iamax_kernel_zero: | |||
| mov x0, xzr | |||
| ret | |||
| EPILOGUE | |||
| @@ -1,124 +0,0 @@ | |||
| /******************************************************************************* | |||
| Copyright (c) 2015, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *******************************************************************************/ | |||
| #define ASSEMBLER | |||
| #include "common.h" | |||
| #define N x0 /* vector length */ | |||
| #define X x1 /* X vector address */ | |||
| #define INC_X x2 /* X stride */ | |||
| #define INDEX x3 /* index of max/min value */ | |||
| #define Z x4 /* vector index */ | |||
| #define I x5 /* loop variable */ | |||
| /******************************************************************************* | |||
| * Macro definitions | |||
| *******************************************************************************/ | |||
| #if defined(USE_MIN) | |||
| #define COND le | |||
| #else | |||
| #define COND ge | |||
| #endif | |||
| #define MAXF d0 | |||
| #define TMPF d1 | |||
| #define TMPVF {v1.d}[0] | |||
| #define SZ 8 | |||
| /******************************************************************************/ | |||
| .macro INIT_S | |||
| lsl INC_X, INC_X, #3 | |||
| ld1 {v0.d}[0], [X], INC_X | |||
| mov Z, #1 | |||
| mov INDEX, Z | |||
| fabs MAXF, MAXF | |||
| .endm | |||
| .macro KERNEL_S1 | |||
| ld1 TMPVF, [X], INC_X | |||
| add Z, Z, #1 | |||
| fabs TMPF, TMPF | |||
| fcmp MAXF, TMPF | |||
| fcsel MAXF, MAXF, TMPF, COND | |||
| csel INDEX, INDEX, Z, COND | |||
| .endm | |||
| /******************************************************************************* | |||
| * End of macro definitions | |||
| *******************************************************************************/ | |||
| PROLOGUE | |||
| cmp N, xzr | |||
| ble iamax_kernel_zero | |||
| cmp INC_X, xzr | |||
| ble iamax_kernel_zero | |||
| INIT_S | |||
| subs N, N, #1 | |||
| ble iamax_kernel_L999 | |||
| asr I, N, #2 | |||
| cmp I, xzr | |||
| ble iamax_kernel_S1 | |||
| iamax_kernel_S4: | |||
| KERNEL_S1 | |||
| KERNEL_S1 | |||
| KERNEL_S1 | |||
| KERNEL_S1 | |||
| subs I, I, #1 | |||
| bne iamax_kernel_S4 | |||
| iamax_kernel_S1: | |||
| ands I, N, #3 | |||
| ble iamax_kernel_L999 | |||
| iamax_kernel_S10: | |||
| KERNEL_S1 | |||
| subs I, I, #1 | |||
| bne iamax_kernel_S10 | |||
| iamax_kernel_L999: | |||
| mov x0, INDEX | |||
| ret | |||
| iamax_kernel_zero: | |||
| mov x0, xzr | |||
| ret | |||
| EPILOGUE | |||
| @@ -1,213 +0,0 @@ | |||
| /******************************************************************************* | |||
| Copyright (c) 2015, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *******************************************************************************/ | |||
| #define ASSEMBLER | |||
| #include "common.h" | |||
| #define N x0 /* vector length */ | |||
| #define X x1 /* X vector address */ | |||
| #define INC_X x2 /* X stride */ | |||
| #define INDEX x3 /* index of max/min value */ | |||
| #define Z x4 /* vector index */ | |||
| #define I x5 /* loop variable */ | |||
| #define X_COPY x6 /* copy of X address */ | |||
| #define MAXF_Z x7 | |||
| /******************************************************************************* | |||
| * Macro definitions | |||
| *******************************************************************************/ | |||
| #define MAXF s5 | |||
| #define TMPF s6 | |||
| #define TMPVF {v6.s}[0] | |||
| #define SZ 4 | |||
| /******************************************************************************/ | |||
| .macro INIT_F1 | |||
| ldr MAXF, [X], #SZ | |||
| mov Z, #1 | |||
| mov INDEX, Z | |||
| fabs MAXF, MAXF | |||
| .endm | |||
| .macro KERNEL_F1 | |||
| ldr TMPF, [X], #SZ | |||
| add Z, Z, #1 | |||
| fabs TMPF, TMPF | |||
| fcmp TMPF, MAXF | |||
| fcsel MAXF, MAXF, TMPF, le | |||
| csel INDEX, INDEX, Z, le | |||
| .endm | |||
| .macro INIT_F4 | |||
| ld1 {v0.4s}, [X], #16 | |||
| fabs v0.4s, v0.4s | |||
| fmaxv MAXF, v0.4s | |||
| mov Z, #5 | |||
| mov MAXF_Z, #1 | |||
| .endm | |||
| .macro KERNEL_F4 | |||
| ld1 {v0.4s}, [X], #16 | |||
| fabs v0.4s, v0.4s | |||
| fmaxv TMPF, v0.4s | |||
| PRFM PLDL1KEEP, [X, #512] | |||
| fcmp TMPF, MAXF | |||
| fcsel MAXF, MAXF, TMPF, le | |||
| csel MAXF_Z, MAXF_Z, Z, le | |||
| add Z, Z, #4 | |||
| .endm | |||
| .macro KERNEL_F4_FINALIZE | |||
| mov INDEX, MAXF_Z | |||
| sub MAXF_Z, MAXF_Z, #1 | |||
| lsl MAXF_Z, MAXF_Z, #2 | |||
| add X_COPY, X_COPY, MAXF_Z | |||
| ldr TMPF, [X_COPY], #SZ | |||
| fabs TMPF, TMPF | |||
| fcmp TMPF, MAXF | |||
| beq KERNEL_F4_FINALIZE_DONE | |||
| add INDEX, INDEX, #1 | |||
| ldr TMPF, [X_COPY], #SZ | |||
| fabs TMPF, TMPF | |||
| fcmp TMPF, MAXF | |||
| beq KERNEL_F4_FINALIZE_DONE | |||
| add INDEX, INDEX, #1 | |||
| ldr TMPF, [X_COPY], #SZ | |||
| fabs TMPF, TMPF | |||
| fcmp TMPF, MAXF | |||
| beq KERNEL_F4_FINALIZE_DONE | |||
| add INDEX, INDEX, #1 | |||
| KERNEL_F4_FINALIZE_DONE: | |||
| .endm | |||
| .macro INIT_S | |||
| lsl INC_X, INC_X, #2 | |||
| ld1 TMPVF, [X], INC_X | |||
| mov Z, #1 | |||
| mov INDEX, Z | |||
| fabs MAXF, TMPF | |||
| .endm | |||
| .macro KERNEL_S1 | |||
| ld1 TMPVF, [X], INC_X | |||
| add Z, Z, #1 | |||
| fabs TMPF, TMPF | |||
| fcmp TMPF, MAXF | |||
| fcsel MAXF, MAXF, TMPF, le | |||
| csel INDEX, INDEX, Z, le | |||
| .endm | |||
| /******************************************************************************* | |||
| * End of macro definitions | |||
| *******************************************************************************/ | |||
| PROLOGUE | |||
| cmp N, xzr | |||
| ble iamax_kernel_zero | |||
| cmp INC_X, xzr | |||
| ble iamax_kernel_zero | |||
| PRFM PLDL1KEEP, [X] | |||
| mov X_COPY, X | |||
| cmp INC_X, #1 | |||
| bne iamax_kernel_S_BEGIN | |||
| iamax_kernel_F_BEGIN: | |||
| asr I, N, #2 | |||
| cmp I, xzr | |||
| beq iamax_kernel_F1_INIT | |||
| INIT_F4 | |||
| subs I, I, #1 | |||
| beq iamax_kernel_F4_FINALIZE | |||
| iamax_kernel_F4: | |||
| KERNEL_F4 | |||
| subs I, I, #1 | |||
| bne iamax_kernel_F4 | |||
| iamax_kernel_F4_FINALIZE: | |||
| KERNEL_F4_FINALIZE | |||
| iamax_kernel_F1: | |||
| ands I, N, #3 | |||
| ble iamax_kernel_L999 | |||
| iamax_kernel_F10: | |||
| KERNEL_F1 | |||
| subs I, I, #1 | |||
| bne iamax_kernel_F10 | |||
| b iamax_kernel_L999 | |||
| iamax_kernel_F1_INIT: | |||
| INIT_F1 | |||
| subs N, N, #1 | |||
| b iamax_kernel_F1 | |||
| iamax_kernel_S_BEGIN: | |||
| INIT_S | |||
| subs N, N, #1 | |||
| ble iamax_kernel_L999 | |||
| asr I, N, #2 | |||
| cmp I, xzr | |||
| ble iamax_kernel_S1 | |||
| iamax_kernel_S4: | |||
| KERNEL_S1 | |||
| KERNEL_S1 | |||
| KERNEL_S1 | |||
| KERNEL_S1 | |||
| subs I, I, #1 | |||
| bne iamax_kernel_S4 | |||
| iamax_kernel_S1: | |||
| ands I, N, #3 | |||
| ble iamax_kernel_L999 | |||
| iamax_kernel_S10: | |||
| KERNEL_S1 | |||
| subs I, I, #1 | |||
| bne iamax_kernel_S10 | |||
| iamax_kernel_L999: | |||
| mov x0, INDEX | |||
| ret | |||
| iamax_kernel_zero: | |||
| mov x0, xzr | |||
| ret | |||
| EPILOGUE | |||
| @@ -0,0 +1,225 @@ | |||
| /******************************************************************************* | |||
| Copyright (c) 2015, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *******************************************************************************/ | |||
| #define ASSEMBLER | |||
| #include "common.h" | |||
| #define N x0 | |||
| #define X x1 | |||
| #define INC_X x2 | |||
| #define I x3 | |||
| #if !defined(DOUBLE) | |||
| #define SSQ s0 | |||
| #define SCALE s1 | |||
| #define REGZERO s5 | |||
| #define REGONE s6 | |||
| #else | |||
| #define SSQ d0 | |||
| #define SCALE d1 | |||
| #define REGZERO d5 | |||
| #define REGONE d6 | |||
| #endif | |||
| /******************************************************************************* | |||
| * Macro definitions | |||
| *******************************************************************************/ | |||
| .macro KERNEL_F1 | |||
| #if !defined(DOUBLE) | |||
| ldr s4, [X], #4 | |||
| fcmp s4, REGZERO | |||
| beq KERNEL_F1_NEXT_\@ | |||
| fabs s4, s4 | |||
| fcmp SCALE, s4 | |||
| bge KERNEL_F1_SCALE_GE_X_\@ | |||
| fdiv s2, SCALE, s4 | |||
| fmul s2, s2, s2 | |||
| fmul s3, SSQ, s2 | |||
| fadd SSQ, REGONE, s3 | |||
| fmov SCALE, s4 | |||
| b KERNEL_F1_NEXT_\@ | |||
| KERNEL_F1_SCALE_GE_X_\@: | |||
| fdiv s2, s4, SCALE | |||
| fmla SSQ, s2, v2.s[0] | |||
| #else | |||
| ldr d4, [X], #8 | |||
| fcmp d4, REGZERO | |||
| beq KERNEL_F1_NEXT_\@ | |||
| fabs d4, d4 | |||
| fcmp SCALE, d4 | |||
| bge KERNEL_F1_SCALE_GE_X_\@ | |||
| fdiv d2, SCALE, d4 | |||
| fmul d2, d2, d2 | |||
| fmul d3, SSQ, d2 | |||
| fadd SSQ, REGONE, d3 | |||
| fmov SCALE, d4 | |||
| b KERNEL_F1_NEXT_\@ | |||
| KERNEL_F1_SCALE_GE_X_\@: | |||
| fdiv d2, d4, SCALE | |||
| fmla SSQ, d2, v2.d[0] | |||
| #endif | |||
| KERNEL_F1_NEXT_\@: | |||
| .endm | |||
| .macro KERNEL_S1 | |||
| #if !defined(DOUBLE) | |||
| ldr s4, [X] | |||
| fcmp s4, REGZERO | |||
| beq KERNEL_S1_NEXT | |||
| fabs s4, s4 | |||
| fcmp SCALE, s4 | |||
| bge KERNEL_S1_SCALE_GE_X | |||
| fdiv s2, SCALE, s4 | |||
| fmul s2, s2, s2 | |||
| fmul s3, SSQ, s2 | |||
| fadd SSQ, REGONE, s3 | |||
| fmov SCALE, s4 | |||
| b KERNEL_S1_NEXT | |||
| KERNEL_S1_SCALE_GE_X: | |||
| fdiv s2, s4, SCALE | |||
| fmla SSQ, s2, v2.s[0] | |||
| #else | |||
| ldr d4, [X] | |||
| fcmp d4, REGZERO | |||
| beq KERNEL_S1_NEXT | |||
| fabs d4, d4 | |||
| fcmp SCALE, d4 | |||
| bge KERNEL_S1_SCALE_GE_X | |||
| fdiv d2, SCALE, d4 | |||
| fmul d2, d2, d2 | |||
| fmul d3, SSQ, d2 | |||
| fadd SSQ, REGONE, d3 | |||
| fmov SCALE, d4 | |||
| b KERNEL_S1_NEXT | |||
| KERNEL_S1_SCALE_GE_X: | |||
| fdiv d2, d4, SCALE | |||
| fmla SSQ, d2, v2.d[0] | |||
| #endif | |||
| KERNEL_S1_NEXT: | |||
| add X, X, INC_X | |||
| .endm | |||
| .macro KERNEL_F8 | |||
| KERNEL_F1 | |||
| KERNEL_F1 | |||
| KERNEL_F1 | |||
| KERNEL_F1 | |||
| KERNEL_F1 | |||
| KERNEL_F1 | |||
| KERNEL_F1 | |||
| KERNEL_F1 | |||
| .endm | |||
| .macro INIT_S | |||
| #if !defined(DOUBLE) | |||
| lsl INC_X, INC_X, #2 // INC_X * SIZE | |||
| #else | |||
| lsl INC_X, INC_X, #3 // INC_X * SIZE | |||
| #endif | |||
| .endm | |||
| .macro INIT | |||
| eor v1.16b, v1.16b, v1.16b // scale=0.0 | |||
| fmov SSQ, #1.0 | |||
| fmov REGONE, SSQ | |||
| fmov REGZERO, SCALE | |||
| .endm | |||
| /******************************************************************************* | |||
| * End of macro definitions | |||
| *******************************************************************************/ | |||
| PROLOGUE | |||
| .align 5 | |||
| INIT | |||
| cmp N, #0 | |||
| ble nrm2_kernel_L999 | |||
| cmp INC_X, #0 | |||
| beq nrm2_kernel_L999 | |||
| cmp INC_X, #1 | |||
| bne nrm2_kernel_S_BEGIN | |||
| nrm2_kernel_F_BEGIN: | |||
| asr I, N, #3 // I = N / 8 | |||
| cmp I, xzr | |||
| ble nrm2_kernel_F1 | |||
| nrm2_kernel_F8: | |||
| KERNEL_F8 | |||
| subs I, I, #1 | |||
| bne nrm2_kernel_F8 | |||
| nrm2_kernel_F1: | |||
| ands I, N, #7 | |||
| ble nrm2_kernel_L999 | |||
| nrm2_kernel_F10: | |||
| KERNEL_F1 | |||
| subs I, I, #1 | |||
| bne nrm2_kernel_F10 | |||
| b nrm2_kernel_L999 | |||
| nrm2_kernel_S_BEGIN: | |||
| INIT_S | |||
| mov I, N | |||
| .align 5 | |||
| nrm2_kernel_S10: | |||
| KERNEL_S1 | |||
| subs I, I, #1 | |||
| bne nrm2_kernel_S10 | |||
| nrm2_kernel_L999: | |||
| fsqrt SSQ, SSQ | |||
| fmul SSQ, SCALE, SSQ | |||
| ret | |||
| EPILOGUE | |||
| @@ -59,10 +59,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT_F1 | |||
| #if !defined(DOUBLE) | |||
| fneg s2, S | |||
| eor v2.16b, v2.16b, v2.16b | |||
| fsub s2, s2, S | |||
| ins v1.s[1], v2.s[0] // [-S, S] | |||
| #else | |||
| fneg d2, S | |||
| eor v2.16b, v2.16b, v2.16b | |||
| fsub d2, d2, S | |||
| ins v1.d[1], v2.d[0] // [-S, S] | |||
| #endif | |||
| .endm | |||
| @@ -43,14 +43,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #if !defined(DOUBLE) | |||
| #define DA_R s0 /* scale input value */ | |||
| #define DA_I s1 /* scale input value */ | |||
| #define TMPX v2.2s | |||
| #define TMPY v3.2s | |||
| #define SZ 4 | |||
| #else | |||
| #define DA_R d0 /* scale input value */ | |||
| #define DA_I d1 /* scale input value */ | |||
| #define TMPX v2.2d | |||
| #define TMPY v3.2d | |||
| #define SZ 8 | |||
| #endif | |||
| @@ -61,22 +57,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #if !defined(CONJ) | |||
| #if !defined(DOUBLE) | |||
| ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R | |||
| fneg s2, DA_I | |||
| eor v2.16b, v2.16b, v2.16b | |||
| fsub s2, s2, DA_I | |||
| ins v1.s[1], v2.s[0] // v1 = -DA_I, DA_I | |||
| ext v1.8b, v1.8b, v1.8b, #4 // v1 = DA_I, -DA_I | |||
| #else | |||
| ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R | |||
| fneg d2, DA_I | |||
| eor v2.16b, v2.16b, v2.16b | |||
| fsub d2, d2, DA_I | |||
| ins v1.d[1], v2.d[0] // v1 = -DA_I, DA_I | |||
| ext v1.16b, v1.16b, v1.16b, #8 // v1 = DA_I, -DA_I | |||
| #endif | |||
| #else | |||
| #if !defined(DOUBLE) | |||
| fneg s2, DA_R | |||
| eor v2.16b, v2.16b, v2.16b | |||
| fsub s2, s2, DA_R | |||
| ins v0.s[1], v2.s[0] // v0 = -DA_R, DA_R | |||
| ins v1.s[1], v1.s[0] // v1 = DA_I, DA_I | |||
| #else | |||
| fneg d2, DA_R | |||
| eor v2.16b, v2.16b, v2.16b | |||
| fsub d2, d2, DA_R | |||
| ins v0.d[1], v2.d[0] // v0 = -DA_R, DA_R | |||
| ins v1.d[1], v1.d[0] // v1 = DA_I, DA_I | |||
| #endif | |||
| @@ -111,9 +111,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro KERNEL_INIT_F4 | |||
| #if !defined(DOUBLE) | |||
| // Replicate the lower 2 floats into the upper 2 slots | |||
| ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R, DA_R, DA_R | |||
| ins v1.d[1], v1.d[0] // v1 = DA_I, DA_I, DA_I, DA_I | |||
| ins v16.s[0], v0.s[0] | |||
| ins v16.s[1], v16.s[0] | |||
| ins v16.d[1], v16.d[0] | |||
| #if !defined(CONJ) | |||
| ins v17.s[0], v1.s[1] | |||
| #else | |||
| ins v17.s[0], v1.s[0] | |||
| #endif | |||
| ins v17.s[1], v17.s[0] | |||
| ins v17.d[1], v17.d[0] | |||
| #else //DOUBLE | |||
| ins v16.d[0], v0.d[0] | |||
| ins v16.d[1], v16.d[0] | |||
| #if !defined(CONJ) | |||
| ins v17.d[0], v1.d[1] | |||
| #else | |||
| ins v17.d[0], v1.d[0] | |||
| #endif | |||
| ins v17.d[1], v17.d[0] | |||
| #endif | |||
| .endm | |||
| @@ -121,55 +137,60 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro KERNEL_F4 | |||
| #if !defined(DOUBLE) | |||
| ld1 {v2.4s,v3.4s}, [X], #32 // V2 = X[3], X[2], X[1], X[0] | |||
| // V3 = X[7], X[6], X[5], X[4] | |||
| ext v6.8b, v2.8b, v2.8b, #4 // V6 = - , - , X[0], X[1] | |||
| ins v6.s[2], v2.s[3] // V6 = - , X[3], X[0], X[1] | |||
| ins v6.s[3], v2.s[2] // V6 = X[2], X[3], X[0], X[1] | |||
| ld2 {v2.4s, v3.4s}, [X], #32 | |||
| ld2 {v4.4s, v5.4s}, [Y_COPY], #32 | |||
| ld1 {v4.4s,v5.4s}, [Y] // V4 = Y[3], Y[2], Y[1], Y[0] | |||
| // V5 = Y[7], Y[6], Y[5], Y[4] | |||
| ext v7.8b, v3.8b, v3.8b, #4 // V7 = - , - , X[4], X[5] | |||
| ins v7.s[2], v3.s[3] // V7 = - , X[7], X[4], X[5] | |||
| ins v7.s[3], v3.s[2] // V7 = X[6], X[7], X[4], X[5] | |||
| fmla v4.4s, v2.4s, v16.4s | |||
| #if !defined(CONJ) | |||
| fmls v4.4s, v3.4s, v17.4s | |||
| #else | |||
| fmla v4.4s, v3.4s, v17.4s | |||
| #endif | |||
| fmla v4.4s, v0.4s, v2.4s // Y[iy] += DA_R * X[ix] | |||
| // Y[iy+1] += +-DA_R * X[ix+1] | |||
| fmla v4.4s, v1.4s, v6.4s // Y[iy] += +-DA_I * X[ix+1] | |||
| // Y[iy+1] += DA_I * X[ix] | |||
| st1 {v4.4s}, [Y], #16 | |||
| #if !defined(CONJ) | |||
| fmla v5.4s, v2.4s, v17.4s | |||
| #else | |||
| fmls v5.4s, v2.4s, v17.4s | |||
| #endif | |||
| fmla v5.4s, v3.4s, v16.4s | |||
| fmla v5.4s, v0.4s, v3.4s // Y[iy] += DA_R * X[ix] | |||
| fmla v5.4s, v1.4s, v7.4s // Y[iy] += +-DA_I * X[ix+1] | |||
| // Y[iy+1] += +-DA_R * X[ix+1] | |||
| // Y[iy+1] += DA_I * X[ix] | |||
| st1 {v5.4s}, [Y], #16 | |||
| st2 {v4.4s, v5.4s}, [Y], #32 | |||
| #else // DOUBLE | |||
| ld1 {v2.2d,v3.2d}, [X], #32 // CX0, CX1, CX2, CX3 | |||
| ext v20.16b, v2.16b, v2.16b, #8 // X[ix], X[ix+1] | |||
| ext v21.16b, v3.16b, v3.16b, #8 // X[ix], X[ix+1] | |||
| ld2 {v2.2d, v3.2d}, [X], #32 | |||
| ld2 {v4.2d, v5.2d}, [Y_COPY], #32 | |||
| ld1 {v4.2d,v5.2d}, [X], #32 // CX0, CX1, CX2, CX3 | |||
| ext v22.16b, v4.16b, v4.16b, #8 // X[ix], X[ix+1] | |||
| ext v23.16b, v5.16b, v5.16b, #8 // X[ix], X[ix+1] | |||
| ld1 {v16.2d,v17.2d}, [Y_COPY], #32 // CY0, CY1, CY2, CY3 | |||
| fmla v4.2d, v2.2d, v16.2d | |||
| #if !defined(CONJ) | |||
| fmls v4.2d, v3.2d, v17.2d | |||
| #else | |||
| fmla v4.2d, v3.2d, v17.2d | |||
| #endif | |||
| #if !defined(CONJ) | |||
| fmla v5.2d, v2.2d, v17.2d | |||
| #else | |||
| fmls v5.2d, v2.2d, v17.2d | |||
| #endif | |||
| fmla v5.2d, v3.2d, v16.2d | |||
| fmla v16.2d, v0.2d, v2.2d | |||
| fmla v17.2d, v0.2d, v3.2d | |||
| st2 {v4.2d, v5.2d}, [Y], #32 | |||
| ld1 {v18.2d,v19.2d}, [Y_COPY], #32 // CY0, CY1, CY2, CY3 | |||
| ld2 {v18.2d, v19.2d}, [X], #32 | |||
| ld2 {v20.2d, v21.2d}, [Y_COPY], #32 | |||
| fmla v16.2d, v1.2d, v20.2d | |||
| fmla v17.2d, v1.2d, v21.2d | |||
| st1 {v16.2d,v17.2d}, [Y], #32 | |||
| fmla v20.2d, v18.2d, v16.2d | |||
| #if !defined(CONJ) | |||
| fmls v20.2d, v19.2d, v17.2d | |||
| #else | |||
| fmla v20.2d, v19.2d, v17.2d | |||
| #endif | |||
| #if !defined(CONJ) | |||
| fmla v21.2d, v18.2d, v17.2d | |||
| #else | |||
| fmls v21.2d, v18.2d, v17.2d | |||
| #endif | |||
| fmla v21.2d, v19.2d, v16.2d | |||
| fmla v18.2d, v0.2d, v4.2d | |||
| fmla v19.2d, v0.2d, v5.2d | |||
| fmla v18.2d, v1.2d, v22.2d | |||
| fmla v19.2d, v1.2d, v23.2d | |||
| st1 {v18.2d,v19.2d}, [Y], #32 | |||
| st2 {v20.2d, v21.2d}, [Y], #32 | |||
| #endif | |||
| PRFM PLDL1KEEP, [X, #512] | |||
| PRFM PLDL1KEEP, [Y, #512] | |||
| @@ -184,73 +184,89 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| fmul v16.2d, v0.2d, v8.2d[0] | |||
| OP_ii v16.2d, v1.2d, v9.2d[0] | |||
| fmul v17.2d, v0.2d, v9.2d[0] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v17.2d, v17.2d | |||
| eor v17.16b, v17.16b, v17.16b | |||
| fmls v17.2d, v0.2d, v9.2d[0] | |||
| #else | |||
| fmul v17.2d, v0.2d, v9.2d[0] | |||
| #endif | |||
| OP_ir v17.2d, v1.2d, v8.2d[0] | |||
| fmul v18.2d, v2.2d, v8.2d[0] | |||
| OP_ii v18.2d, v3.2d, v9.2d[0] | |||
| fmul v19.2d, v2.2d, v9.2d[0] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v19.2d, v19.2d | |||
| eor v19.16b, v19.16b, v19.16b | |||
| fmls v19.2d, v2.2d, v9.2d[0] | |||
| #else | |||
| fmul v19.2d, v2.2d, v9.2d[0] | |||
| #endif | |||
| OP_ir v19.2d, v3.2d, v8.2d[0] | |||
| fmul v20.2d, v0.2d, v8.2d[1] | |||
| OP_ii v20.2d, v1.2d, v9.2d[1] | |||
| fmul v21.2d, v0.2d, v9.2d[1] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v21.2d, v21.2d | |||
| eor v21.16b, v21.16b, v21.16b | |||
| fmls v21.2d, v0.2d, v9.2d[1] | |||
| #else | |||
| fmul v21.2d, v0.2d, v9.2d[1] | |||
| #endif | |||
| OP_ir v21.2d, v1.2d, v8.2d[1] | |||
| fmul v22.2d, v2.2d, v8.2d[1] | |||
| OP_ii v22.2d, v3.2d, v9.2d[1] | |||
| fmul v23.2d, v2.2d, v9.2d[1] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v23.2d, v23.2d | |||
| eor v23.16b, v23.16b, v23.16b | |||
| fmls v23.2d, v2.2d, v9.2d[1] | |||
| #else | |||
| fmul v23.2d, v2.2d, v9.2d[1] | |||
| #endif | |||
| OP_ir v23.2d, v3.2d, v8.2d[1] | |||
| fmul v24.2d, v0.2d, v10.2d[0] | |||
| OP_ii v24.2d, v1.2d, v11.2d[0] | |||
| fmul v25.2d, v0.2d, v11.2d[0] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v25.2d, v25.2d | |||
| eor v25.16b, v25.16b, v25.16b | |||
| fmls v25.2d, v0.2d, v11.2d[0] | |||
| #else | |||
| fmul v25.2d, v0.2d, v11.2d[0] | |||
| #endif | |||
| OP_ir v25.2d, v1.2d, v10.2d[0] | |||
| fmul v26.2d, v2.2d, v10.2d[0] | |||
| OP_ii v26.2d, v3.2d, v11.2d[0] | |||
| fmul v27.2d, v2.2d, v11.2d[0] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v27.2d, v27.2d | |||
| eor v27.16b, v27.16b, v27.16b | |||
| fmls v27.2d, v2.2d, v11.2d[0] | |||
| #else | |||
| fmul v27.2d, v2.2d, v11.2d[0] | |||
| #endif | |||
| OP_ir v27.2d, v3.2d, v10.2d[0] | |||
| fmul v28.2d, v0.2d, v10.2d[1] | |||
| OP_ii v28.2d, v1.2d, v11.2d[1] | |||
| fmul v29.2d, v0.2d, v11.2d[1] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v29.2d, v29.2d | |||
| eor v29.16b, v29.16b, v29.16b | |||
| fmls v29.2d, v0.2d, v11.2d[1] | |||
| #else | |||
| fmul v29.2d, v0.2d, v11.2d[1] | |||
| #endif | |||
| OP_ir v29.2d, v1.2d, v10.2d[1] | |||
| fmul v30.2d, v2.2d, v10.2d[1] | |||
| OP_ii v30.2d, v3.2d, v11.2d[1] | |||
| fmul v31.2d, v2.2d, v11.2d[1] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v31.2d, v31.2d | |||
| eor v31.16b, v31.16b, v31.16b | |||
| fmls v31.2d, v2.2d, v11.2d[1] | |||
| #else | |||
| fmul v31.2d, v2.2d, v11.2d[1] | |||
| #endif | |||
| OP_ir v31.2d, v3.2d, v10.2d[1] | |||
| @@ -110,15 +110,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| /******* INIT FOR F1 AND S1 LOOP ******/ | |||
| #if !defined(DOUBLE) | |||
| ins v0.s[1], v0.s[0] // R(ALPHA), R(ALPHA) | |||
| fneg s2, ALPHA_I | |||
| ins v0.s[1], v0.s[0] // R(ALPHA), R(ALPHA) | |||
| eor v2.16b, v2.16b, v2.16b | |||
| fsub s2, s2, ALPHA_I | |||
| ins v1.s[1], v2.s[0] // -I(ALPHA), I(ALPHA) | |||
| #if !defined(XCONJ) | |||
| ext v1.8b, v1.8b, v1.8b, #4 // I(ALPHA), -I(ALPHA) | |||
| #endif | |||
| #else | |||
| ins v0.d[1], v0.d[0] // R(ALPHA), R(ALPHA) | |||
| fneg d2, ALPHA_I | |||
| ins v0.d[1], v0.d[0] // R(ALPHA), R(ALPHA) | |||
| eor v2.16b, v2.16b, v2.16b | |||
| fsub d2, d2, ALPHA_I | |||
| ins v1.d[1], v2.d[0] // -I(ALPHA), I(ALPHA) | |||
| #if !defined(XCONJ) | |||
| ext v1.16b, v1.16b, v1.16b, #8 // I(ALPHA), -I(ALPHA) | |||
| @@ -156,8 +158,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #else | |||
| fmul v11.4s, v9.4s, v7.4s // [+ R(X) * R(ALPHA)] | |||
| fmls v11.4s, v10.4s, v8.4s // [- I(X) * I(ALPHA)] | |||
| fmul v12.4s, v9.4s, v8.4s // [R(X) * I(ALPHA)] | |||
| fneg v12.4s, v12.4s // [- R(X) * I(ALPHA)] | |||
| eor v12.16b, v12.16b, v12.16b | |||
| fmls v12.4s, v9.4s, v8.4s // [- R(X) * I(ALPHA)] | |||
| fmla v12.4s, v10.4s, v7.4s // [- I(X) * R(ALPHA)] | |||
| #endif | |||
| #endif // CONJ | |||
| @@ -170,24 +172,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| ins v3.s[0], v2.s[1] | |||
| #if !defined(CONJ) | |||
| #if !defined(XCONJ) | |||
| fneg s4, s3 | |||
| eor v4.16b, v4.16b, v4.16b | |||
| fsub s4, s4, s3 | |||
| ins v3.s[1], v4.s[0] | |||
| ext v3.8b, v3.8b, v3.8b, #4 // [I(TEMP), -I(TEMP)] | |||
| ins v2.s[1], v2.s[0] // [R(TEMP), R(TEMP)] | |||
| #else | |||
| fneg s4, s3 | |||
| eor v4.16b, v4.16b, v4.16b | |||
| fsub s4, s4, s3 | |||
| ins v3.s[1], v4.s[0] // [-I(TEMP), I(TEMP)] | |||
| ins v2.s[1], v2.s[0] // [R(TEMP), R(TEMP)] | |||
| #endif | |||
| #else // CONJ | |||
| #if !defined(XCONJ) | |||
| ins v3.s[1], v3.s[0] // [I(TEMP), I(TEMP)] | |||
| fneg s4, s2 | |||
| eor v4.16b, v4.16b, v4.16b | |||
| fsub s4, s4, s2 | |||
| ins v2.s[1], v4.s[0] // [-R(TEMP), R(TEMP)] | |||
| #else | |||
| fneg s3, s3 | |||
| eor v4.16b, v4.16b, v4.16b | |||
| fsub s3, s4, s3 | |||
| ins v3.s[1], v3.s[0] // [-I(TEMP), -I(TEMP)] | |||
| fneg s4, s2 | |||
| eor v4.16b, v4.16b, v4.16b | |||
| fsub s4, s4, s2 | |||
| ins v2.s[1], v4.s[0] // [-R(TEMP), R(TEMP)] | |||
| #endif | |||
| #endif // CONJ | |||
| @@ -220,8 +227,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #else | |||
| fmul v11.2d, v9.2d, v7.2d // [+ R(X) * R(ALPHA)] | |||
| fmls v11.2d, v10.2d, v8.2d // [- I(X) * I(ALPHA)] | |||
| fmul v12.2d, v9.2d, v8.2d // [R(X) * I(ALPHA)] | |||
| fneg v12.2d, v12.2d // [- R(X) * I(ALPHA)] | |||
| eor v12.16b, v12.16b, v12.16b | |||
| fmls v12.2d, v9.2d, v8.2d // [- R(X) * I(ALPHA)] | |||
| fmla v12.2d, v10.2d, v7.2d // [- I(X) * R(ALPHA)] | |||
| #endif | |||
| #endif // CONJ | |||
| @@ -234,24 +241,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| ins v3.d[0], v2.d[1] // I(TEMP) | |||
| #if !defined(CONJ) | |||
| #if !defined(XCONJ) | |||
| fneg d4, d3 // -I(TEMP) | |||
| eor v4.16b, v4.16b, v4.16b | |||
| fsub d4, d4, d3 | |||
| ins v3.d[1], v4.d[0] | |||
| ext v3.16b, v3.16b, v3.16b, #8 // [I(TEMP), -I(TEMP)] | |||
| ins v2.d[1], v2.d[0] // [R(TEMP), R(TEMP)] | |||
| #else | |||
| fneg d4, d3 // -I(TEMP) | |||
| eor v4.16b, v4.16b, v4.16b | |||
| fsub d4, d4, d3 | |||
| ins v3.d[1], v4.d[0] // [-I(TEMP), I(TEMP)] | |||
| ins v2.d[1], v2.d[0] // [R(TEMP), R(TEMP)] | |||
| #endif | |||
| #else // CONJ | |||
| #if !defined(XCONJ) | |||
| ins v3.d[1], v3.d[0] // [I(TEMP), I(TEMP)] | |||
| fneg d4, d2 // -R(TEMP) | |||
| eor v4.16b, v4.16b, v4.16b | |||
| fsub d4, d4, d2 | |||
| ins v2.d[1], v4.d[0] // [-R(TEMP), R(TEMP)] | |||
| #else | |||
| fneg d3, d3 // -I(TEMP) | |||
| eor v4.16b, v4.16b, v4.16b | |||
| fsub d3, d4, d3 | |||
| ins v3.d[1], v3.d[0] // [-I(TEMP), -I(TEMP)] | |||
| fneg d4, d2 // -R(TEMP) | |||
| eor v4.16b, v4.16b, v4.16b | |||
| fsub d4, d4, d2 | |||
| ins v2.d[1], v4.d[0] // [-R(TEMP), R(TEMP)] | |||
| #endif | |||
| #endif // CONJ | |||
| @@ -96,22 +96,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #if !defined(XCONJ) | |||
| #if !defined(DOUBLE) | |||
| ins v0.s[1], v0.s[0] // v0 = ALPHA_R, ALPHA_R | |||
| fneg s2, ALPHA_I | |||
| eor v2.16b, v2.16b, v2.16b | |||
| fsub s2, s2, ALPHA_I | |||
| ins v1.s[1], v2.s[0] | |||
| ext v1.8b, v1.8b, v1.8b, #4 // v1 = ALPHA_I, -ALPHA_I | |||
| #else | |||
| ins v0.d[1], v0.d[0] // v0 = ALPHA_R, ALPHA_R | |||
| fneg d2, ALPHA_I | |||
| eor v2.16b, v2.16b, v2.16b | |||
| fsub d2, d2, ALPHA_I | |||
| ins v1.d[1], v2.d[0] | |||
| ext v1.16b, v1.16b, v1.16b, #8 // v1 = ALPHA_I, -ALPHA_I | |||
| #endif | |||
| #else // XCONJ | |||
| #if !defined(DOUBLE) | |||
| fneg s2, ALPHA_R | |||
| eor v2.16b, v2.16b, v2.16b | |||
| fsub s2, s2, ALPHA_R | |||
| ins v0.s[1], v2.s[0] // v0 = -ALPHA_R, ALPHA_R | |||
| ins v1.s[1], v1.s[0] // v1 = ALPHA_I, ALPHA_I | |||
| #else | |||
| fneg d2, ALPHA_R | |||
| eor v2.16b, v2.16b, v2.16b | |||
| fsub d2, d2, ALPHA_R | |||
| ins v0.d[1], v2.d[0] // v0 = -ALPHA_R, ALPHA_R | |||
| ins v1.d[1], v1.d[0] // v1 = ALPHA_I, ALPHA_I | |||
| #endif | |||
| @@ -136,89 +140,51 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| ld2 {v11.4s, v12.4s}, [X_PTR], #32 | |||
| ld2 {v13.4s, v14.4s}, [A_PTR], #32 | |||
| #if !defined(CONJ) | |||
| #if !defined(XCONJ) | |||
| #if (!defined(CONJ) && !defined(XCONJ)) || (defined(CONJ) && defined(XCONJ)) | |||
| fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R] | |||
| fmls v9.4s, v12.4s, v14.4s // [- I(X) * A_I] | |||
| fmla v10.4s, v11.4s, v14.4s // [+ R(X) * A_I] | |||
| fmla v10.4s, v12.4s, v13.4s // [+ I(X) * A_R] | |||
| #else | |||
| fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R] | |||
| fmla v9.4s, v12.4s, v14.4s // [+ I(X) * A_I] | |||
| fmla v10.4s, v11.4s, v14.4s // [+ R(X) * A_I] | |||
| fmls v10.4s, v12.4s, v13.4s // [- I(X) * A_R] | |||
| #endif | |||
| #else // CONJ | |||
| #if !defined(XCONJ) | |||
| fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R] | |||
| fmla v9.4s, v12.4s, v14.4s // [+ I(X) * A_I] | |||
| fmls v10.4s, v11.4s, v14.4s // [- R(X) * A_I] | |||
| fmla v10.4s, v12.4s, v13.4s // [+ I(X) * A_R] | |||
| #else | |||
| fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R] | |||
| fmls v9.4s, v12.4s, v14.4s // [- I(X) * A_I] | |||
| fmls v10.4s, v11.4s, v14.4s // [- R(X) * A_I] | |||
| fmls v10.4s, v12.4s, v13.4s // [- I(X) * A_R] | |||
| #endif | |||
| #endif // CONJ | |||
| #else // DOUBLE | |||
| ld2 {v11.2d, v12.2d}, [X_PTR], #32 | |||
| ld2 {v13.2d, v14.2d}, [A_PTR], #32 | |||
| prfm PLDL1STRM, [X_PTR, #512] | |||
| #if !defined(CONJ) | |||
| #if !defined(XCONJ) | |||
| #if (!defined(CONJ) && !defined(XCONJ)) || (defined(CONJ) && defined(XCONJ)) | |||
| fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R] | |||
| fmls v9.2d, v12.2d, v14.2d // [- I(X) * A_I] | |||
| fmla v10.2d, v11.2d, v14.2d // [+ R(X) * A_I] | |||
| fmla v10.2d, v12.2d, v13.2d // [+ I(X) * A_R] | |||
| #else | |||
| fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R] | |||
| fmla v9.2d, v12.2d, v14.2d // [+ I(X) * A_I] | |||
| fmla v10.2d, v11.2d, v14.2d // [+ R(X) * A_I] | |||
| fmls v10.2d, v12.2d, v13.2d // [- I(X) * A_R] | |||
| #endif | |||
| #else // CONJ | |||
| #if !defined(XCONJ) | |||
| fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R] | |||
| fmla v9.2d, v12.2d, v14.2d // [+ I(X) * A_I] | |||
| fmls v10.2d, v11.2d, v14.2d // [- R(X) * A_I] | |||
| fmla v10.2d, v12.2d, v13.2d // [+ I(X) * A_R] | |||
| #else | |||
| fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R] | |||
| fmls v9.2d, v12.2d, v14.2d // [- I(X) * A_I] | |||
| fmls v10.2d, v11.2d, v14.2d // [- R(X) * A_I] | |||
| fmls v10.2d, v12.2d, v13.2d // [- I(X) * A_R] | |||
| #endif | |||
| #endif // CONJ | |||
| ld2 {v17.2d, v18.2d}, [X_PTR], #32 | |||
| ld2 {v19.2d, v20.2d}, [A_PTR], #32 | |||
| prfm PLDL1STRM, [A_PTR, #512] | |||
| #if !defined(CONJ) | |||
| #if !defined(XCONJ) | |||
| #if (!defined(CONJ) && !defined(XCONJ)) || (defined(CONJ) && defined(XCONJ)) | |||
| fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R] | |||
| fmls v15.2d, v18.2d, v20.2d // [- I(X) * A_I] | |||
| fmla v16.2d, v17.2d, v20.2d // [+ R(X) * A_I] | |||
| fmla v16.2d, v18.2d, v19.2d // [+ I(X) * A_R] | |||
| #else | |||
| fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R] | |||
| fmla v15.2d, v18.2d, v20.2d // [- I(X) * A_I] | |||
| fmla v16.2d, v17.2d, v20.2d // [+ R(X) * A_I] | |||
| fmls v16.2d, v18.2d, v19.2d // [+ I(X) * A_R] | |||
| #endif | |||
| #else // CONJ | |||
| #if !defined(XCONJ) | |||
| fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R] | |||
| fmla v15.2d, v18.2d, v20.2d // [- I(X) * A_I] | |||
| fmls v16.2d, v17.2d, v20.2d // [+ R(X) * A_I] | |||
| fmla v16.2d, v18.2d, v19.2d // [+ I(X) * A_R] | |||
| #else | |||
| fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R] | |||
| fmls v15.2d, v18.2d, v20.2d // [- I(X) * A_I] | |||
| fmls v16.2d, v17.2d, v20.2d // [+ R(X) * A_I] | |||
| fmls v16.2d, v18.2d, v19.2d // [+ I(X) * A_R] | |||
| #endif | |||
| #endif // CONJ | |||
| #endif //DOUBLE | |||
| .endm | |||
| @@ -252,7 +218,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| ld1r {v4.2s}, [A_PTR], #4 // [A0, A0] | |||
| ld1 {v5.s}[0], [A_PTR], #4 // A1 | |||
| ld1 {v6.2s}, [X_PTR], #8 // [X1, X0] | |||
| fneg s16, s5 | |||
| eor v16.16b, v16.16b, v16.16b | |||
| fsub s16, s16, s5 | |||
| ins v5.s[1], v16.s[0] // [-A1, A1] | |||
| #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) | |||
| ext v5.8b, v5.8b, v5.8b, #4 // [A1, -A1] | |||
| @@ -264,7 +231,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| ld1r {v4.2d}, [A_PTR], #8 // [A0, A0] | |||
| ld1 {v5.d}[0], [A_PTR], #8 // A1 | |||
| ld1 {v6.2d}, [X_PTR], #16 // [X1, X0] | |||
| fneg d16, d5 | |||
| eor v16.16b, v16.16b, v16.16b | |||
| fsub d16, d16, d5 | |||
| ins v5.d[1], v16.d[0] // [-A1, A1] | |||
| #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) | |||
| ext v5.16b, v5.16b, v5.16b, #8 // [A1, -A1] | |||
| @@ -284,7 +252,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| ld1r {v4.2s}, [A_PTR], #4 // [A0, A0] | |||
| ld1 {v5.s}[0], [A_PTR], #4 // A1 | |||
| ld1 {v6.2s}, [X_PTR], INC_X // [X1, X0] | |||
| fneg s16, s5 | |||
| eor v16.16b, v16.16b, v16.16b | |||
| fsub s16, s16, s5 | |||
| ins v5.s[1], v16.s[0] // [-A1, A1] | |||
| #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) | |||
| ext v5.8b, v5.8b, v5.8b, #4 // [A1, -A1] | |||
| @@ -296,7 +265,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| ld1r {v4.2d}, [A_PTR], #8 // [A0, A0] | |||
| ld1 {v5.d}[0], [A_PTR], #8 // A1 | |||
| ld1 {v6.2d}, [X_PTR], INC_X // [X1, X0] | |||
| fneg d16, d5 | |||
| eor v16.16b, v16.16b, v16.16b | |||
| fsub d16, d16, d5 | |||
| ins v5.d[1], v16.d[0] // [-A1, A1] | |||
| #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) | |||
| ext v5.16b, v5.16b, v5.16b, #8 // [A1, -A1] | |||
| @@ -28,201 +28,261 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define ASSEMBLER | |||
| #include "common.h" | |||
| #define N x0 /* vector length */ | |||
| #define X x1 /* X vector address */ | |||
| #define INC_X x2 /* X stride */ | |||
| #define I x5 /* loop variable */ | |||
| #define N x0 | |||
| #define X x1 | |||
| #define INC_X x2 | |||
| /******************************************************************************* | |||
| * Macro definitions | |||
| *******************************************************************************/ | |||
| #define I x3 | |||
| #if !defined(DOUBLE) | |||
| #define TMPF s6 | |||
| #define SSQ s0 | |||
| #define TMPVF {v6.s}[0] | |||
| #define SZ 4 | |||
| #define SSQ s0 | |||
| #define SCALE s1 | |||
| #define REGZERO s6 | |||
| #define REGONE s7 | |||
| #else | |||
| #define TMPF d6 | |||
| #define SSQ d0 | |||
| #define TMPVF {v6.d}[0] | |||
| #define SZ 8 | |||
| #define SSQ d0 | |||
| #define SCALE d1 | |||
| #define REGZERO d6 | |||
| #define REGONE d7 | |||
| #endif | |||
| /******************************************************************************/ | |||
| /************************************************************************************** | |||
| * Macro definitions | |||
| **************************************************************************************/ | |||
| .macro KERNEL_F1 | |||
| #if !defined(DOUBLE) | |||
| ld1 {v1.2s}, [X], #8 | |||
| fmul v1.2s, v1.2s, v1.2s | |||
| faddp TMPF, v1.2s | |||
| fadd SSQ, SSQ, TMPF | |||
| ldr s4, [X], #4 | |||
| fcmp s4, REGZERO | |||
| beq KERNEL_F1_NEXT_\@ | |||
| fabs s4, s4 | |||
| fcmp SCALE, s4 | |||
| bge KERNEL_F1_SCALE_GE_XR_\@ | |||
| fdiv s2, SCALE, s4 | |||
| fmul s2, s2, s2 | |||
| fmul s3, SSQ, s2 | |||
| fadd SSQ, REGONE, s3 | |||
| fmov SCALE, s4 | |||
| b KERNEL_F1_NEXT_\@ | |||
| KERNEL_F1_SCALE_GE_XR_\@: | |||
| fdiv s2, s4, SCALE | |||
| fmla SSQ, s2, v2.s[0] | |||
| KERNEL_F1_NEXT_\@: | |||
| ldr s5, [X], #4 | |||
| fcmp s5, REGZERO | |||
| beq KERNEL_F1_END_\@ | |||
| fabs s5, s5 | |||
| fcmp SCALE, s5 | |||
| bge KERNEL_F1_SCALE_GE_XI_\@ | |||
| fdiv s2, SCALE, s5 | |||
| fmul s2, s2, s2 | |||
| fmul s3, SSQ, s2 | |||
| fadd SSQ, REGONE, s3 | |||
| fmov SCALE, s5 | |||
| b KERNEL_F1_END_\@ | |||
| KERNEL_F1_SCALE_GE_XI_\@: | |||
| fdiv s2, s5, SCALE | |||
| fmla SSQ, s2, v2.s[0] | |||
| #else | |||
| ld1 {v1.2d}, [X], #16 | |||
| fmul v1.2d, v1.2d, v1.2d | |||
| faddp TMPF, v1.2d | |||
| fadd SSQ, SSQ, TMPF | |||
| ldr d4, [X], #8 | |||
| fcmp d4, REGZERO | |||
| beq KERNEL_F1_NEXT_\@ | |||
| fabs d4, d4 | |||
| fcmp SCALE, d4 | |||
| bge KERNEL_F1_SCALE_GE_XR_\@ | |||
| fdiv d2, SCALE, d4 | |||
| fmul d2, d2, d2 | |||
| fmul d3, SSQ, d2 | |||
| fadd SSQ, REGONE, d3 | |||
| fmov SCALE, d4 | |||
| b KERNEL_F1_NEXT_\@ | |||
| KERNEL_F1_SCALE_GE_XR_\@: | |||
| fdiv d2, d4, SCALE | |||
| fmla SSQ, d2, v2.d[0] | |||
| KERNEL_F1_NEXT_\@: | |||
| ldr d5, [X], #8 | |||
| fcmp d5, REGZERO | |||
| beq KERNEL_F1_END_\@ | |||
| fabs d5, d5 | |||
| fcmp SCALE, d5 | |||
| bge KERNEL_F1_SCALE_GE_XI_\@ | |||
| fdiv d2, SCALE, d5 | |||
| fmul d2, d2, d2 | |||
| fmul d3, SSQ, d2 | |||
| fadd SSQ, REGONE, d3 | |||
| fmov SCALE, d5 | |||
| b KERNEL_F1_END_\@ | |||
| KERNEL_F1_SCALE_GE_XI_\@: | |||
| fdiv d2, d5, SCALE | |||
| fmla SSQ, d2, v2.d[0] | |||
| #endif | |||
| KERNEL_F1_END_\@: | |||
| .endm | |||
| .macro KERNEL_F8 | |||
| .macro KERNEL_S1 | |||
| #if !defined(DOUBLE) | |||
| ld1 {v1.4s, v2.4s}, [X], #32 | |||
| fmla v0.4s, v1.4s, v1.4s | |||
| fmla v5.4s, v2.4s, v2.4s | |||
| ld1 {v3.4s,v4.4s}, [X], #32 | |||
| fmla v0.4s, v3.4s, v3.4s | |||
| fmla v5.4s, v4.4s, v4.4s | |||
| PRFM PLDL1KEEP, [X, #1024] | |||
| #else // DOUBLE | |||
| ld1 {v1.2d, v2.2d}, [X], #32 | |||
| fmla v0.2d, v1.2d, v1.2d | |||
| fmla v5.2d, v2.2d, v2.2d | |||
| ld1 {v3.2d, v4.2d}, [X], #32 | |||
| fmla v0.2d, v3.2d, v3.2d | |||
| fmla v5.2d, v4.2d, v4.2d | |||
| ld1 {v16.2d, v17.2d}, [X], #32 | |||
| fmla v0.2d, v16.2d, v16.2d | |||
| fmla v5.2d, v17.2d, v17.2d | |||
| ld1 {v18.2d, v19.2d}, [X], #32 | |||
| fmla v0.2d, v18.2d, v18.2d | |||
| fmla v5.2d, v19.2d, v19.2d | |||
| ldr s4, [X] | |||
| fcmp s4, REGZERO | |||
| beq KERNEL_S1_NEXT_\@ | |||
| fabs s4, s4 | |||
| fcmp SCALE, s4 | |||
| bge KERNEL_S1_SCALE_GE_XR_\@ | |||
| fdiv s2, SCALE, s4 | |||
| fmul s2, s2, s2 | |||
| fmul s3, SSQ, s2 | |||
| fadd SSQ, REGONE, s3 | |||
| fmov SCALE, s4 | |||
| b KERNEL_S1_NEXT_\@ | |||
| KERNEL_S1_SCALE_GE_XR_\@: | |||
| fdiv s2, s4, SCALE | |||
| fmla SSQ, s2, v2.s[0] | |||
| KERNEL_S1_NEXT_\@: | |||
| ldr s5, [X, #4] | |||
| fcmp s5, REGZERO | |||
| beq KERNEL_S1_END_\@ | |||
| fabs s5, s5 | |||
| fcmp SCALE, s5 | |||
| bge KERNEL_S1_SCALE_GE_XI_\@ | |||
| fdiv s2, SCALE, s5 | |||
| fmul s2, s2, s2 | |||
| fmul s3, SSQ, s2 | |||
| fadd SSQ, REGONE, s3 | |||
| fmov SCALE, s5 | |||
| b KERNEL_S1_END_\@ | |||
| KERNEL_S1_SCALE_GE_XI_\@: | |||
| fdiv s2, s5, SCALE | |||
| fmla SSQ, s2, v2.s[0] | |||
| #else | |||
| ldr d4, [X] | |||
| fcmp d4, REGZERO | |||
| beq KERNEL_S1_NEXT_\@ | |||
| fabs d4, d4 | |||
| fcmp SCALE, d4 | |||
| bge KERNEL_S1_SCALE_GE_XR_\@ | |||
| fdiv d2, SCALE, d4 | |||
| fmul d2, d2, d2 | |||
| fmul d3, SSQ, d2 | |||
| fadd SSQ, REGONE, d3 | |||
| fmov SCALE, d4 | |||
| b KERNEL_S1_NEXT_\@ | |||
| KERNEL_S1_SCALE_GE_XR_\@: | |||
| fdiv d2, d4, SCALE | |||
| fmla SSQ, d2, v2.d[0] | |||
| KERNEL_S1_NEXT_\@: | |||
| ldr d5, [X, #8] | |||
| fcmp d5, REGZERO | |||
| beq KERNEL_S1_END_\@ | |||
| fabs d5, d5 | |||
| fcmp SCALE, d5 | |||
| bge KERNEL_S1_SCALE_GE_XI_\@ | |||
| fdiv d2, SCALE, d5 | |||
| fmul d2, d2, d2 | |||
| fmul d3, SSQ, d2 | |||
| fadd SSQ, REGONE, d3 | |||
| fmov SCALE, d5 | |||
| b KERNEL_S1_END_\@ | |||
| KERNEL_S1_SCALE_GE_XI_\@: | |||
| fdiv d2, d5, SCALE | |||
| fmla SSQ, d2, v2.d[0] | |||
| #endif | |||
| KERNEL_S1_END_\@: | |||
| add X, X, INC_X | |||
| .endm | |||
| .macro nrm2_kernel_F8_FINALIZE | |||
| #if !defined(DOUBLE) | |||
| fadd v0.4s, v0.4s, v5.4s | |||
| ext v1.16b, v0.16b, v0.16b, #8 | |||
| fadd v0.2s, v0.2s, v1.2s | |||
| faddp SSQ, v0.2s | |||
| #else | |||
| fadd v0.2d, v0.2d, v5.2d | |||
| faddp SSQ, v0.2d | |||
| #endif | |||
| .macro KERNEL_F8 | |||
| KERNEL_F1 | |||
| KERNEL_F1 | |||
| KERNEL_F1 | |||
| KERNEL_F1 | |||
| KERNEL_F1 | |||
| KERNEL_F1 | |||
| KERNEL_F1 | |||
| KERNEL_F1 | |||
| .endm | |||
| .macro INIT_S | |||
| #if !defined(DOUBLE) | |||
| lsl INC_X, INC_X, #3 | |||
| ld1 {v1.2s}, [X], INC_X | |||
| fmul v1.2s, v1.2s, v1.2s | |||
| faddp SSQ, v1.2s | |||
| lsl INC_X, INC_X, #3 // INC_X * SIZE | |||
| #else | |||
| lsl INC_X, INC_X, #4 | |||
| ld1 {v1.2d}, [X], INC_X | |||
| fmul v1.2d, v1.2d, v1.2d | |||
| faddp SSQ, v1.2d | |||
| lsl INC_X, INC_X, #4 // INC_X * SIZE | |||
| #endif | |||
| .endm | |||
| .macro KERNEL_S1 | |||
| #if !defined(DOUBLE) | |||
| ld1 {v1.2s}, [X], INC_X | |||
| fmul v1.2s, v1.2s, v1.2s | |||
| faddp TMPF, v1.2s | |||
| fadd SSQ, SSQ, TMPF | |||
| #else | |||
| ld1 {v1.2d}, [X], INC_X | |||
| fmul v1.2d, v1.2d, v1.2d | |||
| faddp TMPF, v1.2d | |||
| fadd SSQ, SSQ, TMPF | |||
| #endif | |||
| .macro INIT | |||
| eor v1.16b, v1.16b, v1.16b // scale=0.0 | |||
| fmov SSQ, #1.0 | |||
| fmov REGONE, SSQ | |||
| fmov REGZERO, SCALE | |||
| .endm | |||
| /******************************************************************************* | |||
| /************************************************************************************** | |||
| * End of macro definitions | |||
| *******************************************************************************/ | |||
| **************************************************************************************/ | |||
| PROLOGUE | |||
| #if !defined(DOUBLE) | |||
| fmov SSQ, wzr | |||
| fmov s5, SSQ | |||
| #else | |||
| fmov SSQ, xzr | |||
| fmov d5, SSQ | |||
| #endif | |||
| .align 5 | |||
| INIT | |||
| cmp N, #0 | |||
| ble nrm2_kernel_L999 | |||
| cmp INC_X, #0 | |||
| beq nrm2_kernel_L999 | |||
| cmp N, xzr | |||
| ble nrm2_kernel_zero | |||
| cmp INC_X, xzr | |||
| ble nrm2_kernel_zero | |||
| cmp INC_X, #1 | |||
| bne nrm2_kernel_S_BEGIN | |||
| nrm2_kernel_F_BEGIN: | |||
| asr I, N, #3 | |||
| asr I, N, #3 // I = N / 8 | |||
| cmp I, xzr | |||
| beq nrm2_kernel_F1_INIT | |||
| ble nrm2_kernel_F1 | |||
| nrm2_kernel_F8: | |||
| KERNEL_F8 | |||
| subs I, I, #1 | |||
| bne nrm2_kernel_F8 | |||
| nrm2_kernel_F8_FINALIZE | |||
| subs I, I, #1 | |||
| bne nrm2_kernel_F8 | |||
| nrm2_kernel_F1: | |||
| ands I, N, #7 | |||
| ble nrm2_kernel_L999 | |||
| nrm2_kernel_F10: | |||
| KERNEL_F1 | |||
| subs I, I, #1 | |||
| bne nrm2_kernel_F10 | |||
| subs I, I, #1 | |||
| bne nrm2_kernel_F10 | |||
| b nrm2_kernel_L999 | |||
| nrm2_kernel_F1_INIT: | |||
| b nrm2_kernel_F1 | |||
| nrm2_kernel_S_BEGIN: | |||
| INIT_S | |||
| subs N, N, #1 | |||
| ble nrm2_kernel_L999 | |||
| mov I, N | |||
| asr I, N, #2 | |||
| cmp I, xzr | |||
| ble nrm2_kernel_S1 | |||
| .align 5 | |||
| nrm2_kernel_S4: | |||
| nrm2_kernel_S10: | |||
| KERNEL_S1 | |||
| KERNEL_S1 | |||
| KERNEL_S1 | |||
| KERNEL_S1 | |||
| subs I, I, #1 | |||
| bne nrm2_kernel_S4 | |||
| nrm2_kernel_S1: | |||
| ands I, N, #3 | |||
| ble nrm2_kernel_L999 | |||
| bne nrm2_kernel_S10 | |||
| nrm2_kernel_S10: | |||
| KERNEL_S1 | |||
| subs I, I, #1 | |||
| bne nrm2_kernel_S10 | |||
| nrm2_kernel_L999: | |||
| fsqrt SSQ, SSQ | |||
| ret | |||
| fmul SSQ, SCALE, SSQ | |||
| nrm2_kernel_zero: | |||
| ret | |||
| EPILOGUE | |||
| @@ -32,6 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define X x3 /* X vector address */ | |||
| #define INC_X x4 /* X stride */ | |||
| #define I x5 /* loop variable */ | |||
| #define X_COPY x6 /* Copy of X */ | |||
| /******************************************************************************* | |||
| * Macro definitions | |||
| @@ -50,43 +51,55 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT | |||
| #if !defined(DOUBLE) | |||
| ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R | |||
| fneg s2, DA_I | |||
| ins v1.s[1], v2.s[0] // v1 = -DA_I, DA_I | |||
| ext v1.8b, v1.8b, v1.8b, #4 // v1 = DA_I, -DA_I | |||
| ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R | |||
| #else | |||
| ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R | |||
| fneg d2, DA_I | |||
| ins v1.d[1], v2.d[0] // v1 = DA_I, DA_I | |||
| ext v1.16b, v1.16b, v1.16b, #8 // v1 = DA_I, -DA_I | |||
| #endif | |||
| .endm | |||
| .macro KERNEL_F1 | |||
| #if !defined(DOUBLE) | |||
| ld1 {v2.2s}, [X] // X1, X0 | |||
| ext v3.8b, v2.8b, v2.8b, #4 // X0, X1 | |||
| fmul v2.2s, v2.2s, v0.2s // DA_R*X1, DA_R*X0 | |||
| fmla v2.2s, v3.2s, v1.2s // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1 | |||
| st1 {v2.2s}, [X], #8 | |||
| fmul s3, DA_R, v2.s[0] // DA_R*X0 | |||
| fmul s5, DA_I, v2.s[1] // DA_I*X1 | |||
| fsub s3, s3, s5 // DA_R*X0-DA_I*X1 | |||
| fmul s4, DA_I, v2.s[0] // DA_I*X0 | |||
| fmul s5, DA_R, v2.s[1] // DA_R*X1 | |||
| fadd s4, s4, s5 // DA_I*X0+DA_R*X1 | |||
| ins v3.s[1], v4.s[0] // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1 | |||
| st1 {v3.2s}, [X], #8 | |||
| #else | |||
| ld1 {v2.2d}, [X] // X1, X0 | |||
| ext v3.16b, v2.16b, v2.16b, #8 // X0, X1 | |||
| fmul v2.2d, v2.2d, v0.2d // DA_R*X1, DA_R*X0 | |||
| fmla v2.2d, v3.2d, v1.2d // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1 | |||
| st1 {v2.2d}, [X], #16 | |||
| #endif | |||
| fmul d3, DA_R, v2.d[0] // DA_R*X0 | |||
| fmul d5, DA_I, v2.d[1] // DA_I*X1 | |||
| fsub d3, d3, d5 // DA_R*X0-DA_I*X1 | |||
| fmul d4, DA_I, v2.d[0] // DA_I*X0 | |||
| fmul d5, DA_R, v2.d[1] // DA_R*X1 | |||
| fadd d4, d4, d5 // DA_I*X0+DA_R*X1 | |||
| ins v3.d[1], v4.d[0] // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1 | |||
| st1 {v3.2d}, [X], #16 | |||
| #endif | |||
| .endm | |||
| .macro KERNEL_INIT_F4 | |||
| #if !defined(DOUBLE) | |||
| // Replicate the lower 2 floats into the upper 2 slots | |||
| ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R, DA_R, DA_R | |||
| ins v1.d[1], v1.d[0] // v1 = DA_I, DA_I, DA_I, DA_I | |||
| ins v16.s[0], v0.s[0] | |||
| ins v16.s[1], v16.s[0] | |||
| ins v16.d[1], v16.d[0] | |||
| ins v17.s[0], v1.s[0] | |||
| ins v17.s[1], v17.s[0] | |||
| ins v17.d[1], v17.d[0] | |||
| #else //DOUBLE | |||
| ins v16.d[0], v0.d[0] | |||
| ins v16.d[1], v16.d[0] | |||
| ins v17.d[0], v1.d[0] | |||
| ins v17.d[1], v17.d[0] | |||
| #endif | |||
| .endm | |||
| @@ -94,46 +107,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro KERNEL_F4 | |||
| #if !defined(DOUBLE) | |||
| ld1 {v2.4s,v3.4s}, [X] // V2 = X[3], X[2], X[1], X[0] | |||
| // V3 = X[7], X[6], X[5], X[4] | |||
| ext v6.8b, v2.8b, v2.8b, #4 // V6 = - , - , X[0], X[1] | |||
| ins v6.s[2], v2.s[3] // V6 = - , X[3], X[0], X[1] | |||
| ins v6.s[3], v2.s[2] // V6 = X[2], X[3], X[0], X[1] | |||
| fmul v2.4s, v0.4s, v2.4s // X'[ix] += DA_R * X[ix] | |||
| // X'[ix+1] += DA_R * X[ix+1] | |||
| fmla v2.4s, v1.4s, v6.4s // X'[ix] += -DA_I * X[ix+1] | |||
| // X'[ix+1] += DA_I * X[ix] | |||
| ext v7.8b, v3.8b, v3.8b, #4 // V7 = - , - , X[4], X[5] | |||
| ins v7.s[2], v3.s[3] // V7 = - , X[7], X[4], X[5] | |||
| ins v7.s[3], v3.s[2] // V7 = X[6], X[7], X[4], X[5] | |||
| fmul v3.4s, v0.4s, v3.4s // X'[ix] += DA_R * X[ix] | |||
| // X'[ix+1] += DA_R * X[ix+1] | |||
| fmla v3.4s, v1.4s, v7.4s // X'[ix] += -DA_I * X[ix+1] | |||
| // X'[ix+1] += DA_I * X[ix] | |||
| st1 {v2.4s,v3.4s}, [X], #32 | |||
| ld2 {v2.4s, v3.4s}, [X], #32 | |||
| fmul v4.4s, v2.4s, v16.4s | |||
| fmul v6.4s, v3.4s, v17.4s | |||
| fsub v4.4s, v4.4s, v6.4s | |||
| fmul v5.4s, v2.4s, v17.4s | |||
| fmul v6.4s, v3.4s, v16.4s | |||
| fadd v5.4s, v5.4s, v6.4s | |||
| st2 {v4.4s, v5.4s}, [X_COPY], #32 | |||
| #else // DOUBLE | |||
| ld1 {v2.2d,v3.2d,v4.2d,v5.2d}, [X] // CX0, CX1, CX2, CX3 | |||
| ext v20.16b, v2.16b, v2.16b, #8 // X[ix], X[ix+1] | |||
| ext v21.16b, v3.16b, v3.16b, #8 // X[ix], X[ix+1] | |||
| ext v22.16b, v4.16b, v4.16b, #8 // X[ix], X[ix+1] | |||
| ext v23.16b, v5.16b, v5.16b, #8 // X[ix], X[ix+1] | |||
| ld2 {v2.2d, v3.2d}, [X], #32 | |||
| fmul v2.2d, v0.2d, v2.2d | |||
| fmla v2.2d, v1.2d, v20.2d | |||
| fmul v4.2d, v2.2d, v16.2d | |||
| fmul v6.2d, v3.2d, v17.2d | |||
| fsub v4.2d, v4.2d, v6.2d | |||
| fmul v5.2d, v2.2d, v17.2d | |||
| fmul v6.2d, v3.2d, v16.2d | |||
| fadd v5.2d, v5.2d, v6.2d | |||
| fmul v3.2d, v0.2d, v3.2d | |||
| fmla v3.2d, v1.2d, v21.2d | |||
| st1 {v2.2d,v3.2d}, [X], #32 | |||
| st2 {v4.2d, v5.2d}, [X_COPY], #32 | |||
| fmul v4.2d, v0.2d, v4.2d | |||
| fmla v4.2d, v1.2d, v22.2d | |||
| ld2 {v18.2d, v19.2d}, [X], #32 | |||
| fmul v5.2d, v0.2d, v5.2d | |||
| fmla v5.2d, v1.2d, v23.2d | |||
| st1 {v4.2d,v5.2d}, [X], #32 | |||
| fmul v20.2d, v18.2d, v16.2d | |||
| fmul v6.2d, v19.2d, v17.2d | |||
| fsub v20.2d, v20.2d, v6.2d | |||
| fmul v21.2d, v18.2d, v17.2d | |||
| fmul v6.2d, v19.2d, v16.2d | |||
| fadd v21.2d, v21.2d, v6.2d | |||
| st2 {v20.2d, v21.2d}, [X_COPY], #32 | |||
| #endif | |||
| PRFM PLDL1KEEP, [X, #1024] | |||
| .endm | |||
| @@ -149,21 +155,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .endm | |||
| .macro KERNEL_S1 | |||
| #if !defined(DOUBLE) | |||
| ld1 {v2.2s}, [X] // X1, X0 | |||
| ext v3.8b, v2.8b, v2.8b, #4 // X0, X1 | |||
| fmul v2.2s, v2.2s, v0.2s // DA_R*X1, DA_R*X0 | |||
| fmla v2.2s, v3.2s, v1.2s // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1 | |||
| st1 {v2.2s}, [X], INC_X | |||
| fmul s3, DA_R, v2.s[0] // DA_R*X0 | |||
| fmul s5, DA_I, v2.s[1] // DA_I*X1 | |||
| fsub s3, s3, s5 // DA_R*X0-DA_I*X1 | |||
| fmul s4, DA_I, v2.s[0] // DA_I*X0 | |||
| fmul s5, DA_R, v2.s[1] // DA_R*X1 | |||
| fadd s4, s4, s5 // DA_I*X0+DA_R*X1 | |||
| ins v3.s[1], v4.s[0] // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1 | |||
| st1 {v3.2s}, [X], INC_X | |||
| #else | |||
| ld1 {v2.2d}, [X] // X1, X0 | |||
| ext v3.16b, v2.16b, v2.16b, #8 // X0, X1 | |||
| fmul v2.2d, v2.2d, v0.2d // DA_R*X1, DA_R*X0 | |||
| fmla v2.2d, v3.2d, v1.2d // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1 | |||
| st1 {v2.2d}, [X], INC_X | |||
| #endif | |||
| fmul d3, DA_R, v2.d[0] // DA_R*X0 | |||
| fmul d5, DA_I, v2.d[1] // DA_I*X1 | |||
| fsub d3, d3, d5 // DA_R*X0-DA_I*X1 | |||
| fmul d4, DA_I, v2.d[0] // DA_I*X0 | |||
| fmul d5, DA_R, v2.d[1] // DA_R*X1 | |||
| fadd d4, d4, d5 // DA_I*X0+DA_R*X1 | |||
| ins v3.d[1], v4.d[0] // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1 | |||
| st1 {v3.2d}, [X], INC_X | |||
| #endif | |||
| .endm | |||
| /******************************************************************************* | |||
| @@ -171,21 +187,54 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *******************************************************************************/ | |||
| PROLOGUE | |||
| b zscal_begin | |||
| data_ar: | |||
| .word 0x3e44fae6 | |||
| data_ai: | |||
| .word 0x3d320fa2 | |||
| data_xr: | |||
| .word 0x3f4baff1 | |||
| data_xi: | |||
| .word 0xbe8ef0bd | |||
| zscal_begin: | |||
| ldr s20, data_ar | |||
| ldr s21, data_ai | |||
| ldr s22, data_xr | |||
| ldr s23, data_xi | |||
| fmul s24, s22, s21 | |||
| fmla s24, s23, v20.s[0] | |||
| fmul s25, s22, s21 | |||
| fmul s26, s23, s20 | |||
| fadd s25, s25, s26 | |||
| mov X_COPY, X | |||
| cmp N, xzr | |||
| ble zscal_kernel_L999 | |||
| fcmp DA_R, #0.0 | |||
| bne zscal_kernel_1 | |||
| bne zscal_kernel_R_non_zero | |||
| fcmp DA_I, #0.0 | |||
| beq zscal_kernel_zero | |||
| beq zscal_kernel_RI_zero | |||
| // TODO: special case DA_R == 0 && DA_I != 0 | |||
| b zscal_kernel_R_zero | |||
| zscal_kernel_1: | |||
| zscal_kernel_R_non_zero: | |||
| // TODO: special case DA_R != 0 && DA_I == 0 | |||
| fcmp DA_I, #0.0 | |||
| beq zscal_kernel_I_zero | |||
| /******************************************************************************* | |||
| * A_R != 0 && A_I != 0 | |||
| *******************************************************************************/ | |||
| zscal_kernel_RI_non_zero: | |||
| INIT | |||
| @@ -257,16 +306,85 @@ zscal_kernel_L999: | |||
| mov w0, wzr | |||
| ret | |||
| zscal_kernel_zero: | |||
| /******************************************************************************* | |||
| * A_R == 0 && A_I != 0 | |||
| *******************************************************************************/ | |||
| zscal_kernel_R_zero: | |||
| INIT_S | |||
| #if !defined(DOUBLE) | |||
| eor v2.16b, v2.16b, v2.16b | |||
| fsub s2, s2, DA_I | |||
| ins v1.s[1], v2.s[0] // v1 = -DA_I, DA_I | |||
| #else | |||
| eor v2.16b, v2.16b, v2.16b | |||
| fsub d2, d2, DA_I | |||
| ins v1.d[1], v2.d[0] // v1 = -DA_I, DA_I | |||
| #endif | |||
| zscal_kernel_R_zero_1: | |||
| #if !defined(DOUBLE) | |||
| ld1 {v2.2s}, [X] // X1, X0 | |||
| fmul v2.2s, v2.2s, v1.2s // -DA_I*X1, DA_I*X0 | |||
| ext v2.8b, v2.8b, v2.8b, #4 // DA_I*X0, -DA_I*X1 | |||
| st1 {v2.2s}, [X] | |||
| #else | |||
| ld1 {v2.2d}, [X] // X1, X0 | |||
| fmul v2.2d, v2.2d, v1.2d // -DA_I*X1, DA_I*X0 | |||
| ext v2.16b, v2.16b, v2.16b, #8 // DA_I*X0, -DA_I*X1 | |||
| st1 {v2.2d}, [X] | |||
| #endif | |||
| add X, X, INC_X | |||
| subs N, N, #1 | |||
| bne zscal_kernel_R_zero_1 | |||
| mov w0, wzr | |||
| ret | |||
| /******************************************************************************* | |||
| * A_R != 0 && A_I == 0 | |||
| *******************************************************************************/ | |||
| zscal_kernel_I_zero: | |||
| INIT_S | |||
| #if !defined(DOUBLE) | |||
| ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R | |||
| #else | |||
| ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R | |||
| #endif | |||
| zscal_kernel_I_zero_1: | |||
| #if !defined(DOUBLE) | |||
| ld1 {v2.2s}, [X] // X1, X0 | |||
| fmul v2.2s, v2.2s, v0.2s // DA_R*X1, DA_R*X0 | |||
| st1 {v2.2s}, [X] | |||
| #else | |||
| ld1 {v2.2d}, [X] // X1, X0 | |||
| fmul v2.2d, v2.2d, v0.2d // DA_R*X1, DA_R*X0 | |||
| st1 {v2.2d}, [X] | |||
| #endif | |||
| add X, X, INC_X | |||
| subs N, N, #1 | |||
| bne zscal_kernel_I_zero_1 | |||
| mov w0, wzr | |||
| ret | |||
| /******************************************************************************* | |||
| * A_R == 0 && A_I == 0 | |||
| *******************************************************************************/ | |||
| zscal_kernel_RI_zero: | |||
| INIT_S | |||
| zscal_kernel_Z1: | |||
| zscal_kernel_RI_zero_1: | |||
| stp DA_R, DA_I, [X] | |||
| add X, X, INC_X | |||
| subs N, N, #1 | |||
| bne zscal_kernel_Z1 | |||
| subs N, N, #1 | |||
| bne zscal_kernel_RI_zero_1 | |||
| mov w0, wzr | |||
| ret | |||
| @@ -187,73 +187,89 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| fmul v16.2d, v0.2d, v8.2d[0] | |||
| OP_ii v16.2d, v1.2d, v9.2d[0] | |||
| fmul v17.2d, v0.2d, v9.2d[0] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v17.2d, v17.2d | |||
| eor v17.16b, v17.16b, v17.16b | |||
| fmls v17.2d, v0.2d, v9.2d[0] | |||
| #else | |||
| fmul v17.2d, v0.2d, v9.2d[0] | |||
| #endif | |||
| OP_ir v17.2d, v1.2d, v8.2d[0] | |||
| fmul v18.2d, v2.2d, v8.2d[0] | |||
| OP_ii v18.2d, v3.2d, v9.2d[0] | |||
| fmul v19.2d, v2.2d, v9.2d[0] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v19.2d, v19.2d | |||
| eor v19.16b, v19.16b, v19.16b | |||
| fmls v19.2d, v2.2d, v9.2d[0] | |||
| #else | |||
| fmul v19.2d, v2.2d, v9.2d[0] | |||
| #endif | |||
| OP_ir v19.2d, v3.2d, v8.2d[0] | |||
| fmul v20.2d, v0.2d, v8.2d[1] | |||
| OP_ii v20.2d, v1.2d, v9.2d[1] | |||
| fmul v21.2d, v0.2d, v9.2d[1] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v21.2d, v21.2d | |||
| eor v21.16b, v21.16b, v21.16b | |||
| fmls v21.2d, v0.2d, v9.2d[1] | |||
| #else | |||
| fmul v21.2d, v0.2d, v9.2d[1] | |||
| #endif | |||
| OP_ir v21.2d, v1.2d, v8.2d[1] | |||
| fmul v22.2d, v2.2d, v8.2d[1] | |||
| OP_ii v22.2d, v3.2d, v9.2d[1] | |||
| fmul v23.2d, v2.2d, v9.2d[1] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v23.2d, v23.2d | |||
| eor v23.16b, v23.16b, v23.16b | |||
| fmls v23.2d, v2.2d, v9.2d[1] | |||
| #else | |||
| fmul v23.2d, v2.2d, v9.2d[1] | |||
| #endif | |||
| OP_ir v23.2d, v3.2d, v8.2d[1] | |||
| fmul v24.2d, v0.2d, v10.2d[0] | |||
| OP_ii v24.2d, v1.2d, v11.2d[0] | |||
| fmul v25.2d, v0.2d, v11.2d[0] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v25.2d, v25.2d | |||
| eor v25.16b, v25.16b, v25.16b | |||
| fmls v25.2d, v0.2d, v11.2d[0] | |||
| #else | |||
| fmul v25.2d, v0.2d, v11.2d[0] | |||
| #endif | |||
| OP_ir v25.2d, v1.2d, v10.2d[0] | |||
| fmul v26.2d, v2.2d, v10.2d[0] | |||
| OP_ii v26.2d, v3.2d, v11.2d[0] | |||
| fmul v27.2d, v2.2d, v11.2d[0] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v27.2d, v27.2d | |||
| eor v27.16b, v27.16b, v27.16b | |||
| fmls v27.2d, v2.2d, v11.2d[0] | |||
| #else | |||
| fmul v27.2d, v2.2d, v11.2d[0] | |||
| #endif | |||
| OP_ir v27.2d, v3.2d, v10.2d[0] | |||
| fmul v28.2d, v0.2d, v10.2d[1] | |||
| OP_ii v28.2d, v1.2d, v11.2d[1] | |||
| fmul v29.2d, v0.2d, v11.2d[1] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v29.2d, v29.2d | |||
| eor v29.16b, v29.16b, v29.16b | |||
| fmls v29.2d, v0.2d, v11.2d[1] | |||
| #else | |||
| fmul v29.2d, v0.2d, v11.2d[1] | |||
| #endif | |||
| OP_ir v29.2d, v1.2d, v10.2d[1] | |||
| fmul v30.2d, v2.2d, v10.2d[1] | |||
| OP_ii v30.2d, v3.2d, v11.2d[1] | |||
| fmul v31.2d, v2.2d, v11.2d[1] | |||
| #if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \ | |||
| defined(RR) || defined(RC) || defined(CR) || defined(CC) | |||
| fneg v31.2d, v31.2d | |||
| eor v31.16b, v31.16b, v31.16b | |||
| fmls v31.2d, v2.2d, v11.2d[1] | |||
| #else | |||
| fmul v31.2d, v2.2d, v11.2d[1] | |||
| #endif | |||
| OP_ir v31.2d, v3.2d, v10.2d[1] | |||
| @@ -137,8 +137,13 @@ endif() | |||
| add_library(blas ${ALLOBJ}) | |||
| if(UNIX) | |||
| target_link_libraries(blas m) | |||
| endif() | |||
| #if(UNIX) | |||
| # target_link_libraries(blas m) | |||
| #endif() | |||
| set_target_properties( | |||
| blas PROPERTIES | |||
| VERSION ${LAPACK_VERSION} | |||
| SOVERSION ${LAPACK_MAJOR_VERSION} | |||
| ) | |||
| target_link_libraries(blas) | |||
| lapack_install_library(blas) | |||
| @@ -23,8 +23,9 @@ | |||
| *> | |||
| *> \verbatim | |||
| *> | |||
| *> forms the dot product of two vectors, conjugating the first | |||
| *> vector. | |||
| *> CDOTC forms the dot product of two complex vectors | |||
| *> CDOTC = X^H * Y | |||
| *> | |||
| *> \endverbatim | |||
| * | |||
| * Authors: | |||
| @@ -35,7 +36,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup complex_blas_level1 | |||
| * | |||
| @@ -51,10 +52,10 @@ | |||
| * ===================================================================== | |||
| COMPLEX FUNCTION CDOTC(N,CX,INCX,CY,INCY) | |||
| * | |||
| * -- Reference BLAS level1 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level1 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| INTEGER INCX,INCY,N | |||
| @@ -23,7 +23,9 @@ | |||
| *> | |||
| *> \verbatim | |||
| *> | |||
| *> CDOTU forms the dot product of two vectors. | |||
| *> CDOTU forms the dot product of two complex vectors | |||
| *> CDOTU = X^T * Y | |||
| *> | |||
| *> \endverbatim | |||
| * | |||
| * Authors: | |||
| @@ -34,7 +36,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup complex_blas_level1 | |||
| * | |||
| @@ -50,10 +52,10 @@ | |||
| * ===================================================================== | |||
| COMPLEX FUNCTION CDOTU(N,CX,INCX,CY,INCY) | |||
| * | |||
| * -- Reference BLAS level1 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level1 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| INTEGER INCX,INCY,N | |||
| @@ -165,7 +165,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup complex_blas_level2 | |||
| * | |||
| @@ -187,10 +187,10 @@ | |||
| * ===================================================================== | |||
| SUBROUTINE CGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) | |||
| * | |||
| * -- Reference BLAS level2 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level2 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| COMPLEX ALPHA,BETA | |||
| @@ -319,26 +319,22 @@ | |||
| JX = KX | |||
| IF (INCY.EQ.1) THEN | |||
| DO 60 J = 1,N | |||
| IF (X(JX).NE.ZERO) THEN | |||
| TEMP = ALPHA*X(JX) | |||
| K = KUP1 - J | |||
| DO 50 I = MAX(1,J-KU),MIN(M,J+KL) | |||
| Y(I) = Y(I) + TEMP*A(K+I,J) | |||
| 50 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*X(JX) | |||
| K = KUP1 - J | |||
| DO 50 I = MAX(1,J-KU),MIN(M,J+KL) | |||
| Y(I) = Y(I) + TEMP*A(K+I,J) | |||
| 50 CONTINUE | |||
| JX = JX + INCX | |||
| 60 CONTINUE | |||
| ELSE | |||
| DO 80 J = 1,N | |||
| IF (X(JX).NE.ZERO) THEN | |||
| TEMP = ALPHA*X(JX) | |||
| IY = KY | |||
| K = KUP1 - J | |||
| DO 70 I = MAX(1,J-KU),MIN(M,J+KL) | |||
| Y(IY) = Y(IY) + TEMP*A(K+I,J) | |||
| IY = IY + INCY | |||
| 70 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*X(JX) | |||
| IY = KY | |||
| K = KUP1 - J | |||
| DO 70 I = MAX(1,J-KU),MIN(M,J+KL) | |||
| Y(IY) = Y(IY) + TEMP*A(K+I,J) | |||
| IY = IY + INCY | |||
| 70 CONTINUE | |||
| JX = JX + INCX | |||
| IF (J.GT.KU) KY = KY + INCY | |||
| 80 CONTINUE | |||
| @@ -166,7 +166,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup complex_blas_level3 | |||
| * | |||
| @@ -187,10 +187,10 @@ | |||
| * ===================================================================== | |||
| SUBROUTINE CGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) | |||
| * | |||
| * -- Reference BLAS level3 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level3 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| COMPLEX ALPHA,BETA | |||
| @@ -317,12 +317,10 @@ | |||
| 60 CONTINUE | |||
| END IF | |||
| DO 80 L = 1,K | |||
| IF (B(L,J).NE.ZERO) THEN | |||
| TEMP = ALPHA*B(L,J) | |||
| DO 70 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 70 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*B(L,J) | |||
| DO 70 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 70 CONTINUE | |||
| 80 CONTINUE | |||
| 90 CONTINUE | |||
| ELSE IF (CONJA) THEN | |||
| @@ -376,17 +374,15 @@ | |||
| 170 CONTINUE | |||
| END IF | |||
| DO 190 L = 1,K | |||
| IF (B(J,L).NE.ZERO) THEN | |||
| TEMP = ALPHA*CONJG(B(J,L)) | |||
| DO 180 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 180 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*CONJG(B(J,L)) | |||
| DO 180 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 180 CONTINUE | |||
| 190 CONTINUE | |||
| 200 CONTINUE | |||
| ELSE | |||
| * | |||
| * Form C := alpha*A*B**T + beta*C | |||
| * Form C := alpha*A*B**T + beta*C | |||
| * | |||
| DO 250 J = 1,N | |||
| IF (BETA.EQ.ZERO) THEN | |||
| @@ -399,12 +395,10 @@ | |||
| 220 CONTINUE | |||
| END IF | |||
| DO 240 L = 1,K | |||
| IF (B(J,L).NE.ZERO) THEN | |||
| TEMP = ALPHA*B(J,L) | |||
| DO 230 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 230 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*B(J,L) | |||
| DO 230 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 230 CONTINUE | |||
| 240 CONTINUE | |||
| 250 CONTINUE | |||
| END IF | |||
| @@ -136,7 +136,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup complex_blas_level2 | |||
| * | |||
| @@ -158,10 +158,10 @@ | |||
| * ===================================================================== | |||
| SUBROUTINE CGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) | |||
| * | |||
| * -- Reference BLAS level2 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level2 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| COMPLEX ALPHA,BETA | |||
| @@ -285,24 +285,20 @@ | |||
| JX = KX | |||
| IF (INCY.EQ.1) THEN | |||
| DO 60 J = 1,N | |||
| IF (X(JX).NE.ZERO) THEN | |||
| TEMP = ALPHA*X(JX) | |||
| DO 50 I = 1,M | |||
| Y(I) = Y(I) + TEMP*A(I,J) | |||
| 50 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*X(JX) | |||
| DO 50 I = 1,M | |||
| Y(I) = Y(I) + TEMP*A(I,J) | |||
| 50 CONTINUE | |||
| JX = JX + INCX | |||
| 60 CONTINUE | |||
| ELSE | |||
| DO 80 J = 1,N | |||
| IF (X(JX).NE.ZERO) THEN | |||
| TEMP = ALPHA*X(JX) | |||
| IY = KY | |||
| DO 70 I = 1,M | |||
| Y(IY) = Y(IY) + TEMP*A(I,J) | |||
| IY = IY + INCY | |||
| 70 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*X(JX) | |||
| IY = KY | |||
| DO 70 I = 1,M | |||
| Y(IY) = Y(IY) + TEMP*A(I,J) | |||
| IY = IY + INCY | |||
| 70 CONTINUE | |||
| JX = JX + INCX | |||
| 80 CONTINUE | |||
| END IF | |||
| @@ -21,7 +21,7 @@ | |||
| *> | |||
| *> \verbatim | |||
| *> | |||
| *> DCABS1 computes absolute value of a double complex number | |||
| *> DCABS1 computes |Re(.)| + |Im(.)| of a double complex number | |||
| *> \endverbatim | |||
| * | |||
| * Authors: | |||
| @@ -32,17 +32,17 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup double_blas_level1 | |||
| * | |||
| * ===================================================================== | |||
| DOUBLE PRECISION FUNCTION DCABS1(Z) | |||
| * | |||
| * -- Reference BLAS level1 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level1 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| COMPLEX*16 Z | |||
| @@ -163,7 +163,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup double_blas_level2 | |||
| * | |||
| @@ -185,10 +185,10 @@ | |||
| * ===================================================================== | |||
| SUBROUTINE DGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) | |||
| * | |||
| * -- Reference BLAS level2 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level2 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| DOUBLE PRECISION ALPHA,BETA | |||
| @@ -312,26 +312,22 @@ | |||
| JX = KX | |||
| IF (INCY.EQ.1) THEN | |||
| DO 60 J = 1,N | |||
| IF (X(JX).NE.ZERO) THEN | |||
| TEMP = ALPHA*X(JX) | |||
| K = KUP1 - J | |||
| DO 50 I = MAX(1,J-KU),MIN(M,J+KL) | |||
| Y(I) = Y(I) + TEMP*A(K+I,J) | |||
| 50 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*X(JX) | |||
| K = KUP1 - J | |||
| DO 50 I = MAX(1,J-KU),MIN(M,J+KL) | |||
| Y(I) = Y(I) + TEMP*A(K+I,J) | |||
| 50 CONTINUE | |||
| JX = JX + INCX | |||
| 60 CONTINUE | |||
| ELSE | |||
| DO 80 J = 1,N | |||
| IF (X(JX).NE.ZERO) THEN | |||
| TEMP = ALPHA*X(JX) | |||
| IY = KY | |||
| K = KUP1 - J | |||
| DO 70 I = MAX(1,J-KU),MIN(M,J+KL) | |||
| Y(IY) = Y(IY) + TEMP*A(K+I,J) | |||
| IY = IY + INCY | |||
| 70 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*X(JX) | |||
| IY = KY | |||
| K = KUP1 - J | |||
| DO 70 I = MAX(1,J-KU),MIN(M,J+KL) | |||
| Y(IY) = Y(IY) + TEMP*A(K+I,J) | |||
| IY = IY + INCY | |||
| 70 CONTINUE | |||
| JX = JX + INCX | |||
| IF (J.GT.KU) KY = KY + INCY | |||
| 80 CONTINUE | |||
| @@ -166,7 +166,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup double_blas_level3 | |||
| * | |||
| @@ -187,10 +187,10 @@ | |||
| * ===================================================================== | |||
| SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) | |||
| * | |||
| * -- Reference BLAS level3 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level3 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| DOUBLE PRECISION ALPHA,BETA | |||
| @@ -311,12 +311,10 @@ | |||
| 60 CONTINUE | |||
| END IF | |||
| DO 80 L = 1,K | |||
| IF (B(L,J).NE.ZERO) THEN | |||
| TEMP = ALPHA*B(L,J) | |||
| DO 70 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 70 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*B(L,J) | |||
| DO 70 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 70 CONTINUE | |||
| 80 CONTINUE | |||
| 90 CONTINUE | |||
| ELSE | |||
| @@ -353,12 +351,10 @@ | |||
| 140 CONTINUE | |||
| END IF | |||
| DO 160 L = 1,K | |||
| IF (B(J,L).NE.ZERO) THEN | |||
| TEMP = ALPHA*B(J,L) | |||
| DO 150 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 150 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*B(J,L) | |||
| DO 150 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 150 CONTINUE | |||
| 160 CONTINUE | |||
| 170 CONTINUE | |||
| ELSE | |||
| @@ -134,7 +134,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup double_blas_level2 | |||
| * | |||
| @@ -156,10 +156,10 @@ | |||
| * ===================================================================== | |||
| SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) | |||
| * | |||
| * -- Reference BLAS level2 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level2 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| DOUBLE PRECISION ALPHA,BETA | |||
| @@ -278,24 +278,20 @@ | |||
| JX = KX | |||
| IF (INCY.EQ.1) THEN | |||
| DO 60 J = 1,N | |||
| IF (X(JX).NE.ZERO) THEN | |||
| TEMP = ALPHA*X(JX) | |||
| DO 50 I = 1,M | |||
| Y(I) = Y(I) + TEMP*A(I,J) | |||
| 50 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*X(JX) | |||
| DO 50 I = 1,M | |||
| Y(I) = Y(I) + TEMP*A(I,J) | |||
| 50 CONTINUE | |||
| JX = JX + INCX | |||
| 60 CONTINUE | |||
| ELSE | |||
| DO 80 J = 1,N | |||
| IF (X(JX).NE.ZERO) THEN | |||
| TEMP = ALPHA*X(JX) | |||
| IY = KY | |||
| DO 70 I = 1,M | |||
| Y(IY) = Y(IY) + TEMP*A(I,J) | |||
| IY = IY + INCY | |||
| 70 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*X(JX) | |||
| IY = KY | |||
| DO 70 I = 1,M | |||
| Y(IY) = Y(IY) + TEMP*A(I,J) | |||
| IY = IY + INCY | |||
| 70 CONTINUE | |||
| JX = JX + INCX | |||
| 80 CONTINUE | |||
| END IF | |||
| @@ -23,7 +23,8 @@ | |||
| *> | |||
| *> \verbatim | |||
| *> | |||
| *> DZASUM takes the sum of the absolute values. | |||
| *> DZASUM takes the sum of the (|Re(.)| + |Im(.)|)'s of a complex vector and | |||
| *> returns a single precision result. | |||
| *> \endverbatim | |||
| * | |||
| * Authors: | |||
| @@ -34,7 +35,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup double_blas_level1 | |||
| * | |||
| @@ -51,10 +52,10 @@ | |||
| * ===================================================================== | |||
| DOUBLE PRECISION FUNCTION DZASUM(N,ZX,INCX) | |||
| * | |||
| * -- Reference BLAS level1 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level1 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| INTEGER INCX,N | |||
| @@ -23,7 +23,7 @@ | |||
| *> | |||
| *> \verbatim | |||
| *> | |||
| *> ICAMAX finds the index of element having max. absolute value. | |||
| *> ICAMAX finds the index of the first element having maximum |Re(.)| + |Im(.)| | |||
| *> \endverbatim | |||
| * | |||
| * Authors: | |||
| @@ -34,7 +34,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup aux_blas | |||
| * | |||
| @@ -51,10 +51,10 @@ | |||
| * ===================================================================== | |||
| INTEGER FUNCTION ICAMAX(N,CX,INCX) | |||
| * | |||
| * -- Reference BLAS level1 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level1 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| INTEGER INCX,N | |||
| @@ -23,7 +23,7 @@ | |||
| *> | |||
| *> \verbatim | |||
| *> | |||
| *> IDAMAX finds the index of element having max. absolute value. | |||
| *> IDAMAX finds the index of the first element having maximum absolute value. | |||
| *> \endverbatim | |||
| * | |||
| * Authors: | |||
| @@ -34,7 +34,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup aux_blas | |||
| * | |||
| @@ -51,10 +51,10 @@ | |||
| * ===================================================================== | |||
| INTEGER FUNCTION IDAMAX(N,DX,INCX) | |||
| * | |||
| * -- Reference BLAS level1 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level1 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| INTEGER INCX,N | |||
| @@ -23,7 +23,7 @@ | |||
| *> | |||
| *> \verbatim | |||
| *> | |||
| *> ISAMAX finds the index of element having max. absolute value. | |||
| *> ISAMAX finds the index of the first element having maximum absolute value. | |||
| *> \endverbatim | |||
| * | |||
| * Authors: | |||
| @@ -34,7 +34,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup aux_blas | |||
| * | |||
| @@ -51,10 +51,10 @@ | |||
| * ===================================================================== | |||
| INTEGER FUNCTION ISAMAX(N,SX,INCX) | |||
| * | |||
| * -- Reference BLAS level1 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level1 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| INTEGER INCX,N | |||
| @@ -23,7 +23,7 @@ | |||
| *> | |||
| *> \verbatim | |||
| *> | |||
| *> IZAMAX finds the index of element having max. absolute value. | |||
| *> IZAMAX finds the index of the first element having maximum |Re(.)| + |Im(.)| | |||
| *> \endverbatim | |||
| * | |||
| * Authors: | |||
| @@ -34,7 +34,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup aux_blas | |||
| * | |||
| @@ -51,10 +51,10 @@ | |||
| * ===================================================================== | |||
| INTEGER FUNCTION IZAMAX(N,ZX,INCX) | |||
| * | |||
| * -- Reference BLAS level1 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level1 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| INTEGER INCX,N | |||
| @@ -20,7 +20,7 @@ | |||
| *> | |||
| *> \verbatim | |||
| *> | |||
| *> SCABS1 computes absolute value of a complex number | |||
| *> SCABS1 computes |Re(.)| + |Im(.)| of a complex number | |||
| *> \endverbatim | |||
| * | |||
| * Authors: | |||
| @@ -31,17 +31,17 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup single_blas_level1 | |||
| * | |||
| * ===================================================================== | |||
| REAL FUNCTION SCABS1(Z) | |||
| * | |||
| * -- Reference BLAS level1 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level1 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| COMPLEX Z | |||
| @@ -23,7 +23,7 @@ | |||
| *> | |||
| *> \verbatim | |||
| *> | |||
| *> SCASUM takes the sum of the absolute values of a complex vector and | |||
| *> SCASUM takes the sum of the (|Re(.)| + |Im(.)|)'s of a complex vector and | |||
| *> returns a single precision result. | |||
| *> \endverbatim | |||
| * | |||
| @@ -35,7 +35,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup single_blas_level1 | |||
| * | |||
| @@ -52,10 +52,10 @@ | |||
| * ===================================================================== | |||
| REAL FUNCTION SCASUM(N,CX,INCX) | |||
| * | |||
| * -- Reference BLAS level1 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level1 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| INTEGER INCX,N | |||
| @@ -163,7 +163,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup single_blas_level2 | |||
| * | |||
| @@ -185,10 +185,10 @@ | |||
| * ===================================================================== | |||
| SUBROUTINE SGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) | |||
| * | |||
| * -- Reference BLAS level2 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level2 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| REAL ALPHA,BETA | |||
| @@ -312,26 +312,22 @@ | |||
| JX = KX | |||
| IF (INCY.EQ.1) THEN | |||
| DO 60 J = 1,N | |||
| IF (X(JX).NE.ZERO) THEN | |||
| TEMP = ALPHA*X(JX) | |||
| K = KUP1 - J | |||
| DO 50 I = MAX(1,J-KU),MIN(M,J+KL) | |||
| Y(I) = Y(I) + TEMP*A(K+I,J) | |||
| 50 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*X(JX) | |||
| K = KUP1 - J | |||
| DO 50 I = MAX(1,J-KU),MIN(M,J+KL) | |||
| Y(I) = Y(I) + TEMP*A(K+I,J) | |||
| 50 CONTINUE | |||
| JX = JX + INCX | |||
| 60 CONTINUE | |||
| ELSE | |||
| DO 80 J = 1,N | |||
| IF (X(JX).NE.ZERO) THEN | |||
| TEMP = ALPHA*X(JX) | |||
| IY = KY | |||
| K = KUP1 - J | |||
| DO 70 I = MAX(1,J-KU),MIN(M,J+KL) | |||
| Y(IY) = Y(IY) + TEMP*A(K+I,J) | |||
| IY = IY + INCY | |||
| 70 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*X(JX) | |||
| IY = KY | |||
| K = KUP1 - J | |||
| DO 70 I = MAX(1,J-KU),MIN(M,J+KL) | |||
| Y(IY) = Y(IY) + TEMP*A(K+I,J) | |||
| IY = IY + INCY | |||
| 70 CONTINUE | |||
| JX = JX + INCX | |||
| IF (J.GT.KU) KY = KY + INCY | |||
| 80 CONTINUE | |||
| @@ -166,7 +166,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup single_blas_level3 | |||
| * | |||
| @@ -187,10 +187,10 @@ | |||
| * ===================================================================== | |||
| SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) | |||
| * | |||
| * -- Reference BLAS level3 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level3 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| REAL ALPHA,BETA | |||
| @@ -311,12 +311,10 @@ | |||
| 60 CONTINUE | |||
| END IF | |||
| DO 80 L = 1,K | |||
| IF (B(L,J).NE.ZERO) THEN | |||
| TEMP = ALPHA*B(L,J) | |||
| DO 70 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 70 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*B(L,J) | |||
| DO 70 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 70 CONTINUE | |||
| 80 CONTINUE | |||
| 90 CONTINUE | |||
| ELSE | |||
| @@ -353,12 +351,10 @@ | |||
| 140 CONTINUE | |||
| END IF | |||
| DO 160 L = 1,K | |||
| IF (B(J,L).NE.ZERO) THEN | |||
| TEMP = ALPHA*B(J,L) | |||
| DO 150 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 150 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*B(J,L) | |||
| DO 150 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 150 CONTINUE | |||
| 160 CONTINUE | |||
| 170 CONTINUE | |||
| ELSE | |||
| @@ -134,7 +134,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup single_blas_level2 | |||
| * | |||
| @@ -156,10 +156,10 @@ | |||
| * ===================================================================== | |||
| SUBROUTINE SGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) | |||
| * | |||
| * -- Reference BLAS level2 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level2 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| REAL ALPHA,BETA | |||
| @@ -278,24 +278,20 @@ | |||
| JX = KX | |||
| IF (INCY.EQ.1) THEN | |||
| DO 60 J = 1,N | |||
| IF (X(JX).NE.ZERO) THEN | |||
| TEMP = ALPHA*X(JX) | |||
| DO 50 I = 1,M | |||
| Y(I) = Y(I) + TEMP*A(I,J) | |||
| 50 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*X(JX) | |||
| DO 50 I = 1,M | |||
| Y(I) = Y(I) + TEMP*A(I,J) | |||
| 50 CONTINUE | |||
| JX = JX + INCX | |||
| 60 CONTINUE | |||
| ELSE | |||
| DO 80 J = 1,N | |||
| IF (X(JX).NE.ZERO) THEN | |||
| TEMP = ALPHA*X(JX) | |||
| IY = KY | |||
| DO 70 I = 1,M | |||
| Y(IY) = Y(IY) + TEMP*A(I,J) | |||
| IY = IY + INCY | |||
| 70 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*X(JX) | |||
| IY = KY | |||
| DO 70 I = 1,M | |||
| Y(IY) = Y(IY) + TEMP*A(I,J) | |||
| IY = IY + INCY | |||
| 70 CONTINUE | |||
| JX = JX + INCX | |||
| 80 CONTINUE | |||
| END IF | |||
| @@ -23,7 +23,9 @@ | |||
| *> | |||
| *> \verbatim | |||
| *> | |||
| *> ZDOTC forms the dot product of a vector. | |||
| *> ZDOTC forms the dot product of two complex vectors | |||
| *> ZDOTC = X^H * Y | |||
| *> | |||
| *> \endverbatim | |||
| * | |||
| * Authors: | |||
| @@ -34,7 +36,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup complex16_blas_level1 | |||
| * | |||
| @@ -50,10 +52,10 @@ | |||
| * ===================================================================== | |||
| COMPLEX*16 FUNCTION ZDOTC(N,ZX,INCX,ZY,INCY) | |||
| * | |||
| * -- Reference BLAS level1 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level1 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| INTEGER INCX,INCY,N | |||
| @@ -23,7 +23,9 @@ | |||
| *> | |||
| *> \verbatim | |||
| *> | |||
| *> ZDOTU forms the dot product of two vectors. | |||
| *> ZDOTU forms the dot product of two complex vectors | |||
| *> ZDOTU = X^T * Y | |||
| *> | |||
| *> \endverbatim | |||
| * | |||
| * Authors: | |||
| @@ -34,7 +36,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup complex16_blas_level1 | |||
| * | |||
| @@ -50,10 +52,10 @@ | |||
| * ===================================================================== | |||
| COMPLEX*16 FUNCTION ZDOTU(N,ZX,INCX,ZY,INCY) | |||
| * | |||
| * -- Reference BLAS level1 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level1 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| INTEGER INCX,INCY,N | |||
| @@ -165,7 +165,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup complex16_blas_level2 | |||
| * | |||
| @@ -187,10 +187,10 @@ | |||
| * ===================================================================== | |||
| SUBROUTINE ZGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) | |||
| * | |||
| * -- Reference BLAS level2 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level2 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| COMPLEX*16 ALPHA,BETA | |||
| @@ -319,26 +319,22 @@ | |||
| JX = KX | |||
| IF (INCY.EQ.1) THEN | |||
| DO 60 J = 1,N | |||
| IF (X(JX).NE.ZERO) THEN | |||
| TEMP = ALPHA*X(JX) | |||
| K = KUP1 - J | |||
| DO 50 I = MAX(1,J-KU),MIN(M,J+KL) | |||
| Y(I) = Y(I) + TEMP*A(K+I,J) | |||
| 50 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*X(JX) | |||
| K = KUP1 - J | |||
| DO 50 I = MAX(1,J-KU),MIN(M,J+KL) | |||
| Y(I) = Y(I) + TEMP*A(K+I,J) | |||
| 50 CONTINUE | |||
| JX = JX + INCX | |||
| 60 CONTINUE | |||
| ELSE | |||
| DO 80 J = 1,N | |||
| IF (X(JX).NE.ZERO) THEN | |||
| TEMP = ALPHA*X(JX) | |||
| IY = KY | |||
| K = KUP1 - J | |||
| DO 70 I = MAX(1,J-KU),MIN(M,J+KL) | |||
| Y(IY) = Y(IY) + TEMP*A(K+I,J) | |||
| IY = IY + INCY | |||
| 70 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*X(JX) | |||
| IY = KY | |||
| K = KUP1 - J | |||
| DO 70 I = MAX(1,J-KU),MIN(M,J+KL) | |||
| Y(IY) = Y(IY) + TEMP*A(K+I,J) | |||
| IY = IY + INCY | |||
| 70 CONTINUE | |||
| JX = JX + INCX | |||
| IF (J.GT.KU) KY = KY + INCY | |||
| 80 CONTINUE | |||
| @@ -166,7 +166,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup complex16_blas_level3 | |||
| * | |||
| @@ -187,10 +187,10 @@ | |||
| * ===================================================================== | |||
| SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) | |||
| * | |||
| * -- Reference BLAS level3 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level3 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| COMPLEX*16 ALPHA,BETA | |||
| @@ -317,12 +317,10 @@ | |||
| 60 CONTINUE | |||
| END IF | |||
| DO 80 L = 1,K | |||
| IF (B(L,J).NE.ZERO) THEN | |||
| TEMP = ALPHA*B(L,J) | |||
| DO 70 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 70 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*B(L,J) | |||
| DO 70 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 70 CONTINUE | |||
| 80 CONTINUE | |||
| 90 CONTINUE | |||
| ELSE IF (CONJA) THEN | |||
| @@ -376,17 +374,15 @@ | |||
| 170 CONTINUE | |||
| END IF | |||
| DO 190 L = 1,K | |||
| IF (B(J,L).NE.ZERO) THEN | |||
| TEMP = ALPHA*DCONJG(B(J,L)) | |||
| DO 180 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 180 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*DCONJG(B(J,L)) | |||
| DO 180 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 180 CONTINUE | |||
| 190 CONTINUE | |||
| 200 CONTINUE | |||
| ELSE | |||
| * | |||
| * Form C := alpha*A*B**T + beta*C | |||
| * Form C := alpha*A*B**T + beta*C | |||
| * | |||
| DO 250 J = 1,N | |||
| IF (BETA.EQ.ZERO) THEN | |||
| @@ -399,12 +395,10 @@ | |||
| 220 CONTINUE | |||
| END IF | |||
| DO 240 L = 1,K | |||
| IF (B(J,L).NE.ZERO) THEN | |||
| TEMP = ALPHA*B(J,L) | |||
| DO 230 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 230 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*B(J,L) | |||
| DO 230 I = 1,M | |||
| C(I,J) = C(I,J) + TEMP*A(I,L) | |||
| 230 CONTINUE | |||
| 240 CONTINUE | |||
| 250 CONTINUE | |||
| END IF | |||
| @@ -136,7 +136,7 @@ | |||
| *> \author Univ. of Colorado Denver | |||
| *> \author NAG Ltd. | |||
| * | |||
| *> \date November 2011 | |||
| *> \date November 2015 | |||
| * | |||
| *> \ingroup complex16_blas_level2 | |||
| * | |||
| @@ -158,10 +158,10 @@ | |||
| * ===================================================================== | |||
| SUBROUTINE ZGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) | |||
| * | |||
| * -- Reference BLAS level2 routine (version 3.4.0) -- | |||
| * -- Reference BLAS level2 routine (version 3.6.0) -- | |||
| * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * November 2011 | |||
| * November 2015 | |||
| * | |||
| * .. Scalar Arguments .. | |||
| COMPLEX*16 ALPHA,BETA | |||
| @@ -285,24 +285,20 @@ | |||
| JX = KX | |||
| IF (INCY.EQ.1) THEN | |||
| DO 60 J = 1,N | |||
| IF (X(JX).NE.ZERO) THEN | |||
| TEMP = ALPHA*X(JX) | |||
| DO 50 I = 1,M | |||
| Y(I) = Y(I) + TEMP*A(I,J) | |||
| 50 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*X(JX) | |||
| DO 50 I = 1,M | |||
| Y(I) = Y(I) + TEMP*A(I,J) | |||
| 50 CONTINUE | |||
| JX = JX + INCX | |||
| 60 CONTINUE | |||
| ELSE | |||
| DO 80 J = 1,N | |||
| IF (X(JX).NE.ZERO) THEN | |||
| TEMP = ALPHA*X(JX) | |||
| IY = KY | |||
| DO 70 I = 1,M | |||
| Y(IY) = Y(IY) + TEMP*A(I,J) | |||
| IY = IY + INCY | |||
| 70 CONTINUE | |||
| END IF | |||
| TEMP = ALPHA*X(JX) | |||
| IY = KY | |||
| DO 70 I = 1,M | |||
| Y(IY) = Y(IY) + TEMP*A(I,J) | |||
| IY = IY + INCY | |||
| 70 CONTINUE | |||
| JX = JX + INCX | |||
| 80 CONTINUE | |||
| END IF | |||
| @@ -30,17 +30,16 @@ macro(add_blas_test name src) | |||
| get_filename_component(baseNAME ${src} NAME_WE) | |||
| set(TEST_INPUT "${LAPACK_SOURCE_DIR}/BLAS/${baseNAME}.in") | |||
| add_executable(${name} ${src}) | |||
| get_target_property(TEST_LOC ${name} LOCATION) | |||
| target_link_libraries(${name} blas) | |||
| if(EXISTS "${TEST_INPUT}") | |||
| add_test(BLAS-${name} "${CMAKE_COMMAND}" | |||
| -DTEST=${TEST_LOC} | |||
| add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}" | |||
| -DTEST=$<TARGET_FILE:${name}> | |||
| -DINPUT=${TEST_INPUT} | |||
| -DINTDIR=${CMAKE_CFG_INTDIR} | |||
| -P "${LAPACK_SOURCE_DIR}/TESTING/runtest.cmake") | |||
| else() | |||
| add_test(BLAS-${name} "${CMAKE_COMMAND}" | |||
| -DTEST=${TEST_LOC} | |||
| add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}" | |||
| -DTEST=$<TARGET_FILE:${name}> | |||
| -DINTDIR=${CMAKE_CFG_INTDIR} | |||
| -P "${LAPACK_SOURCE_DIR}/TESTING/runtest.cmake") | |||
| endif() | |||
| @@ -120,7 +120,7 @@ | |||
| REAL RZERO | |||
| PARAMETER ( RZERO = 0.0 ) | |||
| INTEGER NMAX, INCMAX | |||
| PARAMETER ( NMAX = 128, INCMAX = 2 ) | |||
| PARAMETER ( NMAX = 65, INCMAX = 2 ) | |||
| INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX | |||
| PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, | |||
| $ NALMAX = 7, NBEMAX = 7 ) | |||
| @@ -102,7 +102,7 @@ | |||
| REAL RZERO | |||
| PARAMETER ( RZERO = 0.0 ) | |||
| INTEGER NMAX | |||
| PARAMETER ( NMAX = 128 ) | |||
| PARAMETER ( NMAX = 65 ) | |||
| INTEGER NIDMAX, NALMAX, NBEMAX | |||
| PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) | |||
| * .. Local Scalars .. | |||
| @@ -117,7 +117,7 @@ | |||
| DOUBLE PRECISION ZERO, ONE | |||
| PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) | |||
| INTEGER NMAX, INCMAX | |||
| PARAMETER ( NMAX = 128, INCMAX = 2 ) | |||
| PARAMETER ( NMAX = 65, INCMAX = 2 ) | |||
| INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX | |||
| PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, | |||
| $ NALMAX = 7, NBEMAX = 7 ) | |||
| @@ -97,7 +97,7 @@ | |||
| DOUBLE PRECISION ZERO, ONE | |||
| PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) | |||
| INTEGER NMAX | |||
| PARAMETER ( NMAX = 128 ) | |||
| PARAMETER ( NMAX = 65 ) | |||
| INTEGER NIDMAX, NALMAX, NBEMAX | |||
| PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) | |||
| * .. Local Scalars .. | |||
| @@ -117,7 +117,7 @@ | |||
| REAL ZERO, ONE | |||
| PARAMETER ( ZERO = 0.0, ONE = 1.0 ) | |||
| INTEGER NMAX, INCMAX | |||
| PARAMETER ( NMAX = 128, INCMAX = 2 ) | |||
| PARAMETER ( NMAX = 65, INCMAX = 2 ) | |||
| INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX | |||
| PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, | |||
| $ NALMAX = 7, NBEMAX = 7 ) | |||
| @@ -97,7 +97,7 @@ | |||
| REAL ZERO, ONE | |||
| PARAMETER ( ZERO = 0.0, ONE = 1.0 ) | |||
| INTEGER NMAX | |||
| PARAMETER ( NMAX = 128 ) | |||
| PARAMETER ( NMAX = 65 ) | |||
| INTEGER NIDMAX, NALMAX, NBEMAX | |||
| PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) | |||
| * .. Local Scalars .. | |||
| @@ -121,7 +121,7 @@ | |||
| DOUBLE PRECISION RZERO | |||
| PARAMETER ( RZERO = 0.0D0 ) | |||
| INTEGER NMAX, INCMAX | |||
| PARAMETER ( NMAX = 128, INCMAX = 2 ) | |||
| PARAMETER ( NMAX = 65, INCMAX = 2 ) | |||
| INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX | |||
| PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, | |||
| $ NALMAX = 7, NBEMAX = 7 ) | |||
| @@ -104,7 +104,7 @@ | |||
| DOUBLE PRECISION RZERO | |||
| PARAMETER ( RZERO = 0.0D0 ) | |||
| INTEGER NMAX | |||
| PARAMETER ( NMAX = 128 ) | |||
| PARAMETER ( NMAX = 65 ) | |||
| INTEGER NIDMAX, NALMAX, NBEMAX | |||
| PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) | |||
| * .. Local Scalars .. | |||
| @@ -0,0 +1,90 @@ | |||
| message(STATUS "CBLAS enable") | |||
| enable_language(C) | |||
| set(LAPACK_INSTALL_EXPORT_NAME cblas-targets) | |||
| # Create a header file cblas.h for the routines called in my C programs | |||
| include(FortranCInterface) | |||
| FortranCInterface_HEADER( ${CMAKE_CURRENT_SOURCE_DIR}/include/cblas_mangling.h | |||
| MACRO_NAMESPACE "F77_" | |||
| SYMBOL_NAMESPACE "F77_" ) | |||
| # Old way to detect mangling | |||
| #include(FortranMangling) | |||
| #FORTRAN_MANGLING(CDEFS) | |||
| #set(CDEFS ${CDEFS} CACHE STRING "Fortran Mangling" FORCE) | |||
| #MESSAGE(STATUS "=========") | |||
| # -------------------------------------------------- | |||
| # Compiler Flags | |||
| #ADD_DEFINITIONS( "-D${CDEFS}") | |||
| include_directories( include ) | |||
| add_subdirectory(include) | |||
| add_subdirectory(src) | |||
| macro(append_subdir_files variable dirname) | |||
| get_directory_property(holder DIRECTORY ${dirname} DEFINITION ${variable}) | |||
| foreach(depfile ${holder}) | |||
| list(APPEND ${variable} "${dirname}/${depfile}") | |||
| endforeach() | |||
| endmacro() | |||
| append_subdir_files(CBLAS_INCLUDE "include") | |||
| INSTALL( FILES ${CBLAS_INCLUDE} DESTINATION include ) | |||
| # -------------------------------------------------- | |||
| if(BUILD_TESTING) | |||
| add_subdirectory(testing) | |||
| add_subdirectory(examples) | |||
| endif(BUILD_TESTING) | |||
| if(NOT BLAS_FOUND) | |||
| set(ALL_TARGETS ${ALL_TARGETS} blas) | |||
| endif(NOT BLAS_FOUND) | |||
| # Export cblas targets from the | |||
| # install tree, if any. | |||
| set(_cblas_config_install_guard_target "") | |||
| if(ALL_TARGETS) | |||
| install(EXPORT cblas-targets | |||
| DESTINATION lib/cmake/cblas-${LAPACK_VERSION}) | |||
| # Choose one of the cblas targets to use as a guard for | |||
| # cblas-config.cmake to load targets from the install tree. | |||
| list(GET ALL_TARGETS 0 _cblas_config_install_guard_target) | |||
| endif() | |||
| # Export cblas targets from the build tree, if any. | |||
| set(_cblas_config_build_guard_target "") | |||
| if(ALL_TARGETS) | |||
| export(TARGETS ${ALL_TARGETS} FILE cblas-targets.cmake) | |||
| # Choose one of the cblas targets to use as a guard | |||
| # for cblas-config.cmake to load targets from the build tree. | |||
| list(GET ALL_TARGETS 0 _cblas_config_build_guard_target) | |||
| endif() | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/CMAKE/cblas-config-version.cmake.in | |||
| ${LAPACK_BINARY_DIR}/cblas-config-version.cmake @ONLY) | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/CMAKE/cblas-config-build.cmake.in | |||
| ${LAPACK_BINARY_DIR}/cblas-config.cmake @ONLY) | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cblas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/cblas.pc) | |||
| install(FILES | |||
| ${CMAKE_CURRENT_BINARY_DIR}/cblas.pc | |||
| DESTINATION ${PKG_CONFIG_DIR} | |||
| ) | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-install.cmake.in | |||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake @ONLY) | |||
| install(FILES | |||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake | |||
| ${LAPACK_BINARY_DIR}/cblas-config-version.cmake | |||
| DESTINATION lib/cmake/cblas-${LAPACK_VERSION} | |||
| ) | |||
| #install(EXPORT cblas-targets | |||
| # DESTINATION lib/cmake/cblas-${LAPACK_VERSION}) | |||
| @@ -0,0 +1,27 @@ | |||
| include ../make.inc | |||
| all: | |||
| cd include && cp cblas_mangling_with_flags.h cblas_mangling.h | |||
| cd src && $(MAKE) all | |||
| clean: cleanlib | |||
| cleanlib: | |||
| cd src && $(MAKE) clean | |||
| cleanexe: | |||
| cd testing && $(MAKE) cleanexe | |||
| cleanall: clean cleanexe | |||
| rm -f $(CBLASLIB) | |||
| cd examples && rm -f *.o cblas_ex1 cblas_ex2 | |||
| cblas_testing: | |||
| cd testing && $(MAKE) all | |||
| runtst: | |||
| cd testing && $(MAKE) run | |||
| example: all | |||
| cd examples && make all | |||
| @@ -0,0 +1,49 @@ | |||
| # | |||
| # Makefile.LINUX | |||
| # | |||
| # | |||
| # If you compile, change the name to Makefile.in. | |||
| # | |||
| # | |||
| #----------------------------------------------------------------------------- | |||
| # Shell | |||
| #----------------------------------------------------------------------------- | |||
| SHELL = /bin/sh | |||
| #----------------------------------------------------------------------------- | |||
| # Platform | |||
| #----------------------------------------------------------------------------- | |||
| PLAT = LINUX | |||
| #----------------------------------------------------------------------------- | |||
| # Libraries and includes | |||
| #----------------------------------------------------------------------------- | |||
| BLLIB = $(home)/lib/librefblas.a | |||
| CBLIB = ../lib/libcblas.a | |||
| #----------------------------------------------------------------------------- | |||
| # Compilers | |||
| #----------------------------------------------------------------------------- | |||
| CC = gcc | |||
| FC = gfortran | |||
| LOADER = $(FC) | |||
| #----------------------------------------------------------------------------- | |||
| # Flags for Compilers | |||
| #----------------------------------------------------------------------------- | |||
| CFLAGS = -O3 -DADD_ | |||
| FFLAGS = -O3 | |||
| #----------------------------------------------------------------------------- | |||
| # Archive programs and flags | |||
| #----------------------------------------------------------------------------- | |||
| ARCH = ar | |||
| ARCHFLAGS = cr | |||
| RANLIB = ranlib | |||
| @@ -0,0 +1,59 @@ | |||
| INSTALLATION | |||
| Make sure to set these variables appropriately in your Make.inc in the LAPACK folder: | |||
| CBLASLIB is your CBLAS library | |||
| BLASLIB is your Legacy BLAS library (by default the Reference BLAS shipped within LAPACK) | |||
| Then type: | |||
| prompt> make | |||
| which will create the CBLAS library. | |||
| CREATING THE TESTERS | |||
| type: | |||
| prompt> make cblas_testing | |||
| This will create the BLAS library if necessary, then compile the CBLAS testings. | |||
| EXECUTING THE TESTERS | |||
| type: | |||
| prompt> make runtst | |||
| _______________________________________________________________________________ | |||
| This package contains C interface to Legacy BLAS. | |||
| Written by Keita Teranishi (5/20/98) | |||
| _______________________________________________________________________________ | |||
| This release updates an inconsistency between the BLAST document and | |||
| the interface. According to the document, the enumerated types for | |||
| the C interface to the BLAS are not typedef'ed. | |||
| It also updates the Level 2 and 3 testers which check for correct | |||
| exiting of routines when called with bad arguments. This is done by | |||
| overriding the Legacy BLAS library's implementation of xerbla(). If | |||
| this cannot be done ( for instance one cannot override some calls | |||
| to xerbla() in Sun's Performance library), then correct error | |||
| exiting cannot be checked. | |||
| Updated by Jeff Horner (3/15/99) | |||
| _______________________________________________________________________________ | |||
| Updated by R. Clint Whaley (2/23/03): | |||
| Fixed the i?amax error that I reported three years ago: standard dictates | |||
| IAMAX return vals in range 0 <= iamax < N, but reference was mistakenly | |||
| returning like F77: 0 < iamax <= N. | |||
| _______________________________________________________________________________ | |||
| Updated by Julie Langou (08/22/2014): | |||
| Integrate CBLAS package into LAPACK | |||
| Improve headers for mangling | |||
| @@ -0,0 +1,9 @@ | |||
| prefix=@prefix@ | |||
| libdir=@libdir@ | |||
| Name: lapacke | |||
| Description: C Standard Interface to BLAS Linear Algebra PACKage | |||
| Version: @LAPACK_VERSION@ | |||
| URL: http://www.netlib.org/lapack/ | |||
| Libs: -L${libdir} -lcblas | |||
| Requires: blas | |||
| @@ -0,0 +1,14 @@ | |||
| # Load the LAPACK package with which we were built. | |||
| set(LAPACK_DIR "@LAPACK_BINARY_DIR@") | |||
| find_package(LAPACK NO_MODULE) | |||
| # Load lapack targets from the build tree, including lapacke targets. | |||
| if(NOT TARGET lapacke) | |||
| include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") | |||
| endif() | |||
| # Report lapacke header search locations. | |||
| set(CBLAS_INCLUDE_DIRS "@LAPACK_SOURCE_DIR@/cblas/include") | |||
| # Report lapacke libraries. | |||
| set(CBLAS_LIBRARIES cblas) | |||
| @@ -0,0 +1,23 @@ | |||
| # Compute locations from <prefix>/lib/cmake/lapacke-<v>/<self>.cmake | |||
| get_filename_component(_CBLAS_SELF_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) | |||
| get_filename_component(_CBLAS_PREFIX "${_CBLAS_SELF_DIR}" PATH) | |||
| get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH) | |||
| get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH) | |||
| # Load the LAPACK package with which we were built. | |||
| set(LAPACK_DIR "${_CBLAS_PREFIX}/lib/cmake/lapack-@LAPACK_VERSION@") | |||
| find_package(LAPACK NO_MODULE) | |||
| # Load lapacke targets from the install tree. | |||
| if(NOT TARGET cblas) | |||
| include(${_CBLAS_SELF_DIR}/cblas-targets.cmake) | |||
| endif() | |||
| # Report lapacke header search locations. | |||
| set(CBLAS_INCLUDE_DIRS ${_CBLAS_PREFIX}/include) | |||
| # Report lapacke libraries. | |||
| set(CBLAS_LIBRARIES cblas) | |||
| unset(_CBLAS_PREFIX) | |||
| unset(_CBLAS_SELF_DIR) | |||
| @@ -0,0 +1,8 @@ | |||
| add_executable(xexample1_CBLAS cblas_example1.c ) | |||
| add_executable(xexample2_CBLAS cblas_example2.c ) | |||
| target_link_libraries(xexample1_CBLAS cblas ${BLAS_LIBRARIES}) | |||
| target_link_libraries(xexample2_CBLAS cblas ${BLAS_LIBRARIES}) | |||
| add_test(example1_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample1_CBLAS) | |||
| add_test(example2_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample2_CBLAS) | |||
| @@ -0,0 +1,14 @@ | |||
| include ../../make.inc | |||
| all: example1 example2 | |||
| example1: | |||
| $(CC) -c $(CFLAGS) -I../include cblas_example1.c | |||
| $(LOADER) -o cblas_ex1 cblas_example1.o $(CBLASLIB) $(BLASLIB) | |||
| example2: | |||
| $(CC) -c $(CFLAGS) -I../include cblas_example2.c | |||
| $(LOADER) -o cblas_ex2 cblas_example2.o $(CBLASLIB) $(BLASLIB) | |||
| cleanall: | |||
| rm -f *.o cblas_ex1 cblas_ex2 | |||
| @@ -0,0 +1,69 @@ | |||
| /* cblas_example.c */ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include "cblas.h" | |||
| int main ( ) | |||
| { | |||
| CBLAS_LAYOUT Layout; | |||
| CBLAS_TRANSPOSE transa; | |||
| double *a, *x, *y; | |||
| double alpha, beta; | |||
| int m, n, lda, incx, incy, i; | |||
| Layout = CblasColMajor; | |||
| transa = CblasNoTrans; | |||
| m = 4; /* Size of Column ( the number of rows ) */ | |||
| n = 4; /* Size of Row ( the number of columns ) */ | |||
| lda = 4; /* Leading dimension of 5 * 4 matrix is 5 */ | |||
| incx = 1; | |||
| incy = 1; | |||
| alpha = 1; | |||
| beta = 0; | |||
| a = (double *)malloc(sizeof(double)*m*n); | |||
| x = (double *)malloc(sizeof(double)*n); | |||
| y = (double *)malloc(sizeof(double)*n); | |||
| /* The elements of the first column */ | |||
| a[0] = 1; | |||
| a[1] = 2; | |||
| a[2] = 3; | |||
| a[3] = 4; | |||
| /* The elements of the second column */ | |||
| a[m] = 1; | |||
| a[m+1] = 1; | |||
| a[m+2] = 1; | |||
| a[m+3] = 1; | |||
| /* The elements of the third column */ | |||
| a[m*2] = 3; | |||
| a[m*2+1] = 4; | |||
| a[m*2+2] = 5; | |||
| a[m*2+3] = 6; | |||
| /* The elements of the fourth column */ | |||
| a[m*3] = 5; | |||
| a[m*3+1] = 6; | |||
| a[m*3+2] = 7; | |||
| a[m*3+3] = 8; | |||
| /* The elemetns of x and y */ | |||
| x[0] = 1; | |||
| x[1] = 2; | |||
| x[2] = 1; | |||
| x[3] = 1; | |||
| y[0] = 0; | |||
| y[1] = 0; | |||
| y[2] = 0; | |||
| y[3] = 0; | |||
| cblas_dgemv( Layout, transa, m, n, alpha, a, lda, x, incx, beta, | |||
| y, incy ); | |||
| /* Print y */ | |||
| for( i = 0; i < n; i++ ) | |||
| printf(" y%d = %f\n", i, y[i]); | |||
| free(a); | |||
| free(x); | |||
| free(y); | |||
| return 0; | |||
| } | |||
| @@ -0,0 +1,72 @@ | |||
| /* cblas_example2.c */ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| #define INVALID -1 | |||
| int main (int argc, char **argv ) | |||
| { | |||
| int rout=-1,info=0,m,n,k,lda,ldb,ldc; | |||
| double A[2] = {0.0,0.0}, | |||
| B[2] = {0.0,0.0}, | |||
| C[2] = {0.0,0.0}, | |||
| ALPHA=0.0, BETA=0.0; | |||
| if (argc > 2){ | |||
| rout = atoi(argv[1]); | |||
| info = atoi(argv[2]); | |||
| } | |||
| if (rout == 1) { | |||
| if (info==0) { | |||
| printf("Checking if cblas_dgemm fails on parameter 4\n"); | |||
| cblas_dgemm( CblasRowMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0, | |||
| ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||
| } | |||
| if (info==1) { | |||
| printf("Checking if cblas_dgemm fails on parameter 5\n"); | |||
| cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0, | |||
| ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||
| } | |||
| if (info==2) { | |||
| printf("Checking if cblas_dgemm fails on parameter 9\n"); | |||
| cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2, | |||
| ALPHA, A, 1, B, 1, BETA, C, 2 ); | |||
| } | |||
| if (info==3) { | |||
| printf("Checking if cblas_dgemm fails on parameter 11\n"); | |||
| cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 2, | |||
| ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||
| } | |||
| } else { | |||
| if (info==0) { | |||
| printf("Checking if F77_dgemm fails on parameter 3\n"); | |||
| m=INVALID; n=0; k=0; lda=1; ldb=1; ldc=1; | |||
| F77_dgemm( "T", "N", &m, &n, &k, | |||
| &ALPHA, A, &lda, B, &ldb, &BETA, C, &ldc ); | |||
| } | |||
| if (info==1) { | |||
| m=0; n=INVALID; k=0; lda=1; ldb=1; ldc=1; | |||
| printf("Checking if F77_dgemm fails on parameter 4\n"); | |||
| F77_dgemm( "N", "T", &m, &n, &k, | |||
| &ALPHA, A, &lda, B, &ldb, &BETA, C, &ldc ); | |||
| } | |||
| if (info==2) { | |||
| printf("Checking if F77_dgemm fails on parameter 8\n"); | |||
| m=2; n=0; k=0; lda=1; ldb=1; ldc=2; | |||
| F77_dgemm( "N", "N" , &m, &n, &k, | |||
| &ALPHA, A, &lda, B, &ldb, &BETA, C, &ldc ); | |||
| } | |||
| if (info==3) { | |||
| printf("Checking if F77_dgemm fails on parameter 10\n"); | |||
| m=0; n=0; k=2; lda=1; ldb=1; ldc=1; | |||
| F77_dgemm( "N", "N" , &m, &n, &k, | |||
| &ALPHA, A, &lda, B, &ldb, &BETA, C, &ldc ); | |||
| } | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -0,0 +1,3 @@ | |||
| SET (CBLAS_INCLUDE cblas.h cblas_f77.h cblas_test.h cblas_mangling.h) | |||
| file(COPY ${CBLAS_INCLUDE} DESTINATION ${LAPACK_BINARY_DIR}/include) | |||
| @@ -0,0 +1,588 @@ | |||
| #ifndef CBLAS_H | |||
| #define CBLAS_H | |||
| #include <stddef.h> | |||
| #ifdef __cplusplus | |||
| extern "C" { /* Assume C declarations for C++ */ | |||
| #endif /* __cplusplus */ | |||
| /* | |||
| * Enumerated and derived types | |||
| */ | |||
| #ifdef WeirdNEC | |||
| #define CBLAS_INDEX long | |||
| #else | |||
| #define CBLAS_INDEX int | |||
| #endif | |||
| typedef enum {CblasRowMajor=101, CblasColMajor=102} CBLAS_LAYOUT; | |||
| typedef enum {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113} CBLAS_TRANSPOSE; | |||
| typedef enum {CblasUpper=121, CblasLower=122} CBLAS_UPLO; | |||
| typedef enum {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG; | |||
| typedef enum {CblasLeft=141, CblasRight=142} CBLAS_SIDE; | |||
| typedef CBLAS_LAYOUT CBLAS_ORDER; /* this for backward compatibility with CBLAS_ORDER */ | |||
| #include "cblas_mangling.h" | |||
| /* | |||
| * =========================================================================== | |||
| * Prototypes for level 1 BLAS functions (complex are recast as routines) | |||
| * =========================================================================== | |||
| */ | |||
| double cblas_dcabs1(const void *z); | |||
| float cblas_scabs1(const void *c); | |||
| float cblas_sdsdot(const int N, const float alpha, const float *X, | |||
| const int incX, const float *Y, const int incY); | |||
| double cblas_dsdot(const int N, const float *X, const int incX, const float *Y, | |||
| const int incY); | |||
| float cblas_sdot(const int N, const float *X, const int incX, | |||
| const float *Y, const int incY); | |||
| double cblas_ddot(const int N, const double *X, const int incX, | |||
| const double *Y, const int incY); | |||
| /* | |||
| * Functions having prefixes Z and C only | |||
| */ | |||
| void cblas_cdotu_sub(const int N, const void *X, const int incX, | |||
| const void *Y, const int incY, void *dotu); | |||
| void cblas_cdotc_sub(const int N, const void *X, const int incX, | |||
| const void *Y, const int incY, void *dotc); | |||
| void cblas_zdotu_sub(const int N, const void *X, const int incX, | |||
| const void *Y, const int incY, void *dotu); | |||
| void cblas_zdotc_sub(const int N, const void *X, const int incX, | |||
| const void *Y, const int incY, void *dotc); | |||
| /* | |||
| * Functions having prefixes S D SC DZ | |||
| */ | |||
| float cblas_snrm2(const int N, const float *X, const int incX); | |||
| float cblas_sasum(const int N, const float *X, const int incX); | |||
| double cblas_dnrm2(const int N, const double *X, const int incX); | |||
| double cblas_dasum(const int N, const double *X, const int incX); | |||
| float cblas_scnrm2(const int N, const void *X, const int incX); | |||
| float cblas_scasum(const int N, const void *X, const int incX); | |||
| double cblas_dznrm2(const int N, const void *X, const int incX); | |||
| double cblas_dzasum(const int N, const void *X, const int incX); | |||
| /* | |||
| * Functions having standard 4 prefixes (S D C Z) | |||
| */ | |||
| CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX); | |||
| CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX); | |||
| CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX); | |||
| CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX); | |||
| /* | |||
| * =========================================================================== | |||
| * Prototypes for level 1 BLAS routines | |||
| * =========================================================================== | |||
| */ | |||
| /* | |||
| * Routines with standard 4 prefixes (s, d, c, z) | |||
| */ | |||
| void cblas_sswap(const int N, float *X, const int incX, | |||
| float *Y, const int incY); | |||
| void cblas_scopy(const int N, const float *X, const int incX, | |||
| float *Y, const int incY); | |||
| void cblas_saxpy(const int N, const float alpha, const float *X, | |||
| const int incX, float *Y, const int incY); | |||
| void cblas_dswap(const int N, double *X, const int incX, | |||
| double *Y, const int incY); | |||
| void cblas_dcopy(const int N, const double *X, const int incX, | |||
| double *Y, const int incY); | |||
| void cblas_daxpy(const int N, const double alpha, const double *X, | |||
| const int incX, double *Y, const int incY); | |||
| void cblas_cswap(const int N, void *X, const int incX, | |||
| void *Y, const int incY); | |||
| void cblas_ccopy(const int N, const void *X, const int incX, | |||
| void *Y, const int incY); | |||
| void cblas_caxpy(const int N, const void *alpha, const void *X, | |||
| const int incX, void *Y, const int incY); | |||
| void cblas_zswap(const int N, void *X, const int incX, | |||
| void *Y, const int incY); | |||
| void cblas_zcopy(const int N, const void *X, const int incX, | |||
| void *Y, const int incY); | |||
| void cblas_zaxpy(const int N, const void *alpha, const void *X, | |||
| const int incX, void *Y, const int incY); | |||
| /* | |||
| * Routines with S and D prefix only | |||
| */ | |||
| void cblas_srotg(float *a, float *b, float *c, float *s); | |||
| void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P); | |||
| void cblas_srot(const int N, float *X, const int incX, | |||
| float *Y, const int incY, const float c, const float s); | |||
| void cblas_srotm(const int N, float *X, const int incX, | |||
| float *Y, const int incY, const float *P); | |||
| void cblas_drotg(double *a, double *b, double *c, double *s); | |||
| void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P); | |||
| void cblas_drot(const int N, double *X, const int incX, | |||
| double *Y, const int incY, const double c, const double s); | |||
| void cblas_drotm(const int N, double *X, const int incX, | |||
| double *Y, const int incY, const double *P); | |||
| /* | |||
| * Routines with S D C Z CS and ZD prefixes | |||
| */ | |||
| void cblas_sscal(const int N, const float alpha, float *X, const int incX); | |||
| void cblas_dscal(const int N, const double alpha, double *X, const int incX); | |||
| void cblas_cscal(const int N, const void *alpha, void *X, const int incX); | |||
| void cblas_zscal(const int N, const void *alpha, void *X, const int incX); | |||
| void cblas_csscal(const int N, const float alpha, void *X, const int incX); | |||
| void cblas_zdscal(const int N, const double alpha, void *X, const int incX); | |||
| /* | |||
| * =========================================================================== | |||
| * Prototypes for level 2 BLAS | |||
| * =========================================================================== | |||
| */ | |||
| /* | |||
| * Routines with standard 4 prefixes (S, D, C, Z) | |||
| */ | |||
| void cblas_sgemv(const CBLAS_LAYOUT layout, | |||
| const CBLAS_TRANSPOSE TransA, const int M, const int N, | |||
| const float alpha, const float *A, const int lda, | |||
| const float *X, const int incX, const float beta, | |||
| float *Y, const int incY); | |||
| void cblas_sgbmv(CBLAS_LAYOUT layout, | |||
| CBLAS_TRANSPOSE TransA, const int M, const int N, | |||
| const int KL, const int KU, const float alpha, | |||
| const float *A, const int lda, const float *X, | |||
| const int incX, const float beta, float *Y, const int incY); | |||
| void cblas_strmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const float *A, const int lda, | |||
| float *X, const int incX); | |||
| void cblas_stbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const int K, const float *A, const int lda, | |||
| float *X, const int incX); | |||
| void cblas_stpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const float *Ap, float *X, const int incX); | |||
| void cblas_strsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const float *A, const int lda, float *X, | |||
| const int incX); | |||
| void cblas_stbsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const int K, const float *A, const int lda, | |||
| float *X, const int incX); | |||
| void cblas_stpsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const float *Ap, float *X, const int incX); | |||
| void cblas_dgemv(CBLAS_LAYOUT layout, | |||
| CBLAS_TRANSPOSE TransA, const int M, const int N, | |||
| const double alpha, const double *A, const int lda, | |||
| const double *X, const int incX, const double beta, | |||
| double *Y, const int incY); | |||
| void cblas_dgbmv(CBLAS_LAYOUT layout, | |||
| CBLAS_TRANSPOSE TransA, const int M, const int N, | |||
| const int KL, const int KU, const double alpha, | |||
| const double *A, const int lda, const double *X, | |||
| const int incX, const double beta, double *Y, const int incY); | |||
| void cblas_dtrmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const double *A, const int lda, | |||
| double *X, const int incX); | |||
| void cblas_dtbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const int K, const double *A, const int lda, | |||
| double *X, const int incX); | |||
| void cblas_dtpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const double *Ap, double *X, const int incX); | |||
| void cblas_dtrsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const double *A, const int lda, double *X, | |||
| const int incX); | |||
| void cblas_dtbsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const int K, const double *A, const int lda, | |||
| double *X, const int incX); | |||
| void cblas_dtpsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const double *Ap, double *X, const int incX); | |||
| void cblas_cgemv(CBLAS_LAYOUT layout, | |||
| CBLAS_TRANSPOSE TransA, const int M, const int N, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *X, const int incX, const void *beta, | |||
| void *Y, const int incY); | |||
| void cblas_cgbmv(CBLAS_LAYOUT layout, | |||
| CBLAS_TRANSPOSE TransA, const int M, const int N, | |||
| const int KL, const int KU, const void *alpha, | |||
| const void *A, const int lda, const void *X, | |||
| const int incX, const void *beta, void *Y, const int incY); | |||
| void cblas_ctrmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const void *A, const int lda, | |||
| void *X, const int incX); | |||
| void cblas_ctbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const int K, const void *A, const int lda, | |||
| void *X, const int incX); | |||
| void cblas_ctpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const void *Ap, void *X, const int incX); | |||
| void cblas_ctrsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const void *A, const int lda, void *X, | |||
| const int incX); | |||
| void cblas_ctbsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const int K, const void *A, const int lda, | |||
| void *X, const int incX); | |||
| void cblas_ctpsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const void *Ap, void *X, const int incX); | |||
| void cblas_zgemv(CBLAS_LAYOUT layout, | |||
| CBLAS_TRANSPOSE TransA, const int M, const int N, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *X, const int incX, const void *beta, | |||
| void *Y, const int incY); | |||
| void cblas_zgbmv(CBLAS_LAYOUT layout, | |||
| CBLAS_TRANSPOSE TransA, const int M, const int N, | |||
| const int KL, const int KU, const void *alpha, | |||
| const void *A, const int lda, const void *X, | |||
| const int incX, const void *beta, void *Y, const int incY); | |||
| void cblas_ztrmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const void *A, const int lda, | |||
| void *X, const int incX); | |||
| void cblas_ztbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const int K, const void *A, const int lda, | |||
| void *X, const int incX); | |||
| void cblas_ztpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const void *Ap, void *X, const int incX); | |||
| void cblas_ztrsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const void *A, const int lda, void *X, | |||
| const int incX); | |||
| void cblas_ztbsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const int K, const void *A, const int lda, | |||
| void *X, const int incX); | |||
| void cblas_ztpsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, | |||
| const int N, const void *Ap, void *X, const int incX); | |||
| /* | |||
| * Routines with S and D prefixes only | |||
| */ | |||
| void cblas_ssymv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const float alpha, const float *A, | |||
| const int lda, const float *X, const int incX, | |||
| const float beta, float *Y, const int incY); | |||
| void cblas_ssbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const int K, const float alpha, const float *A, | |||
| const int lda, const float *X, const int incX, | |||
| const float beta, float *Y, const int incY); | |||
| void cblas_sspmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const float alpha, const float *Ap, | |||
| const float *X, const int incX, | |||
| const float beta, float *Y, const int incY); | |||
| void cblas_sger(CBLAS_LAYOUT layout, const int M, const int N, | |||
| const float alpha, const float *X, const int incX, | |||
| const float *Y, const int incY, float *A, const int lda); | |||
| void cblas_ssyr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const float alpha, const float *X, | |||
| const int incX, float *A, const int lda); | |||
| void cblas_sspr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const float alpha, const float *X, | |||
| const int incX, float *Ap); | |||
| void cblas_ssyr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const float alpha, const float *X, | |||
| const int incX, const float *Y, const int incY, float *A, | |||
| const int lda); | |||
| void cblas_sspr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const float alpha, const float *X, | |||
| const int incX, const float *Y, const int incY, float *A); | |||
| void cblas_dsymv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const double alpha, const double *A, | |||
| const int lda, const double *X, const int incX, | |||
| const double beta, double *Y, const int incY); | |||
| void cblas_dsbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const int K, const double alpha, const double *A, | |||
| const int lda, const double *X, const int incX, | |||
| const double beta, double *Y, const int incY); | |||
| void cblas_dspmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const double alpha, const double *Ap, | |||
| const double *X, const int incX, | |||
| const double beta, double *Y, const int incY); | |||
| void cblas_dger(CBLAS_LAYOUT layout, const int M, const int N, | |||
| const double alpha, const double *X, const int incX, | |||
| const double *Y, const int incY, double *A, const int lda); | |||
| void cblas_dsyr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const double alpha, const double *X, | |||
| const int incX, double *A, const int lda); | |||
| void cblas_dspr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const double alpha, const double *X, | |||
| const int incX, double *Ap); | |||
| void cblas_dsyr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const double alpha, const double *X, | |||
| const int incX, const double *Y, const int incY, double *A, | |||
| const int lda); | |||
| void cblas_dspr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const double alpha, const double *X, | |||
| const int incX, const double *Y, const int incY, double *A); | |||
| /* | |||
| * Routines with C and Z prefixes only | |||
| */ | |||
| void cblas_chemv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const void *alpha, const void *A, | |||
| const int lda, const void *X, const int incX, | |||
| const void *beta, void *Y, const int incY); | |||
| void cblas_chbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const int K, const void *alpha, const void *A, | |||
| const int lda, const void *X, const int incX, | |||
| const void *beta, void *Y, const int incY); | |||
| void cblas_chpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const void *alpha, const void *Ap, | |||
| const void *X, const int incX, | |||
| const void *beta, void *Y, const int incY); | |||
| void cblas_cgeru(CBLAS_LAYOUT layout, const int M, const int N, | |||
| const void *alpha, const void *X, const int incX, | |||
| const void *Y, const int incY, void *A, const int lda); | |||
| void cblas_cgerc(CBLAS_LAYOUT layout, const int M, const int N, | |||
| const void *alpha, const void *X, const int incX, | |||
| const void *Y, const int incY, void *A, const int lda); | |||
| void cblas_cher(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const float alpha, const void *X, const int incX, | |||
| void *A, const int lda); | |||
| void cblas_chpr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const float alpha, const void *X, | |||
| const int incX, void *A); | |||
| void cblas_cher2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const int N, | |||
| const void *alpha, const void *X, const int incX, | |||
| const void *Y, const int incY, void *A, const int lda); | |||
| void cblas_chpr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const int N, | |||
| const void *alpha, const void *X, const int incX, | |||
| const void *Y, const int incY, void *Ap); | |||
| void cblas_zhemv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const void *alpha, const void *A, | |||
| const int lda, const void *X, const int incX, | |||
| const void *beta, void *Y, const int incY); | |||
| void cblas_zhbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const int K, const void *alpha, const void *A, | |||
| const int lda, const void *X, const int incX, | |||
| const void *beta, void *Y, const int incY); | |||
| void cblas_zhpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const void *alpha, const void *Ap, | |||
| const void *X, const int incX, | |||
| const void *beta, void *Y, const int incY); | |||
| void cblas_zgeru(CBLAS_LAYOUT layout, const int M, const int N, | |||
| const void *alpha, const void *X, const int incX, | |||
| const void *Y, const int incY, void *A, const int lda); | |||
| void cblas_zgerc(CBLAS_LAYOUT layout, const int M, const int N, | |||
| const void *alpha, const void *X, const int incX, | |||
| const void *Y, const int incY, void *A, const int lda); | |||
| void cblas_zher(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const double alpha, const void *X, const int incX, | |||
| void *A, const int lda); | |||
| void cblas_zhpr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| const int N, const double alpha, const void *X, | |||
| const int incX, void *A); | |||
| void cblas_zher2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const int N, | |||
| const void *alpha, const void *X, const int incX, | |||
| const void *Y, const int incY, void *A, const int lda); | |||
| void cblas_zhpr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const int N, | |||
| const void *alpha, const void *X, const int incX, | |||
| const void *Y, const int incY, void *Ap); | |||
| /* | |||
| * =========================================================================== | |||
| * Prototypes for level 3 BLAS | |||
| * =========================================================================== | |||
| */ | |||
| /* | |||
| * Routines with standard 4 prefixes (S, D, C, Z) | |||
| */ | |||
| void cblas_sgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, | |||
| CBLAS_TRANSPOSE TransB, const int M, const int N, | |||
| const int K, const float alpha, const float *A, | |||
| const int lda, const float *B, const int ldb, | |||
| const float beta, float *C, const int ldc); | |||
| void cblas_ssymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, | |||
| CBLAS_UPLO Uplo, const int M, const int N, | |||
| const float alpha, const float *A, const int lda, | |||
| const float *B, const int ldb, const float beta, | |||
| float *C, const int ldc); | |||
| void cblas_ssyrk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE Trans, const int N, const int K, | |||
| const float alpha, const float *A, const int lda, | |||
| const float beta, float *C, const int ldc); | |||
| void cblas_ssyr2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE Trans, const int N, const int K, | |||
| const float alpha, const float *A, const int lda, | |||
| const float *B, const int ldb, const float beta, | |||
| float *C, const int ldc); | |||
| void cblas_strmm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, | |||
| CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, | |||
| CBLAS_DIAG Diag, const int M, const int N, | |||
| const float alpha, const float *A, const int lda, | |||
| float *B, const int ldb); | |||
| void cblas_strsm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, | |||
| CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, | |||
| CBLAS_DIAG Diag, const int M, const int N, | |||
| const float alpha, const float *A, const int lda, | |||
| float *B, const int ldb); | |||
| void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, | |||
| CBLAS_TRANSPOSE TransB, const int M, const int N, | |||
| const int K, const double alpha, const double *A, | |||
| const int lda, const double *B, const int ldb, | |||
| const double beta, double *C, const int ldc); | |||
| void cblas_dsymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, | |||
| CBLAS_UPLO Uplo, const int M, const int N, | |||
| const double alpha, const double *A, const int lda, | |||
| const double *B, const int ldb, const double beta, | |||
| double *C, const int ldc); | |||
| void cblas_dsyrk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE Trans, const int N, const int K, | |||
| const double alpha, const double *A, const int lda, | |||
| const double beta, double *C, const int ldc); | |||
| void cblas_dsyr2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE Trans, const int N, const int K, | |||
| const double alpha, const double *A, const int lda, | |||
| const double *B, const int ldb, const double beta, | |||
| double *C, const int ldc); | |||
| void cblas_dtrmm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, | |||
| CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, | |||
| CBLAS_DIAG Diag, const int M, const int N, | |||
| const double alpha, const double *A, const int lda, | |||
| double *B, const int ldb); | |||
| void cblas_dtrsm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, | |||
| CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, | |||
| CBLAS_DIAG Diag, const int M, const int N, | |||
| const double alpha, const double *A, const int lda, | |||
| double *B, const int ldb); | |||
| void cblas_cgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, | |||
| CBLAS_TRANSPOSE TransB, const int M, const int N, | |||
| const int K, const void *alpha, const void *A, | |||
| const int lda, const void *B, const int ldb, | |||
| const void *beta, void *C, const int ldc); | |||
| void cblas_csymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, | |||
| CBLAS_UPLO Uplo, const int M, const int N, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *B, const int ldb, const void *beta, | |||
| void *C, const int ldc); | |||
| void cblas_csyrk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE Trans, const int N, const int K, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *beta, void *C, const int ldc); | |||
| void cblas_csyr2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE Trans, const int N, const int K, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *B, const int ldb, const void *beta, | |||
| void *C, const int ldc); | |||
| void cblas_ctrmm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, | |||
| CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, | |||
| CBLAS_DIAG Diag, const int M, const int N, | |||
| const void *alpha, const void *A, const int lda, | |||
| void *B, const int ldb); | |||
| void cblas_ctrsm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, | |||
| CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, | |||
| CBLAS_DIAG Diag, const int M, const int N, | |||
| const void *alpha, const void *A, const int lda, | |||
| void *B, const int ldb); | |||
| void cblas_zgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA, | |||
| CBLAS_TRANSPOSE TransB, const int M, const int N, | |||
| const int K, const void *alpha, const void *A, | |||
| const int lda, const void *B, const int ldb, | |||
| const void *beta, void *C, const int ldc); | |||
| void cblas_zsymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, | |||
| CBLAS_UPLO Uplo, const int M, const int N, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *B, const int ldb, const void *beta, | |||
| void *C, const int ldc); | |||
| void cblas_zsyrk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE Trans, const int N, const int K, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *beta, void *C, const int ldc); | |||
| void cblas_zsyr2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE Trans, const int N, const int K, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *B, const int ldb, const void *beta, | |||
| void *C, const int ldc); | |||
| void cblas_ztrmm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, | |||
| CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, | |||
| CBLAS_DIAG Diag, const int M, const int N, | |||
| const void *alpha, const void *A, const int lda, | |||
| void *B, const int ldb); | |||
| void cblas_ztrsm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, | |||
| CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, | |||
| CBLAS_DIAG Diag, const int M, const int N, | |||
| const void *alpha, const void *A, const int lda, | |||
| void *B, const int ldb); | |||
| /* | |||
| * Routines with prefixes C and Z only | |||
| */ | |||
| void cblas_chemm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, | |||
| CBLAS_UPLO Uplo, const int M, const int N, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *B, const int ldb, const void *beta, | |||
| void *C, const int ldc); | |||
| void cblas_cherk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE Trans, const int N, const int K, | |||
| const float alpha, const void *A, const int lda, | |||
| const float beta, void *C, const int ldc); | |||
| void cblas_cher2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE Trans, const int N, const int K, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *B, const int ldb, const float beta, | |||
| void *C, const int ldc); | |||
| void cblas_zhemm(CBLAS_LAYOUT layout, CBLAS_SIDE Side, | |||
| CBLAS_UPLO Uplo, const int M, const int N, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *B, const int ldb, const void *beta, | |||
| void *C, const int ldc); | |||
| void cblas_zherk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE Trans, const int N, const int K, | |||
| const double alpha, const void *A, const int lda, | |||
| const double beta, void *C, const int ldc); | |||
| void cblas_zher2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, | |||
| CBLAS_TRANSPOSE Trans, const int N, const int K, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *B, const int ldb, const double beta, | |||
| void *C, const int ldc); | |||
| void cblas_xerbla(int p, const char *rout, const char *form, ...); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif | |||
| @@ -0,0 +1,394 @@ | |||
| /* | |||
| * cblas_f77.h | |||
| * Written by Keita Teranishi | |||
| * | |||
| * Updated by Jeff Horner | |||
| * Merged cblas_f77.h and cblas_fortran_header.h | |||
| */ | |||
| #ifndef CBLAS_F77_H | |||
| #define CBLAS_F77_H | |||
| #ifdef CRAY | |||
| #include <fortran.h> | |||
| #define F77_CHAR _fcd | |||
| #define C2F_CHAR(a) ( _cptofcd( (a), 1 ) ) | |||
| #define C2F_STR(a, i) ( _cptofcd( (a), (i) ) ) | |||
| #define F77_STRLEN(a) (_fcdlen) | |||
| #endif | |||
| #ifdef WeirdNEC | |||
| #define F77_INT long | |||
| #endif | |||
| #ifdef F77_CHAR | |||
| #define FCHAR F77_CHAR | |||
| #else | |||
| #define FCHAR char * | |||
| #endif | |||
| #ifdef F77_INT | |||
| #define FINT const F77_INT * | |||
| #define FINT2 F77_INT * | |||
| #else | |||
| #define FINT const int * | |||
| #define FINT2 int * | |||
| #endif | |||
| /* | |||
| * Level 1 BLAS | |||
| */ | |||
| #define F77_xerbla F77_GLOBAL(xerbla,XERBLA) | |||
| #define F77_srotg F77_GLOBAL(srotg,SROTG) | |||
| #define F77_srotmg F77_GLOBAL(srotmg,SROTMG) | |||
| #define F77_srot F77_GLOBAL(srot,SROT) | |||
| #define F77_srotm F77_GLOBAL(srotm,SROTM) | |||
| #define F77_drotg F77_GLOBAL(drotg,DROTG) | |||
| #define F77_drotmg F77_GLOBAL(drotmg,DROTMG) | |||
| #define F77_drot F77_GLOBAL(drot,DROT) | |||
| #define F77_drotm F77_GLOBAL(drotm,DROTM) | |||
| #define F77_sswap F77_GLOBAL(sswap,SSWAP) | |||
| #define F77_scopy F77_GLOBAL(scopy,SCOPY) | |||
| #define F77_saxpy F77_GLOBAL(saxpy,SAXPY) | |||
| #define F77_isamax_sub F77_GLOBAL(isamaxsub,ISAMAXSUB) | |||
| #define F77_dswap F77_GLOBAL(dswap,DSWAP) | |||
| #define F77_dcopy F77_GLOBAL(dcopy,DCOPY) | |||
| #define F77_daxpy F77_GLOBAL(daxpy,DAXPY) | |||
| #define F77_idamax_sub F77_GLOBAL(idamaxsub,IDAMAXSUB) | |||
| #define F77_cswap F77_GLOBAL(cswap,CSWAP) | |||
| #define F77_ccopy F77_GLOBAL(ccopy,CCOPY) | |||
| #define F77_caxpy F77_GLOBAL(caxpy,CAXPY) | |||
| #define F77_icamax_sub F77_GLOBAL(icamaxsub,ICAMAXSUB) | |||
| #define F77_zswap F77_GLOBAL(zswap,ZSWAP) | |||
| #define F77_zcopy F77_GLOBAL(zcopy,ZCOPY) | |||
| #define F77_zaxpy F77_GLOBAL(zaxpy,ZAXPY) | |||
| #define F77_izamax_sub F77_GLOBAL(izamaxsub,IZAMAXSUB) | |||
| #define F77_sdot_sub F77_GLOBAL(sdotsub,SDOTSUB) | |||
| #define F77_ddot_sub F77_GLOBAL(ddotsub,DDOTSUB) | |||
| #define F77_dsdot_sub F77_GLOBAL(dsdotsub,DSDOTSUB) | |||
| #define F77_sscal F77_GLOBAL(sscal,SSCAL) | |||
| #define F77_dscal F77_GLOBAL(dscal,DSCAL) | |||
| #define F77_cscal F77_GLOBAL(cscal,CSCAL) | |||
| #define F77_zscal F77_GLOBAL(zscal,ZSCAL) | |||
| #define F77_csscal F77_GLOBAL(csscal,CSSCAL) | |||
| #define F77_zdscal F77_GLOBAL(zdscal,ZDSCAL) | |||
| #define F77_cdotu_sub F77_GLOBAL(cdotusub,CDOTUSUB) | |||
| #define F77_cdotc_sub F77_GLOBAL(cdotcsub,CDOTCSUB) | |||
| #define F77_zdotu_sub F77_GLOBAL(zdotusub,ZDOTUSUB) | |||
| #define F77_zdotc_sub F77_GLOBAL(zdotcsub,ZDOTCSUB) | |||
| #define F77_snrm2_sub F77_GLOBAL(snrm2sub,SNRM2SUB) | |||
| #define F77_sasum_sub F77_GLOBAL(sasumsub,SASUMSUB) | |||
| #define F77_dnrm2_sub F77_GLOBAL(dnrm2sub,DNRM2SUB) | |||
| #define F77_dasum_sub F77_GLOBAL(dasumsub,DASUMSUB) | |||
| #define F77_scnrm2_sub F77_GLOBAL(scnrm2sub,SCNRM2SUB) | |||
| #define F77_scasum_sub F77_GLOBAL(scasumsub,SCASUMSUB) | |||
| #define F77_dznrm2_sub F77_GLOBAL(dznrm2sub,DZNRM2SUB) | |||
| #define F77_dzasum_sub F77_GLOBAL(dzasumsub,DZASUMSUB) | |||
| #define F77_sdsdot_sub F77_GLOBAL(sdsdotsub,SDSDOTSUB) | |||
| /* | |||
| * Level 2 BLAS | |||
| */ | |||
| #define F77_ssymv F77_GLOBAL(ssymv,SSYMY) | |||
| #define F77_ssbmv F77_GLOBAL(ssbmv,SSMBV) | |||
| #define F77_sspmv F77_GLOBAL(sspmv,SSPMV) | |||
| #define F77_sger F77_GLOBAL(sger,SGER) | |||
| #define F77_ssyr F77_GLOBAL(ssyr,SSYR) | |||
| #define F77_sspr F77_GLOBAL(sspr,SSPR) | |||
| #define F77_ssyr2 F77_GLOBAL(ssyr2,SSYR2) | |||
| #define F77_sspr2 F77_GLOBAL(sspr2,SSPR2) | |||
| #define F77_dsymv F77_GLOBAL(dsymv,DSYMV) | |||
| #define F77_dsbmv F77_GLOBAL(dsbmv,DSBMV) | |||
| #define F77_dspmv F77_GLOBAL(dspmv,DSPMV) | |||
| #define F77_dger F77_GLOBAL(dger,DGER) | |||
| #define F77_dsyr F77_GLOBAL(dsyr,DSYR) | |||
| #define F77_dspr F77_GLOBAL(dspr,DSPR) | |||
| #define F77_dsyr2 F77_GLOBAL(dsyr2,DSYR2) | |||
| #define F77_dspr2 F77_GLOBAL(dspr2,DSPR2) | |||
| #define F77_chemv F77_GLOBAL(chemv,CHEMV) | |||
| #define F77_chbmv F77_GLOBAL(chbmv,CHBMV) | |||
| #define F77_chpmv F77_GLOBAL(chpmv,CHPMV) | |||
| #define F77_cgeru F77_GLOBAL(cgeru,CGERU) | |||
| #define F77_cgerc F77_GLOBAL(cgerc,CGERC) | |||
| #define F77_cher F77_GLOBAL(cher,CHER) | |||
| #define F77_chpr F77_GLOBAL(chpr,CHPR) | |||
| #define F77_cher2 F77_GLOBAL(cher2,CHER2) | |||
| #define F77_chpr2 F77_GLOBAL(chpr2,CHPR2) | |||
| #define F77_zhemv F77_GLOBAL(zhemv,ZHEMV) | |||
| #define F77_zhbmv F77_GLOBAL(zhbmv,ZHBMV) | |||
| #define F77_zhpmv F77_GLOBAL(zhpmv,ZHPMV) | |||
| #define F77_zgeru F77_GLOBAL(zgeru,ZGERU) | |||
| #define F77_zgerc F77_GLOBAL(zgerc,ZGERC) | |||
| #define F77_zher F77_GLOBAL(zher,ZHER) | |||
| #define F77_zhpr F77_GLOBAL(zhpr,ZHPR) | |||
| #define F77_zher2 F77_GLOBAL(zher2,ZHER2) | |||
| #define F77_zhpr2 F77_GLOBAL(zhpr2,ZHPR2) | |||
| #define F77_sgemv F77_GLOBAL(sgemv,SGEMV) | |||
| #define F77_sgbmv F77_GLOBAL(sgbmv,SGBMV) | |||
| #define F77_strmv F77_GLOBAL(strmv,STRMV) | |||
| #define F77_stbmv F77_GLOBAL(stbmv,STBMV) | |||
| #define F77_stpmv F77_GLOBAL(stpmv,STPMV) | |||
| #define F77_strsv F77_GLOBAL(strsv,STRSV) | |||
| #define F77_stbsv F77_GLOBAL(stbsv,STBSV) | |||
| #define F77_stpsv F77_GLOBAL(stpsv,STPSV) | |||
| #define F77_dgemv F77_GLOBAL(dgemv,DGEMV) | |||
| #define F77_dgbmv F77_GLOBAL(dgbmv,DGBMV) | |||
| #define F77_dtrmv F77_GLOBAL(dtrmv,DTRMV) | |||
| #define F77_dtbmv F77_GLOBAL(dtbmv,DTBMV) | |||
| #define F77_dtpmv F77_GLOBAL(dtpmv,DTRMV) | |||
| #define F77_dtrsv F77_GLOBAL(dtrsv,DTRSV) | |||
| #define F77_dtbsv F77_GLOBAL(dtbsv,DTBSV) | |||
| #define F77_dtpsv F77_GLOBAL(dtpsv,DTPSV) | |||
| #define F77_cgemv F77_GLOBAL(cgemv,CGEMV) | |||
| #define F77_cgbmv F77_GLOBAL(cgbmv,CGBMV) | |||
| #define F77_ctrmv F77_GLOBAL(ctrmv,CTRMV) | |||
| #define F77_ctbmv F77_GLOBAL(ctbmv,CTBMV) | |||
| #define F77_ctpmv F77_GLOBAL(ctpmv,CTPMV) | |||
| #define F77_ctrsv F77_GLOBAL(ctrsv,CTRSV) | |||
| #define F77_ctbsv F77_GLOBAL(ctbsv,CTBSV) | |||
| #define F77_ctpsv F77_GLOBAL(ctpsv,CTPSV) | |||
| #define F77_zgemv F77_GLOBAL(zgemv,ZGEMV) | |||
| #define F77_zgbmv F77_GLOBAL(zgbmv,ZGBMV) | |||
| #define F77_ztrmv F77_GLOBAL(ztrmv,ZTRMV) | |||
| #define F77_ztbmv F77_GLOBAL(ztbmv,ZTBMV) | |||
| #define F77_ztpmv F77_GLOBAL(ztpmv,ZTPMV) | |||
| #define F77_ztrsv F77_GLOBAL(ztrsv,ZTRSV) | |||
| #define F77_ztbsv F77_GLOBAL(ztbsv,ZTBSV) | |||
| #define F77_ztpsv F77_GLOBAL(ztpsv,ZTPSV) | |||
| /* | |||
| * Level 3 BLAS | |||
| */ | |||
| #define F77_chemm F77_GLOBAL(chemm,CHEMM) | |||
| #define F77_cherk F77_GLOBAL(cherk,CHERK) | |||
| #define F77_cher2k F77_GLOBAL(cher2k,CHER2K) | |||
| #define F77_zhemm F77_GLOBAL(zhemm,ZHEMM) | |||
| #define F77_zherk F77_GLOBAL(zherk,ZHERK) | |||
| #define F77_zher2k F77_GLOBAL(zher2k,ZHER2K) | |||
| #define F77_sgemm F77_GLOBAL(sgemm,SGEMM) | |||
| #define F77_ssymm F77_GLOBAL(ssymm,SSYMM) | |||
| #define F77_ssyrk F77_GLOBAL(ssyrk,SSYRK) | |||
| #define F77_ssyr2k F77_GLOBAL(ssyr2k,SSYR2K) | |||
| #define F77_strmm F77_GLOBAL(strmm,STRMM) | |||
| #define F77_strsm F77_GLOBAL(strsm,STRSM) | |||
| #define F77_dgemm F77_GLOBAL(dgemm,DGEMM) | |||
| #define F77_dsymm F77_GLOBAL(dsymm,DSYMM) | |||
| #define F77_dsyrk F77_GLOBAL(dsyrk,DSYRK) | |||
| #define F77_dsyr2k F77_GLOBAL(dsyr2k,DSYR2K) | |||
| #define F77_dtrmm F77_GLOBAL(dtrmm,DTRMM) | |||
| #define F77_dtrsm F77_GLOBAL(dtrsm,DTRSM) | |||
| #define F77_cgemm F77_GLOBAL(cgemm,CGEMM) | |||
| #define F77_csymm F77_GLOBAL(csymm,CSYMM) | |||
| #define F77_csyrk F77_GLOBAL(csyrk,CSYRK) | |||
| #define F77_csyr2k F77_GLOBAL(csyr2k,CSYR2K) | |||
| #define F77_ctrmm F77_GLOBAL(ctrmm,CTRMM) | |||
| #define F77_ctrsm F77_GLOBAL(ctrsm,CTRSM) | |||
| #define F77_zgemm F77_GLOBAL(zgemm,ZGEMM) | |||
| #define F77_zsymm F77_GLOBAL(zsymm,ZSYMM) | |||
| #define F77_zsyrk F77_GLOBAL(zsyrk,ZSYRK) | |||
| #define F77_zsyr2k F77_GLOBAL(zsyr2k,ZSYR2K) | |||
| #define F77_ztrmm F77_GLOBAL(ztrmm,ZTRMM) | |||
| #define F77_ztrsm F77_GLOBAL(ztrsm,ZTRSM) | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void F77_xerbla(FCHAR, void *); | |||
| /* | |||
| * Level 1 Fortran Prototypes | |||
| */ | |||
| /* Single Precision */ | |||
| void F77_srot(FINT, float *, FINT, float *, FINT, const float *, const float *); | |||
| void F77_srotg(float *,float *,float *,float *); | |||
| void F77_srotm( FINT, float *, FINT, float *, FINT, const float *); | |||
| void F77_srotmg(float *,float *,float *,const float *, float *); | |||
| void F77_sswap( FINT, float *, FINT, float *, FINT); | |||
| void F77_scopy( FINT, const float *, FINT, float *, FINT); | |||
| void F77_saxpy( FINT, const float *, const float *, FINT, float *, FINT); | |||
| void F77_sdot_sub(FINT, const float *, FINT, const float *, FINT, float *); | |||
| void F77_sdsdot_sub( FINT, const float *, const float *, FINT, const float *, FINT, float *); | |||
| void F77_sscal( FINT, const float *, float *, FINT); | |||
| void F77_snrm2_sub( FINT, const float *, FINT, float *); | |||
| void F77_sasum_sub( FINT, const float *, FINT, float *); | |||
| void F77_isamax_sub( FINT, const float * , FINT, FINT2); | |||
| /* Double Precision */ | |||
| void F77_drot(FINT, double *, FINT, double *, FINT, const double *, const double *); | |||
| void F77_drotg(double *,double *,double *,double *); | |||
| void F77_drotm( FINT, double *, FINT, double *, FINT, const double *); | |||
| void F77_drotmg(double *,double *,double *,const double *, double *); | |||
| void F77_dswap( FINT, double *, FINT, double *, FINT); | |||
| void F77_dcopy( FINT, const double *, FINT, double *, FINT); | |||
| void F77_daxpy( FINT, const double *, const double *, FINT, double *, FINT); | |||
| void F77_dswap( FINT, double *, FINT, double *, FINT); | |||
| void F77_dsdot_sub(FINT, const float *, FINT, const float *, FINT, double *); | |||
| void F77_ddot_sub( FINT, const double *, FINT, const double *, FINT, double *); | |||
| void F77_dscal( FINT, const double *, double *, FINT); | |||
| void F77_dnrm2_sub( FINT, const double *, FINT, double *); | |||
| void F77_dasum_sub( FINT, const double *, FINT, double *); | |||
| void F77_idamax_sub( FINT, const double * , FINT, FINT2); | |||
| /* Single Complex Precision */ | |||
| void F77_cswap( FINT, void *, FINT, void *, FINT); | |||
| void F77_ccopy( FINT, const void *, FINT, void *, FINT); | |||
| void F77_caxpy( FINT, const void *, const void *, FINT, void *, FINT); | |||
| void F77_cswap( FINT, void *, FINT, void *, FINT); | |||
| void F77_cdotc_sub( FINT, const void *, FINT, const void *, FINT, void *); | |||
| void F77_cdotu_sub( FINT, const void *, FINT, const void *, FINT, void *); | |||
| void F77_cscal( FINT, const void *, void *, FINT); | |||
| void F77_icamax_sub( FINT, const void *, FINT, FINT2); | |||
| void F77_csscal( FINT, const float *, void *, FINT); | |||
| void F77_scnrm2_sub( FINT, const void *, FINT, float *); | |||
| void F77_scasum_sub( FINT, const void *, FINT, float *); | |||
| /* Double Complex Precision */ | |||
| void F77_zswap( FINT, void *, FINT, void *, FINT); | |||
| void F77_zcopy( FINT, const void *, FINT, void *, FINT); | |||
| void F77_zaxpy( FINT, const void *, const void *, FINT, void *, FINT); | |||
| void F77_zswap( FINT, void *, FINT, void *, FINT); | |||
| void F77_zdotc_sub( FINT, const void *, FINT, const void *, FINT, void *); | |||
| void F77_zdotu_sub( FINT, const void *, FINT, const void *, FINT, void *); | |||
| void F77_zdscal( FINT, const double *, void *, FINT); | |||
| void F77_zscal( FINT, const void *, void *, FINT); | |||
| void F77_dznrm2_sub( FINT, const void *, FINT, double *); | |||
| void F77_dzasum_sub( FINT, const void *, FINT, double *); | |||
| void F77_izamax_sub( FINT, const void *, FINT, FINT2); | |||
| /* | |||
| * Level 2 Fortran Prototypes | |||
| */ | |||
| /* Single Precision */ | |||
| void F77_sgemv(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); | |||
| void F77_sgbmv(FCHAR, FINT, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); | |||
| void F77_ssymv(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); | |||
| void F77_ssbmv(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); | |||
| void F77_sspmv(FCHAR, FINT, const float *, const float *, const float *, FINT, const float *, float *, FINT); | |||
| void F77_strmv( FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT); | |||
| void F77_stbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT); | |||
| void F77_strsv( FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT); | |||
| void F77_stbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT); | |||
| void F77_stpmv( FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT); | |||
| void F77_stpsv( FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT); | |||
| void F77_sger( FINT, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT); | |||
| void F77_ssyr(FCHAR, FINT, const float *, const float *, FINT, float *, FINT); | |||
| void F77_sspr(FCHAR, FINT, const float *, const float *, FINT, float *); | |||
| void F77_sspr2(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float *); | |||
| void F77_ssyr2(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT); | |||
| /* Double Precision */ | |||
| void F77_dgemv(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); | |||
| void F77_dgbmv(FCHAR, FINT, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); | |||
| void F77_dsymv(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); | |||
| void F77_dsbmv(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); | |||
| void F77_dspmv(FCHAR, FINT, const double *, const double *, const double *, FINT, const double *, double *, FINT); | |||
| void F77_dtrmv( FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT); | |||
| void F77_dtbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT); | |||
| void F77_dtrsv( FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT); | |||
| void F77_dtbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT); | |||
| void F77_dtpmv( FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT); | |||
| void F77_dtpsv( FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT); | |||
| void F77_dger( FINT, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT); | |||
| void F77_dsyr(FCHAR, FINT, const double *, const double *, FINT, double *, FINT); | |||
| void F77_dspr(FCHAR, FINT, const double *, const double *, FINT, double *); | |||
| void F77_dspr2(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double *); | |||
| void F77_dsyr2(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT); | |||
| /* Single Complex Precision */ | |||
| void F77_cgemv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT); | |||
| void F77_cgbmv(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT); | |||
| void F77_chemv(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT); | |||
| void F77_chbmv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT); | |||
| void F77_chpmv(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT); | |||
| void F77_ctrmv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT); | |||
| void F77_ctbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT); | |||
| void F77_ctpmv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT); | |||
| void F77_ctrsv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT); | |||
| void F77_ctbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT); | |||
| void F77_ctpsv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT); | |||
| void F77_cgerc( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); | |||
| void F77_cgeru( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); | |||
| void F77_cher(FCHAR, FINT, const float *, const void *, FINT, void *, FINT); | |||
| void F77_cher2(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); | |||
| void F77_chpr(FCHAR, FINT, const float *, const void *, FINT, void *); | |||
| void F77_chpr2(FCHAR, FINT, const float *, const void *, FINT, const void *, FINT, void *); | |||
| /* Double Complex Precision */ | |||
| void F77_zgemv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT); | |||
| void F77_zgbmv(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT); | |||
| void F77_zhemv(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT); | |||
| void F77_zhbmv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT); | |||
| void F77_zhpmv(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT); | |||
| void F77_ztrmv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT); | |||
| void F77_ztbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT); | |||
| void F77_ztpmv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT); | |||
| void F77_ztrsv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT); | |||
| void F77_ztbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT); | |||
| void F77_ztpsv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT); | |||
| void F77_zgerc( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); | |||
| void F77_zgeru( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); | |||
| void F77_zher(FCHAR, FINT, const double *, const void *, FINT, void *, FINT); | |||
| void F77_zher2(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); | |||
| void F77_zhpr(FCHAR, FINT, const double *, const void *, FINT, void *); | |||
| void F77_zhpr2(FCHAR, FINT, const double *, const void *, FINT, const void *, FINT, void *); | |||
| /* | |||
| * Level 3 Fortran Prototypes | |||
| */ | |||
| /* Single Precision */ | |||
| void F77_sgemm(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); | |||
| void F77_ssymm(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); | |||
| void F77_ssyrk(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT); | |||
| void F77_ssyr2k(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); | |||
| void F77_strmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT); | |||
| void F77_strsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT); | |||
| /* Double Precision */ | |||
| void F77_dgemm(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); | |||
| void F77_dsymm(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); | |||
| void F77_dsyrk(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT); | |||
| void F77_dsyr2k(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); | |||
| void F77_dtrmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT); | |||
| void F77_dtrsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT); | |||
| /* Single Complex Precision */ | |||
| void F77_cgemm(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); | |||
| void F77_csymm(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); | |||
| void F77_chemm(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); | |||
| void F77_csyrk(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT); | |||
| void F77_cherk(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT); | |||
| void F77_csyr2k(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); | |||
| void F77_cher2k(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); | |||
| void F77_ctrmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT); | |||
| void F77_ctrsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT); | |||
| /* Double Complex Precision */ | |||
| void F77_zgemm(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); | |||
| void F77_zsymm(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); | |||
| void F77_zhemm(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); | |||
| void F77_zsyrk(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT); | |||
| void F77_zherk(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT); | |||
| void F77_zsyr2k(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); | |||
| void F77_zher2k(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); | |||
| void F77_ztrmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT); | |||
| void F77_ztrsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif /* CBLAS_F77_H */ | |||
| @@ -0,0 +1,17 @@ | |||
| #ifndef F77_HEADER_INCLUDED | |||
| #define F77_HEADER_INCLUDED | |||
| #ifndef F77_GLOBAL | |||
| #if defined(F77_GLOBAL_PATTERN_LC) || defined(ADD_) | |||
| #define F77_GLOBAL(lcname,UCNAME) lcname##_ | |||
| #elif defined(F77_GLOBAL_PATTERN_UC) || defined(UPPER) | |||
| #define F77_GLOBAL(lcname,UCNAME) UCNAME | |||
| #elif defined(F77_GLOBAL_PATTERN_MC) || defined(NOCHANGE) | |||
| #define F77_GLOBAL(lcname,UCNAME) lcname | |||
| #else | |||
| #define F77_GLOBAL(lcname,UCNAME) lcname##_ | |||
| #endif | |||
| #endif | |||
| #endif | |||
| @@ -0,0 +1,190 @@ | |||
| /* | |||
| * cblas_test.h | |||
| * Written by Keita Teranishi | |||
| */ | |||
| #ifndef CBLAS_TEST_H | |||
| #define CBLAS_TEST_H | |||
| #include "cblas.h" | |||
| #include "cblas_mangling.h" | |||
| #define TRUE 1 | |||
| #define PASSED 1 | |||
| #define TEST_ROW_MJR 1 | |||
| #define FALSE 0 | |||
| #define FAILED 0 | |||
| #define TEST_COL_MJR 0 | |||
| #define INVALID -1 | |||
| #define UNDEFINED -1 | |||
| typedef struct { float real; float imag; } CBLAS_TEST_COMPLEX; | |||
| typedef struct { double real; double imag; } CBLAS_TEST_ZOMPLEX; | |||
| #define F77_xerbla F77_GLOBAL(xerbla,XERBLA) | |||
| /* | |||
| * Level 1 BLAS | |||
| */ | |||
| #define F77_srotg F77_GLOBAL(srotgtest,SROTGTEST) | |||
| #define F77_srotmg F77_GLOBAL(srotmgtest,SROTMGTEST) | |||
| #define F77_srot F77_GLOBAL(srottest,SROTTEST) | |||
| #define F77_srotm F77_GLOBAL(srotmtest,SROTMTEST) | |||
| #define F77_drotg F77_GLOBAL(drotgtest,DROTGTEST) | |||
| #define F77_drotmg F77_GLOBAL(drotmgtest,DROTMGTEST) | |||
| #define F77_drot F77_GLOBAL(drottest,DROTTEST) | |||
| #define F77_drotm F77_GLOBAL(drotmtest,DROTMTEST) | |||
| #define F77_sswap F77_GLOBAL(sswaptest,SSWAPTEST) | |||
| #define F77_scopy F77_GLOBAL(scopytest,SCOPYTEST) | |||
| #define F77_saxpy F77_GLOBAL(saxpytest,SAXPYTEST) | |||
| #define F77_isamax F77_GLOBAL(isamaxtest,ISAMAXTEST) | |||
| #define F77_dswap F77_GLOBAL(dswaptest,DSWAPTEST) | |||
| #define F77_dcopy F77_GLOBAL(dcopytest,DCOPYTEST) | |||
| #define F77_daxpy F77_GLOBAL(daxpytest,DAXPYTEST) | |||
| #define F77_idamax F77_GLOBAL(idamaxtest,IDAMAXTEST) | |||
| #define F77_cswap F77_GLOBAL(cswaptest,CSWAPTEST) | |||
| #define F77_ccopy F77_GLOBAL(ccopytest,CCOPYTEST) | |||
| #define F77_caxpy F77_GLOBAL(caxpytest,CAXPYTEST) | |||
| #define F77_icamax F77_GLOBAL(icamaxtest,ICAMAXTEST) | |||
| #define F77_zswap F77_GLOBAL(zswaptest,ZSWAPTEST) | |||
| #define F77_zcopy F77_GLOBAL(zcopytest,ZCOPYTEST) | |||
| #define F77_zaxpy F77_GLOBAL(zaxpytest,ZAXPYTEST) | |||
| #define F77_izamax F77_GLOBAL(izamaxtest,IZAMAXTEST) | |||
| #define F77_sdot F77_GLOBAL(sdottest,SDOTTEST) | |||
| #define F77_ddot F77_GLOBAL(ddottest,DDOTTEST) | |||
| #define F77_dsdot F77_GLOBAL(dsdottest,DSDOTTEST) | |||
| #define F77_sscal F77_GLOBAL(sscaltest,SSCALTEST) | |||
| #define F77_dscal F77_GLOBAL(dscaltest,DSCALTEST) | |||
| #define F77_cscal F77_GLOBAL(cscaltest,CSCALTEST) | |||
| #define F77_zscal F77_GLOBAL(zscaltest,ZSCALTEST) | |||
| #define F77_csscal F77_GLOBAL(csscaltest,CSSCALTEST) | |||
| #define F77_zdscal F77_GLOBAL(zdscaltest,ZDSCALTEST) | |||
| #define F77_cdotu F77_GLOBAL(cdotutest,CDOTUTEST) | |||
| #define F77_cdotc F77_GLOBAL(cdotctest,CDOTCTEST) | |||
| #define F77_zdotu F77_GLOBAL(zdotutest,ZDOTUTEST) | |||
| #define F77_zdotc F77_GLOBAL(zdotctest,ZDOTCTEST) | |||
| #define F77_snrm2 F77_GLOBAL(snrm2test,SNRM2TEST) | |||
| #define F77_sasum F77_GLOBAL(sasumtest,SASUMTEST) | |||
| #define F77_dnrm2 F77_GLOBAL(dnrm2test,DNRM2TEST) | |||
| #define F77_dasum F77_GLOBAL(dasumtest,DASUMTEST) | |||
| #define F77_scnrm2 F77_GLOBAL(scnrm2test,SCNRM2TEST) | |||
| #define F77_scasum F77_GLOBAL(scasumtest,SCASUMTEST) | |||
| #define F77_dznrm2 F77_GLOBAL(dznrm2test,DZNRM2TEST) | |||
| #define F77_dzasum F77_GLOBAL(dzasumtest,DZASUMTEST) | |||
| #define F77_sdsdot F77_GLOBAL(sdsdottest, SDSDOTTEST) | |||
| /* | |||
| * Level 2 BLAS | |||
| */ | |||
| #define F77_s2chke F77_GLOBAL(cs2chke,CS2CHKE) | |||
| #define F77_d2chke F77_GLOBAL(cd2chke,CD2CHKE) | |||
| #define F77_c2chke F77_GLOBAL(cc2chke,CC2CHKE) | |||
| #define F77_z2chke F77_GLOBAL(cz2chke,CZ2CHKE) | |||
| #define F77_ssymv F77_GLOBAL(cssymv,CSSYMV) | |||
| #define F77_ssbmv F77_GLOBAL(cssbmv,CSSBMV) | |||
| #define F77_sspmv F77_GLOBAL(csspmv,CSSPMV) | |||
| #define F77_sger F77_GLOBAL(csger,CSGER) | |||
| #define F77_ssyr F77_GLOBAL(cssyr,CSSYR) | |||
| #define F77_sspr F77_GLOBAL(csspr,CSSPR) | |||
| #define F77_ssyr2 F77_GLOBAL(cssyr2,CSSYR2) | |||
| #define F77_sspr2 F77_GLOBAL(csspr2,CSSPR2) | |||
| #define F77_dsymv F77_GLOBAL(cdsymv,CDSYMV) | |||
| #define F77_dsbmv F77_GLOBAL(cdsbmv,CDSBMV) | |||
| #define F77_dspmv F77_GLOBAL(cdspmv,CDSPMV) | |||
| #define F77_dger F77_GLOBAL(cdger,CDGER) | |||
| #define F77_dsyr F77_GLOBAL(cdsyr,CDSYR) | |||
| #define F77_dspr F77_GLOBAL(cdspr,CDSPR) | |||
| #define F77_dsyr2 F77_GLOBAL(cdsyr2,CDSYR2) | |||
| #define F77_dspr2 F77_GLOBAL(cdspr2,CDSPR2) | |||
| #define F77_chemv F77_GLOBAL(cchemv,CCHEMV) | |||
| #define F77_chbmv F77_GLOBAL(cchbmv,CCHBMV) | |||
| #define F77_chpmv F77_GLOBAL(cchpmv,CCHPMV) | |||
| #define F77_cgeru F77_GLOBAL(ccgeru,CCGERU) | |||
| #define F77_cgerc F77_GLOBAL(ccgerc,CCGERC) | |||
| #define F77_cher F77_GLOBAL(ccher,CCHER) | |||
| #define F77_chpr F77_GLOBAL(cchpr,CCHPR) | |||
| #define F77_cher2 F77_GLOBAL(ccher2,CCHER2) | |||
| #define F77_chpr2 F77_GLOBAL(cchpr2,CCHPR2) | |||
| #define F77_zhemv F77_GLOBAL(czhemv,CZHEMV) | |||
| #define F77_zhbmv F77_GLOBAL(czhbmv,CZHBMV) | |||
| #define F77_zhpmv F77_GLOBAL(czhpmv,CZHPMV) | |||
| #define F77_zgeru F77_GLOBAL(czgeru,CZGERU) | |||
| #define F77_zgerc F77_GLOBAL(czgerc,CZGERC) | |||
| #define F77_zher F77_GLOBAL(czher,CZHER) | |||
| #define F77_zhpr F77_GLOBAL(czhpr,CZHPR) | |||
| #define F77_zher2 F77_GLOBAL(czher2,CZHER2) | |||
| #define F77_zhpr2 F77_GLOBAL(czhpr2,CZHPR2) | |||
| #define F77_sgemv F77_GLOBAL(csgemv,CSGEMV) | |||
| #define F77_sgbmv F77_GLOBAL(csgbmv,CSGBMV) | |||
| #define F77_strmv F77_GLOBAL(cstrmv,CSTRMV) | |||
| #define F77_stbmv F77_GLOBAL(cstbmv,CSTBMV) | |||
| #define F77_stpmv F77_GLOBAL(cstpmv,CSTPMV) | |||
| #define F77_strsv F77_GLOBAL(cstrsv,CSTRSV) | |||
| #define F77_stbsv F77_GLOBAL(cstbsv,CSTBSV) | |||
| #define F77_stpsv F77_GLOBAL(cstpsv,CSTPSV) | |||
| #define F77_dgemv F77_GLOBAL(cdgemv,CDGEMV) | |||
| #define F77_dgbmv F77_GLOBAL(cdgbmv,CDGBMV) | |||
| #define F77_dtrmv F77_GLOBAL(cdtrmv,CDTRMV) | |||
| #define F77_dtbmv F77_GLOBAL(cdtbmv,CDTBMV) | |||
| #define F77_dtpmv F77_GLOBAL(cdtpmv,CDTPMV) | |||
| #define F77_dtrsv F77_GLOBAL(cdtrsv,CDTRSV) | |||
| #define F77_dtbsv F77_GLOBAL(cdtbsv,CDTBSV) | |||
| #define F77_dtpsv F77_GLOBAL(cdtpsv,CDTPSV) | |||
| #define F77_cgemv F77_GLOBAL(ccgemv,CCGEMV) | |||
| #define F77_cgbmv F77_GLOBAL(ccgbmv,CCGBMV) | |||
| #define F77_ctrmv F77_GLOBAL(cctrmv,CCTRMV) | |||
| #define F77_ctbmv F77_GLOBAL(cctbmv,CCTPMV) | |||
| #define F77_ctpmv F77_GLOBAL(cctpmv,CCTPMV) | |||
| #define F77_ctrsv F77_GLOBAL(cctrsv,CCTRSV) | |||
| #define F77_ctbsv F77_GLOBAL(cctbsv,CCTBSV) | |||
| #define F77_ctpsv F77_GLOBAL(cctpsv,CCTPSV) | |||
| #define F77_zgemv F77_GLOBAL(czgemv,CZGEMV) | |||
| #define F77_zgbmv F77_GLOBAL(czgbmv,CZGBMV) | |||
| #define F77_ztrmv F77_GLOBAL(cztrmv,CZTRMV) | |||
| #define F77_ztbmv F77_GLOBAL(cztbmv,CZTBMV) | |||
| #define F77_ztpmv F77_GLOBAL(cztpmv,CZTPMV) | |||
| #define F77_ztrsv F77_GLOBAL(cztrsv,CZTRSV) | |||
| #define F77_ztbsv F77_GLOBAL(cztbsv,CZTBSV) | |||
| #define F77_ztpsv F77_GLOBAL(cztpsv,CZTPSV) | |||
| /* | |||
| * Level 3 BLAS | |||
| */ | |||
| #define F77_s3chke F77_GLOBAL(cs3chke,CS3CHKE) | |||
| #define F77_d3chke F77_GLOBAL(cd3chke,CD3CHKE) | |||
| #define F77_c3chke F77_GLOBAL(cc3chke,CC3CHKE) | |||
| #define F77_z3chke F77_GLOBAL(cz3chke,CZ3CHKE) | |||
| #define F77_chemm F77_GLOBAL(cchemm,CCHEMM) | |||
| #define F77_cherk F77_GLOBAL(ccherk,CCHERK) | |||
| #define F77_cher2k F77_GLOBAL(ccher2k,CCHER2K) | |||
| #define F77_zhemm F77_GLOBAL(czhemm,CZHEMM) | |||
| #define F77_zherk F77_GLOBAL(czherk,CZHERK) | |||
| #define F77_zher2k F77_GLOBAL(czher2k,CZHER2K) | |||
| #define F77_sgemm F77_GLOBAL(csgemm,CSGEMM) | |||
| #define F77_ssymm F77_GLOBAL(cssymm,CSSYMM) | |||
| #define F77_ssyrk F77_GLOBAL(cssyrk,CSSYRK) | |||
| #define F77_ssyr2k F77_GLOBAL(cssyr2k,CSSYR2K) | |||
| #define F77_strmm F77_GLOBAL(cstrmm,CSTRMM) | |||
| #define F77_strsm F77_GLOBAL(cstrsm,CSTRSM) | |||
| #define F77_dgemm F77_GLOBAL(cdgemm,CDGEMM) | |||
| #define F77_dsymm F77_GLOBAL(cdsymm,CDSYMM) | |||
| #define F77_dsyrk F77_GLOBAL(cdsyrk,CDSYRK) | |||
| #define F77_dsyr2k F77_GLOBAL(cdsyr2k,CDSYR2K) | |||
| #define F77_dtrmm F77_GLOBAL(cdtrmm,CDTRMM) | |||
| #define F77_dtrsm F77_GLOBAL(cdtrsm,CDTRSM) | |||
| #define F77_cgemm F77_GLOBAL(ccgemm,CCGEMM) | |||
| #define F77_csymm F77_GLOBAL(ccsymm,CCSYMM) | |||
| #define F77_csyrk F77_GLOBAL(ccsyrk,CCSYRK) | |||
| #define F77_csyr2k F77_GLOBAL(ccsyr2k,CCSYR2K) | |||
| #define F77_ctrmm F77_GLOBAL(cctrmm,CCTRMM) | |||
| #define F77_ctrsm F77_GLOBAL(cctrsm,CCTRSM) | |||
| #define F77_zgemm F77_GLOBAL(czgemm,CZGEMM) | |||
| #define F77_zsymm F77_GLOBAL(czsymm,CZSYMM) | |||
| #define F77_zsyrk F77_GLOBAL(czsyrk,CZSYRK) | |||
| #define F77_zsyr2k F77_GLOBAL(czsyr2k,CZSYR2K) | |||
| #define F77_ztrmm F77_GLOBAL(cztrmm,CZTRMM) | |||
| #define F77_ztrsm F77_GLOBAL(cztrsm, CZTRSM) | |||
| void get_transpose_type(char *type, CBLAS_TRANSPOSE *trans); | |||
| void get_uplo_type(char *type, CBLAS_UPLO *uplo); | |||
| void get_diag_type(char *type, CBLAS_DIAG *diag); | |||
| void get_side_type(char *type, CBLAS_SIDE *side); | |||
| #endif /* CBLAS_TEST_H */ | |||
| @@ -0,0 +1,168 @@ | |||
| # This Makefile compiles the CBLAS routines | |||
| # | |||
| # Error handling routines for level 2 & 3 | |||
| set (ERRHAND cblas_globals.c cblas_xerbla.c xerbla.c) | |||
| # | |||
| # | |||
| # CBLAS routines | |||
| # | |||
| # Level 1 | |||
| # | |||
| # | |||
| # | |||
| # All object files for single real precision | |||
| # | |||
| set (SLEV1 cblas_srotg.c cblas_srotmg.c cblas_srot.c cblas_srotm.c | |||
| cblas_sswap.c cblas_sscal.c cblas_scopy.c cblas_saxpy.c | |||
| cblas_sdot.c cblas_sdsdot.c cblas_snrm2.c cblas_sasum.c | |||
| cblas_isamax.c sdotsub.f sdsdotsub.f snrm2sub.f sasumsub.f | |||
| isamaxsub.f) | |||
| # | |||
| # All object files for double real precision | |||
| # | |||
| set (DLEV1 cblas_drotg.c cblas_drotmg.c cblas_drot.c cblas_drotm.c | |||
| cblas_dswap.c cblas_dscal.c cblas_dcopy.c cblas_daxpy.c | |||
| cblas_ddot.c cblas_dsdot.c cblas_dnrm2.c cblas_dasum.c | |||
| cblas_idamax.c ddotsub.f dsdotsub.f dnrm2sub.f | |||
| dasumsub.f idamaxsub.f) | |||
| # | |||
| # All object files for single complex precision | |||
| # | |||
| set (CLEV1 cblas_cswap.c cblas_cscal.c cblas_csscal.c cblas_ccopy.c | |||
| cblas_caxpy.c cblas_cdotu_sub.c cblas_cdotc_sub.c | |||
| cblas_icamax.c cdotcsub.f cdotusub.f icamaxsub.f) | |||
| # | |||
| # All object files for double complex precision | |||
| # | |||
| set (ZLEV1 cblas_zswap.c cblas_zscal.c cblas_zdscal.c cblas_zcopy.c | |||
| cblas_zaxpy.c cblas_zdotu_sub.c cblas_zdotc_sub.c cblas_dznrm2.c | |||
| cblas_dzasum.c cblas_izamax.c zdotcsub.f zdotusub.f | |||
| dzasumsub.f dznrm2sub.f izamaxsub.f) | |||
| # | |||
| # Common files for single complex precision | |||
| # | |||
| set (SCLEV1 cblas_scasum.c scasumsub.f cblas_scnrm2.c scnrm2sub.f) | |||
| # | |||
| # All object files | |||
| # | |||
| set (ALEV1 ${slev1} ${dlev1} ${clev1} ${zlev1} ${sclev1}) | |||
| # | |||
| # | |||
| # CBLAS routines | |||
| # | |||
| # Level 2 | |||
| # | |||
| # | |||
| # | |||
| # All object files for single real precision | |||
| # | |||
| set (SLEV2 cblas_sgemv.c cblas_sgbmv.c cblas_sger.c cblas_ssbmv.c cblas_sspmv.c | |||
| cblas_sspr.c cblas_sspr2.c cblas_ssymv.c cblas_ssyr.c cblas_ssyr2.c | |||
| cblas_stbmv.c cblas_stbsv.c cblas_stpmv.c cblas_stpsv.c cblas_strmv.c | |||
| cblas_strsv.c) | |||
| # | |||
| # All object files for double real precision | |||
| # | |||
| set (DLEV2 cblas_dgemv.c cblas_dgbmv.c cblas_dger.c cblas_dsbmv.c cblas_dspmv.c | |||
| cblas_dspr.c cblas_dspr2.c cblas_dsymv.c cblas_dsyr.c cblas_dsyr2.c | |||
| cblas_dtbmv.c cblas_dtbsv.c cblas_dtpmv.c cblas_dtpsv.c cblas_dtrmv.c | |||
| cblas_dtrsv.c) | |||
| # | |||
| # All object files for single complex precision | |||
| # | |||
| set (CLEV2 cblas_cgemv.c cblas_cgbmv.c cblas_chemv.c cblas_chbmv.c cblas_chpmv.c | |||
| cblas_ctrmv.c cblas_ctbmv.c cblas_ctpmv.c cblas_ctrsv.c cblas_ctbsv.c | |||
| cblas_ctpsv.c cblas_cgeru.c cblas_cgerc.c cblas_cher.c cblas_cher2.c | |||
| cblas_chpr.c cblas_chpr2.c) | |||
| # | |||
| # All object files for double complex precision | |||
| # | |||
| set (ZLEV2 cblas_zgemv.c cblas_zgbmv.c cblas_zhemv.c cblas_zhbmv.c cblas_zhpmv.c | |||
| cblas_ztrmv.c cblas_ztbmv.c cblas_ztpmv.c cblas_ztrsv.c cblas_ztbsv.c | |||
| cblas_ztpsv.c cblas_zgeru.c cblas_zgerc.c cblas_zher.c cblas_zher2.c | |||
| cblas_zhpr.c cblas_zhpr2.c) | |||
| # | |||
| # All object files | |||
| # | |||
| set (AVEL2 ${slev2} ${dlev2} ${clev2} ${zlev2}) | |||
| # | |||
| # | |||
| # CBLAS routines | |||
| # | |||
| # Level 3 | |||
| # | |||
| # | |||
| # | |||
| # All object files for single real precision | |||
| # | |||
| set (SLEV3 cblas_sgemm.c cblas_ssymm.c cblas_ssyrk.c cblas_ssyr2k.c cblas_strmm.c | |||
| cblas_strsm.c) | |||
| # | |||
| # All object files for double real precision | |||
| # | |||
| set (DLEV3 cblas_dgemm.c cblas_dsymm.c cblas_dsyrk.c cblas_dsyr2k.c cblas_dtrmm.c | |||
| cblas_dtrsm.c) | |||
| # | |||
| # All object files for single complex precision | |||
| # | |||
| set (CLEV3 cblas_cgemm.c cblas_csymm.c cblas_chemm.c cblas_cherk.c | |||
| cblas_cher2k.c cblas_ctrmm.c cblas_ctrsm.c cblas_csyrk.c | |||
| cblas_csyr2k.c) | |||
| # | |||
| # All object files for double complex precision | |||
| # | |||
| set (ZLEV3 cblas_zgemm.c cblas_zsymm.c cblas_zhemm.c cblas_zherk.c | |||
| cblas_zher2k.c cblas_ztrmm.c cblas_ztrsm.c cblas_zsyrk.c | |||
| cblas_zsyr2k.c) | |||
| # | |||
| # All object files | |||
| # | |||
| set (ALEV3 ${slev3} ${dlev3} ${clev3} ${zlev3}) | |||
| # default build all of it | |||
| set(ALLOBJ ${SCLEV1} ${SLEV1} ${SLEV2} ${SLEV3} ${ERRHAND} | |||
| ${DLEV1} ${DLEV2} ${DLEV3} | |||
| ${CLEV1} ${CLEV2} ${CLEV3} | |||
| ${ZLEV1} ${ZLEV2} ${ZLEV3} ) | |||
| # Single real precision | |||
| if(CBLAS_SINGLE) | |||
| set(ALLOBJ ${SCLEV1} ${SLEV1} ${SLEV2} ${SLEV3} ${ERRHAND}) | |||
| endif(CBLAS_SINGLE) | |||
| # Double real precision | |||
| if(CBLAS_DOUBLE) | |||
| set(ALLOBJ ${DLEV1} ${DLEV2} ${DLEV3} ${ERRHAND}) | |||
| endif(CBLAS_DOUBLE) | |||
| # Single complex precision | |||
| if (CBLAS_COMPLEX) | |||
| set(ALLOBJ ${CLEV1} ${SCLEV1} ${CLEV2} ${CLEV3} ${ERRHAND}) | |||
| endif(CBLAS_COMPLEX) | |||
| # Double complex precision | |||
| if (CBLAS_COMPLEX16) | |||
| set(ALLOBJ ${ZLEV1} ${ZLEV2} ${ZLEV3} ${ERRHAND}) | |||
| endif(CBLAS_COMPLEX16) | |||
| add_library(cblas ${ALLOBJ}) | |||
| target_link_libraries(cblas ${BLAS_LIBRARIES} ) | |||
| lapack_install_library(cblas) | |||
| @@ -0,0 +1,249 @@ | |||
| # This Makefile compiles the CBLAS routines | |||
| # | |||
| include ../../make.inc | |||
| # | |||
| # Erase all object and archive files | |||
| # | |||
| all: cblaslib | |||
| clean: | |||
| rm -f *.o a.out core | |||
| # Error handling routines for level 2 & 3 | |||
| errhand = cblas_globals.o cblas_xerbla.o xerbla.o | |||
| # Object files of all routines | |||
| alev = $(alev1) $(alev2) $(alev3) $(errhand) | |||
| # | |||
| # | |||
| # CBLAS routines | |||
| # | |||
| # Level 1 | |||
| # | |||
| # | |||
| # | |||
| # All object files for single real precision | |||
| # | |||
| slev1 = cblas_srotg.o cblas_srotmg.o cblas_srot.o cblas_srotm.o \ | |||
| cblas_sswap.o cblas_sscal.o cblas_scopy.o cblas_saxpy.o \ | |||
| cblas_sdot.o cblas_sdsdot.o cblas_snrm2.o cblas_sasum.o \ | |||
| cblas_isamax.o sdotsub.o sdsdotsub.o snrm2sub.o sasumsub.o \ | |||
| isamaxsub.o | |||
| # | |||
| # All object files for double real precision | |||
| # | |||
| dlev1 = cblas_drotg.o cblas_drotmg.o cblas_drot.o cblas_drotm.o \ | |||
| cblas_dswap.o cblas_dscal.o cblas_dcopy.o cblas_daxpy.o \ | |||
| cblas_ddot.o cblas_dsdot.o cblas_dnrm2.o cblas_dasum.o \ | |||
| cblas_idamax.o ddotsub.o dsdotsub.o dnrm2sub.o \ | |||
| dasumsub.o idamaxsub.o | |||
| # | |||
| # All object files for single complex precision | |||
| # | |||
| clev1 = cblas_cswap.o cblas_cscal.o cblas_csscal.o cblas_ccopy.o \ | |||
| cblas_caxpy.o cblas_cdotu_sub.o cblas_cdotc_sub.o \ | |||
| cblas_icamax.o cdotcsub.o cdotusub.o icamaxsub.o | |||
| # | |||
| # All object files for double complex precision | |||
| # | |||
| zlev1 = cblas_zswap.o cblas_zscal.o cblas_zdscal.o cblas_zcopy.o \ | |||
| cblas_zaxpy.o cblas_zdotu_sub.o cblas_zdotc_sub.o cblas_dznrm2.o \ | |||
| cblas_dzasum.o cblas_izamax.o zdotcsub.o zdotusub.o \ | |||
| dzasumsub.o dznrm2sub.o izamaxsub.o | |||
| # | |||
| # Common files for single / complex precision | |||
| # | |||
| sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o | |||
| # | |||
| # All object files | |||
| # | |||
| alev1 = $(slev1) $(dlev1) $(clev1) $(zlev1) $(sclev1) | |||
| # | |||
| # Make an archive file | |||
| # | |||
| # Single real precision | |||
| slib1: $(slev1) $(sclev1) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev1) $(sclev1) | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Double real precision | |||
| dlib1: $(dlev1) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev1) | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Single complex precision | |||
| clib1: $(clev1) $(sclev1) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev1) $(sclev1) | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Double complex precision | |||
| zlib1: $(zlev1) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev1) | |||
| $(RANLIB) $(CBLASLIB) | |||
| # All precisions | |||
| all1: $(alev1) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev1) | |||
| $(RANLIB) $(CBLASLIB) | |||
| # | |||
| # | |||
| # CBLAS routines | |||
| # | |||
| # Level 2 | |||
| # | |||
| # | |||
| # | |||
| # All object files for single real precision | |||
| # | |||
| slev2 = cblas_sgemv.o cblas_sgbmv.o cblas_sger.o cblas_ssbmv.o cblas_sspmv.o \ | |||
| cblas_sspr.o cblas_sspr2.o cblas_ssymv.o cblas_ssyr.o cblas_ssyr2.o \ | |||
| cblas_stbmv.o cblas_stbsv.o cblas_stpmv.o cblas_stpsv.o cblas_strmv.o \ | |||
| cblas_strsv.o | |||
| # | |||
| # All object files for double real precision | |||
| # | |||
| dlev2 = cblas_dgemv.o cblas_dgbmv.o cblas_dger.o cblas_dsbmv.o cblas_dspmv.o \ | |||
| cblas_dspr.o cblas_dspr2.o cblas_dsymv.o cblas_dsyr.o cblas_dsyr2.o \ | |||
| cblas_dtbmv.o cblas_dtbsv.o cblas_dtpmv.o cblas_dtpsv.o cblas_dtrmv.o \ | |||
| cblas_dtrsv.o | |||
| # | |||
| # All object files for single complex precision | |||
| # | |||
| clev2 = cblas_cgemv.o cblas_cgbmv.o cblas_chemv.o cblas_chbmv.o cblas_chpmv.o \ | |||
| cblas_ctrmv.o cblas_ctbmv.o cblas_ctpmv.o cblas_ctrsv.o cblas_ctbsv.o \ | |||
| cblas_ctpsv.o cblas_cgeru.o cblas_cgerc.o cblas_cher.o cblas_cher2.o \ | |||
| cblas_chpr.o cblas_chpr2.o | |||
| # | |||
| # All object files for double complex precision | |||
| # | |||
| zlev2 = cblas_zgemv.o cblas_zgbmv.o cblas_zhemv.o cblas_zhbmv.o cblas_zhpmv.o \ | |||
| cblas_ztrmv.o cblas_ztbmv.o cblas_ztpmv.o cblas_ztrsv.o cblas_ztbsv.o \ | |||
| cblas_ztpsv.o cblas_zgeru.o cblas_zgerc.o cblas_zher.o cblas_zher2.o \ | |||
| cblas_zhpr.o cblas_zhpr2.o | |||
| # | |||
| # All object files | |||
| # | |||
| alev2 = $(slev2) $(dlev2) $(clev2) $(zlev2) | |||
| # | |||
| # Make an archive file | |||
| # | |||
| # Single real precision | |||
| slib2: $(slev2) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev2) $(errhand) | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Double real precision | |||
| dlib2: $(dlev2) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev2) $(errhand) | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Single complex precision | |||
| clib2: $(clev2) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev2) $(errhand) | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Double complex precision | |||
| zlib2: $(zlev2) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev2) $(errhand) | |||
| $(RANLIB) $(CBLASLIB) | |||
| # All precisions | |||
| all2: $(alev2) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev2) $(errhand) | |||
| $(RANLIB) $(CBLASLIB) | |||
| # | |||
| # | |||
| # CBLAS routines | |||
| # | |||
| # Level 3 | |||
| # | |||
| # | |||
| # | |||
| # All object files for single real precision | |||
| # | |||
| slev3 = cblas_sgemm.o cblas_ssymm.o cblas_ssyrk.o cblas_ssyr2k.o cblas_strmm.o\ | |||
| cblas_strsm.o | |||
| # | |||
| # All object files for double real precision | |||
| # | |||
| dlev3 = cblas_dgemm.o cblas_dsymm.o cblas_dsyrk.o cblas_dsyr2k.o cblas_dtrmm.o\ | |||
| cblas_dtrsm.o | |||
| # | |||
| # All object files for single complex precision | |||
| # | |||
| clev3 = cblas_cgemm.o cblas_csymm.o cblas_chemm.o cblas_cherk.o\ | |||
| cblas_cher2k.o cblas_ctrmm.o cblas_ctrsm.o cblas_csyrk.o\ | |||
| cblas_csyr2k.o | |||
| # | |||
| # All object files for double complex precision | |||
| # | |||
| zlev3 = cblas_zgemm.o cblas_zsymm.o cblas_zhemm.o cblas_zherk.o\ | |||
| cblas_zher2k.o cblas_ztrmm.o cblas_ztrsm.o cblas_zsyrk.o\ | |||
| cblas_zsyr2k.o | |||
| # | |||
| # All object files | |||
| # | |||
| alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3) | |||
| # | |||
| # Make an archive file | |||
| # | |||
| # Single real precision | |||
| slib3: $(slev3) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev3) $(errhand) | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Double real precision | |||
| dlib3: $(dlev3) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev3) $(errhand) | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Single complex precision | |||
| clib3: $(clev3) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev3) $(errhand) | |||
| $(RANLIB) $(CBLASLIB) | |||
| # Single complex precision | |||
| zlib3: $(zlev3) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev3) $(errhand) | |||
| $(RANLIB) $(CBLASLIB) | |||
| # All precisions | |||
| all3: $(alev3) $(errhand) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev3) | |||
| $(RANLIB) $(CBLASLIB) | |||
| # All levels and precisions | |||
| cblaslib: $(alev) | |||
| $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev) | |||
| $(RANLIB) $(CBLASLIB) | |||
| FRC: | |||
| @FRC=$(FRC) | |||
| .c.o: | |||
| $(CC) -c $(CFLAGS) -I ../include -o $@ $< | |||
| .f.o: | |||
| $(FORTRAN) $(OPTS) -c $< -o $@ | |||
| @@ -0,0 +1,22 @@ | |||
| /* | |||
| * cblas_caxpy.c | |||
| * | |||
| * The program is a C interface to caxpy. | |||
| * | |||
| * Written by Keita Teranishi. 2/11/1998 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_caxpy( const int N, const void *alpha, const void *X, | |||
| const int incX, void *Y, const int incY) | |||
| { | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_incX incX | |||
| #define F77_incY incY | |||
| #endif | |||
| F77_caxpy( &F77_N, alpha, X, &F77_incX, Y, &F77_incY); | |||
| } | |||
| @@ -0,0 +1,22 @@ | |||
| /* | |||
| * cblas_ccopy.c | |||
| * | |||
| * The program is a C interface to ccopy. | |||
| * | |||
| * Written by Keita Teranishi. 2/11/1998 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_ccopy( const int N, const void *X, | |||
| const int incX, void *Y, const int incY) | |||
| { | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_incX incX | |||
| #define F77_incY incY | |||
| #endif | |||
| F77_ccopy( &F77_N, X, &F77_incX, Y, &F77_incY); | |||
| } | |||
| @@ -0,0 +1,24 @@ | |||
| /* | |||
| * cblas_cdotc_sub.c | |||
| * | |||
| * The program is a C interface to cdotc. | |||
| * It calls the fortran wrapper before calling cdotc. | |||
| * | |||
| * Written by Keita Teranishi. 2/11/1998 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_cdotc_sub( const int N, const void *X, const int incX, | |||
| const void *Y, const int incY, void *dotc) | |||
| { | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_incX incX | |||
| #define F77_incY incY | |||
| #endif | |||
| F77_cdotc_sub( &F77_N, X, &F77_incX, Y, &F77_incY, dotc); | |||
| return; | |||
| } | |||
| @@ -0,0 +1,24 @@ | |||
| /* | |||
| * cblas_cdotu_sub.c | |||
| * | |||
| * The program is a C interface to cdotu. | |||
| * It calls the fortran wrapper before calling cdotu. | |||
| * | |||
| * Written by Keita Teranishi. 2/11/1998 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_cdotu_sub( const int N, const void *X, const int incX, | |||
| const void *Y, const int incY, void *dotu) | |||
| { | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_incX incX | |||
| #define F77_incY incY | |||
| #endif | |||
| F77_cdotu_sub( &F77_N, X, &F77_incX, Y, &F77_incY, dotu); | |||
| return; | |||
| } | |||
| @@ -0,0 +1,165 @@ | |||
| /* | |||
| * cblas_cgbmv.c | |||
| * The program is a C interface of cgbmv | |||
| * | |||
| * Keita Teranishi 5/20/98 | |||
| * | |||
| */ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_cgbmv(const CBLAS_LAYOUT layout, | |||
| const CBLAS_TRANSPOSE TransA, const int M, const int N, | |||
| const int KL, const int KU, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *X, const int incX, const void *beta, | |||
| void *Y, const int incY) | |||
| { | |||
| char TA; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_TA; | |||
| #else | |||
| #define F77_TA &TA | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; | |||
| F77_INT F77_KL=KL,F77_KU=KU; | |||
| #else | |||
| #define F77_M M | |||
| #define F77_N N | |||
| #define F77_lda lda | |||
| #define F77_KL KL | |||
| #define F77_KU KU | |||
| #define F77_incX incx | |||
| #define F77_incY incY | |||
| #endif | |||
| int n=0, i=0, incx=incX; | |||
| const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta; | |||
| float ALPHA[2],BETA[2]; | |||
| int tincY, tincx; | |||
| float *x=(float *)X, *y=(float *)Y, *st=0, *tx=0; | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if (layout == CblasColMajor) | |||
| { | |||
| if (TransA == CblasNoTrans) TA = 'N'; | |||
| else if (TransA == CblasTrans) TA = 'T'; | |||
| else if (TransA == CblasConjTrans) TA = 'C'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_cgbmv","Illegal TransA setting, %d\n", TransA); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_TA = C2F_CHAR(&TA); | |||
| #endif | |||
| F77_cgbmv(F77_TA, &F77_M, &F77_N, &F77_KL, &F77_KU, alpha, | |||
| A, &F77_lda, X, &F77_incX, beta, Y, &F77_incY); | |||
| } | |||
| else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if (TransA == CblasNoTrans) TA = 'T'; | |||
| else if (TransA == CblasTrans) TA = 'N'; | |||
| else if (TransA == CblasConjTrans) | |||
| { | |||
| ALPHA[0]= *alp; | |||
| ALPHA[1]= -alp[1]; | |||
| BETA[0]= *bet; | |||
| BETA[1]= -bet[1]; | |||
| TA = 'N'; | |||
| if (M > 0) | |||
| { | |||
| n = M << 1; | |||
| x = malloc(n*sizeof(float)); | |||
| tx = x; | |||
| if( incX > 0 ) { | |||
| i = incX << 1 ; | |||
| tincx = 2; | |||
| st= x+n; | |||
| } else { | |||
| i = incX *(-2); | |||
| tincx = -2; | |||
| st = x-2; | |||
| x +=(n-2); | |||
| } | |||
| do | |||
| { | |||
| *x = *xx; | |||
| x[1] = -xx[1]; | |||
| x += tincx ; | |||
| xx += i; | |||
| } | |||
| while (x != st); | |||
| x=tx; | |||
| #ifdef F77_INT | |||
| F77_incX = 1; | |||
| #else | |||
| incx = 1; | |||
| #endif | |||
| if( incY > 0 ) | |||
| tincY = incY; | |||
| else | |||
| tincY = -incY; | |||
| y++; | |||
| if (N > 0) | |||
| { | |||
| i = tincY << 1; | |||
| n = i * N ; | |||
| st = y + n; | |||
| do { | |||
| *y = -(*y); | |||
| y += i; | |||
| } while(y != st); | |||
| y -= n; | |||
| } | |||
| } | |||
| else x = (float *) X; | |||
| } | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_cgbmv","Illegal TransA setting, %d\n", TransA); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_TA = C2F_CHAR(&TA); | |||
| #endif | |||
| if (TransA == CblasConjTrans) | |||
| F77_cgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, ALPHA, | |||
| A ,&F77_lda, x,&F77_incX, BETA, Y, &F77_incY); | |||
| else | |||
| F77_cgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, alpha, | |||
| A ,&F77_lda, x,&F77_incX, beta, Y, &F77_incY); | |||
| if (TransA == CblasConjTrans) | |||
| { | |||
| if (x != X) free(x); | |||
| if (N > 0) | |||
| { | |||
| do | |||
| { | |||
| *y = -(*y); | |||
| y += i; | |||
| } | |||
| while (y != st); | |||
| } | |||
| } | |||
| } | |||
| else cblas_xerbla(1, "cblas_cgbmv", "Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| } | |||
| @@ -0,0 +1,109 @@ | |||
| /* | |||
| * | |||
| * cblas_cgemm.c | |||
| * This program is a C interface to cgemm. | |||
| * Written by Keita Teranishi | |||
| * 4/8/1998 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_cgemm(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE TransA, | |||
| const CBLAS_TRANSPOSE TransB, const int M, const int N, | |||
| const int K, const void *alpha, const void *A, | |||
| const int lda, const void *B, const int ldb, | |||
| const void *beta, void *C, const int ldc) | |||
| { | |||
| char TA, TB; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_TA, F77_TB; | |||
| #else | |||
| #define F77_TA &TA | |||
| #define F77_TB &TB | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_M=M, F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; | |||
| F77_INT F77_ldc=ldc; | |||
| #else | |||
| #define F77_M M | |||
| #define F77_N N | |||
| #define F77_K K | |||
| #define F77_lda lda | |||
| #define F77_ldb ldb | |||
| #define F77_ldc ldc | |||
| #endif | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if( layout == CblasColMajor ) | |||
| { | |||
| if(TransA == CblasTrans) TA='T'; | |||
| else if ( TransA == CblasConjTrans ) TA='C'; | |||
| else if ( TransA == CblasNoTrans ) TA='N'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_cgemm", "Illegal TransA setting, %d\n", TransA); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if(TransB == CblasTrans) TB='T'; | |||
| else if ( TransB == CblasConjTrans ) TB='C'; | |||
| else if ( TransB == CblasNoTrans ) TB='N'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_cgemm", "Illegal TransB setting, %d\n", TransB); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_TA = C2F_CHAR(&TA); | |||
| F77_TB = C2F_CHAR(&TB); | |||
| #endif | |||
| F77_cgemm(F77_TA, F77_TB, &F77_M, &F77_N, &F77_K, alpha, A, | |||
| &F77_lda, B, &F77_ldb, beta, C, &F77_ldc); | |||
| } else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if(TransA == CblasTrans) TB='T'; | |||
| else if ( TransA == CblasConjTrans ) TB='C'; | |||
| else if ( TransA == CblasNoTrans ) TB='N'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_cgemm", "Illegal TransA setting, %d\n", TransA); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if(TransB == CblasTrans) TA='T'; | |||
| else if ( TransB == CblasConjTrans ) TA='C'; | |||
| else if ( TransB == CblasNoTrans ) TA='N'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_cgemm", "Illegal TransB setting, %d\n", TransB); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_TA = C2F_CHAR(&TA); | |||
| F77_TB = C2F_CHAR(&TB); | |||
| #endif | |||
| F77_cgemm(F77_TA, F77_TB, &F77_N, &F77_M, &F77_K, alpha, B, | |||
| &F77_ldb, A, &F77_lda, beta, C, &F77_ldc); | |||
| } | |||
| else cblas_xerbla(1, "cblas_cgemm", "Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,162 @@ | |||
| /* | |||
| * cblas_cgemv.c | |||
| * The program is a C interface of cgemv | |||
| * | |||
| * Keita Teranishi 5/20/98 | |||
| * | |||
| */ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_cgemv(const CBLAS_LAYOUT layout, | |||
| const CBLAS_TRANSPOSE TransA, const int M, const int N, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *X, const int incX, const void *beta, | |||
| void *Y, const int incY) | |||
| { | |||
| char TA; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_TA; | |||
| #else | |||
| #define F77_TA &TA | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; | |||
| #else | |||
| #define F77_M M | |||
| #define F77_N N | |||
| #define F77_lda lda | |||
| #define F77_incX incx | |||
| #define F77_incY incY | |||
| #endif | |||
| int n=0, i=0, incx=incX; | |||
| const float *xx= (const float *)X; | |||
| float ALPHA[2],BETA[2]; | |||
| int tincY, tincx; | |||
| float *x=(float *)X, *y=(float *)Y, *st=0, *tx=0; | |||
| const float *stx = x; | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if (layout == CblasColMajor) | |||
| { | |||
| if (TransA == CblasNoTrans) TA = 'N'; | |||
| else if (TransA == CblasTrans) TA = 'T'; | |||
| else if (TransA == CblasConjTrans) TA = 'C'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_cgemv","Illegal TransA setting, %d\n", TransA); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_TA = C2F_CHAR(&TA); | |||
| #endif | |||
| F77_cgemv(F77_TA, &F77_M, &F77_N, alpha, A, &F77_lda, X, &F77_incX, | |||
| beta, Y, &F77_incY); | |||
| } | |||
| else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if (TransA == CblasNoTrans) TA = 'T'; | |||
| else if (TransA == CblasTrans) TA = 'N'; | |||
| else if (TransA == CblasConjTrans) | |||
| { | |||
| ALPHA[0]= *( (const float *) alpha ); | |||
| ALPHA[1]= -( *( (const float *) alpha+1) ); | |||
| BETA[0]= *( (const float *) beta ); | |||
| BETA[1]= -( *( (const float *) beta+1 ) ); | |||
| TA = 'N'; | |||
| if (M > 0) | |||
| { | |||
| n = M << 1; | |||
| x = malloc(n*sizeof(float)); | |||
| tx = x; | |||
| if( incX > 0 ) { | |||
| i = incX << 1 ; | |||
| tincx = 2; | |||
| st= x+n; | |||
| } else { | |||
| i = incX *(-2); | |||
| tincx = -2; | |||
| st = x-2; | |||
| x +=(n-2); | |||
| } | |||
| do | |||
| { | |||
| *x = *xx; | |||
| x[1] = -xx[1]; | |||
| x += tincx ; | |||
| xx += i; | |||
| } | |||
| while (x != st); | |||
| x=tx; | |||
| F77_incX = 1; | |||
| if(incY > 0) | |||
| tincY = incY; | |||
| else | |||
| tincY = -incY; | |||
| y++; | |||
| if (N > 0) | |||
| { | |||
| i = tincY << 1; | |||
| n = i * N ; | |||
| st = y + n; | |||
| do { | |||
| *y = -(*y); | |||
| y += i; | |||
| } while(y != st); | |||
| y -= n; | |||
| } | |||
| stx = x; | |||
| } | |||
| else stx = (const float *)X; | |||
| } | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_cgemv","Illegal TransA setting, %d\n", TransA); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_TA = C2F_CHAR(&TA); | |||
| #endif | |||
| if (TransA == CblasConjTrans) | |||
| F77_cgemv(F77_TA, &F77_N, &F77_M, ALPHA, A, &F77_lda, stx, | |||
| &F77_incX, BETA, Y, &F77_incY); | |||
| else | |||
| F77_cgemv(F77_TA, &F77_N, &F77_M, alpha, A, &F77_lda, x, | |||
| &F77_incX, beta, Y, &F77_incY); | |||
| if (TransA == CblasConjTrans) | |||
| { | |||
| if (x != (const float *)X) free(x); | |||
| if (N > 0) | |||
| { | |||
| do | |||
| { | |||
| *y = -(*y); | |||
| y += i; | |||
| } | |||
| while (y != st); | |||
| } | |||
| } | |||
| } | |||
| else cblas_xerbla(1, "cblas_cgemv", "Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,84 @@ | |||
| /* | |||
| * cblas_cgerc.c | |||
| * The program is a C interface to cgerc. | |||
| * | |||
| * Keita Teranishi 5/20/98 | |||
| * | |||
| */ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_cgerc(const CBLAS_LAYOUT layout, const int M, const int N, | |||
| const void *alpha, const void *X, const int incX, | |||
| const void *Y, const int incY, void *A, const int lda) | |||
| { | |||
| #ifdef F77_INT | |||
| F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; | |||
| #else | |||
| #define F77_M M | |||
| #define F77_N N | |||
| #define F77_incX incX | |||
| #define F77_incY incy | |||
| #define F77_lda lda | |||
| #endif | |||
| int n, i, tincy, incy=incY; | |||
| float *y=(float *)Y, *yy=(float *)Y, *ty, *st; | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if (layout == CblasColMajor) | |||
| { | |||
| F77_cgerc( &F77_M, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, A, | |||
| &F77_lda); | |||
| } else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if (N > 0) | |||
| { | |||
| n = N << 1; | |||
| y = malloc(n*sizeof(float)); | |||
| ty = y; | |||
| if( incY > 0 ) { | |||
| i = incY << 1; | |||
| tincy = 2; | |||
| st= y+n; | |||
| } else { | |||
| i = incY *(-2); | |||
| tincy = -2; | |||
| st = y-2; | |||
| y +=(n-2); | |||
| } | |||
| do | |||
| { | |||
| *y = *yy; | |||
| y[1] = -yy[1]; | |||
| y += tincy ; | |||
| yy += i; | |||
| } | |||
| while (y != st); | |||
| y = ty; | |||
| #ifdef F77_INT | |||
| F77_incY = 1; | |||
| #else | |||
| incy = 1; | |||
| #endif | |||
| } | |||
| else y = (float *) Y; | |||
| F77_cgeru( &F77_N, &F77_M, alpha, y, &F77_incY, X, &F77_incX, A, | |||
| &F77_lda); | |||
| if(Y!=y) | |||
| free(y); | |||
| } else cblas_xerbla(1, "cblas_cgerc", "Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,45 @@ | |||
| /* | |||
| * cblas_cgeru.c | |||
| * The program is a C interface to cgeru. | |||
| * | |||
| * Keita Teranishi 5/20/98 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_cgeru(const CBLAS_LAYOUT layout, const int M, const int N, | |||
| const void *alpha, const void *X, const int incX, | |||
| const void *Y, const int incY, void *A, const int lda) | |||
| { | |||
| #ifdef F77_INT | |||
| F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; | |||
| #else | |||
| #define F77_M M | |||
| #define F77_N N | |||
| #define F77_incX incX | |||
| #define F77_incY incY | |||
| #define F77_lda lda | |||
| #endif | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if (layout == CblasColMajor) | |||
| { | |||
| F77_cgeru( &F77_M, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, A, | |||
| &F77_lda); | |||
| } | |||
| else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| F77_cgeru( &F77_N, &F77_M, alpha, Y, &F77_incY, X, &F77_incX, A, | |||
| &F77_lda); | |||
| } | |||
| else cblas_xerbla(1, "cblas_cgeru","Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,159 @@ | |||
| /* | |||
| * cblas_chbmv.c | |||
| * The program is a C interface to chbmv | |||
| * | |||
| * Keita Teranishi 5/18/98 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| void cblas_chbmv(const CBLAS_LAYOUT layout, | |||
| const CBLAS_UPLO Uplo,const int N,const int K, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *X, const int incX, const void *beta, | |||
| void *Y, const int incY) | |||
| { | |||
| char UL; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_UL; | |||
| #else | |||
| #define F77_UL &UL | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_incX=incX, F77_incY=incY; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_K K | |||
| #define F77_lda lda | |||
| #define F77_incX incx | |||
| #define F77_incY incY | |||
| #endif | |||
| int n, i=0, incx=incX; | |||
| const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta; | |||
| float ALPHA[2],BETA[2]; | |||
| int tincY, tincx; | |||
| float *x=(float *)X, *y=(float *)Y, *st=0, *tx; | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if (layout == CblasColMajor) | |||
| { | |||
| if (Uplo == CblasLower) UL = 'L'; | |||
| else if (Uplo == CblasUpper) UL = 'U'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_chbmv","Illegal Uplo setting, %d\n",Uplo ); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| #endif | |||
| F77_chbmv(F77_UL, &F77_N, &F77_K, alpha, A, &F77_lda, X, | |||
| &F77_incX, beta, Y, &F77_incY); | |||
| } | |||
| else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| ALPHA[0]= *alp; | |||
| ALPHA[1]= -alp[1]; | |||
| BETA[0]= *bet; | |||
| BETA[1]= -bet[1]; | |||
| if (N > 0) | |||
| { | |||
| n = N << 1; | |||
| x = malloc(n*sizeof(float)); | |||
| tx = x; | |||
| if( incX > 0 ) { | |||
| i = incX << 1 ; | |||
| tincx = 2; | |||
| st= x+n; | |||
| } else { | |||
| i = incX *(-2); | |||
| tincx = -2; | |||
| st = x-2; | |||
| x +=(n-2); | |||
| } | |||
| do | |||
| { | |||
| *x = *xx; | |||
| x[1] = -xx[1]; | |||
| x += tincx ; | |||
| xx += i; | |||
| } | |||
| while (x != st); | |||
| x=tx; | |||
| #ifdef F77_INT | |||
| F77_incX = 1; | |||
| #else | |||
| incx = 1; | |||
| #endif | |||
| if(incY > 0) | |||
| tincY = incY; | |||
| else | |||
| tincY = -incY; | |||
| y++; | |||
| i = tincY << 1; | |||
| n = i * N ; | |||
| st = y + n; | |||
| do { | |||
| *y = -(*y); | |||
| y += i; | |||
| } while(y != st); | |||
| y -= n; | |||
| } else | |||
| x = (float *) X; | |||
| if (Uplo == CblasUpper) UL = 'L'; | |||
| else if (Uplo == CblasLower) UL = 'U'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_chbmv","Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| #endif | |||
| F77_chbmv(F77_UL, &F77_N, &F77_K, ALPHA, | |||
| A ,&F77_lda, x,&F77_incX, BETA, Y, &F77_incY); | |||
| } | |||
| else | |||
| { | |||
| cblas_xerbla(1, "cblas_chbmv","Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if ( layout == CblasRowMajor ) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if(X!=x) | |||
| free(x); | |||
| if (N > 0) | |||
| { | |||
| do | |||
| { | |||
| *y = -(*y); | |||
| y += i; | |||
| } | |||
| while (y != st); | |||
| } | |||
| } | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,106 @@ | |||
| /* | |||
| * | |||
| * cblas_chemm.c | |||
| * This program is a C interface to chemm. | |||
| * Written by Keita Teranishi | |||
| * 4/8/1998 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_chemm(const CBLAS_LAYOUT layout, const CBLAS_SIDE Side, | |||
| const CBLAS_UPLO Uplo, const int M, const int N, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *B, const int ldb, const void *beta, | |||
| void *C, const int ldc) | |||
| { | |||
| char SD, UL; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_SD, F77_UL; | |||
| #else | |||
| #define F77_SD &SD | |||
| #define F77_UL &UL | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; | |||
| F77_INT F77_ldc=ldc; | |||
| #else | |||
| #define F77_M M | |||
| #define F77_N N | |||
| #define F77_lda lda | |||
| #define F77_ldb ldb | |||
| #define F77_ldc ldc | |||
| #endif | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if( layout == CblasColMajor ) | |||
| { | |||
| if( Side == CblasRight) SD='R'; | |||
| else if ( Side == CblasLeft ) SD='L'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_chemm", "Illegal Side setting, %d\n", Side); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if( Uplo == CblasUpper) UL='U'; | |||
| else if ( Uplo == CblasLower ) UL='L'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_chemm", "Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| F77_SD = C2F_CHAR(&SD); | |||
| #endif | |||
| F77_chemm(F77_SD, F77_UL, &F77_M, &F77_N, alpha, A, &F77_lda, | |||
| B, &F77_ldb, beta, C, &F77_ldc); | |||
| } else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if( Side == CblasRight) SD='L'; | |||
| else if ( Side == CblasLeft ) SD='R'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_chemm", "Illegal Side setting, %d\n", Side); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if( Uplo == CblasUpper) UL='L'; | |||
| else if ( Uplo == CblasLower ) UL='U'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_chemm", "Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| F77_SD = C2F_CHAR(&SD); | |||
| #endif | |||
| F77_chemm(F77_SD, F77_UL, &F77_N, &F77_M, alpha, A, | |||
| &F77_lda, B, &F77_ldb, beta, C, &F77_ldc); | |||
| } | |||
| else cblas_xerbla(1, "cblas_chemm", "Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,160 @@ | |||
| /* | |||
| * cblas_chemv.c | |||
| * The program is a C interface to chemv | |||
| * | |||
| * Keita Teranishi 5/18/98 | |||
| * | |||
| */ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_chemv(const CBLAS_LAYOUT layout, | |||
| const CBLAS_UPLO Uplo, const int N, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *X, const int incX, const void *beta, | |||
| void *Y, const int incY) | |||
| { | |||
| char UL; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_UL; | |||
| #else | |||
| #define F77_UL &UL | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_lda lda | |||
| #define F77_incX incx | |||
| #define F77_incY incY | |||
| #endif | |||
| int n=0, i=0, incx=incX; | |||
| const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta; | |||
| float ALPHA[2],BETA[2]; | |||
| int tincY, tincx; | |||
| float *x=(float *)X, *y=(float *)Y, *st=0, *tx; | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if (layout == CblasColMajor) | |||
| { | |||
| if (Uplo == CblasUpper) UL = 'U'; | |||
| else if (Uplo == CblasLower) UL = 'L'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_chemv","Illegal Uplo setting, %d\n",Uplo ); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| #endif | |||
| F77_chemv(F77_UL, &F77_N, alpha, A, &F77_lda, X, &F77_incX, | |||
| beta, Y, &F77_incY); | |||
| } | |||
| else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| ALPHA[0]= *alp; | |||
| ALPHA[1]= -alp[1]; | |||
| BETA[0]= *bet; | |||
| BETA[1]= -bet[1]; | |||
| if (N > 0) | |||
| { | |||
| n = N << 1; | |||
| x = malloc(n*sizeof(float)); | |||
| tx = x; | |||
| if( incX > 0 ) { | |||
| i = incX << 1 ; | |||
| tincx = 2; | |||
| st= x+n; | |||
| } else { | |||
| i = incX *(-2); | |||
| tincx = -2; | |||
| st = x-2; | |||
| x +=(n-2); | |||
| } | |||
| do | |||
| { | |||
| *x = *xx; | |||
| x[1] = -xx[1]; | |||
| x += tincx ; | |||
| xx += i; | |||
| } | |||
| while (x != st); | |||
| x=tx; | |||
| #ifdef F77_INT | |||
| F77_incX = 1; | |||
| #else | |||
| incx = 1; | |||
| #endif | |||
| if(incY > 0) | |||
| tincY = incY; | |||
| else | |||
| tincY = -incY; | |||
| y++; | |||
| i = tincY << 1; | |||
| n = i * N ; | |||
| st = y + n; | |||
| do { | |||
| *y = -(*y); | |||
| y += i; | |||
| } while(y != st); | |||
| y -= n; | |||
| } else | |||
| x = (float *) X; | |||
| if (Uplo == CblasUpper) UL = 'L'; | |||
| else if (Uplo == CblasLower) UL = 'U'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_chemv","Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| #endif | |||
| F77_chemv(F77_UL, &F77_N, ALPHA, A, &F77_lda, x, &F77_incX, | |||
| BETA, Y, &F77_incY); | |||
| } | |||
| else | |||
| { | |||
| cblas_xerbla(1, "cblas_chemv","Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if ( layout == CblasRowMajor ) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if ( X != x ) | |||
| free(x); | |||
| if (N > 0) | |||
| { | |||
| do | |||
| { | |||
| *y = -(*y); | |||
| y += i; | |||
| } | |||
| while (y != st); | |||
| } | |||
| } | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,116 @@ | |||
| /* | |||
| * cblas_cher.c | |||
| * The program is a C interface to cher. | |||
| * | |||
| * Keita Teranishi 5/20/98 | |||
| * | |||
| */ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_cher(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, | |||
| const int N, const float alpha, const void *X, const int incX | |||
| ,void *A, const int lda) | |||
| { | |||
| char UL; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_UL; | |||
| #else | |||
| #define F77_UL &UL | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_lda=lda, F77_incX=incX; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_lda lda | |||
| #define F77_incX incx | |||
| #endif | |||
| int n, i, tincx, incx=incX; | |||
| float *x=(float *)X, *xx=(float *)X, *tx, *st; | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if (layout == CblasColMajor) | |||
| { | |||
| if (Uplo == CblasLower) UL = 'L'; | |||
| else if (Uplo == CblasUpper) UL = 'U'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_cher","Illegal Uplo setting, %d\n",Uplo ); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| #endif | |||
| F77_cher(F77_UL, &F77_N, &alpha, X, &F77_incX, A, &F77_lda); | |||
| } else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if (Uplo == CblasUpper) UL = 'L'; | |||
| else if (Uplo == CblasLower) UL = 'U'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_cher","Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| #endif | |||
| if (N > 0) | |||
| { | |||
| n = N << 1; | |||
| x = malloc(n*sizeof(float)); | |||
| tx = x; | |||
| if( incX > 0 ) { | |||
| i = incX << 1 ; | |||
| tincx = 2; | |||
| st= x+n; | |||
| } else { | |||
| i = incX *(-2); | |||
| tincx = -2; | |||
| st = x-2; | |||
| x +=(n-2); | |||
| } | |||
| do | |||
| { | |||
| *x = *xx; | |||
| x[1] = -xx[1]; | |||
| x += tincx ; | |||
| xx += i; | |||
| } | |||
| while (x != st); | |||
| x=tx; | |||
| #ifdef F77_INT | |||
| F77_incX = 1; | |||
| #else | |||
| incx = 1; | |||
| #endif | |||
| } | |||
| else x = (float *) X; | |||
| F77_cher(F77_UL, &F77_N, &alpha, x, &F77_incX, A, &F77_lda); | |||
| } else | |||
| { | |||
| cblas_xerbla(1, "cblas_cher","Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if(X!=x) | |||
| free(x); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,152 @@ | |||
| /* | |||
| * cblas_cher2.c | |||
| * The program is a C interface to cher2. | |||
| * | |||
| * Keita Teranishi 3/23/98 | |||
| * | |||
| */ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_cher2(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, | |||
| const int N, const void *alpha, const void *X, const int incX, | |||
| const void *Y, const int incY, void *A, const int lda) | |||
| { | |||
| char UL; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_UL; | |||
| #else | |||
| #define F77_UL &UL | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_lda lda | |||
| #define F77_incX incx | |||
| #define F77_incY incy | |||
| #endif | |||
| int n, i, j, tincx, tincy, incx=incX, incy=incY; | |||
| float *x=(float *)X, *xx=(float *)X, *y=(float *)Y, | |||
| *yy=(float *)Y, *tx, *ty, *stx, *sty; | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if (layout == CblasColMajor) | |||
| { | |||
| if (Uplo == CblasLower) UL = 'L'; | |||
| else if (Uplo == CblasUpper) UL = 'U'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_cher2","Illegal Uplo setting, %d\n",Uplo ); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| #endif | |||
| F77_cher2(F77_UL, &F77_N, alpha, X, &F77_incX, | |||
| Y, &F77_incY, A, &F77_lda); | |||
| } else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if (Uplo == CblasUpper) UL = 'L'; | |||
| else if (Uplo == CblasLower) UL = 'U'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_cher2","Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| #endif | |||
| if (N > 0) | |||
| { | |||
| n = N << 1; | |||
| x = malloc(n*sizeof(float)); | |||
| y = malloc(n*sizeof(float)); | |||
| tx = x; | |||
| ty = y; | |||
| if( incX > 0 ) { | |||
| i = incX << 1 ; | |||
| tincx = 2; | |||
| stx= x+n; | |||
| } else { | |||
| i = incX *(-2); | |||
| tincx = -2; | |||
| stx = x-2; | |||
| x +=(n-2); | |||
| } | |||
| if( incY > 0 ) { | |||
| j = incY << 1; | |||
| tincy = 2; | |||
| sty= y+n; | |||
| } else { | |||
| j = incY *(-2); | |||
| tincy = -2; | |||
| sty = y-2; | |||
| y +=(n-2); | |||
| } | |||
| do | |||
| { | |||
| *x = *xx; | |||
| x[1] = -xx[1]; | |||
| x += tincx ; | |||
| xx += i; | |||
| } | |||
| while (x != stx); | |||
| do | |||
| { | |||
| *y = *yy; | |||
| y[1] = -yy[1]; | |||
| y += tincy ; | |||
| yy += j; | |||
| } | |||
| while (y != sty); | |||
| x=tx; | |||
| y=ty; | |||
| #ifdef F77_INT | |||
| F77_incX = 1; | |||
| F77_incY = 1; | |||
| #else | |||
| incx = 1; | |||
| incy = 1; | |||
| #endif | |||
| } else | |||
| { | |||
| x = (float *) X; | |||
| y = (float *) Y; | |||
| } | |||
| F77_cher2(F77_UL, &F77_N, alpha, y, &F77_incY, x, | |||
| &F77_incX, A, &F77_lda); | |||
| } else | |||
| { | |||
| cblas_xerbla(1, "cblas_cher2","Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if(X!=x) | |||
| free(x); | |||
| if(Y!=y) | |||
| free(y); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,111 @@ | |||
| /* | |||
| * | |||
| * cblas_cher2k.c | |||
| * This program is a C interface to cher2k. | |||
| * Written by Keita Teranishi | |||
| * 4/8/1998 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_cher2k(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, | |||
| const CBLAS_TRANSPOSE Trans, const int N, const int K, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *B, const int ldb, const float beta, | |||
| void *C, const int ldc) | |||
| { | |||
| char UL, TR; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_TR, F77_UL; | |||
| #else | |||
| #define F77_TR &TR | |||
| #define F77_UL &UL | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; | |||
| F77_INT F77_ldc=ldc; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_K K | |||
| #define F77_lda lda | |||
| #define F77_ldb ldb | |||
| #define F77_ldc ldc | |||
| #endif | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| float ALPHA[2]; | |||
| const float *alp=(float *)alpha; | |||
| CBLAS_CallFromC = 1; | |||
| RowMajorStrg = 0; | |||
| if( layout == CblasColMajor ) | |||
| { | |||
| if( Uplo == CblasUpper) UL='U'; | |||
| else if ( Uplo == CblasLower ) UL='L'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_cher2k", "Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if( Trans == CblasTrans) TR ='T'; | |||
| else if ( Trans == CblasConjTrans ) TR='C'; | |||
| else if ( Trans == CblasNoTrans ) TR='N'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_cher2k", "Illegal Trans setting, %d\n", Trans); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| F77_TR = C2F_CHAR(&TR); | |||
| #endif | |||
| F77_cher2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); | |||
| } else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if( Uplo == CblasUpper) UL='L'; | |||
| else if ( Uplo == CblasLower ) UL='U'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_cher2k", "Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if( Trans == CblasTrans) TR ='N'; | |||
| else if ( Trans == CblasConjTrans ) TR='N'; | |||
| else if ( Trans == CblasNoTrans ) TR='C'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_cher2k", "Illegal Trans setting, %d\n", Trans); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| F77_TR = C2F_CHAR(&TR); | |||
| #endif | |||
| ALPHA[0]= *alp; | |||
| ALPHA[1]= -alp[1]; | |||
| F77_cher2k(F77_UL,F77_TR, &F77_N, &F77_K, ALPHA, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); | |||
| } | |||
| else cblas_xerbla(1, "cblas_cher2k", "Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,105 @@ | |||
| /* | |||
| * | |||
| * cblas_cherk.c | |||
| * This program is a C interface to cherk. | |||
| * Written by Keita Teranishi | |||
| * 4/8/1998 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_cherk(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, | |||
| const CBLAS_TRANSPOSE Trans, const int N, const int K, | |||
| const float alpha, const void *A, const int lda, | |||
| const float beta, void *C, const int ldc) | |||
| { | |||
| char UL, TR; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_TR, F77_UL; | |||
| #else | |||
| #define F77_TR &TR | |||
| #define F77_UL &UL | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_K=K, F77_lda=lda; | |||
| F77_INT F77_ldc=ldc; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_K K | |||
| #define F77_lda lda | |||
| #define F77_ldc ldc | |||
| #endif | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if( layout == CblasColMajor ) | |||
| { | |||
| if( Uplo == CblasUpper) UL='U'; | |||
| else if ( Uplo == CblasLower ) UL='L'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_cherk", "Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if( Trans == CblasTrans) TR ='T'; | |||
| else if ( Trans == CblasConjTrans ) TR='C'; | |||
| else if ( Trans == CblasNoTrans ) TR='N'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_cherk", "Illegal Trans setting, %d\n", Trans); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| F77_TR = C2F_CHAR(&TR); | |||
| #endif | |||
| F77_cherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda, | |||
| &beta, C, &F77_ldc); | |||
| } else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if( Uplo == CblasUpper) UL='L'; | |||
| else if ( Uplo == CblasLower ) UL='U'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_cherk", "Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if( Trans == CblasTrans) TR ='N'; | |||
| else if ( Trans == CblasConjTrans ) TR='N'; | |||
| else if ( Trans == CblasNoTrans ) TR='C'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_cherk", "Illegal Trans setting, %d\n", Trans); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| F77_SD = C2F_CHAR(&SD); | |||
| #endif | |||
| F77_cherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda, | |||
| &beta, C, &F77_ldc); | |||
| } | |||
| else cblas_xerbla(1, "cblas_cherk", "Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,160 @@ | |||
| /* | |||
| * cblas_chpmv.c | |||
| * The program is a C interface of chpmv | |||
| * | |||
| * Keita Teranishi 5/18/98 | |||
| * | |||
| */ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_chpmv(const CBLAS_LAYOUT layout, | |||
| const CBLAS_UPLO Uplo,const int N, | |||
| const void *alpha, const void *AP, | |||
| const void *X, const int incX, const void *beta, | |||
| void *Y, const int incY) | |||
| { | |||
| char UL; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_UL; | |||
| #else | |||
| #define F77_UL &UL | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_incX incx | |||
| #define F77_incY incY | |||
| #endif | |||
| int n, i=0, incx=incX; | |||
| const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta; | |||
| float ALPHA[2],BETA[2]; | |||
| int tincY, tincx; | |||
| float *x=(float *)X, *y=(float *)Y, *st=0, *tx; | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if (layout == CblasColMajor) | |||
| { | |||
| if (Uplo == CblasLower) UL = 'L'; | |||
| else if (Uplo == CblasUpper) UL = 'U'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_chpmv","Illegal Uplo setting, %d\n",Uplo ); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| #endif | |||
| F77_chpmv(F77_UL, &F77_N, alpha, AP, X, | |||
| &F77_incX, beta, Y, &F77_incY); | |||
| } | |||
| else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| ALPHA[0]= *alp; | |||
| ALPHA[1]= -alp[1]; | |||
| BETA[0]= *bet; | |||
| BETA[1]= -bet[1]; | |||
| if (N > 0) | |||
| { | |||
| n = N << 1; | |||
| x = malloc(n*sizeof(float)); | |||
| tx = x; | |||
| if( incX > 0 ) { | |||
| i = incX << 1; | |||
| tincx = 2; | |||
| st= x+n; | |||
| } else { | |||
| i = incX *(-2); | |||
| tincx = -2; | |||
| st = x-2; | |||
| x +=(n-2); | |||
| } | |||
| do | |||
| { | |||
| *x = *xx; | |||
| x[1] = -xx[1]; | |||
| x += tincx ; | |||
| xx += i; | |||
| } | |||
| while (x != st); | |||
| x=tx; | |||
| #ifdef F77_INT | |||
| F77_incX = 1; | |||
| #else | |||
| incx = 1; | |||
| #endif | |||
| if(incY > 0) | |||
| tincY = incY; | |||
| else | |||
| tincY = -incY; | |||
| y++; | |||
| i = tincY << 1; | |||
| n = i * N ; | |||
| st = y + n; | |||
| do { | |||
| *y = -(*y); | |||
| y += i; | |||
| } while(y != st); | |||
| y -= n; | |||
| } else | |||
| x = (float *) X; | |||
| if (Uplo == CblasUpper) UL = 'L'; | |||
| else if (Uplo == CblasLower) UL = 'U'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_chpmv","Illegal Uplo setting, %d\n", Uplo ); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| #endif | |||
| F77_chpmv(F77_UL, &F77_N, ALPHA, | |||
| AP, x, &F77_incX, BETA, Y, &F77_incY); | |||
| } | |||
| else | |||
| { | |||
| cblas_xerbla(1, "cblas_chpmv","Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if ( layout == CblasRowMajor ) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if(X!=x) | |||
| free(x); | |||
| if (N > 0) | |||
| { | |||
| do | |||
| { | |||
| *y = -(*y); | |||
| y += i; | |||
| } | |||
| while (y != st); | |||
| } | |||
| } | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,115 @@ | |||
| /* | |||
| * cblas_chpr.c | |||
| * The program is a C interface to chpr. | |||
| * | |||
| * Keita Teranishi 3/23/98 | |||
| * | |||
| */ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_chpr(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, | |||
| const int N, const float alpha, const void *X, | |||
| const int incX, void *A) | |||
| { | |||
| char UL; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_UL; | |||
| #else | |||
| #define F77_UL &UL | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_incX=incX; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_incX incx | |||
| #endif | |||
| int n, i, tincx, incx=incX; | |||
| float *x=(float *)X, *xx=(float *)X, *tx, *st; | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if (layout == CblasColMajor) | |||
| { | |||
| if (Uplo == CblasLower) UL = 'L'; | |||
| else if (Uplo == CblasUpper) UL = 'U'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_chpr","Illegal Uplo setting, %d\n",Uplo ); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| #endif | |||
| F77_chpr(F77_UL, &F77_N, &alpha, X, &F77_incX, A); | |||
| } else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if (Uplo == CblasUpper) UL = 'L'; | |||
| else if (Uplo == CblasLower) UL = 'U'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_chpr","Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| #endif | |||
| if (N > 0) | |||
| { | |||
| n = N << 1; | |||
| x = malloc(n*sizeof(float)); | |||
| tx = x; | |||
| if( incX > 0 ) { | |||
| i = incX << 1; | |||
| tincx = 2; | |||
| st= x+n; | |||
| } else { | |||
| i = incX *(-2); | |||
| tincx = -2; | |||
| st = x-2; | |||
| x +=(n-2); | |||
| } | |||
| do | |||
| { | |||
| *x = *xx; | |||
| x[1] = -xx[1]; | |||
| x += tincx ; | |||
| xx += i; | |||
| } | |||
| while (x != st); | |||
| x=tx; | |||
| #ifdef F77_INT | |||
| F77_incX = 1; | |||
| #else | |||
| incx = 1; | |||
| #endif | |||
| } | |||
| else x = (float *) X; | |||
| F77_chpr(F77_UL, &F77_N, &alpha, x, &F77_incX, A); | |||
| } else | |||
| { | |||
| cblas_xerbla(1, "cblas_chpr","Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if(X!=x) | |||
| free(x); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,149 @@ | |||
| /* | |||
| * cblas_chpr2.c | |||
| * The program is a C interface to chpr2. | |||
| * | |||
| * Keita Teranishi 5/20/98 | |||
| * | |||
| */ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_chpr2(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, | |||
| const int N,const void *alpha, const void *X, | |||
| const int incX,const void *Y, const int incY, void *Ap) | |||
| { | |||
| char UL; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_UL; | |||
| #else | |||
| #define F77_UL &UL | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_incX incx | |||
| #define F77_incY incy | |||
| #endif | |||
| int n, i, j, tincx, tincy, incx=incX, incy=incY; | |||
| float *x=(float *)X, *xx=(float *)X, *y=(float *)Y, | |||
| *yy=(float *)Y, *tx, *ty, *stx, *sty; | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if (layout == CblasColMajor) | |||
| { | |||
| if (Uplo == CblasLower) UL = 'L'; | |||
| else if (Uplo == CblasUpper) UL = 'U'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_chpr2","Illegal Uplo setting, %d\n",Uplo ); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| #endif | |||
| F77_chpr2(F77_UL, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, Ap); | |||
| } else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if (Uplo == CblasUpper) UL = 'L'; | |||
| else if (Uplo == CblasLower) UL = 'U'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_chpr2","Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| #endif | |||
| if (N > 0) | |||
| { | |||
| n = N << 1; | |||
| x = malloc(n*sizeof(float)); | |||
| y = malloc(n*sizeof(float)); | |||
| tx = x; | |||
| ty = y; | |||
| if( incX > 0 ) { | |||
| i = incX << 1 ; | |||
| tincx = 2; | |||
| stx= x+n; | |||
| } else { | |||
| i = incX *(-2); | |||
| tincx = -2; | |||
| stx = x-2; | |||
| x +=(n-2); | |||
| } | |||
| if( incY > 0 ) { | |||
| j = incY << 1; | |||
| tincy = 2; | |||
| sty= y+n; | |||
| } else { | |||
| j = incY *(-2); | |||
| tincy = -2; | |||
| sty = y-2; | |||
| y +=(n-2); | |||
| } | |||
| do | |||
| { | |||
| *x = *xx; | |||
| x[1] = -xx[1]; | |||
| x += tincx ; | |||
| xx += i; | |||
| } | |||
| while (x != stx); | |||
| do | |||
| { | |||
| *y = *yy; | |||
| y[1] = -yy[1]; | |||
| y += tincy ; | |||
| yy += j; | |||
| } | |||
| while (y != sty); | |||
| x=tx; | |||
| y=ty; | |||
| #ifdef F77_INT | |||
| F77_incX = 1; | |||
| F77_incY = 1; | |||
| #else | |||
| incx = 1; | |||
| incy = 1; | |||
| #endif | |||
| } else | |||
| { | |||
| x = (float *) X; | |||
| y = (void *) Y; | |||
| } | |||
| F77_chpr2(F77_UL, &F77_N, alpha, y, &F77_incY, x, &F77_incX, Ap); | |||
| } else | |||
| { | |||
| cblas_xerbla(1, "cblas_chpr2","Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if(X!=x) | |||
| free(x); | |||
| if(Y!=y) | |||
| free(y); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,21 @@ | |||
| /* | |||
| * cblas_cscal.c | |||
| * | |||
| * The program is a C interface to cscal. | |||
| * | |||
| * Written by Keita Teranishi. 2/11/1998 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_cscal( const int N, const void *alpha, void *X, | |||
| const int incX) | |||
| { | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_incX=incX; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_incX incX | |||
| #endif | |||
| F77_cscal( &F77_N, alpha, X, &F77_incX); | |||
| } | |||
| @@ -0,0 +1,21 @@ | |||
| /* | |||
| * cblas_csscal.c | |||
| * | |||
| * The program is a C interface to csscal. | |||
| * | |||
| * Written by Keita Teranishi. 2/11/1998 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_csscal( const int N, const float alpha, void *X, | |||
| const int incX) | |||
| { | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_incX=incX; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_incX incX | |||
| #endif | |||
| F77_csscal( &F77_N, &alpha, X, &F77_incX); | |||
| } | |||
| @@ -0,0 +1,22 @@ | |||
| /* | |||
| * cblas_cswap.c | |||
| * | |||
| * The program is a C interface to cswap. | |||
| * | |||
| * Written by Keita Teranishi. 2/11/1998 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_cswap( const int N, void *X, const int incX, void *Y, | |||
| const int incY) | |||
| { | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_incX incX | |||
| #define F77_incY incY | |||
| #endif | |||
| F77_cswap( &F77_N, X, &F77_incX, Y, &F77_incY); | |||
| } | |||
| @@ -0,0 +1,106 @@ | |||
| /* | |||
| * | |||
| * cblas_csymm.c | |||
| * This program is a C interface to csymm. | |||
| * Written by Keita Teranishi | |||
| * 4/8/1998 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_csymm(const CBLAS_LAYOUT layout, const CBLAS_SIDE Side, | |||
| const CBLAS_UPLO Uplo, const int M, const int N, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *B, const int ldb, const void *beta, | |||
| void *C, const int ldc) | |||
| { | |||
| char SD, UL; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_SD, F77_UL; | |||
| #else | |||
| #define F77_SD &SD | |||
| #define F77_UL &UL | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; | |||
| F77_INT F77_ldc=ldc; | |||
| #else | |||
| #define F77_M M | |||
| #define F77_N N | |||
| #define F77_lda lda | |||
| #define F77_ldb ldb | |||
| #define F77_ldc ldc | |||
| #endif | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if( layout == CblasColMajor ) | |||
| { | |||
| if( Side == CblasRight) SD='R'; | |||
| else if ( Side == CblasLeft ) SD='L'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_csymm", "Illegal Side setting, %d\n", Side); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if( Uplo == CblasUpper) UL='U'; | |||
| else if ( Uplo == CblasLower ) UL='L'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_csymm", "Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| F77_SD = C2F_CHAR(&SD); | |||
| #endif | |||
| F77_csymm(F77_SD, F77_UL, &F77_M, &F77_N, alpha, A, &F77_lda, | |||
| B, &F77_ldb, beta, C, &F77_ldc); | |||
| } else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if( Side == CblasRight) SD='L'; | |||
| else if ( Side == CblasLeft ) SD='R'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_csymm", "Illegal Side setting, %d\n", Side); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if( Uplo == CblasUpper) UL='L'; | |||
| else if ( Uplo == CblasLower ) UL='U'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_csymm", "Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| F77_SD = C2F_CHAR(&SD); | |||
| #endif | |||
| F77_csymm(F77_SD, F77_UL, &F77_N, &F77_M, alpha, A, &F77_lda, | |||
| B, &F77_ldb, beta, C, &F77_ldc); | |||
| } | |||
| else cblas_xerbla(1, "cblas_csymm", "Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,108 @@ | |||
| /* | |||
| * | |||
| * cblas_csyr2k.c | |||
| * This program is a C interface to csyr2k. | |||
| * Written by Keita Teranishi | |||
| * 4/8/1998 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_csyr2k(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, | |||
| const CBLAS_TRANSPOSE Trans, const int N, const int K, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *B, const int ldb, const void *beta, | |||
| void *C, const int ldc) | |||
| { | |||
| char UL, TR; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_TR, F77_UL; | |||
| #else | |||
| #define F77_TR &TR | |||
| #define F77_UL &UL | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; | |||
| F77_INT F77_ldc=ldc; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_K K | |||
| #define F77_lda lda | |||
| #define F77_ldb ldb | |||
| #define F77_ldc ldc | |||
| #endif | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if( layout == CblasColMajor ) | |||
| { | |||
| if( Uplo == CblasUpper) UL='U'; | |||
| else if ( Uplo == CblasLower ) UL='L'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_csyr2k", "Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if( Trans == CblasTrans) TR ='T'; | |||
| else if ( Trans == CblasConjTrans ) TR='C'; | |||
| else if ( Trans == CblasNoTrans ) TR='N'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_csyr2k", "Illegal Trans setting, %d\n", Trans); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| F77_TR = C2F_CHAR(&TR); | |||
| #endif | |||
| F77_csyr2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, | |||
| B, &F77_ldb, beta, C, &F77_ldc); | |||
| } else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if( Uplo == CblasUpper) UL='L'; | |||
| else if ( Uplo == CblasLower ) UL='U'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_csyr2k", "Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if( Trans == CblasTrans) TR ='N'; | |||
| else if ( Trans == CblasConjTrans ) TR='N'; | |||
| else if ( Trans == CblasNoTrans ) TR='T'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_csyr2k", "Illegal Trans setting, %d\n", Trans); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| F77_TR = C2F_CHAR(&TR); | |||
| #endif | |||
| F77_csyr2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, B, &F77_ldb, beta, C, &F77_ldc); | |||
| } | |||
| else cblas_xerbla(1, "cblas_csyr2k", "Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,108 @@ | |||
| /* | |||
| * | |||
| * cblas_csyrk.c | |||
| * This program is a C interface to csyrk. | |||
| * Written by Keita Teranishi | |||
| * 4/8/1998 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_csyrk(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, | |||
| const CBLAS_TRANSPOSE Trans, const int N, const int K, | |||
| const void *alpha, const void *A, const int lda, | |||
| const void *beta, void *C, const int ldc) | |||
| { | |||
| char UL, TR; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_TR, F77_UL; | |||
| #else | |||
| #define F77_TR &TR | |||
| #define F77_UL &UL | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_K=K, F77_lda=lda; | |||
| F77_INT F77_ldc=ldc; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_K K | |||
| #define F77_lda lda | |||
| #define F77_ldc ldc | |||
| #endif | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if( layout == CblasColMajor ) | |||
| { | |||
| if( Uplo == CblasUpper) UL='U'; | |||
| else if ( Uplo == CblasLower ) UL='L'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_csyrk", "Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if( Trans == CblasTrans) TR ='T'; | |||
| else if ( Trans == CblasConjTrans ) TR='C'; | |||
| else if ( Trans == CblasNoTrans ) TR='N'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_csyrk", "Illegal Trans setting, %d\n", Trans); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| F77_TR = C2F_CHAR(&TR); | |||
| #endif | |||
| F77_csyrk(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, | |||
| beta, C, &F77_ldc); | |||
| } else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if( Uplo == CblasUpper) UL='L'; | |||
| else if ( Uplo == CblasLower ) UL='U'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_csyrk", "Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if( Trans == CblasTrans) TR ='N'; | |||
| else if ( Trans == CblasConjTrans ) TR='N'; | |||
| else if ( Trans == CblasNoTrans ) TR='T'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_csyrk", "Illegal Trans setting, %d\n", Trans); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| F77_TR = C2F_CHAR(&TR); | |||
| #endif | |||
| F77_csyrk(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, | |||
| beta, C, &F77_ldc); | |||
| } | |||
| else cblas_xerbla(1, "cblas_csyrk", "Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,158 @@ | |||
| /* | |||
| * cblas_ctbmv.c | |||
| * The program is a C interface to ctbmv. | |||
| * | |||
| * Keita Teranishi 5/20/98 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_ctbmv(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, | |||
| const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, | |||
| const int N, const int K, const void *A, const int lda, | |||
| void *X, const int incX) | |||
| { | |||
| char TA; | |||
| char UL; | |||
| char DI; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_TA, F77_UL, F77_DI; | |||
| #else | |||
| #define F77_TA &TA | |||
| #define F77_UL &UL | |||
| #define F77_DI &DI | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_lda=lda, F77_K=K, F77_incX=incX; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_K K | |||
| #define F77_lda lda | |||
| #define F77_incX incX | |||
| #endif | |||
| int n, i=0, tincX; | |||
| float *st=0, *x=(float *)X; | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if (layout == CblasColMajor) | |||
| { | |||
| if (Uplo == CblasUpper) UL = 'U'; | |||
| else if (Uplo == CblasLower) UL = 'L'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_ctbmv","Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if (TransA == CblasNoTrans) TA = 'N'; | |||
| else if (TransA == CblasTrans) TA = 'T'; | |||
| else if (TransA == CblasConjTrans) TA = 'C'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_ctbmv","Illegal TransA setting, %d\n", TransA); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if (Diag == CblasUnit) DI = 'U'; | |||
| else if (Diag == CblasNonUnit) DI = 'N'; | |||
| else | |||
| { | |||
| cblas_xerbla(4, "cblas_ctbmv","Illegal Diag setting, %d\n", Diag); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| F77_TA = C2F_CHAR(&TA); | |||
| F77_DI = C2F_CHAR(&DI); | |||
| #endif | |||
| F77_ctbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, | |||
| &F77_incX); | |||
| } | |||
| else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if (Uplo == CblasUpper) UL = 'L'; | |||
| else if (Uplo == CblasLower) UL = 'U'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_ctbmv","Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if (TransA == CblasNoTrans) TA = 'T'; | |||
| else if (TransA == CblasTrans) TA = 'N'; | |||
| else if (TransA == CblasConjTrans) | |||
| { | |||
| TA = 'N'; | |||
| if ( N > 0) | |||
| { | |||
| if(incX > 0) | |||
| tincX = incX; | |||
| else | |||
| tincX = -incX; | |||
| i = tincX << 1; | |||
| n = i * N; | |||
| x++; | |||
| st = x + n; | |||
| do | |||
| { | |||
| *x = -(*x); | |||
| x+= i; | |||
| } | |||
| while (x != st); | |||
| x -= n; | |||
| } | |||
| } | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_ctbmv","Illegal TransA setting, %d\n", TransA); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if (Diag == CblasUnit) DI = 'U'; | |||
| else if (Diag == CblasNonUnit) DI = 'N'; | |||
| else | |||
| { | |||
| cblas_xerbla(4, "cblas_ctbmv","Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| F77_TA = C2F_CHAR(&TA); | |||
| F77_DI = C2F_CHAR(&DI); | |||
| #endif | |||
| F77_ctbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, | |||
| &F77_incX); | |||
| if (TransA == CblasConjTrans) | |||
| { | |||
| if (N > 0) | |||
| { | |||
| do | |||
| { | |||
| *x = -(*x); | |||
| x += i; | |||
| } | |||
| while (x != st); | |||
| } | |||
| } | |||
| } | |||
| else cblas_xerbla(1, "cblas_ctbmv", "Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,162 @@ | |||
| /* | |||
| * cblas_ctbsv.c | |||
| * The program is a C interface to ctbsv. | |||
| * | |||
| * Keita Teranishi 3/23/98 | |||
| * | |||
| */ | |||
| #include "cblas.h" | |||
| #include "cblas_f77.h" | |||
| void cblas_ctbsv(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo, | |||
| const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, | |||
| const int N, const int K, const void *A, const int lda, | |||
| void *X, const int incX) | |||
| { | |||
| char TA; | |||
| char UL; | |||
| char DI; | |||
| #ifdef F77_CHAR | |||
| F77_CHAR F77_TA, F77_UL, F77_DI; | |||
| #else | |||
| #define F77_TA &TA | |||
| #define F77_UL &UL | |||
| #define F77_DI &DI | |||
| #endif | |||
| #ifdef F77_INT | |||
| F77_INT F77_N=N, F77_lda=lda, F77_K=K, F77_incX=incX; | |||
| #else | |||
| #define F77_N N | |||
| #define F77_K K | |||
| #define F77_lda lda | |||
| #define F77_incX incX | |||
| #endif | |||
| int n, i=0, tincX; | |||
| float *st=0,*x=(float *)X; | |||
| extern int CBLAS_CallFromC; | |||
| extern int RowMajorStrg; | |||
| RowMajorStrg = 0; | |||
| CBLAS_CallFromC = 1; | |||
| if (layout == CblasColMajor) | |||
| { | |||
| if (Uplo == CblasUpper) UL = 'U'; | |||
| else if (Uplo == CblasLower) UL = 'L'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_ctbsv","Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if (TransA == CblasNoTrans) TA = 'N'; | |||
| else if (TransA == CblasTrans) TA = 'T'; | |||
| else if (TransA == CblasConjTrans) TA = 'C'; | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_ctbsv","Illegal TransA setting, %d\n", TransA); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if (Diag == CblasUnit) DI = 'U'; | |||
| else if (Diag == CblasNonUnit) DI = 'N'; | |||
| else | |||
| { | |||
| cblas_xerbla(4, "cblas_ctbsv","Illegal Diag setting, %d\n", Diag); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| F77_TA = C2F_CHAR(&TA); | |||
| F77_DI = C2F_CHAR(&DI); | |||
| #endif | |||
| F77_ctbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, | |||
| &F77_incX); | |||
| } | |||
| else if (layout == CblasRowMajor) | |||
| { | |||
| RowMajorStrg = 1; | |||
| if (Uplo == CblasUpper) UL = 'L'; | |||
| else if (Uplo == CblasLower) UL = 'U'; | |||
| else | |||
| { | |||
| cblas_xerbla(2, "cblas_ctbsv","Illegal Uplo setting, %d\n", Uplo); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if (TransA == CblasNoTrans) TA = 'T'; | |||
| else if (TransA == CblasTrans) TA = 'N'; | |||
| else if (TransA == CblasConjTrans) | |||
| { | |||
| TA = 'N'; | |||
| if ( N > 0) | |||
| { | |||
| if ( incX > 0 ) | |||
| tincX = incX; | |||
| else | |||
| tincX = -incX; | |||
| n = N*2*(tincX); | |||
| x++; | |||
| st=x+n; | |||
| i = tincX << 1; | |||
| do | |||
| { | |||
| *x = -(*x); | |||
| x+=i; | |||
| } | |||
| while (x != st); | |||
| x -= n; | |||
| } | |||
| } | |||
| else | |||
| { | |||
| cblas_xerbla(3, "cblas_ctbsv","Illegal TransA setting, %d\n", TransA); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| if (Diag == CblasUnit) DI = 'U'; | |||
| else if (Diag == CblasNonUnit) DI = 'N'; | |||
| else | |||
| { | |||
| cblas_xerbla(4, "cblas_ctbsv","Illegal Diag setting, %d\n", Diag); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||
| #ifdef F77_CHAR | |||
| F77_UL = C2F_CHAR(&UL); | |||
| F77_TA = C2F_CHAR(&TA); | |||
| F77_DI = C2F_CHAR(&DI); | |||
| #endif | |||
| F77_ctbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, | |||
| &F77_incX); | |||
| if (TransA == CblasConjTrans) | |||
| { | |||
| if (N > 0) | |||
| { | |||
| do | |||
| { | |||
| *x = -(*x); | |||
| x+= i; | |||
| } | |||
| while (x != st); | |||
| } | |||
| } | |||
| } | |||
| else cblas_xerbla(1, "cblas_ctbsv", "Illegal layout setting, %d\n", layout); | |||
| CBLAS_CallFromC = 0; | |||
| RowMajorStrg = 0; | |||
| return; | |||
| } | |||