* Fixed gemmt, imatcopy, zimatcopy_cnc functions * Fixed cblas_cscal testing in ctest * Removed rotmg unreacheble code * Added zero size checkstags/v0.3.27
| @@ -289,6 +289,14 @@ void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLA | |||||
| void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | ||||
| OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | ||||
| void cblas_sgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||||
| OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | |||||
| void cblas_dgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||||
| OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); | |||||
| void cblas_cgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||||
| OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
| void cblas_zgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||||
| OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
| void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, | void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, | ||||
| OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | ||||
| @@ -498,6 +498,15 @@ void BLASFUNC(zgemm3m)(char *, char *, blasint *, blasint *, blasint *, double * | |||||
| void BLASFUNC(xgemm3m)(char *, char *, blasint *, blasint *, blasint *, xdouble *, | void BLASFUNC(xgemm3m)(char *, char *, blasint *, blasint *, blasint *, xdouble *, | ||||
| xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *); | xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *); | ||||
| void BLASFUNC(sgemmt)(char*, char *, char *, blasint *, blasint *, float *, | |||||
| float *, blasint *, float *, blasint *, float *, float *, blasint *); | |||||
| void BLASFUNC(dgemmt)(char*, char *, char *, blasint *, blasint *, double *, | |||||
| double *, blasint *, double *, blasint *, double *, double *, blasint *); | |||||
| void BLASFUNC(cgemmt)(char*, char *, char *, blasint *, blasint *, float *, | |||||
| float *, blasint *, float *, blasint *, float *, float *, blasint *); | |||||
| void BLASFUNC(zgemmt)(char*, char *, char *, blasint *, blasint *, double *, | |||||
| double *, blasint *, double *, blasint *, double *, double *, blasint *); | |||||
| int BLASFUNC(sge2mm)(char *, char *, char *, blasint *, blasint *, | int BLASFUNC(sge2mm)(char *, char *, char *, blasint *, blasint *, | ||||
| float *, float *, blasint *, float *, blasint *, | float *, float *, blasint *, float *, blasint *, | ||||
| float *, float *, blasint *); | float *, float *, blasint *); | ||||
| @@ -96,7 +96,7 @@ | |||||
| INTEGER ICAMAXTEST | INTEGER ICAMAXTEST | ||||
| EXTERNAL SCASUMTEST, SCNRM2TEST, ICAMAXTEST | EXTERNAL SCASUMTEST, SCNRM2TEST, ICAMAXTEST | ||||
| * .. External Subroutines .. | * .. External Subroutines .. | ||||
| EXTERNAL CSCAL, CSSCALTEST, CTEST, ITEST1, STEST1 | |||||
| EXTERNAL CSCALTEST, CSSCALTEST, CTEST, ITEST1, STEST1 | |||||
| * .. Intrinsic Functions .. | * .. Intrinsic Functions .. | ||||
| INTRINSIC MAX | INTRINSIC MAX | ||||
| * .. Common blocks .. | * .. Common blocks .. | ||||
| @@ -214,8 +214,8 @@ | |||||
| CALL STEST1(SCASUMTEST(N,CX,INCX),STRUE4(NP1), | CALL STEST1(SCASUMTEST(N,CX,INCX),STRUE4(NP1), | ||||
| + STRUE4(NP1),SFAC) | + STRUE4(NP1),SFAC) | ||||
| ELSE IF (ICASE.EQ.8) THEN | ELSE IF (ICASE.EQ.8) THEN | ||||
| * .. CSCAL .. | |||||
| CALL CSCAL(N,CA,CX,INCX) | |||||
| * .. CSCALTEST .. | |||||
| CALL CSCALTEST(N,CA,CX,INCX) | |||||
| CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX), | CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX), | ||||
| + SFAC) | + SFAC) | ||||
| ELSE IF (ICASE.EQ.9) THEN | ELSE IF (ICASE.EQ.9) THEN | ||||
| @@ -236,14 +236,14 @@ | |||||
| * | * | ||||
| INCX = 1 | INCX = 1 | ||||
| IF (ICASE.EQ.8) THEN | IF (ICASE.EQ.8) THEN | ||||
| * CSCAL | |||||
| * CSCALTEST | |||||
| * Add a test for alpha equal to zero. | * Add a test for alpha equal to zero. | ||||
| CA = (0.0E0,0.0E0) | CA = (0.0E0,0.0E0) | ||||
| DO 80 I = 1, 5 | DO 80 I = 1, 5 | ||||
| MWPCT(I) = (0.0E0,0.0E0) | MWPCT(I) = (0.0E0,0.0E0) | ||||
| MWPCS(I) = (1.0E0,1.0E0) | MWPCS(I) = (1.0E0,1.0E0) | ||||
| 80 CONTINUE | 80 CONTINUE | ||||
| CALL CSCAL(5,CA,CX,INCX) | |||||
| CALL CSCALTEST(5,CA,CX,INCX) | |||||
| CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) | CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) | ||||
| ELSE IF (ICASE.EQ.9) THEN | ELSE IF (ICASE.EQ.9) THEN | ||||
| * CSSCALTEST | * CSSCALTEST | ||||
| @@ -685,7 +685,7 @@ real *sfac; | |||||
| static integer i__; | static integer i__; | ||||
| extern /* Subroutine */ int ctest_(); | extern /* Subroutine */ int ctest_(); | ||||
| static complex mwpcs[5], mwpct[5]; | static complex mwpcs[5], mwpct[5]; | ||||
| extern /* Subroutine */ int itest1_(), stest1_(); | |||||
| extern /* Subroutine */ int cscaltest_(), itest1_(), stest1_(); | |||||
| static complex cx[8]; | static complex cx[8]; | ||||
| extern real scnrm2test_(); | extern real scnrm2test_(); | ||||
| static integer np1; | static integer np1; | ||||
| @@ -727,7 +727,7 @@ real *sfac; | |||||
| stest1_(&r__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac); | stest1_(&r__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac); | ||||
| } else if (combla_1.icase == 8) { | } else if (combla_1.icase == 8) { | ||||
| /* .. CSCAL .. */ | /* .. CSCAL .. */ | ||||
| cscal_(&combla_1.n, &ca, cx, &combla_1.incx); | |||||
| cscaltest_(&combla_1.n, &ca, cx, &combla_1.incx); | |||||
| ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], | ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], | ||||
| &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac); | &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac); | ||||
| } else if (combla_1.icase == 9) { | } else if (combla_1.icase == 9) { | ||||
| @@ -761,7 +761,7 @@ real *sfac; | |||||
| mwpcs[i__1].r = (float)1., mwpcs[i__1].i = (float)1.; | mwpcs[i__1].r = (float)1., mwpcs[i__1].i = (float)1.; | ||||
| /* L80: */ | /* L80: */ | ||||
| } | } | ||||
| cscal_(&c__5, &ca, cx, &combla_1.incx); | |||||
| cscaltest_(&c__5, &ca, cx, &combla_1.incx); | |||||
| ctest_(&c__5, cx, mwpct, mwpcs, sfac); | ctest_(&c__5, cx, mwpct, mwpcs, sfac); | ||||
| } else if (combla_1.icase == 9) { | } else if (combla_1.icase == 9) { | ||||
| /* CSSCALTEST */ | /* CSSCALTEST */ | ||||
| @@ -35,29 +35,26 @@ | |||||
| #include <stdio.h> | #include <stdio.h> | ||||
| #include <stdlib.h> | #include <stdlib.h> | ||||
| #include "common.h" | #include "common.h" | ||||
| #ifdef FUNCTION_PROFILE | |||||
| #include "functable.h" | |||||
| #endif | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| #define SMP_THRESHOLD_MIN 65536.0 | #define SMP_THRESHOLD_MIN 65536.0 | ||||
| #ifdef XDOUBLE | #ifdef XDOUBLE | ||||
| #define ERROR_NAME "QGEMT " | |||||
| #define ERROR_NAME "QGEMMT " | |||||
| #elif defined(DOUBLE) | #elif defined(DOUBLE) | ||||
| #define ERROR_NAME "DGEMT " | |||||
| #define ERROR_NAME "DGEMMT " | |||||
| #elif defined(BFLOAT16) | #elif defined(BFLOAT16) | ||||
| #define ERROR_NAME "SBGEMT " | |||||
| #define ERROR_NAME "SBGEMMT " | |||||
| #else | #else | ||||
| #define ERROR_NAME "SGEMT " | |||||
| #define ERROR_NAME "SGEMMT " | |||||
| #endif | #endif | ||||
| #else | #else | ||||
| #define SMP_THRESHOLD_MIN 8192.0 | #define SMP_THRESHOLD_MIN 8192.0 | ||||
| #ifdef XDOUBLE | #ifdef XDOUBLE | ||||
| #define ERROR_NAME "XGEMT " | |||||
| #define ERROR_NAME "XGEMMT " | |||||
| #elif defined(DOUBLE) | #elif defined(DOUBLE) | ||||
| #define ERROR_NAME "ZGEMT " | |||||
| #define ERROR_NAME "ZGEMMT " | |||||
| #else | #else | ||||
| #define ERROR_NAME "CGEMT " | |||||
| #define ERROR_NAME "CGEMMT " | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -68,18 +65,22 @@ | |||||
| #ifndef CBLAS | #ifndef CBLAS | ||||
| void NAME(char *UPLO, char *TRANSA, char *TRANSB, | void NAME(char *UPLO, char *TRANSA, char *TRANSB, | ||||
| blasint * M, blasint * N, blasint * K, | |||||
| blasint * M, blasint * K, | |||||
| FLOAT * Alpha, | FLOAT * Alpha, | ||||
| IFLOAT * a, blasint * ldA, | IFLOAT * a, blasint * ldA, | ||||
| IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC) | IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC) | ||||
| { | { | ||||
| blasint m, n, k; | |||||
| blasint m, k; | |||||
| blasint lda, ldb, ldc; | blasint lda, ldb, ldc; | ||||
| int transa, transb, uplo; | int transa, transb, uplo; | ||||
| blasint info; | blasint info; | ||||
| char transA, transB, Uplo; | char transA, transB, Uplo; | ||||
| blasint nrowa, nrowb; | |||||
| #if defined(COMPLEX) | |||||
| blasint ncolb; | |||||
| #endif | |||||
| IFLOAT *buffer; | IFLOAT *buffer; | ||||
| IFLOAT *aa, *bb; | IFLOAT *aa, *bb; | ||||
| FLOAT *cc; | FLOAT *cc; | ||||
| @@ -92,7 +93,6 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB, | |||||
| PRINT_DEBUG_NAME; | PRINT_DEBUG_NAME; | ||||
| m = *M; | m = *M; | ||||
| n = *N; | |||||
| k = *K; | k = *K; | ||||
| #if defined(COMPLEX) | #if defined(COMPLEX) | ||||
| @@ -159,32 +159,47 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB, | |||||
| if (Uplo == 'L') | if (Uplo == 'L') | ||||
| uplo = 1; | uplo = 1; | ||||
| nrowa = m; | |||||
| if (transa & 1) nrowa = k; | |||||
| nrowb = k; | |||||
| #if defined(COMPLEX) | |||||
| ncolb = m; | |||||
| #endif | |||||
| if (transb & 1) { | |||||
| nrowb = m; | |||||
| #if defined(COMPLEX) | |||||
| ncolb = k; | |||||
| #endif | |||||
| } | |||||
| info = 0; | info = 0; | ||||
| if (uplo < 0) | |||||
| info = 14; | |||||
| if (ldc < m) | |||||
| if (ldc < MAX(1, m)) | |||||
| info = 13; | info = 13; | ||||
| if (ldb < MAX(1, nrowb)) | |||||
| info = 10; | |||||
| if (lda < MAX(1, nrowa)) | |||||
| info = 8; | |||||
| if (k < 0) | if (k < 0) | ||||
| info = 5; | info = 5; | ||||
| if (n < 0) | |||||
| info = 4; | |||||
| if (m < 0) | if (m < 0) | ||||
| info = 3; | |||||
| info = 4; | |||||
| if (transb < 0) | if (transb < 0) | ||||
| info = 2; | |||||
| info = 3; | |||||
| if (transa < 0) | if (transa < 0) | ||||
| info = 2; | |||||
| if (uplo < 0) | |||||
| info = 1; | info = 1; | ||||
| if (info) { | |||||
| if (info != 0) { | |||||
| BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); | BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); | ||||
| return; | return; | ||||
| } | } | ||||
| #else | #else | ||||
| void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | ||||
| enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, | |||||
| blasint N, blasint k, | |||||
| enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint m, | |||||
| blasint k, | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| FLOAT alpha, | FLOAT alpha, | ||||
| IFLOAT * A, blasint LDA, | IFLOAT * A, blasint LDA, | ||||
| @@ -205,17 +220,23 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
| int transa, transb, uplo; | int transa, transb, uplo; | ||||
| blasint info; | blasint info; | ||||
| blasint m, n, lda, ldb; | |||||
| blasint lda, ldb; | |||||
| FLOAT *a, *b; | FLOAT *a, *b; | ||||
| #if defined(COMPLEX) | |||||
| blasint nrowb, ncolb; | |||||
| #endif | |||||
| XFLOAT *buffer; | XFLOAT *buffer; | ||||
| PRINT_DEBUG_CNAME; | PRINT_DEBUG_CNAME; | ||||
| uplo = -1; | |||||
| transa = -1; | transa = -1; | ||||
| transb = -1; | transb = -1; | ||||
| info = 0; | info = 0; | ||||
| if (order == CblasColMajor) { | if (order == CblasColMajor) { | ||||
| if (Uplo == CblasUpper) uplo = 0; | |||||
| if (Uplo == CblasLower) uplo = 1; | |||||
| if (TransA == CblasNoTrans) | if (TransA == CblasNoTrans) | ||||
| transa = 0; | transa = 0; | ||||
| @@ -248,9 +269,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
| transb = 3; | transb = 3; | ||||
| #endif | #endif | ||||
| m = M; | |||||
| n = N; | |||||
| a = (void *)A; | a = (void *)A; | ||||
| b = (void *)B; | b = (void *)B; | ||||
| lda = LDA; | lda = LDA; | ||||
| @@ -258,23 +276,42 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
| info = -1; | info = -1; | ||||
| if (ldc < m) | |||||
| blasint nrowa; | |||||
| #if !defined(COMPLEX) | |||||
| blasint nrowb; | |||||
| #endif | |||||
| nrowa = m; | |||||
| if (transa & 1) nrowa = k; | |||||
| nrowb = k; | |||||
| #if defined(COMPLEX) | |||||
| ncolb = m; | |||||
| #endif | |||||
| if (transb & 1) { | |||||
| nrowb = m; | |||||
| #if defined(COMPLEX) | |||||
| ncolb = k; | |||||
| #endif | |||||
| } | |||||
| if (ldc < MAX(1, m)) | |||||
| info = 13; | info = 13; | ||||
| if (ldb < MAX(1, nrowb)) | |||||
| info = 10; | |||||
| if (lda < MAX(1, nrowa)) | |||||
| info = 8; | |||||
| if (k < 0) | if (k < 0) | ||||
| info = 5; | info = 5; | ||||
| if (n < 0) | |||||
| info = 4; | |||||
| if (m < 0) | if (m < 0) | ||||
| info = 3; | |||||
| info = 4; | |||||
| if (transb < 0) | if (transb < 0) | ||||
| info = 2; | |||||
| info = 3; | |||||
| if (transa < 0) | if (transa < 0) | ||||
| info = 2; | |||||
| if (uplo < 0) | |||||
| info = 1; | info = 1; | ||||
| } | } | ||||
| if (order == CblasRowMajor) { | if (order == CblasRowMajor) { | ||||
| m = N; | |||||
| n = M; | |||||
| a = (void *)B; | a = (void *)B; | ||||
| b = (void *)A; | b = (void *)A; | ||||
| @@ -282,6 +319,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
| lda = LDB; | lda = LDB; | ||||
| ldb = LDA; | ldb = LDA; | ||||
| if (Uplo == CblasUpper) uplo = 0; | |||||
| if (Uplo == CblasLower) uplo = 1; | |||||
| if (TransB == CblasNoTrans) | if (TransB == CblasNoTrans) | ||||
| transa = 0; | transa = 0; | ||||
| if (TransB == CblasTrans) | if (TransB == CblasTrans) | ||||
| @@ -315,29 +355,42 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
| info = -1; | info = -1; | ||||
| if (ldc < m) | |||||
| blasint ncola; | |||||
| #if !defined(COMPLEX) | |||||
| blasint ncolb; | |||||
| #endif | |||||
| ncola = m; | |||||
| if (transa & 1) ncola = k; | |||||
| ncolb = k; | |||||
| #if defined(COMPLEX) | |||||
| nrowb = m; | |||||
| #endif | |||||
| if (transb & 1) { | |||||
| #if defined(COMPLEX) | |||||
| nrowb = k; | |||||
| #endif | |||||
| ncolb = m; | |||||
| } | |||||
| if (ldc < MAX(1,m)) | |||||
| info = 13; | info = 13; | ||||
| if (ldb < MAX(1, ncolb)) | |||||
| info = 8; | |||||
| if (lda < MAX(1, ncola)) | |||||
| info = 10; | |||||
| if (k < 0) | if (k < 0) | ||||
| info = 5; | info = 5; | ||||
| if (n < 0) | |||||
| info = 4; | |||||
| if (m < 0) | if (m < 0) | ||||
| info = 3; | |||||
| info = 4; | |||||
| if (transb < 0) | if (transb < 0) | ||||
| info = 2; | info = 2; | ||||
| if (transa < 0) | if (transa < 0) | ||||
| info = 3; | |||||
| if (uplo < 0) | |||||
| info = 1; | info = 1; | ||||
| } | } | ||||
| uplo = -1; | |||||
| if (Uplo == CblasUpper) | |||||
| uplo = 0; | |||||
| if (Uplo == CblasLower) | |||||
| uplo = 1; | |||||
| if (uplo < 0) | |||||
| info = 14; | |||||
| if (info >= 0) { | if (info >= 0) { | ||||
| BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); | BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); | ||||
| return; | return; | ||||
| @@ -407,37 +460,48 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
| #endif | #endif | ||||
| if ((m == 0) || (n == 0)) | |||||
| if (m == 0) | |||||
| return; | return; | ||||
| IDEBUG_START; | IDEBUG_START; | ||||
| FUNCTION_PROFILE_START(); | |||||
| #if defined(COMPLEX) | |||||
| if (transb > 1){ | |||||
| #ifndef CBLAS | |||||
| IMATCOPY_K_CNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb); | |||||
| #else | |||||
| if (order == CblasColMajor) | |||||
| IMATCOPY_K_CNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb); | |||||
| if (order == CblasRowMajor) | |||||
| IMATCOPY_K_RNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb); | |||||
| #endif | |||||
| } | |||||
| #endif | |||||
| const blasint incb = (transb == 0) ? 1 : ldb; | |||||
| const blasint incb = ((transb & 1) == 0) ? 1 : ldb; | |||||
| if (uplo == 1) { | if (uplo == 1) { | ||||
| for (i = 0; i < n; i++) { | |||||
| j = n - i; | |||||
| for (i = 0; i < m; i++) { | |||||
| j = m - i; | |||||
| l = j; | l = j; | ||||
| #if defined(COMPLEX) | #if defined(COMPLEX) | ||||
| aa = a + i * 2; | aa = a + i * 2; | ||||
| bb = b + i * ldb * 2; | bb = b + i * ldb * 2; | ||||
| if (transa) { | |||||
| l = k; | |||||
| if (transa & 1) { | |||||
| aa = a + lda * i * 2; | aa = a + lda * i * 2; | ||||
| bb = b + i * 2; | |||||
| } | } | ||||
| if (transb & 1) | |||||
| bb = b + i * 2; | |||||
| cc = c + i * 2 * ldc + i * 2; | cc = c + i * 2 * ldc + i * 2; | ||||
| #else | #else | ||||
| aa = a + i; | aa = a + i; | ||||
| bb = b + i * ldb; | bb = b + i * ldb; | ||||
| if (transa) { | |||||
| l = k; | |||||
| if (transa & 1) { | |||||
| aa = a + lda * i; | aa = a + lda * i; | ||||
| bb = b + i; | |||||
| } | } | ||||
| if (transb & 1) | |||||
| bb = b + i; | |||||
| cc = c + i * ldc + i; | cc = c + i * ldc + i; | ||||
| #endif | #endif | ||||
| @@ -447,7 +511,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
| NULL, 0); | NULL, 0); | ||||
| if (alpha_r == ZERO && alpha_i == ZERO) | if (alpha_r == ZERO && alpha_i == ZERO) | ||||
| return; | |||||
| continue; | |||||
| #else | #else | ||||
| if (beta != ONE) | if (beta != ONE) | ||||
| SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); | SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); | ||||
| @@ -458,8 +522,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
| IDEBUG_START; | IDEBUG_START; | ||||
| FUNCTION_PROFILE_START(); | |||||
| buffer_size = j + k + 128 / sizeof(FLOAT); | buffer_size = j + k + 128 / sizeof(FLOAT); | ||||
| #ifdef WINDOWS_ABI | #ifdef WINDOWS_ABI | ||||
| buffer_size += 160 / sizeof(FLOAT); | buffer_size += 160 / sizeof(FLOAT); | ||||
| @@ -479,20 +541,34 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
| #endif | #endif | ||||
| #if defined(COMPLEX) | #if defined(COMPLEX) | ||||
| if (!(transa & 1)) | |||||
| (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, | (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, | ||||
| aa, lda, bb, incb, cc, 1, | aa, lda, bb, incb, cc, 1, | ||||
| buffer); | buffer); | ||||
| else | |||||
| (gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i, | |||||
| aa, lda, bb, incb, cc, 1, | |||||
| buffer); | |||||
| #else | #else | ||||
| if (!(transa & 1)) | |||||
| (gemv[(int)transa]) (j, k, 0, alpha, aa, lda, | (gemv[(int)transa]) (j, k, 0, alpha, aa, lda, | ||||
| bb, incb, cc, 1, buffer); | bb, incb, cc, 1, buffer); | ||||
| else | |||||
| (gemv[(int)transa]) (k, j, 0, alpha, aa, lda, | |||||
| bb, incb, cc, 1, buffer); | |||||
| #endif | #endif | ||||
| #ifdef SMP | #ifdef SMP | ||||
| } else { | } else { | ||||
| if (!(transa & 1)) | |||||
| (gemv_thread[(int)transa]) (j, k, alpha, aa, | (gemv_thread[(int)transa]) (j, k, alpha, aa, | ||||
| lda, bb, incb, cc, | lda, bb, incb, cc, | ||||
| 1, buffer, | 1, buffer, | ||||
| nthreads); | nthreads); | ||||
| else | |||||
| (gemv_thread[(int)transa]) (k, j, alpha, aa, | |||||
| lda, bb, incb, cc, | |||||
| 1, buffer, | |||||
| nthreads); | |||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -501,21 +577,19 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
| } | } | ||||
| } else { | } else { | ||||
| for (i = 0; i < n; i++) { | |||||
| for (i = 0; i < m; i++) { | |||||
| j = i + 1; | j = i + 1; | ||||
| l = j; | l = j; | ||||
| #if defined COMPLEX | #if defined COMPLEX | ||||
| bb = b + i * ldb * 2; | bb = b + i * ldb * 2; | ||||
| if (transa) { | |||||
| l = k; | |||||
| if (transb & 1) { | |||||
| bb = b + i * 2; | bb = b + i * 2; | ||||
| } | } | ||||
| cc = c + i * 2 * ldc; | cc = c + i * 2 * ldc; | ||||
| #else | #else | ||||
| bb = b + i * ldb; | bb = b + i * ldb; | ||||
| if (transa) { | |||||
| l = k; | |||||
| if (transb & 1) { | |||||
| bb = b + i; | bb = b + i; | ||||
| } | } | ||||
| cc = c + i * ldc; | cc = c + i * ldc; | ||||
| @@ -527,7 +601,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
| NULL, 0); | NULL, 0); | ||||
| if (alpha_r == ZERO && alpha_i == ZERO) | if (alpha_r == ZERO && alpha_i == ZERO) | ||||
| return; | |||||
| continue; | |||||
| #else | #else | ||||
| if (beta != ONE) | if (beta != ONE) | ||||
| SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); | SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); | ||||
| @@ -537,8 +611,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
| #endif | #endif | ||||
| IDEBUG_START; | IDEBUG_START; | ||||
| FUNCTION_PROFILE_START(); | |||||
| buffer_size = j + k + 128 / sizeof(FLOAT); | buffer_size = j + k + 128 / sizeof(FLOAT); | ||||
| #ifdef WINDOWS_ABI | #ifdef WINDOWS_ABI | ||||
| buffer_size += 160 / sizeof(FLOAT); | buffer_size += 160 / sizeof(FLOAT); | ||||
| @@ -558,32 +630,41 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
| #endif | #endif | ||||
| #if defined(COMPLEX) | #if defined(COMPLEX) | ||||
| if (!(transa & 1)) | |||||
| (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, | (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, | ||||
| a, lda, bb, incb, cc, 1, | a, lda, bb, incb, cc, 1, | ||||
| buffer); | buffer); | ||||
| else | |||||
| (gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i, | |||||
| a, lda, bb, incb, cc, 1, | |||||
| buffer); | |||||
| #else | #else | ||||
| if (!(transa & 1)) | |||||
| (gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb, | (gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb, | ||||
| incb, cc, 1, buffer); | incb, cc, 1, buffer); | ||||
| else | |||||
| (gemv[(int)transa]) (k, j, 0, alpha, a, lda, bb, | |||||
| incb, cc, 1, buffer); | |||||
| #endif | #endif | ||||
| #ifdef SMP | #ifdef SMP | ||||
| } else { | } else { | ||||
| if (!(transa & 1)) | |||||
| (gemv_thread[(int)transa]) (j, k, alpha, a, lda, | (gemv_thread[(int)transa]) (j, k, alpha, a, lda, | ||||
| bb, incb, cc, 1, | bb, incb, cc, 1, | ||||
| buffer, nthreads); | buffer, nthreads); | ||||
| else | |||||
| (gemv_thread[(int)transa]) (k, j, alpha, a, lda, | |||||
| bb, incb, cc, 1, | |||||
| buffer, nthreads); | |||||
| } | } | ||||
| #endif | #endif | ||||
| STACK_FREE(buffer); | STACK_FREE(buffer); | ||||
| } | } | ||||
| } | } | ||||
| FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, | |||||
| args.m * args.k + args.k * args.n + | |||||
| args.m * args.n, 2 * args.m * args.n * args.k); | |||||
| IDEBUG_END; | IDEBUG_END; | ||||
| return; | return; | ||||
| } | |||||
| } | |||||
| @@ -149,10 +149,10 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, | |||||
| #endif | #endif | ||||
| if ( *lda > *ldb ) | |||||
| msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT); | |||||
| else | |||||
| msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT); | |||||
| if ( *rows > *cols ) | |||||
| msize = (size_t)(*rows) * (*ldb) * sizeof(FLOAT); | |||||
| else | |||||
| msize = (size_t)(*cols) * (*ldb) * sizeof(FLOAT); | |||||
| b = malloc(msize); | b = malloc(msize); | ||||
| if ( b == NULL ) | if ( b == NULL ) | ||||
| @@ -96,12 +96,6 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ | |||||
| else | else | ||||
| { | { | ||||
| dp2 = *dd2 * dy1; | dp2 = *dd2 * dy1; | ||||
| if(dp2 == ZERO) | |||||
| { | |||||
| dflag = -TWO; | |||||
| dparam[0] = dflag; | |||||
| return; | |||||
| } | |||||
| dp1 = *dd1 * *dx1; | dp1 = *dd1 * *dx1; | ||||
| dq2 = dp2 * dy1; | dq2 = dp2 * dy1; | ||||
| dq1 = dp1 * *dx1; | dq1 = dp1 * *dx1; | ||||
| @@ -113,24 +107,10 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ | |||||
| dh12 = dp2 / dp1; | dh12 = dp2 / dp1; | ||||
| du = ONE - dh12 * dh21; | du = ONE - dh12 * dh21; | ||||
| if(du > ZERO) | |||||
| { | |||||
| dflag = ZERO; | |||||
| *dd1 = *dd1 / du; | |||||
| *dd2 = *dd2 / du; | |||||
| *dx1 = *dx1 * du; | |||||
| } else { | |||||
| dflag = -ONE; | |||||
| dh11 = ZERO; | |||||
| dh12 = ZERO; | |||||
| dh21 = ZERO; | |||||
| dh22 = ZERO; | |||||
| *dd1 = ZERO; | |||||
| *dd2 = ZERO; | |||||
| *dx1 = ZERO; | |||||
| } | |||||
| dflag = ZERO; | |||||
| *dd1 = *dd1 / du; | |||||
| *dd2 = *dd2 / du; | |||||
| *dx1 = *dx1 * du; | |||||
| } | } | ||||
| else | else | ||||
| @@ -171,10 +171,10 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, | |||||
| } | } | ||||
| #endif | #endif | ||||
| if ( *lda > *ldb ) | |||||
| msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT) * 2; | |||||
| if ( *rows > *cols ) | |||||
| msize = (size_t)(*rows) * (*ldb) * sizeof(FLOAT) * 2; | |||||
| else | else | ||||
| msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT) * 2; | |||||
| msize = (size_t)(*cols) * (*ldb) * sizeof(FLOAT) * 2; | |||||
| b = malloc(msize); | b = malloc(msize); | ||||
| if ( b == NULL ) | if ( b == NULL ) | ||||
| @@ -40,7 +40,6 @@ int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, | |||||
| if ( rows <= 0 ) return(0); | if ( rows <= 0 ) return(0); | ||||
| if ( cols <= 0 ) return(0); | if ( cols <= 0 ) return(0); | ||||
| if ( alpha_r == 1.0 && alpha_i == 0.0 ) return (0); | |||||
| aptr = a; | aptr = a; | ||||
| lda *= 2; | lda *= 2; | ||||
| @@ -33,7 +33,7 @@ int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT * | |||||
| BLASLONG i=0; | BLASLONG i=0; | ||||
| BLASLONG ix,iy; | BLASLONG ix,iy; | ||||
| if ( n < 0 ) return(0); | |||||
| if ( n <= 0 ) return(0); | |||||
| ix = 0; | ix = 0; | ||||
| iy = 0; | iy = 0; | ||||
| @@ -42,7 +42,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS | |||||
| BLASLONG i=0; | BLASLONG i=0; | ||||
| BLASLONG ix,iy; | BLASLONG ix,iy; | ||||
| if ( n < 0 ) return(0); | |||||
| if ( n <= 0 ) return(0); | |||||
| if ( da == 0.0 ) return(0); | if ( da == 0.0 ) return(0); | ||||
| ix = 0; | ix = 0; | ||||
| @@ -41,7 +41,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||||
| BLASLONG i=0; | BLASLONG i=0; | ||||
| BLASLONG ix=0,iy=0; | BLASLONG ix=0,iy=0; | ||||
| if ( n < 0 ) return(0); | |||||
| if ( n <= 0 ) return(0); | |||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| @@ -46,7 +46,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||||
| BLASLONG ix=0,iy=0; | BLASLONG ix=0,iy=0; | ||||
| double dot = 0.0 ; | double dot = 0.0 ; | ||||
| if ( n < 0 ) return(dot); | |||||
| if ( n < 1 ) return(dot); | |||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| @@ -41,7 +41,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, | |||||
| BLASLONG ix=0,iy=0; | BLASLONG ix=0,iy=0; | ||||
| FLOAT temp; | FLOAT temp; | ||||
| if ( n < 0 ) return(0); | |||||
| if ( n <= 0 ) return(0); | |||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| @@ -44,7 +44,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, | |||||
| BLASLONG inc_x2; | BLASLONG inc_x2; | ||||
| BLASLONG inc_y2; | BLASLONG inc_y2; | ||||
| if ( n < 0 ) return(0); | |||||
| if ( n <= 0 ) return(0); | |||||
| if ( da_r == 0.0 && da_i == 0.0 ) return(0); | if ( da_r == 0.0 && da_i == 0.0 ) return(0); | ||||
| ix = 0; | ix = 0; | ||||
| @@ -43,7 +43,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||||
| BLASLONG inc_x2; | BLASLONG inc_x2; | ||||
| BLASLONG inc_y2; | BLASLONG inc_y2; | ||||
| if ( n < 0 ) return(0); | |||||
| if ( n <= 0 ) return(0); | |||||
| inc_x2 = 2 * inc_x; | inc_x2 = 2 * inc_x; | ||||
| inc_y2 = 2 * inc_y; | inc_y2 = 2 * inc_y; | ||||
| @@ -45,7 +45,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm | |||||
| BLASLONG inc_x2; | BLASLONG inc_x2; | ||||
| BLASLONG inc_y2; | BLASLONG inc_y2; | ||||
| if ( n < 0 ) return(0); | |||||
| if ( n <= 0 ) return(0); | |||||
| inc_x2 = 2 * inc_x; | inc_x2 = 2 * inc_x; | ||||
| inc_y2 = 2 * inc_y; | inc_y2 = 2 * inc_y; | ||||