* Fixed gemmt, imatcopy, zimatcopy_cnc functions * Fixed cblas_cscal testing in ctest * Removed rotmg unreacheble code * Added zero size checkstags/v0.3.27
| @@ -289,6 +289,14 @@ void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLA | |||
| void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | |||
| OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||
| void cblas_sgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||
| OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | |||
| void cblas_dgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||
| OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); | |||
| void cblas_cgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||
| OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||
| void cblas_zgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||
| OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||
| void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, | |||
| OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | |||
| @@ -498,6 +498,15 @@ void BLASFUNC(zgemm3m)(char *, char *, blasint *, blasint *, blasint *, double * | |||
| void BLASFUNC(xgemm3m)(char *, char *, blasint *, blasint *, blasint *, xdouble *, | |||
| xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||
| void BLASFUNC(sgemmt)(char*, char *, char *, blasint *, blasint *, float *, | |||
| float *, blasint *, float *, blasint *, float *, float *, blasint *); | |||
| void BLASFUNC(dgemmt)(char*, char *, char *, blasint *, blasint *, double *, | |||
| double *, blasint *, double *, blasint *, double *, double *, blasint *); | |||
| void BLASFUNC(cgemmt)(char*, char *, char *, blasint *, blasint *, float *, | |||
| float *, blasint *, float *, blasint *, float *, float *, blasint *); | |||
| void BLASFUNC(zgemmt)(char*, char *, char *, blasint *, blasint *, double *, | |||
| double *, blasint *, double *, blasint *, double *, double *, blasint *); | |||
| int BLASFUNC(sge2mm)(char *, char *, char *, blasint *, blasint *, | |||
| float *, float *, blasint *, float *, blasint *, | |||
| float *, float *, blasint *); | |||
| @@ -96,7 +96,7 @@ | |||
| INTEGER ICAMAXTEST | |||
| EXTERNAL SCASUMTEST, SCNRM2TEST, ICAMAXTEST | |||
| * .. External Subroutines .. | |||
| EXTERNAL CSCAL, CSSCALTEST, CTEST, ITEST1, STEST1 | |||
| EXTERNAL CSCALTEST, CSSCALTEST, CTEST, ITEST1, STEST1 | |||
| * .. Intrinsic Functions .. | |||
| INTRINSIC MAX | |||
| * .. Common blocks .. | |||
| @@ -214,8 +214,8 @@ | |||
| CALL STEST1(SCASUMTEST(N,CX,INCX),STRUE4(NP1), | |||
| + STRUE4(NP1),SFAC) | |||
| ELSE IF (ICASE.EQ.8) THEN | |||
| * .. CSCAL .. | |||
| CALL CSCAL(N,CA,CX,INCX) | |||
| * .. CSCALTEST .. | |||
| CALL CSCALTEST(N,CA,CX,INCX) | |||
| CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX), | |||
| + SFAC) | |||
| ELSE IF (ICASE.EQ.9) THEN | |||
| @@ -236,14 +236,14 @@ | |||
| * | |||
| INCX = 1 | |||
| IF (ICASE.EQ.8) THEN | |||
| * CSCAL | |||
| * CSCALTEST | |||
| * Add a test for alpha equal to zero. | |||
| CA = (0.0E0,0.0E0) | |||
| DO 80 I = 1, 5 | |||
| MWPCT(I) = (0.0E0,0.0E0) | |||
| MWPCS(I) = (1.0E0,1.0E0) | |||
| 80 CONTINUE | |||
| CALL CSCAL(5,CA,CX,INCX) | |||
| CALL CSCALTEST(5,CA,CX,INCX) | |||
| CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) | |||
| ELSE IF (ICASE.EQ.9) THEN | |||
| * CSSCALTEST | |||
| @@ -685,7 +685,7 @@ real *sfac; | |||
| static integer i__; | |||
| extern /* Subroutine */ int ctest_(); | |||
| static complex mwpcs[5], mwpct[5]; | |||
| extern /* Subroutine */ int itest1_(), stest1_(); | |||
| extern /* Subroutine */ int cscaltest_(), itest1_(), stest1_(); | |||
| static complex cx[8]; | |||
| extern real scnrm2test_(); | |||
| static integer np1; | |||
| @@ -727,7 +727,7 @@ real *sfac; | |||
| stest1_(&r__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac); | |||
| } else if (combla_1.icase == 8) { | |||
| /* .. CSCAL .. */ | |||
| cscal_(&combla_1.n, &ca, cx, &combla_1.incx); | |||
| cscaltest_(&combla_1.n, &ca, cx, &combla_1.incx); | |||
| ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], | |||
| &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac); | |||
| } else if (combla_1.icase == 9) { | |||
| @@ -761,7 +761,7 @@ real *sfac; | |||
| mwpcs[i__1].r = (float)1., mwpcs[i__1].i = (float)1.; | |||
| /* L80: */ | |||
| } | |||
| cscal_(&c__5, &ca, cx, &combla_1.incx); | |||
| cscaltest_(&c__5, &ca, cx, &combla_1.incx); | |||
| ctest_(&c__5, cx, mwpct, mwpcs, sfac); | |||
| } else if (combla_1.icase == 9) { | |||
| /* CSSCALTEST */ | |||
| @@ -35,29 +35,26 @@ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include "common.h" | |||
| #ifdef FUNCTION_PROFILE | |||
| #include "functable.h" | |||
| #endif | |||
| #ifndef COMPLEX | |||
| #define SMP_THRESHOLD_MIN 65536.0 | |||
| #ifdef XDOUBLE | |||
| #define ERROR_NAME "QGEMT " | |||
| #define ERROR_NAME "QGEMMT " | |||
| #elif defined(DOUBLE) | |||
| #define ERROR_NAME "DGEMT " | |||
| #define ERROR_NAME "DGEMMT " | |||
| #elif defined(BFLOAT16) | |||
| #define ERROR_NAME "SBGEMT " | |||
| #define ERROR_NAME "SBGEMMT " | |||
| #else | |||
| #define ERROR_NAME "SGEMT " | |||
| #define ERROR_NAME "SGEMMT " | |||
| #endif | |||
| #else | |||
| #define SMP_THRESHOLD_MIN 8192.0 | |||
| #ifdef XDOUBLE | |||
| #define ERROR_NAME "XGEMT " | |||
| #define ERROR_NAME "XGEMMT " | |||
| #elif defined(DOUBLE) | |||
| #define ERROR_NAME "ZGEMT " | |||
| #define ERROR_NAME "ZGEMMT " | |||
| #else | |||
| #define ERROR_NAME "CGEMT " | |||
| #define ERROR_NAME "CGEMMT " | |||
| #endif | |||
| #endif | |||
| @@ -68,18 +65,22 @@ | |||
| #ifndef CBLAS | |||
| void NAME(char *UPLO, char *TRANSA, char *TRANSB, | |||
| blasint * M, blasint * N, blasint * K, | |||
| blasint * M, blasint * K, | |||
| FLOAT * Alpha, | |||
| IFLOAT * a, blasint * ldA, | |||
| IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC) | |||
| { | |||
| blasint m, n, k; | |||
| blasint m, k; | |||
| blasint lda, ldb, ldc; | |||
| int transa, transb, uplo; | |||
| blasint info; | |||
| char transA, transB, Uplo; | |||
| blasint nrowa, nrowb; | |||
| #if defined(COMPLEX) | |||
| blasint ncolb; | |||
| #endif | |||
| IFLOAT *buffer; | |||
| IFLOAT *aa, *bb; | |||
| FLOAT *cc; | |||
| @@ -92,7 +93,6 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB, | |||
| PRINT_DEBUG_NAME; | |||
| m = *M; | |||
| n = *N; | |||
| k = *K; | |||
| #if defined(COMPLEX) | |||
| @@ -159,32 +159,47 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB, | |||
| if (Uplo == 'L') | |||
| uplo = 1; | |||
| nrowa = m; | |||
| if (transa & 1) nrowa = k; | |||
| nrowb = k; | |||
| #if defined(COMPLEX) | |||
| ncolb = m; | |||
| #endif | |||
| if (transb & 1) { | |||
| nrowb = m; | |||
| #if defined(COMPLEX) | |||
| ncolb = k; | |||
| #endif | |||
| } | |||
| info = 0; | |||
| if (uplo < 0) | |||
| info = 14; | |||
| if (ldc < m) | |||
| if (ldc < MAX(1, m)) | |||
| info = 13; | |||
| if (ldb < MAX(1, nrowb)) | |||
| info = 10; | |||
| if (lda < MAX(1, nrowa)) | |||
| info = 8; | |||
| if (k < 0) | |||
| info = 5; | |||
| if (n < 0) | |||
| info = 4; | |||
| if (m < 0) | |||
| info = 3; | |||
| info = 4; | |||
| if (transb < 0) | |||
| info = 2; | |||
| info = 3; | |||
| if (transa < 0) | |||
| info = 2; | |||
| if (uplo < 0) | |||
| info = 1; | |||
| if (info) { | |||
| if (info != 0) { | |||
| BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); | |||
| return; | |||
| } | |||
| #else | |||
| void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, | |||
| blasint N, blasint k, | |||
| enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint m, | |||
| blasint k, | |||
| #ifndef COMPLEX | |||
| FLOAT alpha, | |||
| IFLOAT * A, blasint LDA, | |||
| @@ -205,17 +220,23 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| int transa, transb, uplo; | |||
| blasint info; | |||
| blasint m, n, lda, ldb; | |||
| blasint lda, ldb; | |||
| FLOAT *a, *b; | |||
| #if defined(COMPLEX) | |||
| blasint nrowb, ncolb; | |||
| #endif | |||
| XFLOAT *buffer; | |||
| PRINT_DEBUG_CNAME; | |||
| uplo = -1; | |||
| transa = -1; | |||
| transb = -1; | |||
| info = 0; | |||
| if (order == CblasColMajor) { | |||
| if (Uplo == CblasUpper) uplo = 0; | |||
| if (Uplo == CblasLower) uplo = 1; | |||
| if (TransA == CblasNoTrans) | |||
| transa = 0; | |||
| @@ -248,9 +269,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| transb = 3; | |||
| #endif | |||
| m = M; | |||
| n = N; | |||
| a = (void *)A; | |||
| b = (void *)B; | |||
| lda = LDA; | |||
| @@ -258,23 +276,42 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| info = -1; | |||
| if (ldc < m) | |||
| blasint nrowa; | |||
| #if !defined(COMPLEX) | |||
| blasint nrowb; | |||
| #endif | |||
| nrowa = m; | |||
| if (transa & 1) nrowa = k; | |||
| nrowb = k; | |||
| #if defined(COMPLEX) | |||
| ncolb = m; | |||
| #endif | |||
| if (transb & 1) { | |||
| nrowb = m; | |||
| #if defined(COMPLEX) | |||
| ncolb = k; | |||
| #endif | |||
| } | |||
| if (ldc < MAX(1, m)) | |||
| info = 13; | |||
| if (ldb < MAX(1, nrowb)) | |||
| info = 10; | |||
| if (lda < MAX(1, nrowa)) | |||
| info = 8; | |||
| if (k < 0) | |||
| info = 5; | |||
| if (n < 0) | |||
| info = 4; | |||
| if (m < 0) | |||
| info = 3; | |||
| info = 4; | |||
| if (transb < 0) | |||
| info = 2; | |||
| info = 3; | |||
| if (transa < 0) | |||
| info = 2; | |||
| if (uplo < 0) | |||
| info = 1; | |||
| } | |||
| if (order == CblasRowMajor) { | |||
| m = N; | |||
| n = M; | |||
| a = (void *)B; | |||
| b = (void *)A; | |||
| @@ -282,6 +319,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| lda = LDB; | |||
| ldb = LDA; | |||
| if (Uplo == CblasUpper) uplo = 0; | |||
| if (Uplo == CblasLower) uplo = 1; | |||
| if (TransB == CblasNoTrans) | |||
| transa = 0; | |||
| if (TransB == CblasTrans) | |||
| @@ -315,29 +355,42 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| info = -1; | |||
| if (ldc < m) | |||
| blasint ncola; | |||
| #if !defined(COMPLEX) | |||
| blasint ncolb; | |||
| #endif | |||
| ncola = m; | |||
| if (transa & 1) ncola = k; | |||
| ncolb = k; | |||
| #if defined(COMPLEX) | |||
| nrowb = m; | |||
| #endif | |||
| if (transb & 1) { | |||
| #if defined(COMPLEX) | |||
| nrowb = k; | |||
| #endif | |||
| ncolb = m; | |||
| } | |||
| if (ldc < MAX(1,m)) | |||
| info = 13; | |||
| if (ldb < MAX(1, ncolb)) | |||
| info = 8; | |||
| if (lda < MAX(1, ncola)) | |||
| info = 10; | |||
| if (k < 0) | |||
| info = 5; | |||
| if (n < 0) | |||
| info = 4; | |||
| if (m < 0) | |||
| info = 3; | |||
| info = 4; | |||
| if (transb < 0) | |||
| info = 2; | |||
| if (transa < 0) | |||
| info = 3; | |||
| if (uplo < 0) | |||
| info = 1; | |||
| } | |||
| uplo = -1; | |||
| if (Uplo == CblasUpper) | |||
| uplo = 0; | |||
| if (Uplo == CblasLower) | |||
| uplo = 1; | |||
| if (uplo < 0) | |||
| info = 14; | |||
| if (info >= 0) { | |||
| BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); | |||
| return; | |||
| @@ -407,37 +460,48 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| #endif | |||
| if ((m == 0) || (n == 0)) | |||
| if (m == 0) | |||
| return; | |||
| IDEBUG_START; | |||
| FUNCTION_PROFILE_START(); | |||
| #if defined(COMPLEX) | |||
| if (transb > 1){ | |||
| #ifndef CBLAS | |||
| IMATCOPY_K_CNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb); | |||
| #else | |||
| if (order == CblasColMajor) | |||
| IMATCOPY_K_CNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb); | |||
| if (order == CblasRowMajor) | |||
| IMATCOPY_K_RNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb); | |||
| #endif | |||
| } | |||
| #endif | |||
| const blasint incb = (transb == 0) ? 1 : ldb; | |||
| const blasint incb = ((transb & 1) == 0) ? 1 : ldb; | |||
| if (uplo == 1) { | |||
| for (i = 0; i < n; i++) { | |||
| j = n - i; | |||
| for (i = 0; i < m; i++) { | |||
| j = m - i; | |||
| l = j; | |||
| #if defined(COMPLEX) | |||
| aa = a + i * 2; | |||
| bb = b + i * ldb * 2; | |||
| if (transa) { | |||
| l = k; | |||
| if (transa & 1) { | |||
| aa = a + lda * i * 2; | |||
| bb = b + i * 2; | |||
| } | |||
| if (transb & 1) | |||
| bb = b + i * 2; | |||
| cc = c + i * 2 * ldc + i * 2; | |||
| #else | |||
| aa = a + i; | |||
| bb = b + i * ldb; | |||
| if (transa) { | |||
| l = k; | |||
| if (transa & 1) { | |||
| aa = a + lda * i; | |||
| bb = b + i; | |||
| } | |||
| if (transb & 1) | |||
| bb = b + i; | |||
| cc = c + i * ldc + i; | |||
| #endif | |||
| @@ -447,7 +511,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| NULL, 0); | |||
| if (alpha_r == ZERO && alpha_i == ZERO) | |||
| return; | |||
| continue; | |||
| #else | |||
| if (beta != ONE) | |||
| SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); | |||
| @@ -458,8 +522,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| IDEBUG_START; | |||
| FUNCTION_PROFILE_START(); | |||
| buffer_size = j + k + 128 / sizeof(FLOAT); | |||
| #ifdef WINDOWS_ABI | |||
| buffer_size += 160 / sizeof(FLOAT); | |||
| @@ -479,20 +541,34 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| #endif | |||
| #if defined(COMPLEX) | |||
| if (!(transa & 1)) | |||
| (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, | |||
| aa, lda, bb, incb, cc, 1, | |||
| buffer); | |||
| else | |||
| (gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i, | |||
| aa, lda, bb, incb, cc, 1, | |||
| buffer); | |||
| #else | |||
| if (!(transa & 1)) | |||
| (gemv[(int)transa]) (j, k, 0, alpha, aa, lda, | |||
| bb, incb, cc, 1, buffer); | |||
| else | |||
| (gemv[(int)transa]) (k, j, 0, alpha, aa, lda, | |||
| bb, incb, cc, 1, buffer); | |||
| #endif | |||
| #ifdef SMP | |||
| } else { | |||
| if (!(transa & 1)) | |||
| (gemv_thread[(int)transa]) (j, k, alpha, aa, | |||
| lda, bb, incb, cc, | |||
| 1, buffer, | |||
| nthreads); | |||
| else | |||
| (gemv_thread[(int)transa]) (k, j, alpha, aa, | |||
| lda, bb, incb, cc, | |||
| 1, buffer, | |||
| nthreads); | |||
| } | |||
| #endif | |||
| @@ -501,21 +577,19 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| } | |||
| } else { | |||
| for (i = 0; i < n; i++) { | |||
| for (i = 0; i < m; i++) { | |||
| j = i + 1; | |||
| l = j; | |||
| #if defined COMPLEX | |||
| bb = b + i * ldb * 2; | |||
| if (transa) { | |||
| l = k; | |||
| if (transb & 1) { | |||
| bb = b + i * 2; | |||
| } | |||
| cc = c + i * 2 * ldc; | |||
| #else | |||
| bb = b + i * ldb; | |||
| if (transa) { | |||
| l = k; | |||
| if (transb & 1) { | |||
| bb = b + i; | |||
| } | |||
| cc = c + i * ldc; | |||
| @@ -527,7 +601,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| NULL, 0); | |||
| if (alpha_r == ZERO && alpha_i == ZERO) | |||
| return; | |||
| continue; | |||
| #else | |||
| if (beta != ONE) | |||
| SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); | |||
| @@ -537,8 +611,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| #endif | |||
| IDEBUG_START; | |||
| FUNCTION_PROFILE_START(); | |||
| buffer_size = j + k + 128 / sizeof(FLOAT); | |||
| #ifdef WINDOWS_ABI | |||
| buffer_size += 160 / sizeof(FLOAT); | |||
| @@ -558,32 +630,41 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| #endif | |||
| #if defined(COMPLEX) | |||
| if (!(transa & 1)) | |||
| (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, | |||
| a, lda, bb, incb, cc, 1, | |||
| buffer); | |||
| else | |||
| (gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i, | |||
| a, lda, bb, incb, cc, 1, | |||
| buffer); | |||
| #else | |||
| if (!(transa & 1)) | |||
| (gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb, | |||
| incb, cc, 1, buffer); | |||
| else | |||
| (gemv[(int)transa]) (k, j, 0, alpha, a, lda, bb, | |||
| incb, cc, 1, buffer); | |||
| #endif | |||
| #ifdef SMP | |||
| } else { | |||
| if (!(transa & 1)) | |||
| (gemv_thread[(int)transa]) (j, k, alpha, a, lda, | |||
| bb, incb, cc, 1, | |||
| buffer, nthreads); | |||
| else | |||
| (gemv_thread[(int)transa]) (k, j, alpha, a, lda, | |||
| bb, incb, cc, 1, | |||
| buffer, nthreads); | |||
| } | |||
| #endif | |||
| STACK_FREE(buffer); | |||
| } | |||
| } | |||
| FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, | |||
| args.m * args.k + args.k * args.n + | |||
| args.m * args.n, 2 * args.m * args.n * args.k); | |||
| IDEBUG_END; | |||
| return; | |||
| } | |||
| } | |||
| @@ -149,10 +149,10 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, | |||
| #endif | |||
| if ( *lda > *ldb ) | |||
| msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT); | |||
| else | |||
| msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT); | |||
| if ( *rows > *cols ) | |||
| msize = (size_t)(*rows) * (*ldb) * sizeof(FLOAT); | |||
| else | |||
| msize = (size_t)(*cols) * (*ldb) * sizeof(FLOAT); | |||
| b = malloc(msize); | |||
| if ( b == NULL ) | |||
| @@ -96,12 +96,6 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ | |||
| else | |||
| { | |||
| dp2 = *dd2 * dy1; | |||
| if(dp2 == ZERO) | |||
| { | |||
| dflag = -TWO; | |||
| dparam[0] = dflag; | |||
| return; | |||
| } | |||
| dp1 = *dd1 * *dx1; | |||
| dq2 = dp2 * dy1; | |||
| dq1 = dp1 * *dx1; | |||
| @@ -113,24 +107,10 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ | |||
| dh12 = dp2 / dp1; | |||
| du = ONE - dh12 * dh21; | |||
| if(du > ZERO) | |||
| { | |||
| dflag = ZERO; | |||
| *dd1 = *dd1 / du; | |||
| *dd2 = *dd2 / du; | |||
| *dx1 = *dx1 * du; | |||
| } else { | |||
| dflag = -ONE; | |||
| dh11 = ZERO; | |||
| dh12 = ZERO; | |||
| dh21 = ZERO; | |||
| dh22 = ZERO; | |||
| *dd1 = ZERO; | |||
| *dd2 = ZERO; | |||
| *dx1 = ZERO; | |||
| } | |||
| dflag = ZERO; | |||
| *dd1 = *dd1 / du; | |||
| *dd2 = *dd2 / du; | |||
| *dx1 = *dx1 * du; | |||
| } | |||
| else | |||
| @@ -171,10 +171,10 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, | |||
| } | |||
| #endif | |||
| if ( *lda > *ldb ) | |||
| msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT) * 2; | |||
| if ( *rows > *cols ) | |||
| msize = (size_t)(*rows) * (*ldb) * sizeof(FLOAT) * 2; | |||
| else | |||
| msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT) * 2; | |||
| msize = (size_t)(*cols) * (*ldb) * sizeof(FLOAT) * 2; | |||
| b = malloc(msize); | |||
| if ( b == NULL ) | |||
| @@ -40,7 +40,6 @@ int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| if ( alpha_r == 1.0 && alpha_i == 0.0 ) return (0); | |||
| aptr = a; | |||
| lda *= 2; | |||
| @@ -33,7 +33,7 @@ int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT * | |||
| BLASLONG i=0; | |||
| BLASLONG ix,iy; | |||
| if ( n < 0 ) return(0); | |||
| if ( n <= 0 ) return(0); | |||
| ix = 0; | |||
| iy = 0; | |||
| @@ -42,7 +42,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS | |||
| BLASLONG i=0; | |||
| BLASLONG ix,iy; | |||
| if ( n < 0 ) return(0); | |||
| if ( n <= 0 ) return(0); | |||
| if ( da == 0.0 ) return(0); | |||
| ix = 0; | |||
| @@ -41,7 +41,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0,iy=0; | |||
| if ( n < 0 ) return(0); | |||
| if ( n <= 0 ) return(0); | |||
| while(i < n) | |||
| { | |||
| @@ -46,7 +46,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| BLASLONG ix=0,iy=0; | |||
| double dot = 0.0 ; | |||
| if ( n < 0 ) return(dot); | |||
| if ( n < 1 ) return(dot); | |||
| while(i < n) | |||
| { | |||
| @@ -41,7 +41,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, | |||
| BLASLONG ix=0,iy=0; | |||
| FLOAT temp; | |||
| if ( n < 0 ) return(0); | |||
| if ( n <= 0 ) return(0); | |||
| while(i < n) | |||
| { | |||
| @@ -44,7 +44,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, | |||
| BLASLONG inc_x2; | |||
| BLASLONG inc_y2; | |||
| if ( n < 0 ) return(0); | |||
| if ( n <= 0 ) return(0); | |||
| if ( da_r == 0.0 && da_i == 0.0 ) return(0); | |||
| ix = 0; | |||
| @@ -43,7 +43,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| BLASLONG inc_x2; | |||
| BLASLONG inc_y2; | |||
| if ( n < 0 ) return(0); | |||
| if ( n <= 0 ) return(0); | |||
| inc_x2 = 2 * inc_x; | |||
| inc_y2 = 2 * inc_y; | |||
| @@ -45,7 +45,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm | |||
| BLASLONG inc_x2; | |||
| BLASLONG inc_y2; | |||
| if ( n < 0 ) return(0); | |||
| if ( n <= 0 ) return(0); | |||
| inc_x2 = 2 * inc_x; | |||
| inc_y2 = 2 * inc_y; | |||