| @@ -35,29 +35,26 @@ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include "common.h" | |||
| #ifdef FUNCTION_PROFILE | |||
| #include "functable.h" | |||
| #endif | |||
| #ifndef COMPLEX | |||
| #define SMP_THRESHOLD_MIN 65536.0 | |||
| #ifdef XDOUBLE | |||
| #define ERROR_NAME "QGEMT " | |||
| #define ERROR_NAME "QGEMMT " | |||
| #elif defined(DOUBLE) | |||
| #define ERROR_NAME "DGEMT " | |||
| #define ERROR_NAME "DGEMMT " | |||
| #elif defined(BFLOAT16) | |||
| #define ERROR_NAME "SBGEMT " | |||
| #define ERROR_NAME "SBGEMMT " | |||
| #else | |||
| #define ERROR_NAME "SGEMT " | |||
| #define ERROR_NAME "SGEMMT " | |||
| #endif | |||
| #else | |||
| #define SMP_THRESHOLD_MIN 8192.0 | |||
| #ifdef XDOUBLE | |||
| #define ERROR_NAME "XGEMT " | |||
| #define ERROR_NAME "XGEMMT " | |||
| #elif defined(DOUBLE) | |||
| #define ERROR_NAME "ZGEMT " | |||
| #define ERROR_NAME "ZGEMMT " | |||
| #else | |||
| #define ERROR_NAME "CGEMT " | |||
| #define ERROR_NAME "CGEMMT " | |||
| #endif | |||
| #endif | |||
| @@ -68,13 +65,13 @@ | |||
| #ifndef CBLAS | |||
| void NAME(char *UPLO, char *TRANSA, char *TRANSB, | |||
| blasint * M, blasint * N, blasint * K, | |||
| blasint * M, blasint * K, | |||
| FLOAT * Alpha, | |||
| IFLOAT * a, blasint * ldA, | |||
| IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC) | |||
| { | |||
| blasint m, n, k; | |||
| blasint m, k; | |||
| blasint lda, ldb, ldc; | |||
| int transa, transb, uplo; | |||
| blasint info; | |||
| @@ -92,7 +89,6 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB, | |||
| PRINT_DEBUG_NAME; | |||
| m = *M; | |||
| n = *N; | |||
| k = *K; | |||
| #if defined(COMPLEX) | |||
| @@ -167,8 +163,6 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB, | |||
| info = 13; | |||
| if (k < 0) | |||
| info = 5; | |||
| if (n < 0) | |||
| info = 4; | |||
| if (m < 0) | |||
| info = 3; | |||
| if (transb < 0) | |||
| @@ -184,7 +178,7 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB, | |||
| void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, | |||
| blasint N, blasint k, | |||
| blasint k, | |||
| #ifndef COMPLEX | |||
| FLOAT alpha, | |||
| IFLOAT * A, blasint LDA, | |||
| @@ -205,7 +199,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| int transa, transb, uplo; | |||
| blasint info; | |||
| blasint m, n, lda, ldb; | |||
| blasint m, lda, ldb; | |||
| FLOAT *a, *b; | |||
| XFLOAT *buffer; | |||
| @@ -248,9 +242,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| transb = 3; | |||
| #endif | |||
| m = M; | |||
| n = N; | |||
| a = (void *)A; | |||
| b = (void *)B; | |||
| lda = LDA; | |||
| @@ -262,8 +253,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| info = 13; | |||
| if (k < 0) | |||
| info = 5; | |||
| if (n < 0) | |||
| info = 4; | |||
| if (m < 0) | |||
| info = 3; | |||
| if (transb < 0) | |||
| @@ -273,8 +262,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| } | |||
| if (order == CblasRowMajor) { | |||
| m = N; | |||
| n = M; | |||
| a = (void *)B; | |||
| b = (void *)A; | |||
| @@ -319,8 +306,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| info = 13; | |||
| if (k < 0) | |||
| info = 5; | |||
| if (n < 0) | |||
| info = 4; | |||
| if (m < 0) | |||
| info = 3; | |||
| if (transb < 0) | |||
| @@ -407,37 +392,35 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| #endif | |||
| if ((m == 0) || (n == 0)) | |||
| if ((m == 0) ) | |||
| return; | |||
| IDEBUG_START; | |||
| FUNCTION_PROFILE_START(); | |||
| const blasint incb = (transb == 0) ? 1 : ldb; | |||
| if (uplo == 1) { | |||
| for (i = 0; i < n; i++) { | |||
| j = n - i; | |||
| for (i = 0; i < m; i++) { | |||
| j = m - i; | |||
| l = j; | |||
| #if defined(COMPLEX) | |||
| aa = a + i * 2; | |||
| bb = b + i * ldb * 2; | |||
| if (transa) { | |||
| l = k; | |||
| aa = a + lda * i * 2; | |||
| bb = b + i * 2; | |||
| } | |||
| if (transb) | |||
| bb = b + i * 2; | |||
| cc = c + i * 2 * ldc + i * 2; | |||
| #else | |||
| aa = a + i; | |||
| bb = b + i * ldb; | |||
| if (transa) { | |||
| l = k; | |||
| aa = a + lda * i; | |||
| bb = b + i; | |||
| } | |||
| if (transb) | |||
| bb = b + i; | |||
| cc = c + i * ldc + i; | |||
| #endif | |||
| @@ -458,8 +441,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| IDEBUG_START; | |||
| FUNCTION_PROFILE_START(); | |||
| buffer_size = j + k + 128 / sizeof(FLOAT); | |||
| #ifdef WINDOWS_ABI | |||
| buffer_size += 160 / sizeof(FLOAT); | |||
| @@ -479,20 +460,34 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| #endif | |||
| #if defined(COMPLEX) | |||
| if (!transa) | |||
| (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, | |||
| aa, lda, bb, incb, cc, 1, | |||
| buffer); | |||
| else | |||
| (gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i, | |||
| aa, lda, bb, incb, cc, 1, | |||
| buffer); | |||
| #else | |||
| if (!transa) | |||
| (gemv[(int)transa]) (j, k, 0, alpha, aa, lda, | |||
| bb, incb, cc, 1, buffer); | |||
| else | |||
| (gemv[(int)transa]) (k, j, 0, alpha, aa, lda, | |||
| bb, incb, cc, 1, buffer); | |||
| #endif | |||
| #ifdef SMP | |||
| } else { | |||
| if (!transa) | |||
| (gemv_thread[(int)transa]) (j, k, alpha, aa, | |||
| lda, bb, incb, cc, | |||
| 1, buffer, | |||
| nthreads); | |||
| else | |||
| (gemv_thread[(int)transa]) (k, j, alpha, aa, | |||
| lda, bb, incb, cc, | |||
| 1, buffer, | |||
| nthreads); | |||
| } | |||
| #endif | |||
| @@ -501,21 +496,19 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| } | |||
| } else { | |||
| for (i = 0; i < n; i++) { | |||
| for (i = 0; i < m; i++) { | |||
| j = i + 1; | |||
| l = j; | |||
| #if defined COMPLEX | |||
| bb = b + i * ldb * 2; | |||
| if (transa) { | |||
| l = k; | |||
| if (transb) { | |||
| bb = b + i * 2; | |||
| } | |||
| cc = c + i * 2 * ldc; | |||
| #else | |||
| bb = b + i * ldb; | |||
| if (transa) { | |||
| l = k; | |||
| if (transb) { | |||
| bb = b + i; | |||
| } | |||
| cc = c + i * ldc; | |||
| @@ -537,8 +530,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| #endif | |||
| IDEBUG_START; | |||
| FUNCTION_PROFILE_START(); | |||
| buffer_size = j + k + 128 / sizeof(FLOAT); | |||
| #ifdef WINDOWS_ABI | |||
| buffer_size += 160 / sizeof(FLOAT); | |||
| @@ -558,30 +549,39 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| #endif | |||
| #if defined(COMPLEX) | |||
| if (!transa) | |||
| (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, | |||
| a, lda, bb, incb, cc, 1, | |||
| buffer); | |||
| else | |||
| (gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i, | |||
| a, lda, bb, incb, cc, 1, | |||
| buffer); | |||
| #else | |||
| if (!transa) | |||
| (gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb, | |||
| incb, cc, 1, buffer); | |||
| else | |||
| (gemv[(int)transa]) (k, j, 0, alpha, a, lda, bb, | |||
| incb, cc, 1, buffer); | |||
| #endif | |||
| #ifdef SMP | |||
| } else { | |||
| if (!transa) | |||
| (gemv_thread[(int)transa]) (j, k, alpha, a, lda, | |||
| bb, incb, cc, 1, | |||
| buffer, nthreads); | |||
| else | |||
| (gemv_thread[(int)transa]) (k, j, alpha, a, lda, | |||
| bb, incb, cc, 1, | |||
| buffer, nthreads); | |||
| } | |||
| #endif | |||
| STACK_FREE(buffer); | |||
| } | |||
| } | |||
| FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, | |||
| args.m * args.k + args.k * args.n + | |||
| args.m * args.n, 2 * args.m * args.n * args.k); | |||
| IDEBUG_END; | |||