| @@ -55,7 +55,7 @@ | |||||
| static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){ | static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){ | ||||
| FLOAT *a, *x, *y; | FLOAT *a, *x, *y; | ||||
| BLASLONG incx, incy; | |||||
| BLASLONG incx; | |||||
| BLASLONG m_from, m_to, i; | BLASLONG m_from, m_to, i; | ||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| FLOAT result; | FLOAT result; | ||||
| @@ -68,7 +68,6 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| y = (FLOAT *)args -> c; | y = (FLOAT *)args -> c; | ||||
| incx = args -> ldb; | incx = args -> ldb; | ||||
| incy = args -> ldc; | |||||
| m_from = 0; | m_from = 0; | ||||
| m_to = args -> m; | m_to = args -> m; | ||||
| @@ -43,7 +43,7 @@ | |||||
| static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){ | static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){ | ||||
| FLOAT *a, *x, *y; | FLOAT *a, *x, *y; | ||||
| BLASLONG lda, incx, incy; | |||||
| BLASLONG incx, incy; | |||||
| BLASLONG i, m_from, m_to; | BLASLONG i, m_from, m_to; | ||||
| FLOAT alpha_r; | FLOAT alpha_r; | ||||
| #ifdef COMPLEX | #ifdef COMPLEX | ||||
| @@ -56,7 +56,6 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL | |||||
| incx = args -> lda; | incx = args -> lda; | ||||
| incy = args -> ldb; | incy = args -> ldb; | ||||
| lda = args -> ldc; | |||||
| alpha_r = *((FLOAT *)args -> alpha + 0); | alpha_r = *((FLOAT *)args -> alpha + 0); | ||||
| #ifdef COMPLEX | #ifdef COMPLEX | ||||
| @@ -46,7 +46,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL | |||||
| BLASLONG incx; | BLASLONG incx; | ||||
| BLASLONG i, m_from, m_to; | BLASLONG i, m_from, m_to; | ||||
| FLOAT alpha_r; | FLOAT alpha_r; | ||||
| #if defined(COMPLEX) && !defined(HER) && !defined(HERREV) | |||||
| #if defined(COMPLEX) && !defined(HEMV) && !defined(HEMVREV) | |||||
| FLOAT alpha_i; | FLOAT alpha_i; | ||||
| #endif | #endif | ||||
| @@ -56,7 +56,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL | |||||
| incx = args -> lda; | incx = args -> lda; | ||||
| alpha_r = *((FLOAT *)args -> alpha + 0); | alpha_r = *((FLOAT *)args -> alpha + 0); | ||||
| #if defined(COMPLEX) && !defined(HER) && !defined(HERREV) | |||||
| #if defined(COMPLEX) && !defined(HEMV) && !defined(HEMVREV) | |||||
| alpha_i = *((FLOAT *)args -> alpha + 1); | alpha_i = *((FLOAT *)args -> alpha + 1); | ||||
| #endif | #endif | ||||
| @@ -55,7 +55,7 @@ | |||||
| static int symv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){ | static int symv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){ | ||||
| FLOAT *a, *x, *y; | FLOAT *a, *x, *y; | ||||
| BLASLONG lda, incx, incy; | |||||
| BLASLONG lda, incx; | |||||
| BLASLONG m_from, m_to; | BLASLONG m_from, m_to; | ||||
| a = (FLOAT *)args -> a; | a = (FLOAT *)args -> a; | ||||
| @@ -64,7 +64,6 @@ static int symv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| lda = args -> lda; | lda = args -> lda; | ||||
| incx = args -> ldb; | incx = args -> ldb; | ||||
| incy = args -> ldc; | |||||
| m_from = 0; | m_from = 0; | ||||
| m_to = args -> m; | m_to = args -> m; | ||||
| @@ -45,13 +45,11 @@ const static FLOAT dp1 = 1.; | |||||
| int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095); | |||||
| COPY_K(n, b, incb, buffer, 1); | COPY_K(n, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -45,13 +45,11 @@ const static FLOAT dp1 = 1.; | |||||
| int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095); | |||||
| COPY_K(n, b, incb, buffer, 1); | COPY_K(n, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -45,13 +45,11 @@ const static FLOAT dp1 = 1.; | |||||
| int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095); | |||||
| COPY_K(n, b, incb, buffer, 1); | COPY_K(n, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -45,13 +45,11 @@ const static FLOAT dp1 = 1.; | |||||
| int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095); | |||||
| COPY_K(n, b, incb, buffer, 1); | COPY_K(n, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -43,12 +43,10 @@ | |||||
| int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) + 4095) & ~4095); | |||||
| COPY_K(m, b, incb, buffer, 1); | COPY_K(m, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -43,12 +43,10 @@ | |||||
| int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) + 4095) & ~4095); | |||||
| COPY_K(m, b, incb, buffer, 1); | COPY_K(m, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -45,7 +45,6 @@ const static FLOAT dp1 = 1.; | |||||
| int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| @@ -57,7 +56,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc | |||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) * COMPSIZE+ 4095) & ~4095); | |||||
| COPY_K(n, b, incb, buffer, 1); | COPY_K(n, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -45,7 +45,6 @@ const static FLOAT dp1 = 1.; | |||||
| int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| @@ -57,7 +56,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc | |||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); | |||||
| COPY_K(n, b, incb, buffer, 1); | COPY_K(n, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -45,7 +45,6 @@ const static FLOAT dp1 = 1.; | |||||
| int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| @@ -57,7 +56,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc | |||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); | |||||
| COPY_K(n, b, incb, buffer, 1); | COPY_K(n, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -45,7 +45,6 @@ const static FLOAT dp1 = 1.; | |||||
| int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ | ||||
| BLASLONG i; | BLASLONG i; | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| BLASLONG length; | BLASLONG length; | ||||
| #if (TRANSA == 2) || (TRANSA == 4) | #if (TRANSA == 2) || (TRANSA == 4) | ||||
| @@ -57,7 +56,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc | |||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) * COMPSIZE+ 4095) & ~4095); | |||||
| COPY_K(n, b, incb, buffer, 1); | COPY_K(n, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -49,12 +49,10 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | |||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT atemp1, atemp2, btemp1, btemp2; | FLOAT atemp1, atemp2, btemp1, btemp2; | ||||
| #endif | #endif | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); | |||||
| COPY_K(m, b, incb, buffer, 1); | COPY_K(m, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -49,12 +49,10 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | |||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT atemp1, atemp2, btemp1, btemp2; | FLOAT atemp1, atemp2, btemp1, btemp2; | ||||
| #endif | #endif | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); | |||||
| COPY_K(m, b, incb, buffer, 1); | COPY_K(m, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -51,12 +51,10 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | |||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT ar, ai, br, bi, ratio, den; | FLOAT ar, ai, br, bi, ratio, den; | ||||
| #endif | #endif | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); | |||||
| COPY_K(m, b, incb, buffer, 1); | COPY_K(m, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -49,12 +49,10 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ | |||||
| #ifndef UNIT | #ifndef UNIT | ||||
| FLOAT ar, ai, br, bi, ratio, den; | FLOAT ar, ai, br, bi, ratio, den; | ||||
| #endif | #endif | ||||
| FLOAT *gemvbuffer = (FLOAT *)buffer; | |||||
| FLOAT *B = b; | FLOAT *B = b; | ||||
| if (incb != 1) { | if (incb != 1) { | ||||
| B = buffer; | B = buffer; | ||||
| gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); | |||||
| COPY_K(m, b, incb, buffer, 1); | COPY_K(m, b, incb, buffer, 1); | ||||
| } | } | ||||
| @@ -65,7 +65,7 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int ( | |||||
| blas_queue_t queue[MAX_CPU_NUMBER]; | blas_queue_t queue[MAX_CPU_NUMBER]; | ||||
| BLASLONG range_M[MAX_CPU_NUMBER + 1], range_N[MAX_CPU_NUMBER + 1]; | BLASLONG range_M[MAX_CPU_NUMBER + 1], range_N[MAX_CPU_NUMBER + 1]; | ||||
| BLASLONG procs, total_procs, num_cpu_m, num_cpu_n; | |||||
| BLASLONG procs, num_cpu_m, num_cpu_n; | |||||
| BLASLONG width, i, j; | BLASLONG width, i, j; | ||||
| BLASLONG divM, divN; | BLASLONG divM, divN; | ||||
| @@ -230,7 +230,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
| BLASLONG is, min_i, div_n; | BLASLONG is, min_i, div_n; | ||||
| BLASLONG i, current; | BLASLONG i, current; | ||||
| BLASLONG l1stride, l2size; | |||||
| BLASLONG l1stride; | |||||
| #ifdef TIMING | #ifdef TIMING | ||||
| BLASULONG rpcc_counter; | BLASULONG rpcc_counter; | ||||
| @@ -298,8 +298,6 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
| #endif | #endif | ||||
| ) return 0; | ) return 0; | ||||
| l2size = GEMM_P * GEMM_Q; | |||||
| #if 0 | #if 0 | ||||
| fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld N_from : %ld N_to : %ld\n", | fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld N_from : %ld N_to : %ld\n", | ||||
| mypos, m_from, m_to, n_from, n_to, N_from, N_to); | mypos, m_from, m_to, n_from, n_to, N_from, N_to); | ||||
| @@ -706,7 +704,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
| n = n_to - n_from; | n = n_to - n_from; | ||||
| } | } | ||||
| if ((args -> m < nthreads * SWITCH_RATIO) || (args -> n < nthreads * SWITCH_RATIO)) { | |||||
| if ((m < nthreads * SWITCH_RATIO) || (n < nthreads * SWITCH_RATIO)) { | |||||
| GEMM_LOCAL(args, range_m, range_n, sa, sb, 0); | GEMM_LOCAL(args, range_m, range_n, sa, sb, 0); | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -914,7 +914,6 @@ static volatile struct { | |||||
| } memory[NUM_BUFFERS]; | } memory[NUM_BUFFERS]; | ||||
| static int memory_initialized = 0; | static int memory_initialized = 0; | ||||
| static void gotoblas_memory_init(void); | |||||
| /* Memory allocation routine */ | /* Memory allocation routine */ | ||||
| /* procpos ... indicates where it comes from */ | /* procpos ... indicates where it comes from */ | ||||