prepared driver/level3 functions for UNROLL values, that are not a po…tags/v0.2.20^2
| @@ -316,7 +316,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| if (min_l > GEMM3M_Q) { | |||
| min_l = (min_l + 1) / 2; | |||
| #ifdef UNROLL_X | |||
| min_l = (min_l + UNROLL_X - 1) & ~(UNROLL_X - 1); | |||
| min_l = ((min_l + UNROLL_X - 1)/UNROLL_X) * UNROLL_X; | |||
| #endif | |||
| } | |||
| } | |||
| @@ -326,7 +326,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_i = GEMM3M_P; | |||
| } else { | |||
| if (min_i > GEMM3M_P) { | |||
| min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||
| min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||
| } | |||
| } | |||
| @@ -365,7 +365,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_i = GEMM3M_P; | |||
| } else | |||
| if (min_i > GEMM3M_P) { | |||
| min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||
| min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||
| } | |||
| START_RPCC(); | |||
| @@ -386,7 +386,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_i = GEMM3M_P; | |||
| } else { | |||
| if (min_i > GEMM3M_P) { | |||
| min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||
| min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||
| } | |||
| } | |||
| @@ -429,7 +429,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_i = GEMM3M_P; | |||
| } else | |||
| if (min_i > GEMM3M_P) { | |||
| min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||
| min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||
| } | |||
| START_RPCC(); | |||
| @@ -451,7 +451,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_i = GEMM3M_P; | |||
| } else { | |||
| if (min_i > GEMM3M_P) { | |||
| min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||
| min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||
| } | |||
| } | |||
| @@ -494,7 +494,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_i = GEMM3M_P; | |||
| } else | |||
| if (min_i > GEMM3M_P) { | |||
| min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||
| min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||
| } | |||
| START_RPCC(); | |||
| @@ -297,9 +297,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_l = GEMM_Q; | |||
| } else { | |||
| if (min_l > GEMM_Q) { | |||
| min_l = (min_l / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1); | |||
| min_l = ((min_l / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M; | |||
| } | |||
| gemm_p = ((l2size / min_l + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1)); | |||
| gemm_p = ((l2size / min_l + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M; | |||
| while (gemm_p * min_l > l2size) gemm_p -= GEMM_UNROLL_M; | |||
| } | |||
| @@ -311,7 +311,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_i = GEMM_P; | |||
| } else { | |||
| if (min_i > GEMM_P) { | |||
| min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1); | |||
| min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M; | |||
| } else { | |||
| l1stride = 0; | |||
| } | |||
| @@ -369,7 +369,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_i = GEMM_P; | |||
| } else | |||
| if (min_i > GEMM_P) { | |||
| min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1); | |||
| min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M; | |||
| } | |||
| START_RPCC(); | |||
| @@ -365,7 +365,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| buffer[0] = sb; | |||
| for (i = 1; i < DIVIDE_RATE; i++) { | |||
| buffer[i] = buffer[i - 1] + GEMM3M_Q * ((div_n + GEMM3M_UNROLL_N - 1) & ~(GEMM3M_UNROLL_N - 1)); | |||
| buffer[i] = buffer[i - 1] + GEMM3M_Q * (((div_n + GEMM3M_UNROLL_N - 1)/GEMM3M_UNROLL_N) * GEMM3M_UNROLL_N); | |||
| } | |||
| for(ls = 0; ls < k; ls += min_l){ | |||
| @@ -384,7 +384,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_i = GEMM3M_P; | |||
| } else { | |||
| if (min_i > GEMM3M_P) { | |||
| min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||
| min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||
| } | |||
| } | |||
| @@ -482,7 +482,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_i = GEMM3M_P; | |||
| } else | |||
| if (min_i > GEMM3M_P) { | |||
| min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||
| min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||
| } | |||
| START_RPCC(); | |||
| @@ -618,7 +618,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_i = GEMM3M_P; | |||
| } else | |||
| if (min_i > GEMM3M_P) { | |||
| min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||
| min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||
| } | |||
| START_RPCC(); | |||
| @@ -754,7 +754,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_i = GEMM3M_P; | |||
| } else | |||
| if (min_i > GEMM3M_P) { | |||
| min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||
| min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||
| } | |||
| START_RPCC(); | |||
| @@ -189,7 +189,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| min_i = GEMM_P; | |||
| } else | |||
| if (min_i > GEMM_P) { | |||
| min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| } | |||
| #ifndef LOWER | |||
| @@ -230,7 +230,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| min_i = GEMM_P; | |||
| } else | |||
| if (min_i > GEMM_P) { | |||
| min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| } | |||
| ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa); | |||
| @@ -245,7 +245,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| min_i = GEMM_P; | |||
| } else | |||
| if (min_i > GEMM_P) { | |||
| min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| } | |||
| if (m_start >= js) { | |||
| @@ -284,7 +284,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| min_i = GEMM_P; | |||
| } else | |||
| if (min_i > GEMM_P) { | |||
| min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| } | |||
| ICOPY_OPERATION(min_l, min_i, b, ldb, ls, is, sa); | |||
| @@ -322,7 +322,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| min_i = GEMM_P; | |||
| } else | |||
| if (min_i > GEMM_P) { | |||
| min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| } | |||
| aa = sb + min_l * (is - js) * COMPSIZE; | |||
| @@ -353,7 +353,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| min_i = GEMM_P; | |||
| } else | |||
| if (min_i > GEMM_P) { | |||
| min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| } | |||
| aa = sb + min_l * (m_start - js) * COMPSIZE; | |||
| @@ -383,7 +383,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| min_i = GEMM_P; | |||
| } else | |||
| if (min_i > GEMM_P) { | |||
| min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| } | |||
| aa = sb + min_l * (is - js) * COMPSIZE; | |||
| @@ -198,7 +198,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| min_i = GEMM_P; | |||
| } else | |||
| if (min_i > GEMM_P) { | |||
| min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| } | |||
| #ifndef LOWER | |||
| @@ -239,7 +239,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| min_i = GEMM_P; | |||
| } else | |||
| if (min_i > GEMM_P) { | |||
| min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| } | |||
| aa = sb + min_l * (is - js) * COMPSIZE; | |||
| @@ -303,7 +303,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| min_i = GEMM_P; | |||
| } else | |||
| if (min_i > GEMM_P) { | |||
| min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| } | |||
| START_RPCC(); | |||
| @@ -375,7 +375,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| min_i = GEMM_P; | |||
| } else | |||
| if (min_i > GEMM_P) { | |||
| min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| } | |||
| if (is < js + min_j) { | |||
| @@ -460,7 +460,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| min_i = GEMM_P; | |||
| } else | |||
| if (min_i > GEMM_P) { | |||
| min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| } | |||
| START_RPCC(); | |||
| @@ -210,8 +210,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld\n", mypos, m_from, m_to, n_from, n_to); | |||
| #endif | |||
| div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE | |||
| + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| div_n = (((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| buffer[0] = sb; | |||
| for (i = 1; i < DIVIDE_RATE; i++) { | |||
| @@ -233,7 +232,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_i = GEMM_P; | |||
| } else { | |||
| if (min_i > GEMM_P) { | |||
| min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| } | |||
| } | |||
| @@ -253,8 +252,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| STOP_RPCC(copy_A); | |||
| div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE | |||
| + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| div_n = (((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| for (xxx = m_from, bufferside = 0; xxx < m_to; xxx += div_n, bufferside ++) { | |||
| @@ -353,9 +351,8 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| while (current >= 0) { | |||
| #endif | |||
| div_n = ((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE | |||
| + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| div_n = (((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) { | |||
| START_RPCC(); | |||
| @@ -412,7 +409,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_i = GEMM_P; | |||
| } else | |||
| if (min_i > GEMM_P) { | |||
| min_i = ((min_i + 1) / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| min_i = (((min_i + 1) / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| } | |||
| START_RPCC(); | |||
| @@ -425,8 +422,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| do { | |||
| div_n = ((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE | |||
| + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||
| div_n = (((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) { | |||
| @@ -602,9 +598,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| double di = (double)i; | |||
| width = (((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask); | |||
| width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1) ); | |||
| if (num_cpu == 0) width = n - ((n - width) & ~mask); | |||
| if (num_cpu == 0) width = n - (((n - width)/(mask+1)) * (mask+1) ); | |||
| if ((width > n - i) || (width < mask)) width = n - i; | |||
| @@ -644,7 +640,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| double di = (double)i; | |||
| width = (((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask); | |||
| width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1)); | |||
| if ((width > n - i) || (width < mask)) width = n - i; | |||
| @@ -310,7 +310,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| buffer[0] = sb; | |||
| for (i = 1; i < DIVIDE_RATE; i++) { | |||
| buffer[i] = buffer[i - 1] + GEMM_Q * ((div_n + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1)) * COMPSIZE; | |||
| buffer[i] = buffer[i - 1] + GEMM_Q * ((div_n + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N * COMPSIZE; | |||
| } | |||
| @@ -331,7 +331,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_i = GEMM_P; | |||
| } else { | |||
| if (min_i > GEMM_P) { | |||
| min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1); | |||
| min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M; | |||
| } else { | |||
| if (args -> nthreads == 1) l1stride = 0; | |||
| } | |||
| @@ -443,7 +443,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| min_i = GEMM_P; | |||
| } else | |||
| if (min_i > GEMM_P) { | |||
| min_i = ((min_i + 1) / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1); | |||
| min_i = (((min_i + 1) / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M; | |||
| } | |||
| START_RPCC(); | |||
| @@ -158,7 +158,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, | |||
| int mm, nn; | |||
| mm = (loop & ~(GEMM_UNROLL_MN - 1)); | |||
| mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| nn = MIN(GEMM_UNROLL_MN, n - loop); | |||
| #ifndef LOWER | |||
| @@ -149,7 +149,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||
| int mm, nn; | |||
| mm = (loop & ~(GEMM_UNROLL_MN - 1)); | |||
| mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| nn = MIN(GEMM_UNROLL_MN, n - loop); | |||
| #ifndef LOWER | |||
| @@ -132,7 +132,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, | |||
| int mm, nn; | |||
| mm = (loop & ~(GEMM_UNROLL_MN - 1)); | |||
| mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||
| nn = MIN(GEMM_UNROLL_MN, n - loop); | |||
| #ifndef LOWER | |||