|
|
|
@@ -332,13 +332,16 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, |
|
|
|
#else |
|
|
|
for(jjs = js; jjs < js + min_j; jjs += min_jj){ |
|
|
|
min_jj = min_j + js - jjs; |
|
|
|
|
|
|
|
#ifdef SKYLAKEX |
|
|
|
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve best performance */ |
|
|
|
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; |
|
|
|
#else |
|
|
|
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; |
|
|
|
else |
|
|
|
if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N; |
|
|
|
else |
|
|
|
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
START_RPCC(); |
|
|
|
|