Browse Source

Merge pull request #2836 from austinpagan/gordon_trsm

Fixing a performance bug in trsm_[LR].c.
tags/v0.3.11^2
Martin Kroeker GitHub 5 years ago
parent
commit
f8950f40a2
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 6 deletions
  1. +2
    -2
      driver/level3/trsm_L.c
  2. +4
    -4
      driver/level3/trsm_R.c

+ 2
- 2
driver/level3/trsm_L.c View File

@@ -131,7 +131,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO

for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
else
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;

@@ -197,7 +197,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO

for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
else
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;



+ 4
- 4
driver/level3/trsm_R.c View File

@@ -126,7 +126,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO

for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
else
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;

@@ -182,7 +182,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO

for(jjs = 0; jjs < min_j - min_l - ls + js; jjs += min_jj){
min_jj = min_j - min_l - ls + js - jjs;
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
else
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;

@@ -243,7 +243,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO

for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
else
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;

@@ -304,7 +304,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO

for(jjs = 0; jjs < min_j - js + ls; jjs += min_jj){
min_jj = min_j - js + ls - jjs;
if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
else
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;



Loading…
Cancel
Save