Browse Source

skylakex: Make the sgemm/dgemm beta code robust for a N=0 or M=0 case

in the threading code there are cases where N or M can become 0,
and the optimized beta code did not handle this well, leading
to a crash

during the audit for the crash a few edge conditions on the if statements
were found and fixed as well
tags/v0.3.4
Arjan van de Ven 7 years ago
parent
commit
dcc5d6291e
2 changed files with 8 additions and 4 deletions
  1. +4
    -2
      kernel/x86_64/dgemm_beta_skylakex.c
  2. +4
    -2
      kernel/x86_64/sgemm_beta_skylakex.c

+ 4
- 2
kernel/x86_64/dgemm_beta_skylakex.c View File

@@ -55,6 +55,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
return 0;
}

if (m == 0 || n == 0)
return 0;

c_offset = c;

@@ -69,7 +71,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,

i = m;

while (i > 32) {
while (i >= 32) {
_mm512_storeu_pd(c_offset1, z_zero);
_mm512_storeu_pd(c_offset1 + 8, z_zero);
_mm512_storeu_pd(c_offset1 + 16, z_zero);
@@ -77,7 +79,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
c_offset1 += 32;
i -= 32;
}
while (i > 8) {
while (i >= 8) {
_mm512_storeu_pd(c_offset1, z_zero);
c_offset1 += 8;
i -= 8;


+ 4
- 2
kernel/x86_64/sgemm_beta_skylakex.c View File

@@ -55,6 +55,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
return 0;
}

if (n == 0 || m == 0)
return;

c_offset = c;

@@ -71,13 +73,13 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,

i = m;

while (i > 32) {
while (i >= 32) {
_mm512_storeu_ps(c_offset1, z_zero);
_mm512_storeu_ps(c_offset1 + 16, z_zero);
c_offset1 += 32;
i -= 32;
}
while (i > 8) {
while (i >= 8) {
_mm256_storeu_ps(c_offset1, y_zero);
c_offset1 += 8;
i -= 8;


Loading…
Cancel
Save