Browse Source

Make the skylakex/haswell sgemm code compile and run even with compilers without avx2 support

tags/v0.3.5
Arjan van de Ven 7 years ago
parent
commit
69d206440a
1 changed files with 5 additions and 6 deletions
  1. +5
    -6
      kernel/x86_64/sgemm_beta_skylakex.c

+ 5
- 6
kernel/x86_64/sgemm_beta_skylakex.c View File

@@ -61,10 +61,6 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
c_offset = c;

if (beta == ZERO){
#ifdef __AVX512CD__
__m512 z_zero = _mm512_setzero_ps();
#endif
__m256 y_zero = _mm256_setzero_ps();

j = n;
do {
@@ -72,12 +68,14 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
c_offset += ldc;

i = m;
#ifdef __AVX2__
while (i >= 32) {
#ifdef __AVX512CD__
__m512 z_zero = _mm512_setzero_ps();
_mm512_storeu_ps(c_offset1, z_zero);
_mm512_storeu_ps(c_offset1 + 16, z_zero);
#else
__m256 y_zero = _mm256_setzero_ps();
_mm256_storeu_ps(c_offset1, y_zero);
_mm256_storeu_ps(c_offset1 + 8, y_zero);
_mm256_storeu_ps(c_offset1 + 16, y_zero);
@@ -87,11 +85,12 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
i -= 32;
}
while (i >= 8) {
__m256 y_zero = _mm256_setzero_ps();
_mm256_storeu_ps(c_offset1, y_zero);
c_offset1 += 8;
i -= 8;
}
#endif
while (i > 0) {
*c_offset1 = ZERO;
c_offset1 ++;


Loading…
Cancel
Save