Browse Source

Save and restore input argument 8 (lda4)

Fixes miscompilation with gcc9 -ftree-vectorize (related to issue #2009)
tags/v0.3.6^2
Martin Kroeker GitHub 7 years ago
parent
commit
46e415b140
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 5 additions and 2 deletions
  1. +5
    -2
      kernel/x86_64/sgemv_n_microk_haswell-4.c

+ 5
- 2
kernel/x86_64/sgemv_n_microk_haswell-4.c View File

@@ -26,7 +26,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/



#define HAVE_KERNEL_4x8 1
static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLONG lda4, FLOAT *alpha) __attribute__ ((noinline));

@@ -49,6 +48,8 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO

"vbroadcastss (%9), %%ymm6 \n\t" // alpha

"movq %8, %%xmm10 \n\t" //save lda

"testq $0x04, %1 \n\t"
"jz 2f \n\t"

@@ -151,6 +152,7 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO

"4: \n\t"
"vzeroupper \n\t"
"movq %%xmm10, %8 \n\t" //restore lda

:
"+r" (i), // 0
@@ -170,6 +172,7 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
"%xmm4", "%xmm5",
"%xmm6", "%xmm7",
"%xmm8", "%xmm9",
"%xmm10",
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
"memory"
);
@@ -177,7 +180,6 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
}



#define HAVE_KERNEL_4x4 1
static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT *alpha) __attribute__ ((noinline));

@@ -196,6 +198,7 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT

"vbroadcastss (%8), %%ymm6 \n\t" // alpha


"testq $0x04, %1 \n\t"
"jz 2f \n\t"



Loading…
Cancel
Save