Browse Source

Merge pull request #4326 from bartoldeman/fix-casum-backup-kernel

Fix casum fallback kernel for x86_64
tags/v0.3.26
Martin Kroeker GitHub 2 years ago
parent
commit
df4cd7e82c
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 12 additions and 12 deletions
  1. +12
    -12
      kernel/x86_64/casum.c

+ 12
- 12
kernel/x86_64/casum.c View File

@@ -9,12 +9,12 @@
#endif

#ifndef HAVE_CASUM_KERNEL
static FLOAT casum_kernel(BLASLONG n, FLOAT *x1)
static FLOAT casum_kernel(BLASLONG n, FLOAT *x)
{

BLASLONG i=0;
BLASLONG n_8 = n & -8;
FLOAT *x = x1;
FLOAT *x1 = x;
FLOAT temp0, temp1, temp2, temp3;
FLOAT temp4, temp5, temp6, temp7;
FLOAT sum0 = 0.0;
@@ -24,14 +24,14 @@ static FLOAT casum_kernel(BLASLONG n, FLOAT *x1)
FLOAT sum4 = 0.0;
while (i < n_8) {
temp0 = ABS_K(x[0]);
temp1 = ABS_K(x[1]);
temp2 = ABS_K(x[2]);
temp3 = ABS_K(x[3]);
temp4 = ABS_K(x[4]);
temp5 = ABS_K(x[5]);
temp6 = ABS_K(x[6]);
temp7 = ABS_K(x[7]);
temp0 = ABS_K(x1[0]);
temp1 = ABS_K(x1[1]);
temp2 = ABS_K(x1[2]);
temp3 = ABS_K(x1[3]);
temp4 = ABS_K(x1[4]);
temp5 = ABS_K(x1[5]);
temp6 = ABS_K(x1[6]);
temp7 = ABS_K(x1[7]);
sum0 += temp0;
sum1 += temp1;
@@ -43,12 +43,12 @@ static FLOAT casum_kernel(BLASLONG n, FLOAT *x1)
sum2 += temp6;
sum3 += temp7;
x+=8;
x1+=8;
i+=4;
}

while (i < n) {
sum4 += (ABS_K(x1[0]) + ABS_K(x1[1]));
sum4 += ABS_K(x1[0]) + ABS_K(x1[1]);
x1 += 2;
i++;
}


Loading…
Cancel
Save