Browse Source

Merge pull request #3048 from martin-frbg/issue2998

Temporarily revert to the old NRM2 kernels for ThunderX2/3 and NeoverseN1
tags/v0.3.14^2
Martin Kroeker GitHub 5 years ago
parent
commit
3559c5d7a2
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 19 additions and 16 deletions
  1. +4
    -4
      kernel/arm64/KERNEL.NEOVERSEN1
  2. +4
    -4
      kernel/arm64/KERNEL.THUNDERX2T99
  3. +10
    -7
      kernel/arm64/KERNEL.THUNDERX3T110
  4. +1
    -1
      kernel/x86_64/srot.c

+ 4
- 4
kernel/arm64/KERNEL.NEOVERSEN1 View File

@@ -91,10 +91,10 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
ICAMAXKERNEL = izamax_thunderx2t99.c
IZAMAXKERNEL = izamax_thunderx2t99.c

SNRM2KERNEL = scnrm2_thunderx2t99.c
DNRM2KERNEL = dznrm2_thunderx2t99.c
CNRM2KERNEL = scnrm2_thunderx2t99.c
ZNRM2KERNEL = dznrm2_thunderx2t99.c
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S

DDOTKERNEL = dot_thunderx2t99.c
SDOTKERNEL = dot_thunderx2t99.c


+ 4
- 4
kernel/arm64/KERNEL.THUNDERX2T99 View File

@@ -153,12 +153,12 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
ICAMAXKERNEL = izamax_thunderx2t99.c
IZAMAXKERNEL = izamax_thunderx2t99.c

SNRM2KERNEL = scnrm2_thunderx2t99.c
CNRM2KERNEL = scnrm2_thunderx2t99.c
SNRM2KERNEL = nrm2.S
CNRM2KERNEL = nrm2.S
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
DNRM2KERNEL = dznrm2_thunderx2t99.c
ZNRM2KERNEL = dznrm2_thunderx2t99.c
DNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S


DDOTKERNEL = dot_thunderx2t99.c


+ 10
- 7
kernel/arm64/KERNEL.THUNDERX3T110 View File

@@ -153,13 +153,16 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
ICAMAXKERNEL = izamax_thunderx2t99.c
IZAMAXKERNEL = izamax_thunderx2t99.c

SNRM2KERNEL = scnrm2_thunderx2t99.c
CNRM2KERNEL = scnrm2_thunderx2t99.c
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
DNRM2KERNEL = dznrm2_thunderx2t99.c
ZNRM2KERNEL = dznrm2_thunderx2t99.c

#SNRM2KERNEL = scnrm2_thunderx2t99.c
#CNRM2KERNEL = scnrm2_thunderx2t99.c
##DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
##ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
#DNRM2KERNEL = dznrm2_thunderx2t99.c
#ZNRM2KERNEL = dznrm2_thunderx2t99.c
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S

DDOTKERNEL = dot_thunderx2t99.c
SDOTKERNEL = dot_thunderx2t99.c


+ 1
- 1
kernel/x86_64/srot.c View File

@@ -13,7 +13,7 @@ static void srot_kernel(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s)
{
BLASLONG i = 0;
#if V_SIMD && (defined(HAVE_FMA3) || V_SIMD > 128)
#if V_SIMD && !defined(C_PGI) && (defined(HAVE_FMA3) || V_SIMD > 128)
const int vstep = v_nlanes_f32;
const int unrollx4 = n & (-vstep * 4);
const int unrollx = n & -vstep;


Loading…
Cancel
Save