Browse Source

Merge pull request #3061 from martin-frbg/arm64-pgi

Support NVIDIA HPC SDK on ARM64
tags/v0.3.14^2
Martin Kroeker GitHub 5 years ago
parent
commit
a0e4fb3a28
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 50 additions and 10 deletions
  1. +2
    -2
      Makefile.arm64
  2. +3
    -0
      cmake/utils.cmake
  3. +1
    -1
      common_arm64.h
  4. +2
    -2
      kernel/arm/zdot.c
  5. +9
    -0
      kernel/arm64/KERNEL.ARMV8
  6. +14
    -5
      kernel/arm64/KERNEL.CORTEXA53
  7. +9
    -0
      kernel/arm64/KERNEL.CORTEXA57
  8. +5
    -0
      kernel/arm64/KERNEL.THUNDERX
  9. +5
    -0
      kernel/arm64/KERNEL.TSV110

+ 2
- 2
Makefile.arm64 View File

@@ -1,4 +1,4 @@
ifneq ($(C_COMPILER), PGI)
ifeq ($(CORE), ARMV8)
CCOMMON_OPT += -march=armv8-a
FCOMMON_OPT += -march=armv8-a
@@ -77,4 +77,4 @@ CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
endif
endif
endif

+ 3
- 0
cmake/utils.cmake View File

@@ -74,6 +74,9 @@ macro(ParseMakefileVars MAKEFILE_IN)
string(REGEX MATCH "ifneq \\(\\$\\(([_A-Z]+)\\),[ \t]*([0-9_A-Z]+)\\)" line_match "${makefile_line}")
if (NOT "${line_match}" STREQUAL "")
# message(STATUS "IFNEQ: ${line_match} first: ${CMAKE_MATCH_1} second: ${CMAKE_MATCH_2}")
if ( ${CMAKE_MATCH_1} STREQUAL C_COMPILER)
set (CMAKE_MATCH_1 CMAKE_C_COMPILER)
endif ()
if (NOT ( ${${CMAKE_MATCH_1}} STREQUAL ${CMAKE_MATCH_2}))
# message (STATUS "condition is true")
set (IfElse 1)


+ 1
- 1
common_arm64.h View File

@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#define INLINE inline

#ifdef F_INTERFACE_FLANG
#if defined( F_INTERFACE_FLANG) || defined(F_INTERFACE_PGI)
#define RETURN_BY_STACK
#else
#define RETURN_BY_COMPLEX


+ 2
- 2
kernel/arm/zdot.c View File

@@ -48,7 +48,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA

dot[0]=0.0;
dot[1]=0.0;
#if !defined(__PPC__) && !defined(__SunOS)
#if !defined(__PPC__) && !defined(__SunOS) && !defined(__PGI)
CREAL(result) = 0.0 ;
CIMAG(result) = 0.0 ;
#else
@@ -73,7 +73,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
i++ ;

}
#if !defined(__PPC__) && !defined(__SunOS)
#if !defined(__PPC__) && !defined(__SunOS) && !defined(__PGI)
CREAL(result) = dot[0];
CIMAG(result) = dot[1];
#else


+ 9
- 0
kernel/arm64/KERNEL.ARMV8 View File

@@ -97,9 +97,18 @@ CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S

DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S

DGEMM_BETA = dgemm_beta.S


+ 14
- 5
kernel/arm64/KERNEL.CORTEXA53 View File

@@ -96,11 +96,20 @@ DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S

DDOTKERNEL = dot.S
SDOTKERNEL = ../generic/dot.c
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
DSDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S

DGEMM_BETA = dgemm_beta.S
SGEMM_BETA = sgemm_beta.S


+ 9
- 0
kernel/arm64/KERNEL.CORTEXA57 View File

@@ -70,10 +70,19 @@ DCOPYKERNEL = copy.S
CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy.S

ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S

SNRM2KERNEL = nrm2.S


+ 5
- 0
kernel/arm64/KERNEL.THUNDERX View File

@@ -47,8 +47,13 @@ ZCOPYKERNEL = copy.S

SDOTKERNEL = dot_thunderx.c
DDOTKERNEL = ddot_thunderx.c
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S

SNRM2KERNEL = nrm2.S


+ 5
- 0
kernel/arm64/KERNEL.TSV110 View File

@@ -72,8 +72,13 @@ ZCOPYKERNEL = copy.S

SDOTKERNEL = dot.S
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S

SNRM2KERNEL = nrm2.S


Loading…
Cancel
Save