Browse Source

Merge pull request #2 from xianyi/develop

rebase
tags/v0.3.13^2
Martin Kroeker GitHub 5 years ago
parent
commit
980ab349bc
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 166 additions and 47 deletions
  1. +4
    -1
      CONTRIBUTORS.md
  2. +12
    -4
      Makefile.sparc
  3. +12
    -1
      Makefile.system
  4. +2
    -1
      c_check
  5. +6
    -0
      common_sparc.h
  6. +2
    -2
      kernel/arm/zdot.c
  7. +8
    -8
      kernel/mips/cgemm_kernel_8x4_msa.c
  8. +2
    -2
      kernel/mips/cgemv_n_msa.c
  9. +19
    -7
      kernel/mips/cgemv_t_msa.c
  10. +28
    -2
      kernel/mips/dswap_msa.c
  11. +28
    -1
      kernel/mips/sswap_msa.c
  12. +2
    -2
      kernel/mips/zgemv_n_msa.c
  13. +19
    -7
      kernel/mips/zgemv_t_msa.c
  14. +10
    -0
      kernel/sparc/KERNEL.sparc
  15. +9
    -9
      param.h
  16. +3
    -0
      utest/Makefile

+ 4
- 1
CONTRIBUTORS.md View File

@@ -190,4 +190,7 @@ In chronological order:
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support

* Danfeng Zhang <https://github.com/craft-zhang>
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53

* PingTouGe Semiconductor Co., Ltd.
* [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910

+ 12
- 4
Makefile.sparc View File

@@ -3,21 +3,29 @@ RANLIB = ranlib

ifdef BINARY64

ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -mcpu=v9 -m64
else
CCOMMON_OPT += -m64
endif
ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mcpu=v9 -m64
endif
ifeq ($(COMPILER_F77), f90)
FCOMMON_OPT += -xarch=v9
ifeq ($(COMPILER_F77), f95)
FCOMMON_OPT += -m64
endif
else

ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -mcpu=v9
else
CCOMMON_OPT += -xarch=v9
endif

ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mcpu=v9
endif
ifeq ($(COMPILER_F77), f90)
ifeq ($(COMPILER_F77), f95)
FCOMMON_OPT += -xarch=v8plusb
endif

@@ -37,4 +45,4 @@ LIBSUNPERF = -L/opt/SUNWspro/lib/v9 -L/opt/SUNWspro/prod/lib/v9 \
else
LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \
-Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath
endif
endif

+ 12
- 1
Makefile.system View File

@@ -1131,16 +1131,25 @@ CCOMMON_OPT += -w
ifeq ($(ARCH), x86)
CCOMMON_OPT += -m32
else
FCOMMON_OPT += -m64
ifdef BINARY64
CCOMMON_OPT += -m64
else
CCOMMON_OPT += -m32
endif
endif
endif

ifeq ($(F_COMPILER), SUN)
CCOMMON_OPT += -DF_INTERFACE_SUN
FCOMMON_OPT += -ftrap=%none -xrecursive
ifeq ($(ARCH), x86)
FCOMMON_OPT += -m32
else
ifdef BINARY64
FCOMMON_OPT += -m64
else
FCOMMON_OPT += -m32
endif
endif
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -xopenmp=parallel
@@ -1313,8 +1322,10 @@ KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
include $(TOPDIR)/Makefile.$(ARCH)

ifneq ($(C_COMPILER), PGI)
ifneq ($(C_COMPILER), SUN)
CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME
endif
endif
CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\"

ifeq ($(CORE), PPC440)


+ 2
- 1
c_check View File

@@ -6,7 +6,8 @@
# Checking cross compile
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
$hostarch = `uname -p` if ($hostos eq "AIX");
$hostarch = `uname -p` if ($hostos eq "AIX" || $hostos eq "SunOS");
chop($hostarch);
$hostarch = "x86_64" if ($hostarch eq "amd64");
$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/);
$hostarch = "arm64" if ($hostarch eq "aarch64");


+ 6
- 0
common_sparc.h View File

@@ -78,6 +78,12 @@ static __inline unsigned long rpcc(void){
#define __BIG_ENDIAN__
#endif

#ifdef C_SUN
#ifndef __64BIT
#define RETURN_BY_STACK
#endif
#endif

#ifdef DOUBLE
#define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory")
#else


+ 2
- 2
kernel/arm/zdot.c View File

@@ -48,7 +48,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA

dot[0]=0.0;
dot[1]=0.0;
#if !defined(__PPC__)
#if !defined(__PPC__) && !defined(__SunOS)
CREAL(result) = 0.0 ;
CIMAG(result) = 0.0 ;
#else
@@ -73,7 +73,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
i++ ;

}
#if !defined(__PPC__)
#if !defined(__PPC__) && !defined(__SunOS)
CREAL(result) = dot[0];
CIMAG(result) = dot[1];
#else


+ 8
- 8
kernel/mips/cgemm_kernel_8x4_msa.c View File

@@ -758,10 +758,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pc0[1] += alphar * res1; \
pc0[1] += alphai * res0; \
\
pc1[2] += alphar * res2; \
pc1[2] -= alphai * res3; \
pc1[3] += alphar * res3; \
pc1[3] += alphai * res2; \
pc1[0] += alphar * res2; \
pc1[0] -= alphai * res3; \
pc1[1] += alphar * res3; \
pc1[1] += alphai * res2; \
}

#define CGEMM_SCALE_1X1 \
@@ -1067,10 +1067,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pc0[1] = alphar * res1; \
pc0[1] += alphai * res0; \
\
pc1[2] = alphar * res2; \
pc1[2] -= alphai * res3; \
pc1[3] = alphar * res3; \
pc1[3] += alphai * res2; \
pc1[0] = alphar * res2; \
pc1[0] -= alphai * res3; \
pc1[1] = alphar * res3; \
pc1[1] += alphai * res2; \
}

#define CGEMM_TRMM_SCALE_1X1 \


+ 2
- 2
kernel/mips/cgemv_n_msa.c View File

@@ -56,11 +56,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if !defined(XCONJ)
#define OP0 +=
#define OP1 -=
#define OP2 -=
#define OP2 +=
#else
#define OP0 -=
#define OP1 -=
#define OP2 +=
#define OP2 -=
#endif
#endif



+ 19
- 7
kernel/mips/cgemv_t_msa.c View File

@@ -32,14 +32,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#undef OP1
#undef OP2

#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
#define OP0 -=
#define OP1 +=
#define OP2 +=
#if !defined(CONJ)
#if !defined(XCONJ)
#define OP0 -=
#define OP1 +=
#define OP2 +=
#else
#define OP0 +=
#define OP1 +=
#define OP2 -=
#endif
#else
#define OP0 +=
#define OP1 +=
#define OP2 -=
#if !defined(XCONJ)
#define OP0 +=
#define OP1 -=
#define OP2 +=
#else
#define OP0 -=
#define OP1 -=
#define OP2 -=
#endif
#endif

#define CGEMV_T_8x4() \


+ 28
- 2
kernel/mips/dswap_msa.c View File

@@ -184,7 +184,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
}
}
}
else
else if ((inc_x != 0) && (inc_y != 0))
{
for (i = (n >> 3); i--;)
{
@@ -248,6 +248,32 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
}
}
}

else
{
if (inc_x == inc_y)
{
if (n & 1)
{
x0 = *srcx;
*srcx = *srcy;
*srcy = x0;
}
else
return (0);
}
else
{
BLASLONG ix = 0, iy = 0;
while (i < n)
{
x0 = srcx[ix];
srcx[ix] = srcy[iy];
srcy[iy] = x0;
ix += inc_x;
iy += inc_y;
i++;
}
}
}
return (0);
}

+ 28
- 1
kernel/mips/sswap_msa.c View File

@@ -198,7 +198,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
}
}
}
else
else if ((inc_x != 0) && (inc_y != 0))
{
for (i = (n >> 3); i--;)
{
@@ -262,6 +262,33 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3,
}
}
}
else
{
if (inc_x == inc_y)
{
if (n & 1)
{
x0 = *srcx;
*srcx = *srcy;
*srcy = x0;
}
else
return (0);
}
else
{
BLASLONG ix = 0, iy = 0;
while (i < n)
{
x0 = srcx[ix];
srcx[ix] = srcy[iy];
srcy[iy] = x0;
ix += inc_x;
iy += inc_y;
i++;
}
}
}

return (0);
}

+ 2
- 2
kernel/mips/zgemv_n_msa.c View File

@@ -56,11 +56,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if !defined(XCONJ)
#define OP0 +=
#define OP1 -=
#define OP2 -=
#define OP2 +=
#else
#define OP0 -=
#define OP1 -=
#define OP2 +=
#define OP2 -=
#endif
#endif



+ 19
- 7
kernel/mips/zgemv_t_msa.c View File

@@ -34,14 +34,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#undef OP3
#undef OP4

#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
#define OP0 -=
#define OP1 +=
#define OP2 +=
#if !defined(CONJ)
#if !defined(XCONJ)
#define OP0 -=
#define OP1 +=
#define OP2 +=
#else
#define OP0 +=
#define OP1 +=
#define OP2 -=
#endif
#else
#define OP0 +=
#define OP1 +=
#define OP2 -=
#if !defined(XCONJ)
#define OP0 +=
#define OP1 -=
#define OP2 +=
#else
#define OP0 -=
#define OP1 -=
#define OP2 -=
#endif
#endif

#define ZGEMV_T_8x1() \


+ 10
- 0
kernel/sparc/KERNEL.sparc View File

@@ -54,3 +54,13 @@ ZTRSMKERNEL_LN = ztrsm_kernel_LN.S
ZTRSMKERNEL_LT = ztrsm_kernel_LT.S
ZTRSMKERNEL_RN = ztrsm_kernel_LT.S
ZTRSMKERNEL_RT = ztrsm_kernel_RT.S


SDOTKERNEL = ../generic/dot.c
SDSDOTKERNEL = ../generic/dot.c
DSDOTKERNEL = ../generic/dot.c
DDOTKERNEL = ../generic/dot.c
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
CSWAPKERNEL = ../arm/zswap.c
ZSWAPKERNEL = ../arm/zswap.c

+ 9
- 9
param.h View File

@@ -1454,22 +1454,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#define SGEMM_DEFAULT_P 768
#define SGEMM_DEFAULT_R sgemm_r
//#define SGEMM_DEFAULT_R 1024
/*#define SGEMM_DEFAULT_R 1024*/

#define DGEMM_DEFAULT_P 512
#define DGEMM_DEFAULT_R dgemm_r
//#define DGEMM_DEFAULT_R 1024
/*#define DGEMM_DEFAULT_R 1024*/

#define QGEMM_DEFAULT_P 504
#define QGEMM_DEFAULT_R qgemm_r

#define CGEMM_DEFAULT_P 768
#define CGEMM_DEFAULT_R cgemm_r
//#define CGEMM_DEFAULT_R 1024
/*#define CGEMM_DEFAULT_R 1024*/

#define ZGEMM_DEFAULT_P 512
#define ZGEMM_DEFAULT_R zgemm_r
//#define ZGEMM_DEFAULT_R 1024
/*#define ZGEMM_DEFAULT_R 1024*/

#define XGEMM_DEFAULT_P 252
#define XGEMM_DEFAULT_R xgemm_r
@@ -2571,7 +2571,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif

#ifdef LOONGSON3A
////Copy from SICORTEX
/*Copy from SICORTEX*/
#define SNUMOPT 2
#define DNUMOPT 2

@@ -2863,7 +2863,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYMV_P 16
#endif

// Common ARMv8 parameters
/* Common ARMv8 parameters */
#if defined(ARMV8)

#define SNUMOPT 2
@@ -3066,7 +3066,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096

#else // Other/undetected ARMv8 cores
#else /* Other/undetected ARMv8 cores */

#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4
@@ -3095,9 +3095,9 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096

#endif // Cores
#endif /* Cores */

#endif // ARMv8
#endif /* ARMv8 */

#if defined(ARMV5)
#define SNUMOPT 2


+ 3
- 0
utest/Makefile View File

@@ -35,6 +35,9 @@ endif
ifeq ($(C_COMPILER), PGI)
OBJS = utest_main2.o
endif
ifeq ($(C_COMPILER), SUN)
OBJS = utest_main2.o
endif
ifeq ($(OSNAME), AIX)
OBJS = utest_main2.o
endif


Loading…
Cancel
Save