Browse Source

Merge pull request #4976 from martin-frbg/m3m_exprec

[WIP]Add better workaround for GEMM3M on GENERIC and re-enable EXPRECISION for x86/x86_64 targets
tags/v0.3.29
Martin Kroeker GitHub 1 year ago
parent
commit
718fb73bd8
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
3 changed files with 23 additions and 11 deletions
  1. +8
    -10
      Makefile.system
  2. +1
    -1
      interface/gemm.c
  3. +14
    -0
      param.h

+ 8
- 10
Makefile.system View File

@@ -446,7 +446,7 @@ endif


ifeq ($(OSNAME), Linux) ifeq ($(OSNAME), Linux)
EXTRALIB += -lm EXTRALIB += -lm
NO_EXPRECISION = 1
#NO_EXPRECISION = 1
endif endif


ifeq ($(OSNAME), Android) ifeq ($(OSNAME), Android)
@@ -572,7 +572,7 @@ NO_BINARY_MODE = 1
endif endif


ifeq ($(CORE), generic) ifeq ($(CORE), generic)
NO_EXPRECISION = 1
#NO_EXPRECISION = 1
endif endif


ifndef NO_EXPRECISION ifndef NO_EXPRECISION
@@ -595,7 +595,7 @@ endif
ifeq ($(ARCH), x86_64) ifeq ($(ARCH), x86_64)


ifeq ($(CORE), generic) ifeq ($(CORE), generic)
NO_EXPRECISION = 1
#NO_EXPRECISION = 1
endif endif


ifndef NO_EXPRECISION ifndef NO_EXPRECISION
@@ -828,8 +828,8 @@ BINARY_DEFINED = 1


ifeq ($(F_COMPILER), GFORTRAN) ifeq ($(F_COMPILER), GFORTRAN)
ifeq ($(C_COMPILER), GCC) ifeq ($(C_COMPILER), GCC)
# EXPRECISION = 1
# CCOMMON_OPT += -DEXPRECISION
EXPRECISION = 1
CCOMMON_OPT += -DEXPRECISION
endif endif
endif endif
endif endif
@@ -1392,17 +1392,15 @@ endif
endif endif


ifeq ($(F_COMPILER), CRAY) ifeq ($(F_COMPILER), CRAY)
CCOMMON_OPT += -DF_INTERFACE_CRAYFC
CCOMMON_OPT += -DF_INTERFACE_INTEL
FCOMMON_OPT += -hnopattern FCOMMON_OPT += -hnopattern
ifdef INTERFACE64 ifdef INTERFACE64
ifneq ($(INTERFACE64), 0) ifneq ($(INTERFACE64), 0)
FCOMMON_OPT += -s integer64 FCOMMON_OPT += -s integer64
endif endif
endif endif
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -fopenmp
else
FCOMMON_OPT += -fno-openmp
ifneq ($(USE_OPENMP), 1)
FCOMMON_OPT += -O noomp
endif endif
endif endif




+ 1
- 1
interface/gemm.c View File

@@ -86,7 +86,7 @@
#endif #endif


static int (*gemm[])(blas_arg_t *, BLASLONG *, BLASLONG *, IFLOAT *, IFLOAT *, BLASLONG) = { static int (*gemm[])(blas_arg_t *, BLASLONG *, BLASLONG *, IFLOAT *, IFLOAT *, BLASLONG) = {
#ifndef GEMM3M
#if !defined(GEMM3M) || defined(GENERIC)
GEMM_NN, GEMM_TN, GEMM_RN, GEMM_CN, GEMM_NN, GEMM_TN, GEMM_RN, GEMM_CN,
GEMM_NT, GEMM_TT, GEMM_RT, GEMM_CT, GEMM_NT, GEMM_TT, GEMM_RT, GEMM_CT,
GEMM_NR, GEMM_TR, GEMM_RR, GEMM_CR, GEMM_NR, GEMM_TR, GEMM_RR, GEMM_CR,


+ 14
- 0
param.h View File

@@ -4033,6 +4033,8 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
#define CGEMM_DEFAULT_UNROLL_N 2 #define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 2 #define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1 #define XGEMM_DEFAULT_UNROLL_N 1
#define CGEMM3M_DEFAULT_UNROLL_N 2
#define ZGEMM3M_DEFAULT_UNROLL_N 2


#ifdef ARCH_X86 #ifdef ARCH_X86
#define SGEMM_DEFAULT_UNROLL_M 2 #define SGEMM_DEFAULT_UNROLL_M 2
@@ -4048,6 +4050,18 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
#define CGEMM_DEFAULT_UNROLL_M 2 #define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 2 #define ZGEMM_DEFAULT_UNROLL_M 2
#define XGEMM_DEFAULT_UNROLL_M 1 #define XGEMM_DEFAULT_UNROLL_M 1
#define CGEMM3M_DEFAULT_UNROLL_M 2
#define ZGEMM3M_DEFAULT_UNROLL_M 2
#define CGEMM3M_DEFAULT_P 448
#define ZGEMM3M_DEFAULT_P 224
#define XGEMM3M_DEFAULT_P 112
#define CGEMM3M_DEFAULT_Q 224
#define ZGEMM3M_DEFAULT_Q 224
#define XGEMM3M_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_R 12288
#define ZGEMM3M_DEFAULT_R 12288
#define XGEMM3M_DEFAULT_R 12288

#endif #endif


#ifdef ARCH_MIPS #ifdef ARCH_MIPS


Loading…
Cancel
Save