[WIP]Add better workaround for GEMM3M on GENERIC and re-enable EXPRECISION for x86/x86_64 targetstags/v0.3.29
| @@ -446,7 +446,7 @@ endif | |||||
| ifeq ($(OSNAME), Linux) | ifeq ($(OSNAME), Linux) | ||||
| EXTRALIB += -lm | EXTRALIB += -lm | ||||
| NO_EXPRECISION = 1 | |||||
| #NO_EXPRECISION = 1 | |||||
| endif | endif | ||||
| ifeq ($(OSNAME), Android) | ifeq ($(OSNAME), Android) | ||||
| @@ -572,7 +572,7 @@ NO_BINARY_MODE = 1 | |||||
| endif | endif | ||||
| ifeq ($(CORE), generic) | ifeq ($(CORE), generic) | ||||
| NO_EXPRECISION = 1 | |||||
| #NO_EXPRECISION = 1 | |||||
| endif | endif | ||||
| ifndef NO_EXPRECISION | ifndef NO_EXPRECISION | ||||
| @@ -595,7 +595,7 @@ endif | |||||
| ifeq ($(ARCH), x86_64) | ifeq ($(ARCH), x86_64) | ||||
| ifeq ($(CORE), generic) | ifeq ($(CORE), generic) | ||||
| NO_EXPRECISION = 1 | |||||
| #NO_EXPRECISION = 1 | |||||
| endif | endif | ||||
| ifndef NO_EXPRECISION | ifndef NO_EXPRECISION | ||||
| @@ -828,8 +828,8 @@ BINARY_DEFINED = 1 | |||||
| ifeq ($(F_COMPILER), GFORTRAN) | ifeq ($(F_COMPILER), GFORTRAN) | ||||
| ifeq ($(C_COMPILER), GCC) | ifeq ($(C_COMPILER), GCC) | ||||
| # EXPRECISION = 1 | |||||
| # CCOMMON_OPT += -DEXPRECISION | |||||
| EXPRECISION = 1 | |||||
| CCOMMON_OPT += -DEXPRECISION | |||||
| endif | endif | ||||
| endif | endif | ||||
| endif | endif | ||||
| @@ -1392,17 +1392,15 @@ endif | |||||
| endif | endif | ||||
| ifeq ($(F_COMPILER), CRAY) | ifeq ($(F_COMPILER), CRAY) | ||||
| CCOMMON_OPT += -DF_INTERFACE_CRAYFC | |||||
| CCOMMON_OPT += -DF_INTERFACE_INTEL | |||||
| FCOMMON_OPT += -hnopattern | FCOMMON_OPT += -hnopattern | ||||
| ifdef INTERFACE64 | ifdef INTERFACE64 | ||||
| ifneq ($(INTERFACE64), 0) | ifneq ($(INTERFACE64), 0) | ||||
| FCOMMON_OPT += -s integer64 | FCOMMON_OPT += -s integer64 | ||||
| endif | endif | ||||
| endif | endif | ||||
| ifeq ($(USE_OPENMP), 1) | |||||
| FCOMMON_OPT += -fopenmp | |||||
| else | |||||
| FCOMMON_OPT += -fno-openmp | |||||
| ifneq ($(USE_OPENMP), 1) | |||||
| FCOMMON_OPT += -O noomp | |||||
| endif | endif | ||||
| endif | endif | ||||
| @@ -86,7 +86,7 @@ | |||||
| #endif | #endif | ||||
| static int (*gemm[])(blas_arg_t *, BLASLONG *, BLASLONG *, IFLOAT *, IFLOAT *, BLASLONG) = { | static int (*gemm[])(blas_arg_t *, BLASLONG *, BLASLONG *, IFLOAT *, IFLOAT *, BLASLONG) = { | ||||
| #ifndef GEMM3M | |||||
| #if !defined(GEMM3M) || defined(GENERIC) | |||||
| GEMM_NN, GEMM_TN, GEMM_RN, GEMM_CN, | GEMM_NN, GEMM_TN, GEMM_RN, GEMM_CN, | ||||
| GEMM_NT, GEMM_TT, GEMM_RT, GEMM_CT, | GEMM_NT, GEMM_TT, GEMM_RT, GEMM_CT, | ||||
| GEMM_NR, GEMM_TR, GEMM_RR, GEMM_CR, | GEMM_NR, GEMM_TR, GEMM_RR, GEMM_CR, | ||||
| @@ -4033,6 +4033,8 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout | |||||
| #define CGEMM_DEFAULT_UNROLL_N 2 | #define CGEMM_DEFAULT_UNROLL_N 2 | ||||
| #define ZGEMM_DEFAULT_UNROLL_N 2 | #define ZGEMM_DEFAULT_UNROLL_N 2 | ||||
| #define XGEMM_DEFAULT_UNROLL_N 1 | #define XGEMM_DEFAULT_UNROLL_N 1 | ||||
| #define CGEMM3M_DEFAULT_UNROLL_N 2 | |||||
| #define ZGEMM3M_DEFAULT_UNROLL_N 2 | |||||
| #ifdef ARCH_X86 | #ifdef ARCH_X86 | ||||
| #define SGEMM_DEFAULT_UNROLL_M 2 | #define SGEMM_DEFAULT_UNROLL_M 2 | ||||
| @@ -4048,6 +4050,18 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout | |||||
| #define CGEMM_DEFAULT_UNROLL_M 2 | #define CGEMM_DEFAULT_UNROLL_M 2 | ||||
| #define ZGEMM_DEFAULT_UNROLL_M 2 | #define ZGEMM_DEFAULT_UNROLL_M 2 | ||||
| #define XGEMM_DEFAULT_UNROLL_M 1 | #define XGEMM_DEFAULT_UNROLL_M 1 | ||||
| #define CGEMM3M_DEFAULT_UNROLL_M 2 | |||||
| #define ZGEMM3M_DEFAULT_UNROLL_M 2 | |||||
| #define CGEMM3M_DEFAULT_P 448 | |||||
| #define ZGEMM3M_DEFAULT_P 224 | |||||
| #define XGEMM3M_DEFAULT_P 112 | |||||
| #define CGEMM3M_DEFAULT_Q 224 | |||||
| #define ZGEMM3M_DEFAULT_Q 224 | |||||
| #define XGEMM3M_DEFAULT_Q 224 | |||||
| #define CGEMM3M_DEFAULT_R 12288 | |||||
| #define ZGEMM3M_DEFAULT_R 12288 | |||||
| #define XGEMM3M_DEFAULT_R 12288 | |||||
| #endif | #endif | ||||
| #ifdef ARCH_MIPS | #ifdef ARCH_MIPS | ||||