| @@ -40,6 +40,7 @@ bn=`basename \"$compiler_name\"` | |||||
| case "$bn" in | case "$bn" in | ||||
| *-*) if [ "$bn" != '-' ]; then | *-*) if [ "$bn" != '-' ]; then | ||||
| cross_suffix="$cross_suffix${bn%-*}-" | cross_suffix="$cross_suffix${bn%-*}-" | ||||
| cross_suffix=`echo $cross_suffix|sed -e 's/ -$//'` | |||||
| fi | fi | ||||
| esac | esac | ||||
| @@ -1,5 +1,6 @@ | |||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | /* Copyright 2009, 2010 The University of Texas at Austin. */ | ||||
| /* Copyright 2023 The OpenBLAS Project. */ | |||||
| /* All rights reserved. */ | /* All rights reserved. */ | ||||
| /* */ | /* */ | ||||
| /* Redistribution and use in source and binary forms, with or */ | /* Redistribution and use in source and binary forms, with or */ | ||||
| @@ -45,6 +46,7 @@ | |||||
| typedef struct { | typedef struct { | ||||
| int dtb_entries; | int dtb_entries; | ||||
| int switch_ratio; | |||||
| int offsetA, offsetB, align; | int offsetA, offsetB, align; | ||||
| #if BUILD_BFLOAT16 == 1 | #if BUILD_BFLOAT16 == 1 | ||||
| @@ -267,9 +267,9 @@ int detect(void) | |||||
| } | } | ||||
| #else | #else | ||||
| #ifdef __APPLE__ | #ifdef __APPLE__ | ||||
| sysctlbyname("hw.cpufamily",&value,&length,NULL,0); | |||||
| if (value ==131287967|| value == 458787763 ) return CPU_VORTEX; //A12/M1 | |||||
| if (value == 3660830781) return CPU_VORTEX; //A15/M2 | |||||
| sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0); | |||||
| if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1 | |||||
| if (value64 == 3660830781) return CPU_VORTEX; //A15/M2 | |||||
| #endif | #endif | ||||
| return CPU_ARMV8; | return CPU_ARMV8; | ||||
| #endif | #endif | ||||
| @@ -1,5 +1,6 @@ | |||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | /* Copyright 2009, 2010 The University of Texas at Austin. */ | ||||
| /* Copyright 2023 The OpenBLAS Project. */ | |||||
| /* All rights reserved. */ | /* All rights reserved. */ | ||||
| /* */ | /* */ | ||||
| /* Redistribution and use in source and binary forms, with or */ | /* Redistribution and use in source and binary forms, with or */ | ||||
| @@ -44,10 +45,6 @@ | |||||
| #define DIVIDE_RATE 2 | #define DIVIDE_RATE 2 | ||||
| #endif | #endif | ||||
| #ifndef SWITCH_RATIO | |||||
| #define SWITCH_RATIO 2 | |||||
| #endif | |||||
| //The array of job_t may overflow the stack. | //The array of job_t may overflow the stack. | ||||
| //Instead, use malloc to alloc job_t. | //Instead, use malloc to alloc job_t. | ||||
| #if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD | #if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD | ||||
| @@ -1015,6 +1012,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
| BLASLONG divN, divT; | BLASLONG divN, divT; | ||||
| int mode; | int mode; | ||||
| #if defined(DYNAMIC_ARCH) | |||||
| int switch_ratio = gotoblas->switch_ratio; | |||||
| #else | |||||
| int switch_ratio = SWITCH_RATIO; | |||||
| #endif | |||||
| if (range_m) { | if (range_m) { | ||||
| BLASLONG m_from = *(((BLASLONG *)range_m) + 0); | BLASLONG m_from = *(((BLASLONG *)range_m) + 0); | ||||
| BLASLONG m_to = *(((BLASLONG *)range_m) + 1); | BLASLONG m_to = *(((BLASLONG *)range_m) + 1); | ||||
| @@ -1030,7 +1033,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
| } | } | ||||
| */ | */ | ||||
| if ((args -> m < nthreads * SWITCH_RATIO) || (args -> n < nthreads * SWITCH_RATIO)) { | |||||
| if ((args -> m < nthreads * switch_ratio) || (args -> n < nthreads * switch_ratio)) { | |||||
| GEMM3M_LOCAL(args, range_m, range_n, sa, sb, 0); | GEMM3M_LOCAL(args, range_m, range_n, sa, sb, 0); | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -1038,7 +1041,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
| divT = nthreads; | divT = nthreads; | ||||
| divN = 1; | divN = 1; | ||||
| while ((GEMM3M_P * divT > m * SWITCH_RATIO) && (divT > 1)) { | |||||
| while ((GEMM3M_P * divT > m * switch_ratio) && (divT > 1)) { | |||||
| do { | do { | ||||
| divT --; | divT --; | ||||
| divN = 1; | divN = 1; | ||||
| @@ -1,5 +1,6 @@ | |||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | /* Copyright 2009, 2010 The University of Texas at Austin. */ | ||||
| /* Copyright 2023 The OpenBLAS Project. */ | |||||
| /* All rights reserved. */ | /* All rights reserved. */ | ||||
| /* */ | /* */ | ||||
| /* Redistribution and use in source and binary forms, with or */ | /* Redistribution and use in source and binary forms, with or */ | ||||
| @@ -44,10 +45,6 @@ | |||||
| #define DIVIDE_RATE 2 | #define DIVIDE_RATE 2 | ||||
| #endif | #endif | ||||
| #ifndef SWITCH_RATIO | |||||
| #define SWITCH_RATIO 2 | |||||
| #endif | |||||
| //The array of job_t may overflow the stack. | //The array of job_t may overflow the stack. | ||||
| //Instead, use malloc to alloc job_t. | //Instead, use malloc to alloc job_t. | ||||
| #if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD | #if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD | ||||
| @@ -528,7 +525,13 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
| int mode, mask; | int mode, mask; | ||||
| double dnum, di, dinum; | double dnum, di, dinum; | ||||
| if ((nthreads == 1) || (args -> n < nthreads * SWITCH_RATIO)) { | |||||
| #if defined(DYNAMIC_ARCH) | |||||
| int switch_ratio = gotoblas->switch_ratio; | |||||
| #else | |||||
| int switch_ratio = SWITCH_RATIO; | |||||
| #endif | |||||
| if ((nthreads == 1) || (args->n < nthreads * switch_ratio)) { | |||||
| SYRK_LOCAL(args, range_m, range_n, sa, sb, 0); | SYRK_LOCAL(args, range_m, range_n, sa, sb, 0); | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -1,5 +1,6 @@ | |||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | /* Copyright 2009, 2010 The University of Texas at Austin. */ | ||||
| /* Copyright 2023 The OpenBLAS Project. */ | |||||
| /* All rights reserved. */ | /* All rights reserved. */ | ||||
| /* */ | /* */ | ||||
| /* Redistribution and use in source and binary forms, with or */ | /* Redistribution and use in source and binary forms, with or */ | ||||
| @@ -44,10 +45,6 @@ | |||||
| #define DIVIDE_RATE 2 | #define DIVIDE_RATE 2 | ||||
| #endif | #endif | ||||
| #ifndef SWITCH_RATIO | |||||
| #define SWITCH_RATIO 2 | |||||
| #endif | |||||
| #ifndef GEMM_PREFERED_SIZE | #ifndef GEMM_PREFERED_SIZE | ||||
| #define GEMM_PREFERED_SIZE 1 | #define GEMM_PREFERED_SIZE 1 | ||||
| #endif | #endif | ||||
| @@ -577,6 +574,11 @@ InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock); | |||||
| BLASLONG width, i, j, k, js; | BLASLONG width, i, j, k, js; | ||||
| BLASLONG m, n, n_from, n_to; | BLASLONG m, n, n_from, n_to; | ||||
| int mode; | int mode; | ||||
| #if defined(DYNAMIC_ARCH) | |||||
| int switch_ratio = gotoblas->switch_ratio; | |||||
| #else | |||||
| int switch_ratio = SWITCH_RATIO; | |||||
| #endif | |||||
| /* Get execution mode */ | /* Get execution mode */ | ||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| @@ -698,8 +700,8 @@ EnterCriticalSection((PCRITICAL_SECTION)&level3_lock); | |||||
| num_parts = 0; | num_parts = 0; | ||||
| while (n > 0){ | while (n > 0){ | ||||
| width = blas_quickdivide(n + nthreads - num_parts - 1, nthreads - num_parts); | width = blas_quickdivide(n + nthreads - num_parts - 1, nthreads - num_parts); | ||||
| if (width < SWITCH_RATIO) { | |||||
| width = SWITCH_RATIO; | |||||
| if (width < switch_ratio) { | |||||
| width = switch_ratio; | |||||
| } | } | ||||
| width = round_up(n, width, GEMM_PREFERED_SIZE); | width = round_up(n, width, GEMM_PREFERED_SIZE); | ||||
| @@ -746,6 +748,11 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF | |||||
| BLASLONG m = args -> m; | BLASLONG m = args -> m; | ||||
| BLASLONG n = args -> n; | BLASLONG n = args -> n; | ||||
| BLASLONG nthreads_m, nthreads_n; | BLASLONG nthreads_m, nthreads_n; | ||||
| #if defined(DYNAMIC_ARCH) | |||||
| int switch_ratio = gotoblas->switch_ratio; | |||||
| #else | |||||
| int switch_ratio = SWITCH_RATIO; | |||||
| #endif | |||||
| /* Get dimensions from index ranges if available */ | /* Get dimensions from index ranges if available */ | ||||
| if (range_m) { | if (range_m) { | ||||
| @@ -755,21 +762,21 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF | |||||
| n = range_n[1] - range_n[0]; | n = range_n[1] - range_n[0]; | ||||
| } | } | ||||
| /* Partitions in m should have at least SWITCH_RATIO rows */ | |||||
| if (m < 2 * SWITCH_RATIO) { | |||||
| /* Partitions in m should have at least switch_ratio rows */ | |||||
| if (m < 2 * switch_ratio) { | |||||
| nthreads_m = 1; | nthreads_m = 1; | ||||
| } else { | } else { | ||||
| nthreads_m = args -> nthreads; | nthreads_m = args -> nthreads; | ||||
| while (m < nthreads_m * SWITCH_RATIO) { | |||||
| while (m < nthreads_m * switch_ratio) { | |||||
| nthreads_m = nthreads_m / 2; | nthreads_m = nthreads_m / 2; | ||||
| } | } | ||||
| } | } | ||||
| /* Partitions in n should have at most SWITCH_RATIO * nthreads_m columns */ | |||||
| if (n < SWITCH_RATIO * nthreads_m) { | |||||
| /* Partitions in n should have at most switch_ratio * nthreads_m columns */ | |||||
| if (n < switch_ratio * nthreads_m) { | |||||
| nthreads_n = 1; | nthreads_n = 1; | ||||
| } else { | } else { | ||||
| nthreads_n = (n + SWITCH_RATIO * nthreads_m - 1) / (SWITCH_RATIO * nthreads_m); | |||||
| nthreads_n = (n + switch_ratio * nthreads_m - 1) / (switch_ratio * nthreads_m); | |||||
| if (nthreads_m * nthreads_n > args -> nthreads) { | if (nthreads_m * nthreads_n > args -> nthreads) { | ||||
| nthreads_n = blas_quickdivide(args -> nthreads, nthreads_m); | nthreads_n = blas_quickdivide(args -> nthreads, nthreads_m); | ||||
| } | } | ||||
| @@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define pCRow3 x15 | #define pCRow3 x15 | ||||
| #define pA x16 | #define pA x16 | ||||
| #define alphaR w17 | #define alphaR w17 | ||||
| #define alphaI w18 | |||||
| #define alphaI w19 | |||||
| #define alpha0_R s10 | #define alpha0_R s10 | ||||
| #define alphaV0_R v10.s[0] | #define alphaV0_R v10.s[0] | ||||
| @@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define pCRow3 x15 | #define pCRow3 x15 | ||||
| #define pA x16 | #define pA x16 | ||||
| #define alphaR w17 | #define alphaR w17 | ||||
| #define alphaI w18 | |||||
| #define alphaI w19 | |||||
| #define alpha0_R s10 | #define alpha0_R s10 | ||||
| #define alphaV0_R v10.s[0] | #define alphaV0_R v10.s[0] | ||||
| @@ -49,10 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define pCRow3 x15 | #define pCRow3 x15 | ||||
| #define pA x16 | #define pA x16 | ||||
| #define alphaR w17 | #define alphaR w17 | ||||
| #define alphaI w18 | |||||
| #define temp x19 | |||||
| #define tempOffset x20 | |||||
| #define tempK x21 | |||||
| #define alphaI w19 | |||||
| #define temp x20 | |||||
| #define tempOffset x21 | |||||
| #define tempK x22 | |||||
| #define alpha0_R s10 | #define alpha0_R s10 | ||||
| #define alphaV0_R v10.s[0] | #define alphaV0_R v10.s[0] | ||||
| @@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #include <float.h> | |||||
| #include <arm_neon.h> | #include <arm_neon.h> | ||||
| #if defined(SMP) | #if defined(SMP) | ||||
| @@ -404,7 +404,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||||
| #else | #else | ||||
| nrm2_compute(n, x, inc_x, &ssq, &scale); | nrm2_compute(n, x, inc_x, &ssq, &scale); | ||||
| #endif | #endif | ||||
| if (fabs(scale) <1.e-300) return 0.; | |||||
| volatile FLOAT sca = fabs(scale); | |||||
| if (sca < DBL_MIN) return 0.; | |||||
| ssq = sqrt(ssq) * scale; | ssq = sqrt(ssq) * scale; | ||||
| return ssq; | return ssq; | ||||
| @@ -1,5 +1,6 @@ | |||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | /* Copyright 2009, 2010 The University of Texas at Austin. */ | ||||
| /* Copyright 2023 The OpenBLAS Project. */ | |||||
| /* All rights reserved. */ | /* All rights reserved. */ | ||||
| /* */ | /* */ | ||||
| /* Redistribution and use in source and binary forms, with or */ | /* Redistribution and use in source and binary forms, with or */ | ||||
| @@ -49,7 +50,9 @@ | |||||
| static void init_parameter(void); | static void init_parameter(void); | ||||
| gotoblas_t TABLE_NAME = { | gotoblas_t TABLE_NAME = { | ||||
| DTB_DEFAULT_ENTRIES , | |||||
| DTB_DEFAULT_ENTRIES, | |||||
| SWITCH_RATIO, | |||||
| GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN, | GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN, | ||||
| @@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if (defined(OS_DARWIN) || defined(OS_WINDOWS)) && (defined(__GNUC__) && __GNUC__ > 11) | |||||
| #if (defined(__GNUC__) && __GNUC__ > 11) | |||||
| #pragma GCC optimize("no-tree-vectorize") | #pragma GCC optimize("no-tree-vectorize") | ||||
| #endif | #endif | ||||
| @@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if (defined(OS_DARWIN) || defined(OS_WINDOWS)) && (defined(__GNUC__) && __GNUC__ > 11) | |||||
| #if (defined(__GNUC__) && __GNUC__ > 11) | |||||
| #pragma GCC optimize("no-tree-vectorize") | #pragma GCC optimize("no-tree-vectorize") | ||||
| #endif | #endif | ||||
| @@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if (defined(OS_DARWIN) || defined(OS_WINDOWS)) && (defined(__GNUC__) && __GNUC__ > 11) | |||||
| #if (defined(__GNUC__) && __GNUC__ > 11) | |||||
| #pragma GCC optimize("no-tree-vectorize") | #pragma GCC optimize("no-tree-vectorize") | ||||
| #endif | #endif | ||||
| @@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if (defined(OS_DARWIN) || defined(OS_WINDOWS)) && (defined(__GNUC__) && __GNUC__ > 11) | |||||
| #if (defined(__GNUC__) && __GNUC__ > 11) | |||||
| #pragma GCC optimize("no-tree-vectorize") | #pragma GCC optimize("no-tree-vectorize") | ||||
| #endif | #endif | ||||
| @@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if (defined(OS_DARWIN) || defined(OS_WINDOWS)) && (defined(__GNUC__) && __GNUC__ > 11) | |||||
| #if (defined(__GNUC__) && __GNUC__ > 11) | |||||
| #pragma GCC optimize("no-tree-vectorize") | #pragma GCC optimize("no-tree-vectorize") | ||||
| #endif | #endif | ||||
| @@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if (defined(OS_DARWIN) || defined(OS_WINDOWS)) && (defined(__GNUC__) && __GNUC__ > 11) | |||||
| #if (defined(__GNUC__) && __GNUC__ > 11) | |||||
| #pragma GCC optimize("no-tree-vectorize") | #pragma GCC optimize("no-tree-vectorize") | ||||
| #endif | #endif | ||||
| @@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if (defined(OS_DARWIN) || defined(OS_WINDOWS)) && (defined(__GNUC__) && __GNUC__ > 11) | |||||
| #if (defined(__GNUC__) && __GNUC__ > 11) | |||||
| #pragma GCC optimize("no-tree-vectorize") | #pragma GCC optimize("no-tree-vectorize") | ||||
| #endif | #endif | ||||
| @@ -80,10 +80,6 @@ static FLOAT dm1 = -1.; | |||||
| #define DIVIDE_RATE 2 | #define DIVIDE_RATE 2 | ||||
| #endif | #endif | ||||
| #ifndef SWITCH_RATIO | |||||
| #define SWITCH_RATIO 2 | |||||
| #endif | |||||
| #ifndef LOWER | #ifndef LOWER | ||||
| #define TRANS | #define TRANS | ||||
| #endif | #endif | ||||
| @@ -1,5 +1,5 @@ | |||||
| /***************************************************************************** | /***************************************************************************** | ||||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||||
| Copyright (c) 2011-2023, The OpenBLAS Project | |||||
| All rights reserved. | All rights reserved. | ||||
| Redistribution and use in source and binary forms, with or without | Redistribution and use in source and binary forms, with or without | ||||
| @@ -3338,6 +3338,12 @@ is a big desktop or server with abundant cache rather than a phone or embedded d | |||||
| #elif defined(NEOVERSEN1) | #elif defined(NEOVERSEN1) | ||||
| #if defined(XDOUBLE) || defined(DOUBLE) | |||||
| #define SWITCH_RATIO 8 | |||||
| #else | |||||
| #define SWITCH_RATIO 16 | |||||
| #endif | |||||
| #define SGEMM_DEFAULT_UNROLL_M 16 | #define SGEMM_DEFAULT_UNROLL_M 16 | ||||
| #define SGEMM_DEFAULT_UNROLL_N 4 | #define SGEMM_DEFAULT_UNROLL_N 4 | ||||
| @@ -3367,7 +3373,11 @@ is a big desktop or server with abundant cache rather than a phone or embedded d | |||||
| #elif defined(NEOVERSEV1) | #elif defined(NEOVERSEV1) | ||||
| #define SWITCH_RATIO 16 | |||||
| #if defined(XDOUBLE) || defined(DOUBLE) | |||||
| #define SWITCH_RATIO 8 | |||||
| #else | |||||
| #define SWITCH_RATIO 16 | |||||
| #endif | |||||
| #define SGEMM_DEFAULT_UNROLL_M 16 | #define SGEMM_DEFAULT_UNROLL_M 16 | ||||
| #define SGEMM_DEFAULT_UNROLL_N 4 | #define SGEMM_DEFAULT_UNROLL_N 4 | ||||
| @@ -3398,6 +3408,12 @@ is a big desktop or server with abundant cache rather than a phone or embedded d | |||||
| #elif defined(NEOVERSEN2) | #elif defined(NEOVERSEN2) | ||||
| #if defined(XDOUBLE) || defined(DOUBLE) | |||||
| #define SWITCH_RATIO 8 | |||||
| #else | |||||
| #define SWITCH_RATIO 16 | |||||
| #endif | |||||
| #undef SBGEMM_ALIGN_K | #undef SBGEMM_ALIGN_K | ||||
| #define SBGEMM_ALIGN_K 4 | #define SBGEMM_ALIGN_K 4 | ||||
| @@ -3838,6 +3854,10 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout | |||||
| #endif | #endif | ||||
| #ifndef SWITCH_RATIO | |||||
| #define SWITCH_RATIO 2 | |||||
| #endif | |||||
| #ifndef QGEMM_DEFAULT_UNROLL_M | #ifndef QGEMM_DEFAULT_UNROLL_M | ||||
| #define QGEMM_DEFAULT_UNROLL_M 2 | #define QGEMM_DEFAULT_UNROLL_M 2 | ||||
| #endif | #endif | ||||