Propagate SWITCH_RATIO to DYNAMIC_ARCH buildstags/v0.3.24
| @@ -1,5 +1,6 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* Copyright 2023 The OpenBLAS Project. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| @@ -45,6 +46,7 @@ | |||
| typedef struct { | |||
| int dtb_entries; | |||
| int switch_ratio; | |||
| int offsetA, offsetB, align; | |||
| #if BUILD_BFLOAT16 == 1 | |||
| @@ -1,5 +1,6 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* Copyright 2023 The OpenBLAS Project. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| @@ -44,10 +45,6 @@ | |||
| #define DIVIDE_RATE 2 | |||
| #endif | |||
| #ifndef SWITCH_RATIO | |||
| #define SWITCH_RATIO 2 | |||
| #endif | |||
| //The array of job_t may overflow the stack. | |||
| //Instead, use malloc to alloc job_t. | |||
| #if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD | |||
| @@ -1015,6 +1012,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| BLASLONG divN, divT; | |||
| int mode; | |||
| #if defined(DYNAMIC_ARCH) | |||
| int switch_ratio = gotoblas->switch_ratio; | |||
| #else | |||
| int switch_ratio = SWITCH_RATIO; | |||
| #endif | |||
| if (range_m) { | |||
| BLASLONG m_from = *(((BLASLONG *)range_m) + 0); | |||
| BLASLONG m_to = *(((BLASLONG *)range_m) + 1); | |||
| @@ -1030,7 +1033,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| } | |||
| */ | |||
| if ((args -> m < nthreads * SWITCH_RATIO) || (args -> n < nthreads * SWITCH_RATIO)) { | |||
| if ((args -> m < nthreads * switch_ratio) || (args -> n < nthreads * switch_ratio)) { | |||
| GEMM3M_LOCAL(args, range_m, range_n, sa, sb, 0); | |||
| return 0; | |||
| } | |||
| @@ -1038,7 +1041,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| divT = nthreads; | |||
| divN = 1; | |||
| while ((GEMM3M_P * divT > m * SWITCH_RATIO) && (divT > 1)) { | |||
| while ((GEMM3M_P * divT > m * switch_ratio) && (divT > 1)) { | |||
| do { | |||
| divT --; | |||
| divN = 1; | |||
| @@ -1,5 +1,6 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* Copyright 2023 The OpenBLAS Project. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| @@ -44,10 +45,6 @@ | |||
| #define DIVIDE_RATE 2 | |||
| #endif | |||
| #ifndef SWITCH_RATIO | |||
| #define SWITCH_RATIO 2 | |||
| #endif | |||
| //The array of job_t may overflow the stack. | |||
| //Instead, use malloc to alloc job_t. | |||
| #if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD | |||
| @@ -528,7 +525,13 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
| int mode, mask; | |||
| double dnum, di, dinum; | |||
| if ((nthreads == 1) || (args -> n < nthreads * SWITCH_RATIO)) { | |||
| #if defined(DYNAMIC_ARCH) | |||
| int switch_ratio = gotoblas->switch_ratio; | |||
| #else | |||
| int switch_ratio = SWITCH_RATIO; | |||
| #endif | |||
| if ((nthreads == 1) || (args->n < nthreads * switch_ratio)) { | |||
| SYRK_LOCAL(args, range_m, range_n, sa, sb, 0); | |||
| return 0; | |||
| } | |||
| @@ -1,5 +1,6 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* Copyright 2023 The OpenBLAS Project. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| @@ -44,10 +45,6 @@ | |||
| #define DIVIDE_RATE 2 | |||
| #endif | |||
| #ifndef SWITCH_RATIO | |||
| #define SWITCH_RATIO 2 | |||
| #endif | |||
| #ifndef GEMM_PREFERED_SIZE | |||
| #define GEMM_PREFERED_SIZE 1 | |||
| #endif | |||
| @@ -577,6 +574,11 @@ InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock); | |||
| BLASLONG width, i, j, k, js; | |||
| BLASLONG m, n, n_from, n_to; | |||
| int mode; | |||
| #if defined(DYNAMIC_ARCH) | |||
| int switch_ratio = gotoblas->switch_ratio; | |||
| #else | |||
| int switch_ratio = SWITCH_RATIO; | |||
| #endif | |||
| /* Get execution mode */ | |||
| #ifndef COMPLEX | |||
| @@ -698,8 +700,8 @@ EnterCriticalSection((PCRITICAL_SECTION)&level3_lock); | |||
| num_parts = 0; | |||
| while (n > 0){ | |||
| width = blas_quickdivide(n + nthreads - num_parts - 1, nthreads - num_parts); | |||
| if (width < SWITCH_RATIO) { | |||
| width = SWITCH_RATIO; | |||
| if (width < switch_ratio) { | |||
| width = switch_ratio; | |||
| } | |||
| width = round_up(n, width, GEMM_PREFERED_SIZE); | |||
| @@ -746,6 +748,11 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF | |||
| BLASLONG m = args -> m; | |||
| BLASLONG n = args -> n; | |||
| BLASLONG nthreads_m, nthreads_n; | |||
| #if defined(DYNAMIC_ARCH) | |||
| int switch_ratio = gotoblas->switch_ratio; | |||
| #else | |||
| int switch_ratio = SWITCH_RATIO; | |||
| #endif | |||
| /* Get dimensions from index ranges if available */ | |||
| if (range_m) { | |||
| @@ -755,21 +762,21 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF | |||
| n = range_n[1] - range_n[0]; | |||
| } | |||
| /* Partitions in m should have at least SWITCH_RATIO rows */ | |||
| if (m < 2 * SWITCH_RATIO) { | |||
| /* Partitions in m should have at least switch_ratio rows */ | |||
| if (m < 2 * switch_ratio) { | |||
| nthreads_m = 1; | |||
| } else { | |||
| nthreads_m = args -> nthreads; | |||
| while (m < nthreads_m * SWITCH_RATIO) { | |||
| while (m < nthreads_m * switch_ratio) { | |||
| nthreads_m = nthreads_m / 2; | |||
| } | |||
| } | |||
| /* Partitions in n should have at most SWITCH_RATIO * nthreads_m columns */ | |||
| if (n < SWITCH_RATIO * nthreads_m) { | |||
| /* Partitions in n should have at most switch_ratio * nthreads_m columns */ | |||
| if (n < switch_ratio * nthreads_m) { | |||
| nthreads_n = 1; | |||
| } else { | |||
| nthreads_n = (n + SWITCH_RATIO * nthreads_m - 1) / (SWITCH_RATIO * nthreads_m); | |||
| nthreads_n = (n + switch_ratio * nthreads_m - 1) / (switch_ratio * nthreads_m); | |||
| if (nthreads_m * nthreads_n > args -> nthreads) { | |||
| nthreads_n = blas_quickdivide(args -> nthreads, nthreads_m); | |||
| } | |||
| @@ -1,5 +1,6 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* Copyright 2023 The OpenBLAS Project. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| @@ -49,7 +50,9 @@ | |||
| static void init_parameter(void); | |||
| gotoblas_t TABLE_NAME = { | |||
| DTB_DEFAULT_ENTRIES , | |||
| DTB_DEFAULT_ENTRIES, | |||
| SWITCH_RATIO, | |||
| GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN, | |||
| @@ -80,10 +80,6 @@ static FLOAT dm1 = -1.; | |||
| #define DIVIDE_RATE 2 | |||
| #endif | |||
| #ifndef SWITCH_RATIO | |||
| #define SWITCH_RATIO 2 | |||
| #endif | |||
| #ifndef LOWER | |||
| #define TRANS | |||
| #endif | |||
| @@ -3854,6 +3854,10 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout | |||
| #endif | |||
| #ifndef SWITCH_RATIO | |||
| #define SWITCH_RATIO 2 | |||
| #endif | |||
| #ifndef QGEMM_DEFAULT_UNROLL_M | |||
| #define QGEMM_DEFAULT_UNROLL_M 2 | |||
| #endif | |||