Browse Source

Add sgemm_direct_performant for switching between direct and regular kernels

pull/5423/head
Martin Kroeker GitHub 8 months ago
parent
commit
89898fc499
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
1 changed files with 31 additions and 0 deletions
  1. +31
    -0
      kernel/arm64/sgemm_direct_performant.c

+ 31
- 0
kernel/arm64/sgemm_direct_performant.c View File

@@ -0,0 +1,31 @@
#include "common.h"
/* helper for the direct sgemm code written by Arjan van der Ven */




int CNAME(BLASLONG M, BLASLONG N, BLASLONG K)
{
if (M<3 || M%2==1) return 0;
unsigned long long mnk = M * N * K;
/* large matrixes -> not performant */
if (mnk >= 28 * 512 * 512)
return 0;

/*
* if the B matrix is not a nice multiple if 4 we get many unaligned accesses,
* and the regular sgemm copy/realignment of data pays off much quicker
*/
if ((N & 3) != 0 && (mnk >= 8 * 512 * 512))
return 0;

#ifdef SMP
/* if we can run multithreaded, the threading changes the based threshold */
if (mnk > 2 * 350 * 512 && num_cpu_avail(3)> 1)
return 0;
#endif

return 1;
}



Loading…
Cancel
Save