| @@ -0,0 +1,31 @@ | |||||
| #include "common.h" | |||||
| /* helper for the direct sgemm code written by Arjan van der Ven */ | |||||
| int CNAME(BLASLONG M, BLASLONG N, BLASLONG K) | |||||
| { | |||||
| if (M<3 || M%2==1) return 0; | |||||
| unsigned long long mnk = M * N * K; | |||||
| /* large matrixes -> not performant */ | |||||
| if (mnk >= 28 * 512 * 512) | |||||
| return 0; | |||||
| /* | |||||
| * if the B matrix is not a nice multiple if 4 we get many unaligned accesses, | |||||
| * and the regular sgemm copy/realignment of data pays off much quicker | |||||
| */ | |||||
| if ((N & 3) != 0 && (mnk >= 8 * 512 * 512)) | |||||
| return 0; | |||||
| #ifdef SMP | |||||
| /* if we can run multithreaded, the threading changes the based threshold */ | |||||
| if (mnk > 2 * 350 * 512 && num_cpu_avail(3)> 1) | |||||
| return 0; | |||||
| #endif | |||||
| return 1; | |||||
| } | |||||