You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

sgemm_direct_performant.c 651 B

123456789101112131415161718192021222324252627282930
  1. #include "common.h"
  2. /* helper for the direct sgemm code written by Arjan van der Ven */
  3. int CNAME(BLASLONG M, BLASLONG N, BLASLONG K)
  4. {
  5. unsigned long long mnk = M * N * K;
  6. /* large matrixes -> not performant */
  7. if (mnk >= 28 * 512 * 512)
  8. return 0;
  9. /*
  10. * if the B matrix is not a nice multiple if 4 we get many unaligned accesses,
  11. * and the regular sgemm copy/realignment of data pays off much quicker
  12. */
  13. if ((N & 3) != 0 && (mnk >= 8 * 512 * 512))
  14. return 0;
  15. #ifdef SMP
  16. /* if we can run multithreaded, the threading changes the based threshold */
  17. if (mnk > 2 * 350 * 512 && num_cpu_avail(3)> 1)
  18. return 0;
  19. #endif
  20. return 1;
  21. }