|
|
|
@@ -1771,6 +1771,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
|
|
#endif |
|
|
|
#define USE_SGEMM_KERNEL_DIRECT 1 |
|
|
|
|
|
|
|
#undef SBGEMM_DEFAULT_UNROLL_N |
|
|
|
#undef SBGEMM_DEFAULT_UNROLL_M |
|
|
|
#undef SBGEMM_DEFAULT_P |
|
|
|
#undef SBGEMM_DEFAULT_R |
|
|
|
#undef SBGEMM_DEFAULT_Q |
|
|
|
// FIXME: actually UNROLL_M = UNROLL_N = 16 |
|
|
|
// If M and N is equal, OpenBLAS will reuse OCOPY as ICOPY. |
|
|
|
// But for AMX, they are not the same, set UNROLL_M = 32 to workaround |
|
|
|
#define SBGEMM_DEFAULT_UNROLL_N 16 |
|
|
|
#define SBGEMM_DEFAULT_UNROLL_M 32 |
|
|
|
#define SBGEMM_DEFAULT_P 192 |
|
|
|
#define SBGEMM_DEFAULT_Q 1024 |
|
|
|
#define SBGEMM_DEFAULT_R sbgemm_r |
|
|
|
|
|
|
|
#ifdef ARCH_X86 |
|
|
|
|
|
|
|
#define SGEMM_DEFAULT_UNROLL_M 4 |
|
|
|
|