| @@ -1771,6 +1771,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #define USE_SGEMM_KERNEL_DIRECT 1 | #define USE_SGEMM_KERNEL_DIRECT 1 | ||||
| #undef SBGEMM_DEFAULT_UNROLL_N | |||||
| #undef SBGEMM_DEFAULT_UNROLL_M | |||||
| #undef SBGEMM_DEFAULT_P | |||||
| #undef SBGEMM_DEFAULT_R | |||||
| #undef SBGEMM_DEFAULT_Q | |||||
| // FIXME: actually UNROLL_M = UNROLL_N = 16 | |||||
| // If M and N is equal, OpenBLAS will reuse OCOPY as ICOPY. | |||||
| // But for AMX, they are not the same, set UNROLL_M = 32 to workaround | |||||
| #define SBGEMM_DEFAULT_UNROLL_N 16 | |||||
| #define SBGEMM_DEFAULT_UNROLL_M 32 | |||||
| #define SBGEMM_DEFAULT_P 192 | |||||
| #define SBGEMM_DEFAULT_Q 1024 | |||||
| #define SBGEMM_DEFAULT_R sbgemm_r | |||||
| #ifdef ARCH_X86 | #ifdef ARCH_X86 | ||||
| #define SGEMM_DEFAULT_UNROLL_M 4 | #define SGEMM_DEFAULT_UNROLL_M 4 | ||||