|
|
|
@@ -1,4 +1,5 @@ |
|
|
|
#include "common.h" |
|
|
|
#include <stdint.h> |
|
|
|
#include <immintrin.h> |
|
|
|
/* row-major c_block */ |
|
|
|
/* 64-bit pointer registers: a_block_pointer,b_block_pointer,c_pointer;*/ |
|
|
|
@@ -289,43 +290,6 @@ |
|
|
|
INNER_TRANS_8x8(%%zmm10,%%zmm13,%%zmm16,%%zmm19,%%zmm22,%%zmm25,%%zmm28,%%zmm31)\ |
|
|
|
INNER_STORE_8x8(%%zmm10,%%zmm13,%%zmm16,%%zmm19,%%zmm22,%%zmm25,%%zmm28,%%zmm31) |
|
|
|
|
|
|
|
#define COMPUTE_m1n8 {\ |
|
|
|
__asm__ __volatile__(\ |
|
|
|
INNER_INIT_m1n8\ |
|
|
|
INNER_KERNELm1(8)\ |
|
|
|
INNER_SAVE_m1n8\ |
|
|
|
:"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes)\ |
|
|
|
:"zmm4","zmm5","zmm6","zmm7","zmm8","cc","memory","k1");\ |
|
|
|
c_pointer += 1;\ |
|
|
|
} |
|
|
|
#define COMPUTE_m2n8 {\ |
|
|
|
__asm__ __volatile__(\ |
|
|
|
INNER_INIT_m2n8\ |
|
|
|
INNER_KERNELm2(8)\ |
|
|
|
INNER_SAVE_m2n8\ |
|
|
|
:"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes)\ |
|
|
|
:"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","cc","memory","k1");\ |
|
|
|
c_pointer += 2;\ |
|
|
|
} |
|
|
|
#define COMPUTE_m4n8 {\ |
|
|
|
__asm__ __volatile__(\ |
|
|
|
INNER_INIT_m4n8\ |
|
|
|
INNER_KERNELm4(8)\ |
|
|
|
INNER_SAVE_m4n8\ |
|
|
|
:"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes),"Yk"(k02),"Yk"(k03),"Yk"(k01)\ |
|
|
|
:"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","zmm10","zmm11","cc","memory");\ |
|
|
|
c_pointer += 4;\ |
|
|
|
} |
|
|
|
#define COMPUTE_m8n8 {\ |
|
|
|
__asm__ __volatile__(\ |
|
|
|
INNER_INIT_m8n8\ |
|
|
|
INNER_KERNELm8(8)\ |
|
|
|
INNER_SAVE_m8n8\ |
|
|
|
:"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes),"Yk"(k02),"Yk"(k03)\ |
|
|
|
:"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","zmm10","zmm11","zmm12","zmm13","zmm14","zmm15","cc","memory");\ |
|
|
|
c_pointer += 8;\ |
|
|
|
} |
|
|
|
|
|
|
|
#define COMPUTE_n8 {\ |
|
|
|
__asm__ __volatile__(\ |
|
|
|
"movq %8,%%r14;movq %2,%%r13;"\ |
|
|
|
|