#include "common.h" #if defined(BFLOAT16) && defined(BFLOAT16CONVERSION) static float bfloat16tof32 (bfloat16 f16) { float result = 0; unsigned short* q = (unsigned short*)(&result); #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ q[0] = f16; #else q[1] = f16; #endif return result; } static bfloat16 f32tobfloat16(float f32) { unsigned short *q = (unsigned short *)(&f32); #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ return q[0]; #else return q[1]; #endif } #ifdef BGEMM #define ALPHA bfloat16tof32(alpha) #define BF16TOF32(x) (bfloat16tof32(x)) #define F32TOBF16(x) (f32tobfloat16(x)) #else #define ALPHA alpha #define BF16TOF32(x) (bfloat16tof32(x)) #define F32TOBF16(x) x #endif #else #define ALPHA alpha #define BF16TOF32(x) x #define F32TOBF16(x) x #endif int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,IFLOAT* ba,IFLOAT* bb,FLOAT* C,BLASLONG ldc #ifdef TRMMKERNEL ,BLASLONG offset #endif ) { BLASLONG i,j,k; FLOAT *C0,*C1; IFLOAT *ptrba,*ptrbb; #ifdef BGEMM float res0,res1,res2,res3; #else FLOAT res0,res1,res2,res3; #endif IFLOAT load0,load1,load2,load3,load4,load5,load6,load7; for (j=0; j