Browse Source

fix generic gemm_beta for bgemm

pull/5287/head
Ye Tao 1 year ago
parent
commit
59d0cf4a21
2 changed files with 13 additions and 11 deletions
  1. +12
    -10
      kernel/generic/gemm_beta.c
  2. +1
    -1
      kernel/setparam-ref.c

+ 12
- 10
kernel/generic/gemm_beta.c View File

@@ -72,7 +72,7 @@ f32tobfloat16(float f32)
#endif


int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta_in,
IFLOAT *dummy2, BLASLONG dummy3, IFLOAT *dummy4, BLASLONG dummy5,
FLOAT *c, BLASLONG ldc){

@@ -83,6 +83,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
c_offset = c;
chunk = m >> 3;
remain = m & 7;
float beta = BF16TOF32(beta_in);

if (beta == ZERO){
for(j=n; j>0; j--){
c_offset1 = c_offset;
@@ -108,18 +110,18 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
c_offset1 = c_offset;
c_offset += ldc;
for(i=chunk; i>0; i--){
*(c_offset1 + 0) *= beta;
*(c_offset1 + 1) *= beta;
*(c_offset1 + 2) *= beta;
*(c_offset1 + 3) *= beta;
*(c_offset1 + 4) *= beta;
*(c_offset1 + 5) *= beta;
*(c_offset1 + 6) *= beta;
*(c_offset1 + 7) *= beta;
*(c_offset1 + 0) = F32TOBF16(beta * BF16TOF32(c_offset1[0]));
*(c_offset1 + 1) = F32TOBF16(beta * BF16TOF32(c_offset1[1]));
*(c_offset1 + 2) = F32TOBF16(beta * BF16TOF32(c_offset1[2]));
*(c_offset1 + 3) = F32TOBF16(beta * BF16TOF32(c_offset1[3]));
*(c_offset1 + 4) = F32TOBF16(beta * BF16TOF32(c_offset1[4]));
*(c_offset1 + 5) = F32TOBF16(beta * BF16TOF32(c_offset1[5]));
*(c_offset1 + 6) = F32TOBF16(beta * BF16TOF32(c_offset1[6]));
*(c_offset1 + 7) = F32TOBF16(beta * BF16TOF32(c_offset1[7]));
c_offset1 += 8;
}
for(i=remain; i>0; i--){
*c_offset1 *= beta;
*c_offset1 = F32TOBF16(beta * BF16TOF32(c_offset1[0]));
c_offset1 ++;
}
}


+ 1
- 1
kernel/setparam-ref.c View File

@@ -1,6 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2023, 2025 The OpenBLAS Project. */
/* Copyright 2023, 2025 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */


Loading…
Cancel
Save