From 59d0cf4a2108bfed979feb1e53eb6b2ba16ba9b2 Mon Sep 17 00:00:00 2001 From: Ye Tao Date: Thu, 22 May 2025 09:06:23 +0000 Subject: [PATCH] fix generic gemm_beta for bgemm --- kernel/generic/gemm_beta.c | 22 ++++++++++++---------- kernel/setparam-ref.c | 2 +- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/kernel/generic/gemm_beta.c b/kernel/generic/gemm_beta.c index 64ff505e8..f399de090 100644 --- a/kernel/generic/gemm_beta.c +++ b/kernel/generic/gemm_beta.c @@ -72,7 +72,7 @@ f32tobfloat16(float f32) #endif -int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta, +int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta_in, IFLOAT *dummy2, BLASLONG dummy3, IFLOAT *dummy4, BLASLONG dummy5, FLOAT *c, BLASLONG ldc){ @@ -83,6 +83,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta, c_offset = c; chunk = m >> 3; remain = m & 7; + float beta = BF16TOF32(beta_in); + if (beta == ZERO){ for(j=n; j>0; j--){ c_offset1 = c_offset; @@ -108,18 +110,18 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta, c_offset1 = c_offset; c_offset += ldc; for(i=chunk; i>0; i--){ - *(c_offset1 + 0) *= beta; - *(c_offset1 + 1) *= beta; - *(c_offset1 + 2) *= beta; - *(c_offset1 + 3) *= beta; - *(c_offset1 + 4) *= beta; - *(c_offset1 + 5) *= beta; - *(c_offset1 + 6) *= beta; - *(c_offset1 + 7) *= beta; + *(c_offset1 + 0) = F32TOBF16(beta * BF16TOF32(c_offset1[0])); + *(c_offset1 + 1) = F32TOBF16(beta * BF16TOF32(c_offset1[1])); + *(c_offset1 + 2) = F32TOBF16(beta * BF16TOF32(c_offset1[2])); + *(c_offset1 + 3) = F32TOBF16(beta * BF16TOF32(c_offset1[3])); + *(c_offset1 + 4) = F32TOBF16(beta * BF16TOF32(c_offset1[4])); + *(c_offset1 + 5) = F32TOBF16(beta * BF16TOF32(c_offset1[5])); + *(c_offset1 + 6) = F32TOBF16(beta * BF16TOF32(c_offset1[6])); + *(c_offset1 + 7) = F32TOBF16(beta * BF16TOF32(c_offset1[7])); c_offset1 += 8; } for(i=remain; i>0; i--){ - *c_offset1 *= beta; + *c_offset1 = F32TOBF16(beta * BF16TOF32(c_offset1[0])); c_offset1 ++; } } diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 78b59835f..ea05e8dc9 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -1,6 +1,6 @@ /*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ -/* Copyright 2023, 2025 The OpenBLAS Project. */ +/* Copyright 2023, 2025 The OpenBLAS Project. */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */