Browse Source

Merge remote-tracking branch 'origin/develop' into XLC-AIX

tags/v0.3.25^2
Chip-Kerchner 2 years ago
parent
commit
48da98b2a7
35 changed files with 102 additions and 94 deletions
  1. +3
    -0
      .github/workflows/loongarch64.yml
  2. +1
    -1
      Makefile.system
  3. +8
    -8
      common_thread.h
  4. +5
    -5
      cpuid_x86.c
  5. +1
    -1
      driver/level3/gemm_thread_m.c
  6. +1
    -1
      driver/level3/gemm_thread_mn.c
  7. +1
    -1
      driver/level3/gemm_thread_n.c
  8. +1
    -1
      driver/level3/gemm_thread_variable.c
  9. +1
    -1
      driver/level3/syrk_thread.c
  10. +2
    -2
      driver/others/blas_l1_thread.c
  11. +1
    -1
      driver/others/blas_server.c
  12. +5
    -5
      driver/others/blas_server_omp.c
  13. +13
    -13
      driver/others/memory.c
  14. +9
    -9
      driver/others/openblas_env.c
  15. +1
    -1
      driver/others/openblas_error_handle.c
  16. +4
    -4
      driver/others/openblas_get_config.c
  17. +3
    -3
      driver/others/openblas_get_parallel.c
  18. +1
    -1
      driver/others/parameter.c
  19. +1
    -1
      interface/lapack/laswp.c
  20. +1
    -1
      interface/lapack/zlaswp.c
  21. +5
    -5
      kernel/loongarch64/dgemv_n_8_lasx.S
  22. +3
    -3
      kernel/loongarch64/dgemv_t_8_lasx.S
  23. +5
    -5
      kernel/loongarch64/sgemv_n_8_lasx.S
  24. +3
    -3
      kernel/loongarch64/sgemv_t_8_lasx.S
  25. +1
    -1
      kernel/x86_64/ddot.c
  26. +1
    -1
      kernel/x86_64/drot.c
  27. +1
    -1
      kernel/x86_64/srot.c
  28. +1
    -1
      kernel/x86_64/zdot.c
  29. +1
    -1
      lapack-netlib/LAPACKE/src/lapacke_nancheck.c
  30. +5
    -0
      lapack/laswp/loongarch64/Makefile
  31. +2
    -2
      lapack/lauum/lauum_L_parallel.c
  32. +2
    -2
      lapack/lauum/lauum_U_parallel.c
  33. +1
    -1
      lapack/potrf/potrf_L_parallel.c
  34. +1
    -1
      lapack/potrf/potrf_U_parallel.c
  35. +7
    -7
      utest/ctest.h

+ 3
- 0
.github/workflows/loongarch64.yml View File

@@ -18,6 +18,9 @@ jobs:
- target: LOONGSON2K1000
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 TARGET=LOONGSON2K1000
- target: DYNAMIC_ARCH
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC

steps:
- name: Checkout repository


+ 1
- 1
Makefile.system View File

@@ -1168,7 +1168,7 @@ endif
ifeq ($(F_COMPILER), IBM)
CCOMMON_OPT += -DF_INTERFACE_IBM
FEXTRALIB += -lxlf90
ifeq ($(C_COMPILER), GCC)
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC CLANG))
FCOMMON_OPT += -qextname
else ifeq ($(C_COMPILER), CLANG)
FCOMMON_OPT += -qextname


+ 8
- 8
common_thread.h View File

@@ -192,27 +192,27 @@ int exec_blas(BLASLONG num_cpu, blas_param_t *param, void *buffer);
int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
void *a, BLASLONG lda,
void *b, BLASLONG ldb,
void *c, BLASLONG ldc, int (*function)(), int threads);
void *c, BLASLONG ldc, int (*function)(void), int threads);

int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *, void *, BLASLONG);

int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT*, FLOAT*, BLASLONG), void *, void *, BLASLONG);

int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *, void *, BLASLONG);

int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG, BLASLONG);
int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *, void *, BLASLONG, BLASLONG);

int trsm_thread(int mode, BLASLONG m, BLASLONG n,
double alpha_r, double alpha_i,
void *a, BLASLONG lda,
void *c, BLASLONG ldc, int (*function)(), void *buffer);
void *c, BLASLONG ldc, int (*function)(void), void *buffer);

int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*, FLOAT *, FLOAT *, BLASLONG), void*, void*, BLASLONG);

int getrf_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k,
void *offsetA, BLASLONG lda,
void *offsetB, BLASLONG jb,
void *ipiv, BLASLONG offset, int (*function)(), void *buffer);
void *ipiv, BLASLONG offset, int (*function)(void), void *buffer);

#endif /* ENDIF ASSEMBLER */



+ 5
- 5
cpuid_x86.c View File

@@ -194,7 +194,7 @@ static C_INLINE void xgetbv(int op, int * eax, int * edx){
}
#endif

int support_avx(){
int support_avx(void){
#ifndef NO_AVX
int eax, ebx, ecx, edx;
int ret=0;
@@ -212,7 +212,7 @@ int support_avx(){
#endif
}

int support_avx2(){
int support_avx2(void){
#ifndef NO_AVX2
int eax, ebx, ecx=0, edx;
int ret=0;
@@ -228,7 +228,7 @@ int support_avx2(){
#endif
}

int support_avx512(){
int support_avx512(void){
#if !defined(NO_AVX) && !defined(NO_AVX512)
int eax, ebx, ecx, edx;
int ret=0;
@@ -250,7 +250,7 @@ int support_avx512(){
#endif
}

int support_avx512_bf16(){
int support_avx512_bf16(void){
#if !defined(NO_AVX) && !defined(NO_AVX512)
int eax, ebx, ecx, edx;
int ret=0;
@@ -271,7 +271,7 @@ int support_avx512_bf16(){
#define BIT_AMX_BF16 0x00400000
#define BIT_AMX_ENBD 0x00060000

int support_amx_bf16() {
int support_amx_bf16(void) {
#if !defined(NO_AVX) && !defined(NO_AVX512)
int eax, ebx, ecx, edx;
int ret=0;


+ 1
- 1
driver/level3/gemm_thread_m.c View File

@@ -40,7 +40,7 @@
#include <stdlib.h>
#include "common.h"

int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) {
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG nthreads) {

blas_queue_t queue[MAX_CPU_NUMBER];
BLASLONG range[MAX_CPU_NUMBER + 1];


+ 1
- 1
driver/level3/gemm_thread_mn.c View File

@@ -60,7 +60,7 @@ static const int divide_rule[][2] =
{ 1, 61}, { 2, 31}, { 7, 9}, { 8, 8},
};

int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) {
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG nthreads) {

blas_queue_t queue[MAX_CPU_NUMBER];



+ 1
- 1
driver/level3/gemm_thread_n.c View File

@@ -40,7 +40,7 @@
#include <stdlib.h>
#include "common.h"

int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) {
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *sa, void *sb, BLASLONG nthreads) {

blas_queue_t queue[MAX_CPU_NUMBER];
BLASLONG range[MAX_CPU_NUMBER + 1];


+ 1
- 1
driver/level3/gemm_thread_variable.c View File

@@ -42,7 +42,7 @@

int CNAME(int mode,
blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
int (*function)(), void *sa, void *sb, BLASLONG divM, BLASLONG divN) {
int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG divM, BLASLONG divN) {

blas_queue_t queue[MAX_CPU_NUMBER];



+ 1
- 1
driver/level3/syrk_thread.c View File

@@ -41,7 +41,7 @@
#include <math.h>
#include "common.h"

int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) {
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*, FLOAT *, FLOAT *, BLASLONG), void *sa, void *sb, BLASLONG nthreads) {

blas_queue_t queue[MAX_CPU_NUMBER];
BLASLONG range[MAX_CPU_NUMBER + 1];


+ 2
- 2
driver/others/blas_l1_thread.c View File

@@ -43,7 +43,7 @@
int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
void *a, BLASLONG lda,
void *b, BLASLONG ldb,
void *c, BLASLONG ldc, int (*function)(), int nthreads){
void *c, BLASLONG ldc, int (*function)(void), int nthreads){

blas_queue_t queue[MAX_CPU_NUMBER];
blas_arg_t args [MAX_CPU_NUMBER];
@@ -141,7 +141,7 @@ int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha
int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
void *a, BLASLONG lda,
void *b, BLASLONG ldb,
void *c, BLASLONG ldc, int (*function)(), int nthreads){
void *c, BLASLONG ldc, int (*function)(void), int nthreads){

blas_queue_t queue[MAX_CPU_NUMBER];
blas_arg_t args [MAX_CPU_NUMBER];


+ 1
- 1
driver/others/blas_server.c View File

@@ -93,7 +93,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#endif

extern unsigned int openblas_thread_timeout();
extern unsigned int openblas_thread_timeout(void);

#ifdef SMP_SERVER



+ 5
- 5
driver/others/blas_server_omp.c View File

@@ -70,7 +70,7 @@
int blas_server_avail = 0;
int blas_omp_number_max = 0;

extern int openblas_omp_adaptive_env();
extern int openblas_omp_adaptive_env(void);

static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER];
#ifdef HAVE_C11
@@ -79,7 +79,7 @@ static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
static _Bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
#endif

static void adjust_thread_buffers() {
static void adjust_thread_buffers(void) {

int i=0, j=0;

@@ -124,9 +124,9 @@ void openblas_set_num_threads(int num_threads) {
}

int blas_thread_init(void){
if(blas_omp_number_max <= 0)
blas_omp_number_max = omp_get_max_threads();
if(blas_omp_number_max <= 0)
blas_omp_number_max = omp_get_max_threads();
blas_get_cpu_number();

adjust_thread_buffers();


+ 13
- 13
driver/others/memory.c View File

@@ -427,9 +427,9 @@ int goto_get_num_procs (void) {
return blas_cpu_number;
}

static void blas_memory_init();
static void blas_memory_init(void);

void openblas_fork_handler()
void openblas_fork_handler(void)
{
// This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is
// built with "make USE_OPENMP=0".
@@ -446,9 +446,9 @@ void openblas_fork_handler()
#endif
}

extern int openblas_num_threads_env();
extern int openblas_goto_num_threads_env();
extern int openblas_omp_num_threads_env();
extern int openblas_num_threads_env(void);
extern int openblas_goto_num_threads_env(void);
extern int openblas_omp_num_threads_env(void);

int blas_get_cpu_number(void){
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) || defined(OS_HAIKU)
@@ -592,7 +592,7 @@ static BLASULONG key_lock = 0UL;
#endif

/* Returns a pointer to the start of the per-thread memory allocation data */
static __inline struct alloc_t ** get_memory_table() {
static __inline struct alloc_t ** get_memory_table(void) {
#if defined(SMP)
LOCK_COMMAND(&key_lock);
lsk=local_storage_key;
@@ -1145,7 +1145,7 @@ static void blas_memory_cleanup(void* ptr){
}
}

static void blas_memory_init(){
static void blas_memory_init(void){
#if defined(SMP)
# if defined(OS_WINDOWS)
local_storage_key = TlsAlloc();
@@ -1502,7 +1502,7 @@ static void gotoblas_memory_init(void) {
/* Initialization for all function; this function should be called before main */

static int gotoblas_initialized = 0;
extern void openblas_read_env();
extern void openblas_read_env(void);

void CONSTRUCTOR gotoblas_init(void) {

@@ -1999,7 +1999,7 @@ int goto_get_num_procs (void) {
return blas_cpu_number;
}

void openblas_fork_handler()
void openblas_fork_handler(void)
{
// This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is
// built with "make USE_OPENMP=0".
@@ -2016,9 +2016,9 @@ void openblas_fork_handler()
#endif
}

extern int openblas_num_threads_env();
extern int openblas_goto_num_threads_env();
extern int openblas_omp_num_threads_env();
extern int openblas_num_threads_env(void);
extern int openblas_goto_num_threads_env(void);
extern int openblas_omp_num_threads_env(void);

int blas_get_cpu_number(void){
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) || defined(OS_HAIKU)
@@ -3339,7 +3339,7 @@ static void gotoblas_memory_init(void) {
/* Initialization for all function; this function should be called before main */

static int gotoblas_initialized = 0;
extern void openblas_read_env();
extern void openblas_read_env(void);

void CONSTRUCTOR gotoblas_init(void) {



+ 9
- 9
driver/others/openblas_env.c View File

@@ -41,15 +41,15 @@ static int openblas_env_goto_num_threads=0;
static int openblas_env_omp_num_threads=0;
static int openblas_env_omp_adaptive=0;

int openblas_verbose() { return openblas_env_verbose;}
unsigned int openblas_thread_timeout() { return openblas_env_thread_timeout;}
int openblas_block_factor() { return openblas_env_block_factor;}
int openblas_num_threads_env() { return openblas_env_openblas_num_threads;}
int openblas_goto_num_threads_env() { return openblas_env_goto_num_threads;}
int openblas_omp_num_threads_env() { return openblas_env_omp_num_threads;}
int openblas_omp_adaptive_env() { return openblas_env_omp_adaptive;}
void openblas_read_env() {
int openblas_verbose(void) { return openblas_env_verbose;}
unsigned int openblas_thread_timeout(void) { return openblas_env_thread_timeout;}
int openblas_block_factor(void) { return openblas_env_block_factor;}
int openblas_num_threads_env(void) { return openblas_env_openblas_num_threads;}
int openblas_goto_num_threads_env(void) { return openblas_env_goto_num_threads;}
int openblas_omp_num_threads_env(void) { return openblas_env_omp_num_threads;}
int openblas_omp_adaptive_env(void) { return openblas_env_omp_adaptive;}
void openblas_read_env(void) {
int ret=0;
env_var_t p;
if (readenv(p,"OPENBLAS_VERBOSE")) ret = atoi(p);


+ 1
- 1
driver/others/openblas_error_handle.c View File

@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "common.h"

extern int openblas_verbose();
extern int openblas_verbose(void);

void openblas_warning(int verbose, const char * msg) {
int current_verbose;


+ 4
- 4
driver/others/openblas_get_config.c View File

@@ -69,13 +69,13 @@ static char* openblas_config_str=""
;

#ifdef DYNAMIC_ARCH
char *gotoblas_corename();
char *gotoblas_corename(void);
#endif

static char tmp_config_str[256];
int openblas_get_parallel();
int openblas_get_parallel(void);

char* CNAME() {
char* CNAME(void) {
char tmpstr[20];
strcpy(tmp_config_str, openblas_config_str);
#ifdef DYNAMIC_ARCH
@@ -90,7 +90,7 @@ char tmpstr[20];
}


char* openblas_get_corename() {
char* openblas_get_corename(void) {
#ifndef DYNAMIC_ARCH
return CHAR_CORENAME;
#else


+ 3
- 3
driver/others/openblas_get_parallel.c View File

@@ -42,17 +42,17 @@ static int parallel = 0;


#ifdef NEEDBUNDERSCORE
int CNAME() {
int CNAME(void) {
return parallel;
}

int NAME() {
int NAME(void) {
return parallel;
}

#else
//The CNAME and NAME are the same.
int NAME() {
int NAME(void) {
return parallel;
}
#endif


+ 1
- 1
driver/others/parameter.c View File

@@ -40,7 +40,7 @@
#include <string.h>
#include "common.h"

extern int openblas_block_factor();
extern int openblas_block_factor(void);
int get_L2_size(void);

#define DEFAULT_GEMM_P 128


+ 1
- 1
interface/lapack/laswp.c View File

@@ -97,7 +97,7 @@ int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint *

blas_level1_thread(mode, n, k1, k2, dummyalpha,
a, lda, NULL, 0, ipiv, incx,
(int(*)())laswp[flag], nthreads);
(int(*)(void))laswp[flag], nthreads);
}
#endif



+ 1
- 1
interface/lapack/zlaswp.c View File

@@ -96,7 +96,7 @@ int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint *
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif

blas_level1_thread(mode, n, k1, k2, dummyalpha, a, lda, NULL, 0, ipiv, incx, (int(*)())laswp[flag], nthreads);
blas_level1_thread(mode, n, k1, k2, dummyalpha, a, lda, NULL, 0, ipiv, incx, (int(*)(void))laswp[flag], nthreads);
}
#endif



+ 5
- 5
kernel/loongarch64/dgemv_n_8_lasx.S View File

@@ -341,7 +341,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmadd.d $f10, $f12, $f2, $f10
.endm

.macro DGEMV_N XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req
.macro DGEMV_N_LASX XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req
PTR_SRLI J, N, 3
beqz J, .L_\XW\()_N_7
PTR_SLLI K_LDA, LDA, 3
@@ -541,13 +541,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.hword .L_GAP_1_0 - .L_GAP_TABLE
.hword .L_GAP_1_1 - .L_GAP_TABLE
.L_GAP_0_0: /* if (inc_x == 1) && (incy == 1) */
DGEMV_N GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1
DGEMV_N_LASX GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1
.L_GAP_0_1: /* if (inc_x == 1) && (incy != 1) */
DGEMV_N GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1
DGEMV_N_LASX GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1
.L_GAP_1_0: /* if (inc_x != 1) && (incy == 1) */
DGEMV_N GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1
DGEMV_N_LASX GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1
.L_GAP_1_1: /* if (inc_x != 1) && (incy != 1) */
DGEMV_N GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1
DGEMV_N_LASX GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1
.L_END:
pop_if_used 17 + 7, 24 + 4
jirl $r0, $r1, 0x0


+ 3
- 3
kernel/loongarch64/dgemv_t_8_lasx.S View File

@@ -220,7 +220,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
GMADD xvf, d, TP0, A0, X0, TP0, TP1, A2, X0, TP1
.endm

.macro DGEMV_T XW:req X8:req, X4:req
.macro DGEMV_T_LASX XW:req X8:req, X4:req
PTR_SRLI J, N, 3
beqz J, .L_\XW\()_N_7
PTR_SLLI K_LDA, LDA, 3
@@ -472,9 +472,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.hword .L_GAP_0 - .L_GAP_TABLE
.hword .L_GAP_1 - .L_GAP_TABLE
.L_GAP_0: /* if (incx == 1) */
DGEMV_T GAP_0, X8, X4
DGEMV_T_LASX GAP_0, X8, X4
.L_GAP_1: /* if (incx != 1) */
DGEMV_T GAP_1, X8_GAP, X4_GAP
DGEMV_T_LASX GAP_1, X8_GAP, X4_GAP
.L_END:
pop_if_used 17 + 8, 24 + 3
jirl $r0, $r1, 0x0


+ 5
- 5
kernel/loongarch64/sgemv_n_8_lasx.S View File

@@ -274,7 +274,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
GST f, s, Y0_F, Y, 0
.endm

.macro SGEMV_N XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req
.macro SGEMV_N_LASX XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req
PTR_SRLI J, N, 3
beqz J, .L_\XW\()_N_7
PTR_SLLI K_LDA, LDA, 3
@@ -450,13 +450,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.hword .L_GAP_1_0 - .L_GAP_TABLE
.hword .L_GAP_1_1 - .L_GAP_TABLE
.L_GAP_0_0: /* if (inc_x == 1) && (incy == 1) */
SGEMV_N GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1
SGEMV_N_LASX GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1
.L_GAP_0_1: /* if (inc_x == 1) && (incy != 1) */
SGEMV_N GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1
SGEMV_N_LASX GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1
.L_GAP_1_0: /* if (inc_x != 1) && (incy == 1) */
SGEMV_N GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1
SGEMV_N_LASX GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1
.L_GAP_1_1: /* if (inc_x != 1) && (incy != 1) */
SGEMV_N GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1
SGEMV_N_LASX GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1
.L_END:
pop_if_used 17 + 7, 19
jirl $r0, $r1, 0x0


+ 3
- 3
kernel/loongarch64/sgemv_t_8_lasx.S View File

@@ -160,7 +160,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
GMADD xvf, s, TP0, A0, X0, TP0, TP1, A1, X0, TP1
.endm

.macro SGEMV_T XW:req X8:req, X4:req
.macro SGEMV_T_LASX XW:req X8:req, X4:req
PTR_SRLI J, N, 3
beqz J, .L_\XW\()_N_7
PTR_SLLI K_LDA, LDA, 3
@@ -396,9 +396,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.hword .L_GAP_0 - .L_GAP_TABLE
.hword .L_GAP_1 - .L_GAP_TABLE
.L_GAP_0: /* if (incx == 1) */
SGEMV_T GAP_0, X8, X4
SGEMV_T_LASX GAP_0, X8, X4
.L_GAP_1: /* if (incx != 1) */
SGEMV_T GAP_1, X8_GAP, X4_GAP
SGEMV_T_LASX GAP_1, X8_GAP, X4_GAP
.L_END:
pop_if_used 17 + 8, 18
jirl $r0, $r1, 0x0


+ 1
- 1
kernel/x86_64/ddot.c View File

@@ -159,7 +159,7 @@ static int dot_thread_function(BLASLONG n, BLASLONG dummy0,

extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n,
BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb,
void *c, BLASLONG ldc, int (*function)(), int nthreads);
void *c, BLASLONG ldc, int (*function)(void), int nthreads);
#endif

FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)


+ 1
- 1
kernel/x86_64/drot.c View File

@@ -169,7 +169,7 @@ static int rot_thread_function(blas_arg_t *args)
return 0;
}

extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(), int nthreads);
extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(void), int nthreads);
#endif
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s)
{


+ 1
- 1
kernel/x86_64/srot.c View File

@@ -171,7 +171,7 @@ static int rot_thread_function(blas_arg_t *args)
return 0;
}

extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(), int nthreads);
extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(void), int nthreads);
#endif
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s)
{


+ 1
- 1
kernel/x86_64/zdot.c View File

@@ -92,7 +92,7 @@ static void zdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d)
#if defined(SMP)
extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n,
BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb,
void *c, BLASLONG ldc, int (*function)(), int nthreads);
void *c, BLASLONG ldc, int (*function)(void), int nthreads);
#endif


+ 1
- 1
lapack-netlib/LAPACKE/src/lapacke_nancheck.c View File

@@ -39,7 +39,7 @@ void LAPACKE_set_nancheck( int flag )
nancheck_flag = ( flag ) ? 1 : 0;
}

int LAPACKE_get_nancheck( )
int LAPACKE_get_nancheck( void )
{
char* env;
if ( nancheck_flag != -1 ) {


+ 5
- 0
lapack/laswp/loongarch64/Makefile View File

@@ -1,6 +1,11 @@
TOPDIR = ../../..
include ../../../Makefile.system

ifeq ($(DYNAMIC_ARCH), 1)
LASWP = ../generic/laswp_k_4.c
ZLASWP = ../generic/zlaswp_k_4.c
endif

ifndef LASWP
LASWP = ../generic/laswp_k.c
endif


+ 2
- 2
lapack/lauum/lauum_L_parallel.c View File

@@ -102,7 +102,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.c = a;

syrk_thread(mode | BLAS_TRANSA_T | BLAS_TRANSB_N | BLAS_UPLO,
&newarg, NULL, NULL, (int (*)(void))HERK_LC, sa, sb, args -> nthreads);
&newarg, NULL, NULL, (int (*)(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG))HERK_LC, sa, sb, args -> nthreads);

newarg.m = bk;
newarg.n = i;
@@ -110,7 +110,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.b = a + (i ) * COMPSIZE;

gemm_thread_n(mode | BLAS_TRANSA_T,
&newarg, NULL, NULL, (int (*)(void))TRMM_LCLN, sa, sb, args -> nthreads);
&newarg, NULL, NULL, (int (*)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT*, FLOAT*, BLASLONG))TRMM_LCLN, sa, sb, args -> nthreads);

newarg.m = bk;
newarg.n = bk;


+ 2
- 2
lapack/lauum/lauum_U_parallel.c View File

@@ -102,7 +102,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.c = a;

syrk_thread(mode | BLAS_TRANSA_N | BLAS_TRANSB_T,
&newarg, NULL, NULL, (int (*)(void))HERK_UN, sa, sb, args -> nthreads);
&newarg, NULL, NULL, (int (*)(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG))HERK_UN, sa, sb, args -> nthreads);

newarg.m = i;
newarg.n = bk;
@@ -110,7 +110,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.b = a + ( i * lda) * COMPSIZE;

gemm_thread_m(mode | BLAS_TRANSA_T | BLAS_RSIDE,
&newarg, NULL, NULL, (int (*)(void))TRMM_RCUN, sa, sb, args -> nthreads);
&newarg, NULL, NULL, (int (*)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT*, FLOAT*, BLASLONG))TRMM_RCUN, sa, sb, args -> nthreads);

newarg.m = bk;
newarg.n = bk;


+ 1
- 1
lapack/potrf/potrf_L_parallel.c View File

@@ -110,7 +110,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.b = a + (i + bk + i * lda) * COMPSIZE;

gemm_thread_m(mode | BLAS_RSIDE | BLAS_TRANSA_T | BLAS_UPLO,
&newarg, NULL, NULL, (int (*)(void))TRSM_RCLN, sa, sb, args -> nthreads);
&newarg, NULL, NULL, (int (*)(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG))TRSM_RCLN, sa, sb, args -> nthreads);

newarg.n = n - i - bk;
newarg.k = bk;


+ 1
- 1
lapack/potrf/potrf_U_parallel.c View File

@@ -110,7 +110,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.b = a + (i + (i + bk) * lda) * COMPSIZE;

gemm_thread_n(mode | BLAS_TRANSA_T,
&newarg, NULL, NULL, (int (*)(void))TRSM_LCUN, sa, sb, args -> nthreads);
&newarg, NULL, NULL, (int (*)(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG))TRSM_LCUN, sa, sb, args -> nthreads);

newarg.n = n - i - bk;
newarg.k = bk;


+ 7
- 7
utest/ctest.h View File

@@ -41,7 +41,7 @@ typedef void (*RunWithDataFunc)(void*);
struct ctest {
const char* ssname; // suite name
const char* ttname; // test name
void (*run)();
void (*run)(void);
int skip;

void* data;
@@ -159,9 +159,9 @@ struct ctest {
void WEAK sname##_teardown(struct sname##_data* data)

#define __CTEST_INTERNAL(sname, tname, _skip) \
void __FNAME(sname, tname)(); \
void __FNAME(sname, tname)(void); \
__CTEST_STRUCT(sname, tname, _skip, NULL, NULL, NULL) \
void __FNAME(sname, tname)()
void __FNAME(sname, tname)(void)

#ifdef __CTEST_APPLE
#define SETUP_FNAME(sname) NULL
@@ -366,7 +366,7 @@ void __ctest_addTest(struct ctest *test)
#ifndef __CTEST_MSVC
/* Add all tests to linked list automatically.
*/
static void __ctest_linkTests()
static void __ctest_linkTests(void)
{
struct ctest ** test;
struct ctest ** ctest_begin = (struct ctest **)__PNAME(suite, test);
@@ -401,7 +401,7 @@ static void __ctest_linkTests()
__ctest_head_p = ctest_begin;
}
#else //for msvc
static void __ctest_linkTests()
static void __ctest_linkTests(void)
{
struct ctest ** ctest_start = __ctest_head_p;
struct ctest ** test;
@@ -450,7 +450,7 @@ static void msg_start(const char* color, const char* title) {
print_errormsg(" %s: ", title);
}

static void msg_end() {
static void msg_end(void) {
if (color_output) {
print_errormsg(ANSI_NORMAL);
}
@@ -634,7 +634,7 @@ static int suite_test_filter(struct ctest* t) {


#ifndef __CTEST_NO_TIME
static uint64_t getCurrentTime() {
static uint64_t getCurrentTime(void) {
struct timeval now;
gettimeofday(&now, NULL);
uint64_t now64 = (uint64_t) now.tv_sec;


Loading…
Cancel
Save