|
|
|
@@ -1,5 +1,5 @@ |
|
|
|
/***************************************************************************** |
|
|
|
Copyright (c) 2011-2020, The OpenBLAS Project |
|
|
|
Copyright (c) 2011-2024, The OpenBLAS Project |
|
|
|
All rights reserved. |
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without |
|
|
|
@@ -32,53 +32,299 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
|
|
**********************************************************************************/ |
|
|
|
|
|
|
|
#include <stdint.h> |
|
|
|
#include <sys/auxv.h> |
|
|
|
#include <stdio.h> |
|
|
|
#include <math.h> |
|
|
|
#include <string.h> |
|
|
|
#include <sys/auxv.h> |
|
|
|
|
|
|
|
/* If LASX extension instructions supported, |
|
|
|
* using core LOONGSON3R5 |
|
|
|
* If only LSX extension instructions supported, |
|
|
|
* using core LOONGSON2K1000 |
|
|
|
* If neither LASX nor LSX extension instructions supported, |
|
|
|
* using core LOONGSONGENERIC (As far as I know, there is no such |
|
|
|
* CPU yet) |
|
|
|
*/ |
|
|
|
#define CPU_LA64_GENERIC 0 |
|
|
|
#define CPU_LA264 1 |
|
|
|
#define CPU_LA364 2 |
|
|
|
#define CPU_LA464 3 |
|
|
|
#define CPU_LA664 4 |
|
|
|
|
|
|
|
#define CPU_GENERIC 0 |
|
|
|
#define CPU_LOONGSON3R5 1 |
|
|
|
#define CPU_LOONGSON2K1000 2 |
|
|
|
#define CORE_LA64_GENERIC 0 |
|
|
|
#define CORE_LA264 1 |
|
|
|
#define CORE_LA464 2 |
|
|
|
|
|
|
|
#define LA_HWCAP_LSX (1U << 4) |
|
|
|
#define LA_HWCAP_LASX (1U << 5) |
|
|
|
|
|
|
|
#define LOONGARCH_CFG0 0x00 |
|
|
|
#define LOONGARCH_CFG2 0x02 |
|
|
|
#define LOONGARCH_CFG10 0x10 |
|
|
|
#define LOONGARCH_CFG11 0x11 |
|
|
|
#define LOONGARCH_CFG12 0x12 |
|
|
|
#define LOONGARCH_CFG13 0x13 |
|
|
|
#define LOONGARCH_CFG14 0x14 |
|
|
|
#define LASX_MASK 1<<7 |
|
|
|
#define LSX_MASK 1<<6 |
|
|
|
#define PRID_SERIES_MASK 0xf000 |
|
|
|
#define PRID_SERIES_LA264 0xa000 |
|
|
|
#define PRID_SERIES_LA364 0xb000 |
|
|
|
#define PRID_SERIES_LA464 0xc000 |
|
|
|
#define PRID_SERIES_LA664 0xd000 |
|
|
|
|
|
|
|
#define CACHE_INFO_L1_IU 0 |
|
|
|
#define CACHE_INFO_L1_D 1 |
|
|
|
#define CACHE_INFO_L2_IU 2 |
|
|
|
#define CACHE_INFO_L2_D 3 |
|
|
|
#define CACHE_INFO_L3_IU 4 |
|
|
|
#define CACHE_INFO_L3_D 5 |
|
|
|
#define L1_IU_PRESENT_MASK 0x0001 |
|
|
|
#define L1_IU_UNITY_MASK 0x0002 |
|
|
|
#define L1_D_PRESENT_MASK 0x0004 |
|
|
|
#define L2_IU_PRESENT_MASK 0x0008 |
|
|
|
#define L2_IU_UNITY_MASK 0x0010 |
|
|
|
#define L2_D_PRESENT_MASK 0x0080 |
|
|
|
#define L3_IU_PRESENT_MASK 0x0400 |
|
|
|
#define L3_IU_UNITY_MASK 0x0800 |
|
|
|
#define L3_D_PRESENT_MASK 0x4000 |
|
|
|
#define CACHE_WAY_MINUS_1_MASK 0x0000ffff |
|
|
|
#define CACHE_INDEX_LOG2_MASK 0x00ff0000 |
|
|
|
#define CACHE_LINESIZE_LOG2_MASK 0x7f000000 |
|
|
|
|
|
|
|
typedef struct { |
|
|
|
int size; |
|
|
|
int associative; |
|
|
|
int linesize; |
|
|
|
int unify; |
|
|
|
int present; |
|
|
|
} cache_info_t; |
|
|
|
|
|
|
|
/* Using microarchitecture representation */ |
|
|
|
static char *cpuname[] = { |
|
|
|
"LOONGSONGENERIC", |
|
|
|
"LOONGSON3R5", |
|
|
|
"LOONGSON2K1000" |
|
|
|
"LA64_GENERIC", |
|
|
|
"LA264", /* Loongson 64bit, 2-issue, Like 2K1000LA */ |
|
|
|
"LA364", /* Loongson 64bit, 3-issue, Like 2K2000 */ |
|
|
|
"LA464", /* Loongson 64bit, 4-issue, Like 3A5000, 3C5000L, 3C5000 and 3D5000 */ |
|
|
|
"LA664" /* Loongson 64bit, 6-issue, Like 3A6000, 3C6000 and 3D6000 */ |
|
|
|
}; |
|
|
|
|
|
|
|
static char *cpuname_lower[] = { |
|
|
|
"loongsongeneric", |
|
|
|
"loongson3r5", |
|
|
|
"loongson2k1000" |
|
|
|
"la64_generic", |
|
|
|
"la264", |
|
|
|
"la364", |
|
|
|
"la464", |
|
|
|
"la664" |
|
|
|
}; |
|
|
|
|
|
|
|
static char *corename[] = { |
|
|
|
"LA64_GENERIC", /* Implies using scalar instructions for optimization */ |
|
|
|
"LA264", /* Implies using LSX instructions for optimization */ |
|
|
|
"LA464", /* Implies using LASX instructions for optimization */ |
|
|
|
}; |
|
|
|
|
|
|
|
static char *corename_lower[] = { |
|
|
|
"la64_generic", |
|
|
|
"la264", |
|
|
|
"la464", |
|
|
|
}; |
|
|
|
|
|
|
|
int detect(void) { |
|
|
|
#ifdef __linux |
|
|
|
/* |
|
|
|
* Obtain cache and processor identification |
|
|
|
* through the cpucfg command. |
|
|
|
*/ |
|
|
|
static void get_cacheinfo(int type, cache_info_t *cacheinfo) { |
|
|
|
cache_info_t cache_info; |
|
|
|
memset(&cache_info, 0, sizeof(cache_info)); |
|
|
|
uint32_t reg_10 = 0; |
|
|
|
__asm__ volatile ( |
|
|
|
"cpucfg %0, %1 \n\t" |
|
|
|
: "+&r"(reg_10) |
|
|
|
: "r"(LOONGARCH_CFG10) |
|
|
|
); |
|
|
|
|
|
|
|
switch (type) { |
|
|
|
case CACHE_INFO_L1_IU: |
|
|
|
if (reg_10 & L1_IU_PRESENT_MASK) { |
|
|
|
uint32_t reg_11 = 0; |
|
|
|
cache_info.present = reg_10 & L1_IU_PRESENT_MASK; |
|
|
|
cache_info.unify = reg_10 & L1_IU_UNITY_MASK; |
|
|
|
__asm__ volatile ( |
|
|
|
"cpucfg %0, %1 \n\t" |
|
|
|
: "+&r"(reg_11) |
|
|
|
: "r"(LOONGARCH_CFG11) |
|
|
|
); |
|
|
|
cache_info.associative = (reg_11 & CACHE_WAY_MINUS_1_MASK) + 1; |
|
|
|
cache_info.linesize = 1 << ((reg_11 & CACHE_LINESIZE_LOG2_MASK) >> 24); |
|
|
|
cache_info.size = cache_info.associative * cache_info.linesize * |
|
|
|
(1 << ((reg_11 & CACHE_INDEX_LOG2_MASK) >> 16)); |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
|
case CACHE_INFO_L1_D: |
|
|
|
if (reg_10 & L1_D_PRESENT_MASK) { |
|
|
|
uint32_t reg_12 = 0; |
|
|
|
cache_info.present = reg_10 & L1_D_PRESENT_MASK; |
|
|
|
__asm__ volatile ( |
|
|
|
"cpucfg %0, %1 \n\t" |
|
|
|
: "+&r"(reg_12) |
|
|
|
: "r"(LOONGARCH_CFG12) |
|
|
|
); |
|
|
|
cache_info.associative = (reg_12 & CACHE_WAY_MINUS_1_MASK) + 1; |
|
|
|
cache_info.linesize = 1 << ((reg_12 & CACHE_LINESIZE_LOG2_MASK) >> 24); |
|
|
|
cache_info.size = cache_info.associative * cache_info.linesize * |
|
|
|
(1 << ((reg_12 & CACHE_INDEX_LOG2_MASK) >> 16)); |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
|
case CACHE_INFO_L2_IU: |
|
|
|
if (reg_10 & L2_IU_PRESENT_MASK) { |
|
|
|
uint32_t reg_13 = 0; |
|
|
|
cache_info.present = reg_10 & L2_IU_PRESENT_MASK; |
|
|
|
cache_info.unify = reg_10 & L2_IU_UNITY_MASK; |
|
|
|
__asm__ volatile ( |
|
|
|
"cpucfg %0, %1 \n\t" |
|
|
|
: "+&r"(reg_13) |
|
|
|
: "r"(LOONGARCH_CFG13) |
|
|
|
); |
|
|
|
cache_info.associative = (reg_13 & CACHE_WAY_MINUS_1_MASK) + 1; |
|
|
|
cache_info.linesize = 1 << ((reg_13 & CACHE_LINESIZE_LOG2_MASK) >> 24); |
|
|
|
cache_info.size = cache_info.associative * cache_info.linesize * |
|
|
|
(1 << ((reg_13 & CACHE_INDEX_LOG2_MASK) >> 16)); |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
|
case CACHE_INFO_L2_D: |
|
|
|
if (reg_10 & L2_D_PRESENT_MASK) { |
|
|
|
cache_info.present = reg_10 & L2_D_PRESENT_MASK; |
|
|
|
// No date fetch |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
|
case CACHE_INFO_L3_IU: |
|
|
|
if (reg_10 & L3_IU_PRESENT_MASK) { |
|
|
|
uint32_t reg_14 = 0; |
|
|
|
cache_info.present = reg_10 & L3_IU_PRESENT_MASK; |
|
|
|
cache_info.unify = reg_10 & L3_IU_UNITY_MASK; |
|
|
|
__asm__ volatile ( |
|
|
|
"cpucfg %0, %1 \n\t" |
|
|
|
: "+&r"(reg_14) |
|
|
|
: "r"(LOONGARCH_CFG14) |
|
|
|
); |
|
|
|
cache_info.associative = (reg_14 & CACHE_WAY_MINUS_1_MASK) + 1; |
|
|
|
cache_info.linesize = 1 << ((reg_14 & CACHE_LINESIZE_LOG2_MASK) >> 24); |
|
|
|
cache_info.size = cache_info.associative * cache_info.linesize * |
|
|
|
(1 << ((reg_14 & CACHE_INDEX_LOG2_MASK) >> 16)); |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
|
case CACHE_INFO_L3_D: |
|
|
|
if (reg_10 & L3_D_PRESENT_MASK) { |
|
|
|
cache_info.present = reg_10 & L3_D_PRESENT_MASK; |
|
|
|
// No data fetch |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
|
default: |
|
|
|
break; |
|
|
|
} |
|
|
|
*cacheinfo = cache_info; |
|
|
|
} |
|
|
|
|
|
|
|
static uint32_t get_prid() { |
|
|
|
uint32_t reg = 0; |
|
|
|
__asm__ volatile ( |
|
|
|
"cpucfg %0, %1 \n\t" |
|
|
|
: "+&r"(reg) |
|
|
|
: "r"(LOONGARCH_CFG0) |
|
|
|
); |
|
|
|
return reg; |
|
|
|
} |
|
|
|
|
|
|
|
static void get_cpucount(uint32_t *count) { |
|
|
|
uint32_t num = 0; |
|
|
|
FILE *f = fopen("/proc/cpuinfo", "r"); |
|
|
|
if (!f) return; |
|
|
|
char buf[200]; |
|
|
|
while (fgets(buf, sizeof(buf), f)) |
|
|
|
{ |
|
|
|
if (!strncmp("processor", buf, 9)) |
|
|
|
num ++; |
|
|
|
} |
|
|
|
fclose(f); |
|
|
|
*count = num; |
|
|
|
} |
|
|
|
|
|
|
|
/* Detect whether the OS supports the LASX instruction set */ |
|
|
|
static int os_support_lasx() { |
|
|
|
int hwcap = (int)getauxval(AT_HWCAP); |
|
|
|
|
|
|
|
if (hwcap & LA_HWCAP_LASX) |
|
|
|
return CPU_LOONGSON3R5; |
|
|
|
else if (hwcap & LA_HWCAP_LSX) |
|
|
|
return CPU_LOONGSON2K1000; |
|
|
|
return 1; |
|
|
|
else |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
/* Detect whether the OS supports the LSX instruction set */ |
|
|
|
static int os_support_lsx() { |
|
|
|
int hwcap = (int)getauxval(AT_HWCAP); |
|
|
|
|
|
|
|
if (hwcap & LA_HWCAP_LSX) |
|
|
|
return 1; |
|
|
|
else |
|
|
|
return CPU_GENERIC; |
|
|
|
#endif |
|
|
|
return CPU_GENERIC; |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
int get_coretype(void) { |
|
|
|
uint32_t prid = get_prid(); |
|
|
|
switch (prid & PRID_SERIES_MASK) { |
|
|
|
case (PRID_SERIES_LA464): |
|
|
|
case (PRID_SERIES_LA664): |
|
|
|
if (os_support_lasx()) |
|
|
|
return CORE_LA464; |
|
|
|
else if (os_support_lsx()) |
|
|
|
return CORE_LA264; |
|
|
|
else |
|
|
|
return CORE_LA64_GENERIC; |
|
|
|
break; |
|
|
|
|
|
|
|
case (PRID_SERIES_LA264): |
|
|
|
case (PRID_SERIES_LA364): |
|
|
|
if (os_support_lsx()) |
|
|
|
return CORE_LA264; |
|
|
|
else |
|
|
|
return CORE_LA64_GENERIC; |
|
|
|
break; |
|
|
|
|
|
|
|
default: |
|
|
|
return CORE_LA64_GENERIC; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
int get_cputype(void) { |
|
|
|
uint32_t prid = get_prid(); |
|
|
|
switch (prid & PRID_SERIES_MASK) { |
|
|
|
case (PRID_SERIES_LA264): |
|
|
|
return CPU_LA264; |
|
|
|
break; |
|
|
|
|
|
|
|
case (PRID_SERIES_LA364): |
|
|
|
return CPU_LA364; |
|
|
|
break; |
|
|
|
|
|
|
|
case (PRID_SERIES_LA464): |
|
|
|
return CPU_LA464; |
|
|
|
break; |
|
|
|
|
|
|
|
case (PRID_SERIES_LA664): |
|
|
|
return CPU_LA664; |
|
|
|
break; |
|
|
|
|
|
|
|
default: |
|
|
|
return CPU_LA64_GENERIC; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
char *get_corename(void) { |
|
|
|
return cpuname[detect()]; |
|
|
|
return corename[get_coretype()]; |
|
|
|
} |
|
|
|
|
|
|
|
void get_libname(void){ |
|
|
|
printf("%s", corename_lower[get_coretype()]); |
|
|
|
} |
|
|
|
|
|
|
|
void get_architecture(void) { |
|
|
|
@@ -86,8 +332,7 @@ void get_architecture(void) { |
|
|
|
} |
|
|
|
|
|
|
|
void get_subarchitecture(void) { |
|
|
|
int d = detect(); |
|
|
|
printf("%s", cpuname[d]); |
|
|
|
printf("%s", cpuname[get_cputype()]); |
|
|
|
} |
|
|
|
|
|
|
|
void get_subdirname(void) { |
|
|
|
@@ -95,50 +340,69 @@ void get_subdirname(void) { |
|
|
|
} |
|
|
|
|
|
|
|
void get_cpuconfig(void) { |
|
|
|
uint32_t hwcaps = 0; |
|
|
|
int d = detect(); |
|
|
|
|
|
|
|
switch (d) { |
|
|
|
case CPU_LOONGSON3R5: |
|
|
|
printf("#define LOONGSON3R5\n"); |
|
|
|
printf("#define L1_DATA_SIZE 65536\n"); |
|
|
|
printf("#define L1_DATA_LINESIZE 64\n"); |
|
|
|
printf("#define L2_SIZE 1048576\n"); |
|
|
|
printf("#define L2_LINESIZE 64\n"); |
|
|
|
printf("#define DTB_DEFAULT_ENTRIES 64\n"); |
|
|
|
printf("#define DTB_SIZE 4096\n"); |
|
|
|
printf("#define L2_ASSOCIATIVE 16\n"); |
|
|
|
break; |
|
|
|
cache_info_t info; |
|
|
|
uint32_t num_cores = 0; |
|
|
|
|
|
|
|
case CPU_LOONGSON2K1000: |
|
|
|
printf("#define LOONGSON2K1000\n"); |
|
|
|
printf("#define L1_DATA_SIZE 65536\n"); |
|
|
|
printf("#define L1_DATA_LINESIZE 64\n"); |
|
|
|
printf("#define L2_SIZE 262144\n"); |
|
|
|
printf("#define L2_LINESIZE 64\n"); |
|
|
|
printf("#define DTB_DEFAULT_ENTRIES 64\n"); |
|
|
|
printf("#define DTB_SIZE 4096\n"); |
|
|
|
printf("#define L2_ASSOCIATIVE 16\n"); |
|
|
|
break; |
|
|
|
printf("#define %s\n", corename[get_coretype()]); // Core name |
|
|
|
|
|
|
|
default: |
|
|
|
printf("#define LOONGSONGENERIC\n"); |
|
|
|
printf("#define L1_DATA_SIZE 65536\n"); |
|
|
|
printf("#define L1_DATA_LINESIZE 64\n"); |
|
|
|
printf("#define L2_SIZE 262144\n"); |
|
|
|
printf("#define L2_LINESIZE 64\n"); |
|
|
|
printf("#define DTB_DEFAULT_ENTRIES 64\n"); |
|
|
|
printf("#define DTB_SIZE 4096\n"); |
|
|
|
printf("#define L2_ASSOCIATIVE 16\n"); |
|
|
|
break; |
|
|
|
printf("#define CPU_NAME %s\n", cpuname[get_cputype()]); // Cpu microarchitecture name |
|
|
|
|
|
|
|
get_cacheinfo(CACHE_INFO_L1_IU, &info); |
|
|
|
if (info.present) { |
|
|
|
if (info.unify) { // Unified cache, without distinguishing between instructions and data |
|
|
|
printf("#define L1_SIZE %d\n", info.size); |
|
|
|
printf("#define L1_ASSOCIATIVE %d\n", info.associative); |
|
|
|
printf("#define L1_LINESIZE %d\n", info.linesize); |
|
|
|
} else { |
|
|
|
printf("#define L1_CODE_SIZE %d\n", info.size); |
|
|
|
printf("#define L1_CODE_ASSOCIATIVE %d\n", info.associative); |
|
|
|
printf("#define L1_CODE_LINESIZE %d\n", info.linesize); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
hwcaps = (uint32_t)getauxval( AT_HWCAP ); |
|
|
|
if (hwcaps & LA_HWCAP_LSX) printf("#define HAVE_LSX\n"); |
|
|
|
if (hwcaps & LA_HWCAP_LASX) printf("#define HAVE_LASX\n"); |
|
|
|
} |
|
|
|
if (!info.unify) { |
|
|
|
get_cacheinfo(CACHE_INFO_L1_D, &info); |
|
|
|
if (info.present) { |
|
|
|
printf("#define L1_DATA_SIZE %d\n", info.size); |
|
|
|
printf("#define L1_DATA_ASSOCIATIVE %d\n", info.associative); |
|
|
|
printf("#define L1_DATA_LINESIZE %d\n", info.linesize); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
void get_libname(void){ |
|
|
|
int d = detect(); |
|
|
|
printf("%s", cpuname_lower[d]); |
|
|
|
get_cacheinfo(CACHE_INFO_L2_IU, &info); |
|
|
|
if (info.present > 0) { |
|
|
|
if (info.unify) { |
|
|
|
printf("#define L2_SIZE %d\n", info.size); |
|
|
|
printf("#define L2_ASSOCIATIVE %d\n", info.associative); |
|
|
|
printf("#define L2_LINESIZE %d\n", info.linesize); |
|
|
|
} else { |
|
|
|
printf("#define L2_CODE_SIZE %d\n", info.size); |
|
|
|
printf("#define L2_CODE_ASSOCIATIVE %d\n", info.associative); |
|
|
|
printf("#define L2_CODE_LINESIZE %d\n", info.linesize); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
get_cacheinfo(CACHE_INFO_L3_IU, &info); |
|
|
|
if (info.present > 0) { |
|
|
|
if (info.unify) { |
|
|
|
printf("#define L3_SIZE %d\n", info.size); |
|
|
|
printf("#define L3_ASSOCIATIVE %d\n", info.associative); |
|
|
|
printf("#define L3_LINESIZE %d\n", info.linesize); |
|
|
|
} else { |
|
|
|
printf("#define L3_CODE_SIZE %d\n", info.size); |
|
|
|
printf("#define L3_CODE_ASSOCIATIVE %d\n", info.associative); |
|
|
|
printf("#define L3_CODE_LINESIZE %d\n", info.linesize); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if(os_support_lsx) printf("#define HAVE_LSX\n"); |
|
|
|
if(os_support_lasx) printf("#define HAVE_LASX\n"); |
|
|
|
|
|
|
|
get_cpucount(&num_cores); |
|
|
|
if (num_cores) |
|
|
|
printf("#define NUM_CORES %d\n", num_cores); |
|
|
|
|
|
|
|
//TODO: It’s unclear what this entry represents, but it is indeed necessary. |
|
|
|
//It has been set based on reference to other platforms. |
|
|
|
printf("#define DTB_DEFAULT_ENTRIES 64\n"); |
|
|
|
} |