Browse Source

Identify all cores, group by performance and report the fastest TARGET

tags/v0.3.29
Martin Kroeker GitHub 1 year ago
parent
commit
be807c98a6
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
1 changed files with 123 additions and 61 deletions
  1. +123
    -61
      cpuid_arm64.c

+ 123
- 61
cpuid_arm64.c View File

@@ -25,6 +25,7 @@
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdlib.h>
#include <string.h> #include <string.h>
#ifdef __APPLE__ #ifdef __APPLE__
#include <sys/sysctl.h> #include <sys/sysctl.h>
@@ -33,6 +34,20 @@ size_t length=sizeof(value);
int64_t value64; int64_t value64;
size_t length64=sizeof(value64); size_t length64=sizeof(value64);
#endif #endif
#if (defined OS_LINUX || defined OS_ANDROID)
#include <asm/hwcap.h>
#include <sys/auxv.h>
#ifndef HWCAP_CPUID
#define HWCAP_CPUID (1 << 11)
#endif
#ifndef HWCAP_SVE
#define HWCAP_SVE (1 << 22)
#endif

#define get_cpu_ftr(id, var) ({ \
__asm__ __volatile__ ("mrs %0, "#id : "=r" (var)); \
})
#endif


#define CPU_UNKNOWN 0 #define CPU_UNKNOWN 0
#define CPU_ARMV8 1 #define CPU_ARMV8 1
@@ -42,11 +57,9 @@ size_t length64=sizeof(value64);
#define CPU_CORTEXA57 3 #define CPU_CORTEXA57 3
#define CPU_CORTEXA72 4 #define CPU_CORTEXA72 4
#define CPU_CORTEXA73 5 #define CPU_CORTEXA73 5
#define CPU_CORTEXA76 23
#define CPU_NEOVERSEN1 11 #define CPU_NEOVERSEN1 11
#define CPU_NEOVERSEV1 16 #define CPU_NEOVERSEV1 16
#define CPU_NEOVERSEN2 17 #define CPU_NEOVERSEN2 17
#define CPU_NEOVERSEV2 24
#define CPU_CORTEXX1 18 #define CPU_CORTEXX1 18
#define CPU_CORTEXX2 19 #define CPU_CORTEXX2 19
#define CPU_CORTEXA510 20 #define CPU_CORTEXA510 20
@@ -91,9 +104,7 @@ static char *cpuname[] = {
"CORTEXX2", "CORTEXX2",
"CORTEXA510", "CORTEXA510",
"CORTEXA710", "CORTEXA710",
"FT2000",
"CORTEXA76",
"NEOVERSEV2"
"FT2000"
}; };


static char *cpuname_lower[] = { static char *cpuname_lower[] = {
@@ -119,15 +130,17 @@ static char *cpuname_lower[] = {
"cortexx2", "cortexx2",
"cortexa510", "cortexa510",
"cortexa710", "cortexa710",
"ft2000",
"cortexa76",
"neoversev2"
"ft2000"
}; };


static int cpulowperf=0;
static int cpumidperf=0;
static int cpuhiperf=0;

int get_feature(char *search) int get_feature(char *search)
{ {


#if defined( __linux ) || defined( __NetBSD__ )
#ifdef __linux
FILE *infile; FILE *infile;
char buffer[2048], *p,*t; char buffer[2048], *p,*t;
p = (char *) NULL ; p = (char *) NULL ;
@@ -158,33 +171,108 @@ int get_feature(char *search)
#endif #endif
return(0); return(0);
} }

static int cpusort(const void *model1, const void *model2)
{
return (*(int*)model2-*(int*)model1);
}


int detect(void) int detect(void)
{ {


#if defined( __linux ) || defined( __NetBSD__ )

#ifdef __linux
int n,i,ii;
int midr_el1;
int implementer;
int cpucap[1024];
int cpucores[1024];
FILE *infile; FILE *infile;
char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL;
char cpupart[6],cpuimpl[6];
char *cpu_impl=NULL,*cpu_pt=NULL;
char buffer[2048], *p, *cpu_part = NULL, *cpu_implementer = NULL;
p = (char *) NULL ; p = (char *) NULL ;

infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile)) {
if ((cpu_part != NULL) && (cpu_implementer != NULL)) {
break;
cpulowperf=cpumidperf=cpuhiperf=0;
for (i=0;i<1024;i++)cpucores[i]=0;
n=0;
infile = fopen("/sys/devices/system/cpu/possible", "r");
if (!infile) {
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile)) {
if (!strncmp("processor", buffer, 9))
n++;
} }

if ((cpu_part == NULL) && !strncmp("CPU part", buffer, 8)) {
cpu_part = strchr(buffer, ':') + 2;
cpu_part = strdup(cpu_part);
} else if ((cpu_implementer == NULL) && !strncmp("CPU implementer", buffer, 15)) {
cpu_implementer = strchr(buffer, ':') + 2;
cpu_implementer = strdup(cpu_implementer);
} else {
fgets(buffer, sizeof(buffer), infile);
sscanf(buffer,"0-%d",&n);
n++;
}
fclose(infile);

cpu_implementer=NULL;
for (i=0;i<n;i++){
sprintf(buffer,"/sys/devices/system/cpu/cpu%d/regs/identification/midr_el1",i);
infile= fopen(buffer,"r");
if (!infile) {
infile = fopen("/proc/cpuinfo", "r");
for (ii=0;ii<n;ii++){
cpu_part=NULL;cpu_implementer=NULL;
while (fgets(buffer, sizeof(buffer), infile)) {
if ((cpu_part != NULL) && (cpu_implementer != NULL)) {
break;
}

if ((cpu_part == NULL) && !strncmp("CPU part", buffer, 8)) {
cpu_pt = strchr(buffer, ':') + 2;
cpu_part = strdup(cpu_pt);
cpucores[i]=strtol(cpu_part,NULL,0);

} else if ((cpu_implementer == NULL) && !strncmp("CPU implementer", buffer, 15)) {
cpu_impl = strchr(buffer, ':') + 2;
cpu_implementer = strdup(cpu_impl);
}

}
if (strstr(cpu_implementer, "0x41")) {
if (cpucores[ii] >= 0xd4b) cpuhiperf++;
else
if (cpucores[ii] >= 0xd07) cpumidperf++;
else cpulowperf++;
}
else cpulowperf++;
}
fclose(infile);
break;
} else {
(void)fgets(buffer, sizeof(buffer), infile);
midr_el1=strtoul(buffer,NULL,16);
fclose(infile);
implementer = (midr_el1 >> 24) & 0xFF;
cpucores[i] = (midr_el1 >> 4) & 0xFFF;
sprintf(buffer,"/sys/devices/system/cpu/cpu%d/cpu_capacity",i);
infile= fopen(buffer,"r");
if (!infile) {
if (implementer== 65) {
if (cpucores[i] >= 0xd4b) cpuhiperf++;
else
if (cpucores[i] >= 0xd07) cpumidperf++;
else cpulowperf++;
}
else cpulowperf++;
} else {
(void)fgets(buffer, sizeof(buffer), infile);
sscanf(buffer,"%d",&cpucap[i]);
if (cpucap[i] >= 1000) cpuhiperf++;
else
if (cpucap[i] >= 500) cpumidperf++;
else cpulowperf++;
fclose(infile);
}
} }
sprintf(cpuimpl,"0x%2x",implementer);
cpu_implementer=strdup(cpuimpl);
} }

fclose(infile);
qsort(cpucores,1024,sizeof(int),cpusort);
sprintf(cpupart,"0x%3x",cpucores[0]);
cpu_part=strdup(cpupart);
if(cpu_part != NULL && cpu_implementer != NULL) { if(cpu_part != NULL && cpu_implementer != NULL) {
// Arm // Arm
if (strstr(cpu_implementer, "0x41")) { if (strstr(cpu_implementer, "0x41")) {
@@ -216,10 +304,6 @@ int detect(void)
return CPU_CORTEXX2; return CPU_CORTEXX2;
else if (strstr(cpu_part, "0xd4e")) //X3 else if (strstr(cpu_part, "0xd4e")) //X3
return CPU_CORTEXX2; return CPU_CORTEXX2;
else if (strstr(cpu_part, "0xd4f")) //NVIDIA Grace et al.
return CPU_NEOVERSEV2;
else if (strstr(cpu_part, "0xd0b"))
return CPU_CORTEXA76;
} }
// Qualcomm // Qualcomm
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
@@ -280,8 +364,6 @@ int detect(void)
sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0); sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0);
if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1 if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1
if (value64 == 3660830781) return CPU_VORTEX; //A15/M2 if (value64 == 3660830781) return CPU_VORTEX; //A15/M2
if (value64 == 2271604202) return CPU_VORTEX; //A16/M3
if (value64 == 1867590060) return CPU_VORTEX; //M4
#endif #endif
return CPU_ARMV8; return CPU_ARMV8;
#endif #endif
@@ -314,7 +396,7 @@ void get_cpucount(void)
{ {
int n=0; int n=0;


#if defined( __linux ) || defined( __NetBSD__ )
#ifdef __linux
FILE *infile; FILE *infile;
char buffer[2048], *p,*t; char buffer[2048], *p,*t;
p = (char *) NULL ; p = (char *) NULL ;
@@ -331,6 +413,12 @@ int n=0;
fclose(infile); fclose(infile);


printf("#define NUM_CORES %d\n",n); printf("#define NUM_CORES %d\n",n);
if (cpulowperf >0)
printf("#define NUM_CORES_LP %d\n",cpulowperf);
if (cpumidperf >0)
printf("#define NUM_CORES_MP %d\n",cpumidperf);
if (cpuhiperf >0)
printf("#define NUM_CORES_HP %d\n",cpuhiperf);
#endif #endif
#ifdef __APPLE__ #ifdef __APPLE__
sysctlbyname("hw.physicalcpu_max",&value,&length,NULL,0); sysctlbyname("hw.physicalcpu_max",&value,&length,NULL,0);
@@ -347,7 +435,6 @@ void get_cpuconfig(void)
printf("#define ARMV8\n"); printf("#define ARMV8\n");
printf("#define HAVE_NEON\n"); // This shouldn't be necessary printf("#define HAVE_NEON\n"); // This shouldn't be necessary
printf("#define HAVE_VFPV4\n"); // This shouldn't be necessary printf("#define HAVE_VFPV4\n"); // This shouldn't be necessary

int d = detect(); int d = detect();
switch (d) switch (d)
{ {
@@ -402,8 +489,6 @@ void get_cpuconfig(void)
break; break;


case CPU_NEOVERSEV1: case CPU_NEOVERSEV1:
printf("#define HAVE_SVE 1\n");
case CPU_CORTEXA76:
printf("#define %s\n", cpuname[d]); printf("#define %s\n", cpuname[d]);
printf("#define L1_CODE_SIZE 65536\n"); printf("#define L1_CODE_SIZE 65536\n");
printf("#define L1_CODE_LINESIZE 64\n"); printf("#define L1_CODE_LINESIZE 64\n");
@@ -431,32 +516,12 @@ void get_cpuconfig(void)
printf("#define L2_ASSOCIATIVE 8\n"); printf("#define L2_ASSOCIATIVE 8\n");
printf("#define DTB_DEFAULT_ENTRIES 48\n"); printf("#define DTB_DEFAULT_ENTRIES 48\n");
printf("#define DTB_SIZE 4096\n"); printf("#define DTB_SIZE 4096\n");
printf("#define HAVE_SVE 1\n");
break;
case CPU_NEOVERSEV2:
printf("#define ARMV9\n");
printf("#define HAVE_SVE 1\n");
printf("#define %s\n", cpuname[d]);
printf("#define L1_CODE_SIZE 65536\n");
printf("#define L1_CODE_LINESIZE 64\n");
printf("#define L1_CODE_ASSOCIATIVE 4\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L1_DATA_ASSOCIATIVE 4\n");
printf("#define L2_SIZE 1048576\n");
printf("#define L2_LINESIZE 64\n");
printf("#define L2_ASSOCIATIVE 8\n");
// L1 Data TLB = 48 entries
// L2 Data TLB = 2048 entries
printf("#define DTB_DEFAULT_ENTRIES 48\n");
printf("#define DTB_SIZE 4096\n"); // Set to 4096 for symmetry with other configs.
break; break;
case CPU_CORTEXA510: case CPU_CORTEXA510:
case CPU_CORTEXA710: case CPU_CORTEXA710:
case CPU_CORTEXX1: case CPU_CORTEXX1:
case CPU_CORTEXX2: case CPU_CORTEXX2:
printf("#define ARMV9\n"); printf("#define ARMV9\n");
printf("#define HAVE_SVE 1\n");
printf("#define %s\n", cpuname[d]); printf("#define %s\n", cpuname[d]);
printf("#define L1_CODE_SIZE 65536\n"); printf("#define L1_CODE_SIZE 65536\n");
printf("#define L1_CODE_LINESIZE 64\n"); printf("#define L1_CODE_LINESIZE 64\n");
@@ -559,8 +624,6 @@ void get_cpuconfig(void)
case CPU_VORTEX: case CPU_VORTEX:
printf("#define VORTEX \n"); printf("#define VORTEX \n");
#ifdef __APPLE__ #ifdef __APPLE__
sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0);
if (value64 == 1867590060) printf("#define HAVE_SME 1\n");; //M4
sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0); sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0);
printf("#define L1_CODE_SIZE %lld \n",value64); printf("#define L1_CODE_SIZE %lld \n",value64);
sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0); sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0);
@@ -575,7 +638,6 @@ void get_cpuconfig(void)
break; break;
case CPU_A64FX: case CPU_A64FX:
printf("#define A64FX\n"); printf("#define A64FX\n");
printf("#define HAVE_SVE 1\n");
printf("#define L1_CODE_SIZE 65535\n"); printf("#define L1_CODE_SIZE 65535\n");
printf("#define L1_DATA_SIZE 65535\n"); printf("#define L1_DATA_SIZE 65535\n");
printf("#define L1_DATA_LINESIZE 256\n"); printf("#define L1_DATA_LINESIZE 256\n");
@@ -608,7 +670,7 @@ void get_libname(void)
void get_features(void) void get_features(void)
{ {


#if defined( __linux ) || defined( __NetBSD__ )
#ifdef __linux
FILE *infile; FILE *infile;
char buffer[2048], *p,*t; char buffer[2048], *p,*t;
p = (char *) NULL ; p = (char *) NULL ;


Loading…
Cancel
Save