Co-Authored-By: Ralph Campbell <ralph.campbell@broadcom.com>tags/v0.2.16.rc1
| @@ -4,4 +4,8 @@ CCOMMON_OPT += -march=armv8-a | |||||
| FCOMMON_OPT += -march=armv8-a | FCOMMON_OPT += -march=armv8-a | ||||
| endif | endif | ||||
| ifeq ($(CORE), CORTEXA57) | |||||
| CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 | |||||
| FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 | |||||
| endif | |||||
| @@ -74,3 +74,5 @@ ARMV5 | |||||
| 7.ARM 64-bit CPU: | 7.ARM 64-bit CPU: | ||||
| ARMV8 | ARMV8 | ||||
| CORTEXA57 | |||||
| @@ -89,8 +89,10 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||||
| #if defined(ASSEMBLER) && !defined(NEEDPARAM) | #if defined(ASSEMBLER) && !defined(NEEDPARAM) | ||||
| #define PROLOGUE \ | #define PROLOGUE \ | ||||
| .text ;\ | |||||
| .align 4 ;\ | |||||
| .global REALNAME ;\ | .global REALNAME ;\ | ||||
| .func REALNAME ;\ | |||||
| .type REALNAME, %function ;\ | |||||
| REALNAME: | REALNAME: | ||||
| #define EPILOGUE | #define EPILOGUE | ||||
| @@ -107,7 +109,11 @@ REALNAME: | |||||
| #endif | #endif | ||||
| #define HUGE_PAGESIZE ( 4 << 20) | #define HUGE_PAGESIZE ( 4 << 20) | ||||
| #if defined(CORTEXA57) | |||||
| #define BUFFER_SIZE (128 << 20) | |||||
| #else | |||||
| #define BUFFER_SIZE (16 << 20) | #define BUFFER_SIZE (16 << 20) | ||||
| #endif | |||||
| #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) | #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) | ||||
| @@ -29,12 +29,19 @@ | |||||
| #define CPU_UNKNOWN 0 | #define CPU_UNKNOWN 0 | ||||
| #define CPU_ARMV8 1 | #define CPU_ARMV8 1 | ||||
| #define CPU_CORTEXA57 2 | |||||
| static char *cpuname[] = { | static char *cpuname[] = { | ||||
| "UNKOWN", | |||||
| "ARMV8" | |||||
| "UNKNOWN", | |||||
| "ARMV8" , | |||||
| "CORTEXA57" | |||||
| }; | }; | ||||
| static char *cpuname_lower[] = { | |||||
| "unknown", | |||||
| "armv8" , | |||||
| "cortexa57" | |||||
| }; | |||||
| int get_feature(char *search) | int get_feature(char *search) | ||||
| { | { | ||||
| @@ -53,13 +60,13 @@ int get_feature(char *search) | |||||
| { | { | ||||
| p = strchr(buffer, ':') + 2; | p = strchr(buffer, ':') + 2; | ||||
| break; | break; | ||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| fclose(infile); | |||||
| fclose(infile); | |||||
| if( p == NULL ) return; | |||||
| if( p == NULL ) return 0; | |||||
| t = strtok(p," "); | t = strtok(p," "); | ||||
| while( t = strtok(NULL," ")) | while( t = strtok(NULL," ")) | ||||
| @@ -82,11 +89,30 @@ int detect(void) | |||||
| p = (char *) NULL ; | p = (char *) NULL ; | ||||
| infile = fopen("/proc/cpuinfo", "r"); | infile = fopen("/proc/cpuinfo", "r"); | ||||
| while (fgets(buffer, sizeof(buffer), infile)) | |||||
| { | |||||
| if (!strncmp("CPU part", buffer, 8)) | |||||
| { | |||||
| p = strchr(buffer, ':') + 2; | |||||
| break; | |||||
| } | |||||
| } | |||||
| fclose(infile); | |||||
| if(p != NULL) { | |||||
| if (strstr(p, "0xd07")) { | |||||
| return CPU_CORTEXA57; | |||||
| } | |||||
| } | |||||
| p = (char *) NULL ; | |||||
| infile = fopen("/proc/cpuinfo", "r"); | |||||
| while (fgets(buffer, sizeof(buffer), infile)) | while (fgets(buffer, sizeof(buffer), infile)) | ||||
| { | { | ||||
| if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9))) | |||||
| if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9)) || | |||||
| (!strncmp("CPU architecture", buffer, 16))) | |||||
| { | { | ||||
| p = strchr(buffer, ':') + 2; | p = strchr(buffer, ':') + 2; | ||||
| break; | break; | ||||
| @@ -100,7 +126,7 @@ int detect(void) | |||||
| if (strstr(p, "AArch64")) | if (strstr(p, "AArch64")) | ||||
| { | { | ||||
| return CPU_ARMV8; | |||||
| return CPU_ARMV8; | |||||
| } | } | ||||
| @@ -118,23 +144,13 @@ char *get_corename(void) | |||||
| void get_architecture(void) | void get_architecture(void) | ||||
| { | { | ||||
| printf("ARM"); | |||||
| printf("ARM64"); | |||||
| } | } | ||||
| void get_subarchitecture(void) | void get_subarchitecture(void) | ||||
| { | { | ||||
| int d = detect(); | int d = detect(); | ||||
| switch (d) | |||||
| { | |||||
| case CPU_ARMV8: | |||||
| printf("ARMV8"); | |||||
| break; | |||||
| default: | |||||
| printf("UNKNOWN"); | |||||
| break; | |||||
| } | |||||
| printf("%s", cpuname[d]); | |||||
| } | } | ||||
| void get_subdirname(void) | void get_subdirname(void) | ||||
| @@ -160,26 +176,32 @@ void get_cpuconfig(void) | |||||
| printf("#define L2_ASSOCIATIVE 4\n"); | printf("#define L2_ASSOCIATIVE 4\n"); | ||||
| break; | break; | ||||
| case CPU_CORTEXA57: | |||||
| printf("#define CORTEXA57\n"); | |||||
| printf("#define HAVE_VFP\n"); | |||||
| printf("#define HAVE_VFPV3\n"); | |||||
| printf("#define HAVE_NEON\n"); | |||||
| printf("#define HAVE_VFPV4\n"); | |||||
| printf("#define L1_CODE_SIZE 49152\n"); | |||||
| printf("#define L1_CODE_LINESIZE 64\n"); | |||||
| printf("#define L1_CODE_ASSOCIATIVE 3\n"); | |||||
| printf("#define L1_DATA_SIZE 32768\n"); | |||||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||||
| printf("#define L1_DATA_ASSOCIATIVE 2\n"); | |||||
| printf("#define L2_SIZE 2097152\n"); | |||||
| printf("#define L2_LINESIZE 64\n"); | |||||
| printf("#define L2_ASSOCIATIVE 16\n"); | |||||
| break; | |||||
| } | } | ||||
| } | } | ||||
| void get_libname(void) | void get_libname(void) | ||||
| { | { | ||||
| int d = detect(); | int d = detect(); | ||||
| switch (d) | |||||
| { | |||||
| case CPU_ARMV8: | |||||
| printf("armv8\n"); | |||||
| break; | |||||
| } | |||||
| printf("%s", cpuname_lower[d]); | |||||
| } | } | ||||
| void get_features(void) | void get_features(void) | ||||
| { | { | ||||
| @@ -819,10 +819,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | ||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " | "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " | ||||
| #define LIBNAME "armv8" | #define LIBNAME "armv8" | ||||
| #define CORENAME "XGENE1" | |||||
| #else | |||||
| #define CORENAME "ARMV8" | |||||
| #endif | #endif | ||||
| #ifdef FORCE_CORTEXA57 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "ARMV8" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DCORTEXA57 " \ | |||||
| "-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ | |||||
| "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON" | |||||
| #define LIBNAME "cortexa57" | |||||
| #define CORENAME "CORTEXA57" | |||||
| #else | |||||
| #endif | |||||
| #ifndef FORCE | #ifndef FORCE | ||||
| @@ -0,0 +1,2 @@ | |||||
| include $(KERNELDIR)/KERNEL.ARMV8 | |||||
| @@ -2214,6 +2214,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ZGEMM_DEFAULT_R 4096 | #define ZGEMM_DEFAULT_R 4096 | ||||
| #define SYMV_P 16 | |||||
| #endif | |||||
| #if defined(CORTEXA57) | |||||
| #define SNUMOPT 2 | |||||
| #define DNUMOPT 2 | |||||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||||
| #define SGEMM_DEFAULT_UNROLL_M 4 | |||||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define DGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define CGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define CGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define ZGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define ZGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define SGEMM_DEFAULT_P 128 | |||||
| #define DGEMM_DEFAULT_P 512 | |||||
| #define CGEMM_DEFAULT_P 96 | |||||
| #define ZGEMM_DEFAULT_P 64 | |||||
| #define SGEMM_DEFAULT_Q 240 | |||||
| #define DGEMM_DEFAULT_Q 480 | |||||
| #define CGEMM_DEFAULT_Q 120 | |||||
| #define ZGEMM_DEFAULT_Q 120 | |||||
| #define SGEMM_DEFAULT_R 12288 | |||||
| #define DGEMM_DEFAULT_R 8192 | |||||
| #define CGEMM_DEFAULT_R 4096 | |||||
| #define ZGEMM_DEFAULT_R 4096 | |||||
| #define SYMV_P 16 | #define SYMV_P 16 | ||||
| #endif | #endif | ||||