| @@ -9,3 +9,8 @@ CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 | |||||
| FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 | FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 | ||||
| endif | endif | ||||
| ifeq ($(CORE), VULCAN) | |||||
| CCOMMON_OPT += -mtune=vulcan -mcpu=vulcan | |||||
| FCOMMON_OPT += -mtune=vulcan -mcpu=vulcan | |||||
| endif | |||||
| @@ -80,4 +80,5 @@ ARMV5 | |||||
| 8.ARM 64-bit CPU: | 8.ARM 64-bit CPU: | ||||
| ARMV8 | ARMV8 | ||||
| CORTEXA57 | CORTEXA57 | ||||
| VULCAN | |||||
| @@ -2193,7 +2193,7 @@ | |||||
| #endif | #endif | ||||
| #ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
| #if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) | |||||
| #if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) | |||||
| extern BLASLONG gemm_offset_a; | extern BLASLONG gemm_offset_a; | ||||
| extern BLASLONG gemm_offset_b; | extern BLASLONG gemm_offset_b; | ||||
| extern BLASLONG sgemm_p; | extern BLASLONG sgemm_p; | ||||
| @@ -30,17 +30,20 @@ | |||||
| #define CPU_UNKNOWN 0 | #define CPU_UNKNOWN 0 | ||||
| #define CPU_ARMV8 1 | #define CPU_ARMV8 1 | ||||
| #define CPU_CORTEXA57 2 | #define CPU_CORTEXA57 2 | ||||
| #define CPU_VULCAN 3 | |||||
| static char *cpuname[] = { | static char *cpuname[] = { | ||||
| "UNKNOWN", | "UNKNOWN", | ||||
| "ARMV8" , | "ARMV8" , | ||||
| "CORTEXA57" | "CORTEXA57" | ||||
| "VULCAN" | |||||
| }; | }; | ||||
| static char *cpuname_lower[] = { | static char *cpuname_lower[] = { | ||||
| "unknown", | "unknown", | ||||
| "armv8" , | "armv8" , | ||||
| "cortexa57" | "cortexa57" | ||||
| "vulcan" | |||||
| }; | }; | ||||
| int get_feature(char *search) | int get_feature(char *search) | ||||
| @@ -85,25 +88,27 @@ int detect(void) | |||||
| #ifdef linux | #ifdef linux | ||||
| FILE *infile; | FILE *infile; | ||||
| char buffer[512], *p; | |||||
| char buffer[512], *p, *cpu_part, *cpu_implementer; | |||||
| p = (char *) NULL ; | p = (char *) NULL ; | ||||
| infile = fopen("/proc/cpuinfo", "r"); | infile = fopen("/proc/cpuinfo", "r"); | ||||
| while (fgets(buffer, sizeof(buffer), infile)) | |||||
| { | |||||
| while (fgets(buffer, sizeof(buffer), infile)) { | |||||
| if (!strncmp("CPU part", buffer, 8)) | |||||
| { | |||||
| p = strchr(buffer, ':') + 2; | |||||
| if (!strncmp("CPU part", buffer, 8)) { | |||||
| cpu_part = strchr(buffer, ':') + 2; | |||||
| break; | |||||
| } else if (!strncmp("CPU implementer", buffer, 15)) { | |||||
| cpu_implementer = strchr(buffer, ':') + 2; | |||||
| break; | break; | ||||
| } | } | ||||
| } | } | ||||
| fclose(infile); | fclose(infile); | ||||
| if(p != NULL) { | |||||
| if (strstr(p, "0xd07")) { | |||||
| return CPU_CORTEXA57; | |||||
| } | |||||
| if(cpu_part != NULL && cpu_implementer != NULL) { | |||||
| if (strstr(cpu_part, "0xd07") && strstr(cpu_implementer, "0x41")) | |||||
| return CPU_CORTEXA57; | |||||
| else if (strstr(cpu_part, "0x516") && strstr(cpu_implementer, "0x42")) | |||||
| return CPU_VULCAN; | |||||
| } | } | ||||
| p = (char *) NULL ; | p = (char *) NULL ; | ||||
| @@ -176,6 +181,28 @@ void get_cpuconfig(void) | |||||
| printf("#define L2_ASSOCIATIVE 4\n"); | printf("#define L2_ASSOCIATIVE 4\n"); | ||||
| break; | break; | ||||
| case CPU_VULCAN: | |||||
| printf("#define VULCAN \n"); | |||||
| printf("#define HAVE_VFP \n"); | |||||
| printf("#define HAVE_VFPV3 \n"); | |||||
| printf("#define HAVE_NEON \n"); | |||||
| printf("#define HAVE_VFPV4 \n"); | |||||
| printf("#define L1_CODE_SIZE 32768 \n"); | |||||
| printf("#define L1_CODE_LINESIZE 64 \n"); | |||||
| printf("#define L1_CODE_ASSOCIATIVE 8 \n"); | |||||
| printf("#define L1_DATA_SIZE 32768 \n"); | |||||
| printf("#define L1_DATA_LINESIZE 64 \n"); | |||||
| printf("#define L1_DATA_ASSOCIATIVE 8 \n"); | |||||
| printf("#define L2_SIZE 262144 \n"); | |||||
| printf("#define L2_LINESIZE 64 \n"); | |||||
| printf("#define L2_ASSOCIATIVE 8 \n"); | |||||
| printf("#define L3_SIZE 33554432 \n"); | |||||
| printf("#define L3_LINESIZE 64 \n"); | |||||
| printf("#define L3_ASSOCIATIVE 32 \n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | |||||
| printf("#define DTB_SIZE 4096 \n"); | |||||
| break; | |||||
| case CPU_CORTEXA57: | case CPU_CORTEXA57: | ||||
| printf("#define CORTEXA57\n"); | printf("#define CORTEXA57\n"); | ||||
| printf("#define HAVE_VFP\n"); | printf("#define HAVE_VFP\n"); | ||||
| @@ -191,8 +218,8 @@ void get_cpuconfig(void) | |||||
| printf("#define L2_SIZE 2097152\n"); | printf("#define L2_SIZE 2097152\n"); | ||||
| printf("#define L2_LINESIZE 64\n"); | printf("#define L2_LINESIZE 64\n"); | ||||
| printf("#define L2_ASSOCIATIVE 16\n"); | printf("#define L2_ASSOCIATIVE 16\n"); | ||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| break; | break; | ||||
| } | } | ||||
| } | } | ||||
| @@ -995,7 +995,7 @@ void *blas_memory_alloc(int procpos){ | |||||
| if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number(); | if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number(); | ||||
| #endif | #endif | ||||
| #if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) | |||||
| #if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) | |||||
| #ifndef DYNAMIC_ARCH | #ifndef DYNAMIC_ARCH | ||||
| blas_set_parameter(); | blas_set_parameter(); | ||||
| #endif | #endif | ||||
| @@ -727,3 +727,16 @@ void blas_set_parameter(void){ | |||||
| } | } | ||||
| #endif | #endif | ||||
| #if defined(ARCH_ARM64) | |||||
| void blas_set_parameter(void) | |||||
| { | |||||
| #if defined(VULCAN) | |||||
| dgemm_p = 160; | |||||
| dgemm_q = 128; | |||||
| dgemm_r = 4096; | |||||
| #endif | |||||
| } | |||||
| #endif | |||||
| @@ -897,6 +897,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #else | #else | ||||
| #endif | #endif | ||||
| #ifdef FORCE_VULCAN | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "ARMV8" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DVULCAN " \ | |||||
| "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \ | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ | |||||
| "-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON" | |||||
| #define LIBNAME "vulcan" | |||||
| #define CORENAME "VULCAN" | |||||
| #else | |||||
| #endif | |||||
| #ifndef FORCE | #ifndef FORCE | ||||
| #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \ | #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \ | ||||
| @@ -0,0 +1,2 @@ | |||||
| include $(KERNELDIR)/KERNEL.CORTEXA57 | |||||
| @@ -2303,6 +2303,44 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ZGEMM_DEFAULT_R 4096 | #define ZGEMM_DEFAULT_R 4096 | ||||
| #define SYMV_P 16 | |||||
| #endif | |||||
| #if defined(VULCAN) | |||||
| #define SNUMOPT 2 | |||||
| #define DNUMOPT 2 | |||||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||||
| #define SGEMM_DEFAULT_UNROLL_M 16 | |||||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define DGEMM_DEFAULT_UNROLL_M 8 | |||||
| #define DGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define CGEMM_DEFAULT_UNROLL_M 8 | |||||
| #define CGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define ZGEMM_DEFAULT_UNROLL_M 4 | |||||
| #define ZGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define SGEMM_DEFAULT_P 512 | |||||
| #define DGEMM_DEFAULT_P dgemm_p | |||||
| #define CGEMM_DEFAULT_P 256 | |||||
| #define ZGEMM_DEFAULT_P 128 | |||||
| #define SGEMM_DEFAULT_Q 1024 | |||||
| #define DGEMM_DEFAULT_Q dgemm_q | |||||
| #define CGEMM_DEFAULT_Q 512 | |||||
| #define ZGEMM_DEFAULT_Q 512 | |||||
| #define SGEMM_DEFAULT_R 4096 | |||||
| #define DGEMM_DEFAULT_R dgemm_r | |||||
| #define CGEMM_DEFAULT_R 4096 | |||||
| #define ZGEMM_DEFAULT_R 2048 | |||||
| #define SYMV_P 16 | #define SYMV_P 16 | ||||
| #endif | #endif | ||||