| @@ -101,10 +101,13 @@ static void INLINE blas_lock(volatile unsigned long *address){ | |||||
| static inline unsigned int rpcc(void){ | static inline unsigned int rpcc(void){ | ||||
| unsigned long ret; | unsigned long ret; | ||||
| #if defined(LOONGSON3A) | |||||
| #if defined(LOONGSON3A) | |||||
| unsigned long long tmp; | unsigned long long tmp; | ||||
| __asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory"); | __asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory"); | ||||
| ret=tmp; | ret=tmp; | ||||
| #elif defined(LOONGSON3B) | |||||
| //Temp Implementation. | |||||
| return 1; | |||||
| #else | #else | ||||
| __asm__ __volatile__(".set push \n" | __asm__ __volatile__(".set push \n" | ||||
| ".set mips32r2\n" | ".set mips32r2\n" | ||||
| @@ -234,6 +237,11 @@ REALNAME: ;\ | |||||
| #define FIXED_PAGESIZE (16UL << 10) | #define FIXED_PAGESIZE (16UL << 10) | ||||
| #endif | #endif | ||||
| #if defined(LOONGSON3B) | |||||
| #define PAGESIZE (16UL << 10) | |||||
| #define FIXED_PAGESIZE (16UL << 10) | |||||
| #endif | |||||
| #ifndef PAGESIZE | #ifndef PAGESIZE | ||||
| #define PAGESIZE (64UL << 10) | #define PAGESIZE (64UL << 10) | ||||
| #endif | #endif | ||||
| @@ -245,7 +253,7 @@ REALNAME: ;\ | |||||
| #define MAP_ANONYMOUS MAP_ANON | #define MAP_ANONYMOUS MAP_ANON | ||||
| #endif | #endif | ||||
| #if defined(LOONGSON3A) | |||||
| #if defined(LOONGSON3A) || defined(LOONGSON3B) | |||||
| #define PREFETCHD_(x) ld $0, x | #define PREFETCHD_(x) ld $0, x | ||||
| #define PREFETCHD(x) PREFETCHD_(x) | #define PREFETCHD(x) PREFETCHD_(x) | ||||
| #else | #else | ||||
| @@ -72,11 +72,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define CPU_UNKNOWN 0 | #define CPU_UNKNOWN 0 | ||||
| #define CPU_SICORTEX 1 | #define CPU_SICORTEX 1 | ||||
| #define CPU_LOONGSON3A 2 | #define CPU_LOONGSON3A 2 | ||||
| #define CPU_LOONGSON3B 3 | |||||
| static char *cpuname[] = { | static char *cpuname[] = { | ||||
| "UNKOWN", | "UNKOWN", | ||||
| "SICORTEX", | "SICORTEX", | ||||
| "LOONGSON3A" | |||||
| "LOONGSON3A", | |||||
| "LOONGSON3B" | |||||
| }; | }; | ||||
| int detect(void){ | int detect(void){ | ||||
| @@ -101,6 +103,8 @@ int detect(void){ | |||||
| if (strstr(p, "Loongson-3A")){ | if (strstr(p, "Loongson-3A")){ | ||||
| return CPU_LOONGSON3A; | return CPU_LOONGSON3A; | ||||
| }else if(strstr(p, "Loongson-3B")){ | |||||
| return CPU_LOONGSON3B; | |||||
| }else if (strstr(p, "Loongson-3")){ | }else if (strstr(p, "Loongson-3")){ | ||||
| infile = fopen("/proc/cpuinfo", "r"); | infile = fopen("/proc/cpuinfo", "r"); | ||||
| while (fgets(buffer, sizeof(buffer), infile)){ | while (fgets(buffer, sizeof(buffer), infile)){ | ||||
| @@ -130,6 +134,8 @@ void get_architecture(void){ | |||||
| void get_subarchitecture(void){ | void get_subarchitecture(void){ | ||||
| if(detect()==CPU_LOONGSON3A) { | if(detect()==CPU_LOONGSON3A) { | ||||
| printf("LOONGSON3A"); | printf("LOONGSON3A"); | ||||
| }else if(detect()==CPU_LOONGSON3B){ | |||||
| printf("LOONGSON3B"); | |||||
| }else{ | }else{ | ||||
| printf("SICORTEX"); | printf("SICORTEX"); | ||||
| } | } | ||||
| @@ -149,6 +155,15 @@ void get_cpuconfig(void){ | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | printf("#define DTB_DEFAULT_ENTRIES 64\n"); | ||||
| printf("#define DTB_SIZE 4096\n"); | printf("#define DTB_SIZE 4096\n"); | ||||
| printf("#define L2_ASSOCIATIVE 4\n"); | printf("#define L2_ASSOCIATIVE 4\n"); | ||||
| }else if(detect()==CPU_LOONGSON3B){ | |||||
| printf("#define LOONGSON3B\n"); | |||||
| printf("#define L1_DATA_SIZE 65536\n"); | |||||
| printf("#define L1_DATA_LINESIZE 32\n"); | |||||
| printf("#define L2_SIZE 512488\n"); | |||||
| printf("#define L2_LINESIZE 32\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| printf("#define L2_ASSOCIATIVE 4\n"); | |||||
| }else{ | }else{ | ||||
| printf("#define SICORTEX\n"); | printf("#define SICORTEX\n"); | ||||
| printf("#define L1_DATA_SIZE 32768\n"); | printf("#define L1_DATA_SIZE 32768\n"); | ||||
| @@ -164,6 +179,8 @@ void get_cpuconfig(void){ | |||||
| void get_libname(void){ | void get_libname(void){ | ||||
| if(detect()==CPU_LOONGSON3A) { | if(detect()==CPU_LOONGSON3A) { | ||||
| printf("loongson3a\n"); | printf("loongson3a\n"); | ||||
| }else if(detect()==CPU_LOONGSON3B) { | |||||
| printf("loongson3b\n"); | |||||
| }else{ | }else{ | ||||
| #ifdef __mips64 | #ifdef __mips64 | ||||
| printf("mips64\n"); | printf("mips64\n"); | ||||
| @@ -683,7 +683,7 @@ void blas_set_parameter(void){ | |||||
| #if defined(ARCH_MIPS64) | #if defined(ARCH_MIPS64) | ||||
| void blas_set_parameter(void){ | void blas_set_parameter(void){ | ||||
| #if defined(LOONGSON3A) | |||||
| #if defined(LOONGSON3A) || defined(LOONGSON3B) | |||||
| #ifdef SMP | #ifdef SMP | ||||
| if(blas_num_threads == 1){ | if(blas_num_threads == 1){ | ||||
| #endif | #endif | ||||
| @@ -117,6 +117,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| /* #define FORCE_CELL */ | /* #define FORCE_CELL */ | ||||
| /* #define FORCE_SICORTEX */ | /* #define FORCE_SICORTEX */ | ||||
| /* #define FORCE_LOONGSON3A */ | /* #define FORCE_LOONGSON3A */ | ||||
| /* #define FORCE_LOONGSON3B */ | |||||
| /* #define FORCE_ITANIUM2 */ | /* #define FORCE_ITANIUM2 */ | ||||
| /* #define FORCE_GENERIC */ | /* #define FORCE_GENERIC */ | ||||
| /* #define FORCE_SPARC */ | /* #define FORCE_SPARC */ | ||||
| @@ -548,6 +549,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #else | #else | ||||
| #endif | #endif | ||||
| #ifdef FORCE_LOONGSON3B | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "MIPS" | |||||
| #define SUBARCHITECTURE "LOONGSON3B" | |||||
| #define SUBDIRNAME "mips64" | |||||
| #define ARCHCONFIG "-DLOONGSON3B " \ | |||||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||||
| "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " | |||||
| #define LIBNAME "loongson3b" | |||||
| #define CORENAME "LOONGSON3B" | |||||
| #else | |||||
| #endif | |||||
| #ifdef FORCE_ITANIUM2 | #ifdef FORCE_ITANIUM2 | ||||
| #define FORCE | #define FORCE | ||||
| #define ARCHITECTURE "IA64" | #define ARCHITECTURE "IA64" | ||||
| @@ -0,0 +1,68 @@ | |||||
| SAXPYKERNEL=axpy_loongson3a.S | |||||
| DAXPYKERNEL=daxpy_loongson3a_simd.S | |||||
| SGEMVNKERNEL = gemv_n_loongson3a.c | |||||
| SGEMVTKERNEL = gemv_t_loongson3a.c | |||||
| DGEMVNKERNEL = gemv_n_loongson3a.c | |||||
| DGEMVTKERNEL = gemv_t_loongson3a.c | |||||
| CGEMVNKERNEL = zgemv_n_loongson3a.c | |||||
| CGEMVTKERNEL = zgemv_t_loongson3a.c | |||||
| ZGEMVNKERNEL = zgemv_n_loongson3a.c | |||||
| ZGEMVTKERNEL = zgemv_t_loongson3a.c | |||||
| SGEMMKERNEL = sgemm_kernel_8x4_ps.S | |||||
| SGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||||
| SGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
| SGEMMINCOPYOBJ = sgemm_incopy.o | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy.o | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
| DGEMMKERNEL = dgemm_kernel_loongson3a_4x4.S | |||||
| DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
| CGEMMKERNEL = cgemm_kernel_loongson3a_4x2_ps.S | |||||
| CGEMMINCOPY = ../generic/zgemm_ncopy_4.c | |||||
| CGEMMITCOPY = ../generic/zgemm_tcopy_4.c | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
| CGEMMINCOPYOBJ = cgemm_incopy.o | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy.o | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
| ZGEMMKERNEL = zgemm_kernel_loongson3a_2x2.S | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| @@ -1513,6 +1513,47 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define SYMV_P 16 | #define SYMV_P 16 | ||||
| #endif | #endif | ||||
| #ifdef LOONGSON3B | |||||
| #define SNUMOPT 2 | |||||
| #define DNUMOPT 2 | |||||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||||
| #define SGEMM_DEFAULT_UNROLL_M 8 | |||||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define DGEMM_DEFAULT_UNROLL_M 4 | |||||
| #define DGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define CGEMM_DEFAULT_UNROLL_M 4 | |||||
| #define CGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define ZGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define ZGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define SGEMM_DEFAULT_P 64 | |||||
| #define DGEMM_DEFAULT_P 44 | |||||
| #define CGEMM_DEFAULT_P 64 | |||||
| #define ZGEMM_DEFAULT_P 32 | |||||
| #define SGEMM_DEFAULT_Q 192 | |||||
| #define DGEMM_DEFAULT_Q 92 | |||||
| #define CGEMM_DEFAULT_Q 128 | |||||
| #define ZGEMM_DEFAULT_Q 80 | |||||
| #define SGEMM_DEFAULT_R 1024 | |||||
| #define DGEMM_DEFAULT_R dgemm_r | |||||
| #define CGEMM_DEFAULT_R 1024 | |||||
| #define ZGEMM_DEFAULT_R 1024 | |||||
| #define GEMM_OFFSET_A1 0x10000 | |||||
| #define GEMM_OFFSET_B1 0x100000 | |||||
| #define SYMV_P 16 | |||||
| #endif | |||||
| #ifdef GENERIC | #ifdef GENERIC | ||||
| #define SNUMOPT 2 | #define SNUMOPT 2 | ||||