| @@ -101,10 +101,13 @@ static void INLINE blas_lock(volatile unsigned long *address){ | |||
| static inline unsigned int rpcc(void){ | |||
| unsigned long ret; | |||
| #if defined(LOONGSON3A) | |||
| #if defined(LOONGSON3A) | |||
| unsigned long long tmp; | |||
| __asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory"); | |||
| ret=tmp; | |||
| #elif defined(LOONGSON3B) | |||
| //Temp Implementation. | |||
| return 1; | |||
| #else | |||
| __asm__ __volatile__(".set push \n" | |||
| ".set mips32r2\n" | |||
| @@ -234,6 +237,11 @@ REALNAME: ;\ | |||
| #define FIXED_PAGESIZE (16UL << 10) | |||
| #endif | |||
| #if defined(LOONGSON3B) | |||
| #define PAGESIZE (16UL << 10) | |||
| #define FIXED_PAGESIZE (16UL << 10) | |||
| #endif | |||
| #ifndef PAGESIZE | |||
| #define PAGESIZE (64UL << 10) | |||
| #endif | |||
| @@ -245,7 +253,7 @@ REALNAME: ;\ | |||
| #define MAP_ANONYMOUS MAP_ANON | |||
| #endif | |||
| #if defined(LOONGSON3A) | |||
| #if defined(LOONGSON3A) || defined(LOONGSON3B) | |||
| #define PREFETCHD_(x) ld $0, x | |||
| #define PREFETCHD(x) PREFETCHD_(x) | |||
| #else | |||
| @@ -72,11 +72,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CPU_UNKNOWN 0 | |||
| #define CPU_SICORTEX 1 | |||
| #define CPU_LOONGSON3A 2 | |||
| #define CPU_LOONGSON3B 3 | |||
| static char *cpuname[] = { | |||
| "UNKOWN", | |||
| "SICORTEX", | |||
| "LOONGSON3A" | |||
| "LOONGSON3A", | |||
| "LOONGSON3B" | |||
| }; | |||
| int detect(void){ | |||
| @@ -101,6 +103,8 @@ int detect(void){ | |||
| if (strstr(p, "Loongson-3A")){ | |||
| return CPU_LOONGSON3A; | |||
| }else if(strstr(p, "Loongson-3B")){ | |||
| return CPU_LOONGSON3B; | |||
| }else if (strstr(p, "Loongson-3")){ | |||
| infile = fopen("/proc/cpuinfo", "r"); | |||
| while (fgets(buffer, sizeof(buffer), infile)){ | |||
| @@ -130,6 +134,8 @@ void get_architecture(void){ | |||
| void get_subarchitecture(void){ | |||
| if(detect()==CPU_LOONGSON3A) { | |||
| printf("LOONGSON3A"); | |||
| }else if(detect()==CPU_LOONGSON3B){ | |||
| printf("LOONGSON3B"); | |||
| }else{ | |||
| printf("SICORTEX"); | |||
| } | |||
| @@ -149,6 +155,15 @@ void get_cpuconfig(void){ | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| printf("#define L2_ASSOCIATIVE 4\n"); | |||
| }else if(detect()==CPU_LOONGSON3B){ | |||
| printf("#define LOONGSON3B\n"); | |||
| printf("#define L1_DATA_SIZE 65536\n"); | |||
| printf("#define L1_DATA_LINESIZE 32\n"); | |||
| printf("#define L2_SIZE 512488\n"); | |||
| printf("#define L2_LINESIZE 32\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| printf("#define L2_ASSOCIATIVE 4\n"); | |||
| }else{ | |||
| printf("#define SICORTEX\n"); | |||
| printf("#define L1_DATA_SIZE 32768\n"); | |||
| @@ -164,6 +179,8 @@ void get_cpuconfig(void){ | |||
| void get_libname(void){ | |||
| if(detect()==CPU_LOONGSON3A) { | |||
| printf("loongson3a\n"); | |||
| }else if(detect()==CPU_LOONGSON3B) { | |||
| printf("loongson3b\n"); | |||
| }else{ | |||
| #ifdef __mips64 | |||
| printf("mips64\n"); | |||
| @@ -683,7 +683,7 @@ void blas_set_parameter(void){ | |||
| #if defined(ARCH_MIPS64) | |||
| void blas_set_parameter(void){ | |||
| #if defined(LOONGSON3A) | |||
| #if defined(LOONGSON3A) || defined(LOONGSON3B) | |||
| #ifdef SMP | |||
| if(blas_num_threads == 1){ | |||
| #endif | |||
| @@ -117,6 +117,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| /* #define FORCE_CELL */ | |||
| /* #define FORCE_SICORTEX */ | |||
| /* #define FORCE_LOONGSON3A */ | |||
| /* #define FORCE_LOONGSON3B */ | |||
| /* #define FORCE_ITANIUM2 */ | |||
| /* #define FORCE_GENERIC */ | |||
| /* #define FORCE_SPARC */ | |||
| @@ -548,6 +549,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_LOONGSON3B | |||
| #define FORCE | |||
| #define ARCHITECTURE "MIPS" | |||
| #define SUBARCHITECTURE "LOONGSON3B" | |||
| #define SUBDIRNAME "mips64" | |||
| #define ARCHCONFIG "-DLOONGSON3B " \ | |||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
| "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " | |||
| #define LIBNAME "loongson3b" | |||
| #define CORENAME "LOONGSON3B" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_ITANIUM2 | |||
| #define FORCE | |||
| #define ARCHITECTURE "IA64" | |||
| @@ -0,0 +1,68 @@ | |||
| SAXPYKERNEL=axpy_loongson3a.S | |||
| DAXPYKERNEL=daxpy_loongson3a_simd.S | |||
| SGEMVNKERNEL = gemv_n_loongson3a.c | |||
| SGEMVTKERNEL = gemv_t_loongson3a.c | |||
| DGEMVNKERNEL = gemv_n_loongson3a.c | |||
| DGEMVTKERNEL = gemv_t_loongson3a.c | |||
| CGEMVNKERNEL = zgemv_n_loongson3a.c | |||
| CGEMVTKERNEL = zgemv_t_loongson3a.c | |||
| ZGEMVNKERNEL = zgemv_n_loongson3a.c | |||
| ZGEMVTKERNEL = zgemv_t_loongson3a.c | |||
| SGEMMKERNEL = sgemm_kernel_8x4_ps.S | |||
| SGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||
| SGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
| SGEMMINCOPYOBJ = sgemm_incopy.o | |||
| SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| DGEMMKERNEL = dgemm_kernel_loongson3a_4x4.S | |||
| DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
| CGEMMKERNEL = cgemm_kernel_loongson3a_4x2_ps.S | |||
| CGEMMINCOPY = ../generic/zgemm_ncopy_4.c | |||
| CGEMMITCOPY = ../generic/zgemm_tcopy_4.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| CGEMMINCOPYOBJ = cgemm_incopy.o | |||
| CGEMMITCOPYOBJ = cgemm_itcopy.o | |||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
| ZGEMMKERNEL = zgemm_kernel_loongson3a_2x2.S | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| @@ -1513,6 +1513,47 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define SYMV_P 16 | |||
| #endif | |||
| #ifdef LOONGSON3B | |||
| #define SNUMOPT 2 | |||
| #define DNUMOPT 2 | |||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
| #define SGEMM_DEFAULT_UNROLL_M 8 | |||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||
| #define DGEMM_DEFAULT_UNROLL_M 4 | |||
| #define DGEMM_DEFAULT_UNROLL_N 4 | |||
| #define CGEMM_DEFAULT_UNROLL_M 4 | |||
| #define CGEMM_DEFAULT_UNROLL_N 2 | |||
| #define ZGEMM_DEFAULT_UNROLL_M 2 | |||
| #define ZGEMM_DEFAULT_UNROLL_N 2 | |||
| #define SGEMM_DEFAULT_P 64 | |||
| #define DGEMM_DEFAULT_P 44 | |||
| #define CGEMM_DEFAULT_P 64 | |||
| #define ZGEMM_DEFAULT_P 32 | |||
| #define SGEMM_DEFAULT_Q 192 | |||
| #define DGEMM_DEFAULT_Q 92 | |||
| #define CGEMM_DEFAULT_Q 128 | |||
| #define ZGEMM_DEFAULT_Q 80 | |||
| #define SGEMM_DEFAULT_R 1024 | |||
| #define DGEMM_DEFAULT_R dgemm_r | |||
| #define CGEMM_DEFAULT_R 1024 | |||
| #define ZGEMM_DEFAULT_R 1024 | |||
| #define GEMM_OFFSET_A1 0x10000 | |||
| #define GEMM_OFFSET_B1 0x100000 | |||
| #define SYMV_P 16 | |||
| #endif | |||
| #ifdef GENERIC | |||
| #define SNUMOPT 2 | |||