Browse Source

Refs #47. On Loongson 3A, set DGEMM_R parameter depending on different number of threads. It would improve double precision BLAS3 on multi-threads.

tags/v0.1.0^2
Xianyi Zhang 14 years ago
parent
commit
4727fe8abf
6 changed files with 44 additions and 3 deletions
  1. +3
    -1
      common_macro.h
  2. +5
    -0
      driver/others/blas_server.c
  3. +5
    -0
      driver/others/blas_server_omp.c
  4. +1
    -1
      driver/others/memory.c
  5. +28
    -0
      driver/others/parameter.c
  6. +2
    -1
      param.h

+ 3
- 1
common_macro.h View File

@@ -2127,7 +2127,9 @@
#endif #endif


#ifndef ASSEMBLER #ifndef ASSEMBLER
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64)
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
extern BLASLONG gemm_offset_a;
extern BLASLONG gemm_offset_b;
extern BLASLONG sgemm_p; extern BLASLONG sgemm_p;
extern BLASLONG sgemm_q; extern BLASLONG sgemm_q;
extern BLASLONG sgemm_r; extern BLASLONG sgemm_r;


+ 5
- 0
driver/others/blas_server.c View File

@@ -797,6 +797,11 @@ void goto_set_num_threads(int num_threads) {


blas_cpu_number = num_threads; blas_cpu_number = num_threads;


#if defined(ARCH_MIPS64)
//set parameters for different number of threads.
blas_set_parameter();
#endif

} }


void openblas_set_num_threads(int num_threads) { void openblas_set_num_threads(int num_threads) {


+ 5
- 0
driver/others/blas_server_omp.c View File

@@ -63,6 +63,11 @@ void goto_set_num_threads(int num_threads) {


omp_set_num_threads(blas_cpu_number); omp_set_num_threads(blas_cpu_number);
#if defined(ARCH_MIPS64)
//set parameters for different number of threads.
blas_set_parameter();
#endif

} }
void openblas_set_num_threads(int num_threads) { void openblas_set_num_threads(int num_threads) {




+ 1
- 1
driver/others/memory.c View File

@@ -884,7 +884,7 @@ void *blas_memory_alloc(int procpos){
if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number(); if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number();
#endif #endif
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64)
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
#ifndef DYNAMIC_ARCH #ifndef DYNAMIC_ARCH
blas_set_parameter(); blas_set_parameter();
#endif #endif


+ 28
- 0
driver/others/parameter.c View File

@@ -45,8 +45,22 @@ int get_L2_size(void);
#define DEFAULT_GEMM_P 128 #define DEFAULT_GEMM_P 128
#define DEFAULT_GEMM_Q 128 #define DEFAULT_GEMM_Q 128
#define DEFAULT_GEMM_R 128 #define DEFAULT_GEMM_R 128
#define DEFAULT_GEMM_OFFSET_A 0
#define DEFAULT_GEMM_OFFSET_B 0


/* Global Parameter */ /* Global Parameter */
#if GEMM_OFFSET_A == gemm_offset_a
BLASLONG gemm_offset_a = DEFAULT_GEMM_OFFSET_A;
#else
BLASLONG gemm_offset_a = GEMM_OFFSET_A;
#endif

#if GEMM_OFFSET_B == gemm_offset_b
BLASLONG gemm_offset_b = DEFAULT_GEMM_OFFSET_B;
#else
BLASLONG gemm_offset_b = GEMM_OFFSET_B;
#endif

#if SGEMM_P == sgemm_p #if SGEMM_P == sgemm_p
BLASLONG sgemm_p = DEFAULT_GEMM_P; BLASLONG sgemm_p = DEFAULT_GEMM_P;
#else #else
@@ -666,3 +680,17 @@ void blas_set_parameter(void){
#endif #endif


#endif #endif

#if defined(ARCH_MIPS64)
void blas_set_parameter(void){
#if defined(LOONGSON3A)
if(blas_num_threads == 1){
//single thread
dgemm_r = 1000;
}else{
//multi thread
dgemm_r = 300;
}
#endif
}
#endif

+ 2
- 1
param.h View File

@@ -1507,7 +1507,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//#define DGEMM_DEFAULT_R 200 //#define DGEMM_DEFAULT_R 200
//#define DGEMM_DEFAULT_R 400 //#define DGEMM_DEFAULT_R 400
//#define DGEMM_DEFAULT_R 192 //#define DGEMM_DEFAULT_R 192
#define DGEMM_DEFAULT_R 1000
#define DGEMM_DEFAULT_R dgemm_r
//1000
//#define DGEMM_DEFAULT_R 160 //#define DGEMM_DEFAULT_R 160
//#define DGEMM_DEFAULT_R 270 //#define DGEMM_DEFAULT_R 270
#define CGEMM_DEFAULT_R 1000 #define CGEMM_DEFAULT_R 1000


Loading…
Cancel
Save