| @@ -304,9 +304,47 @@ int support_avx(){ | |||||
| #endif | #endif | ||||
| } | } | ||||
| int support_avx2(){ | |||||
| #ifndef NO_AVX2 | |||||
| int eax, ebx, ecx=0, edx; | |||||
| int ret=0; | |||||
| if (!support_avx) | |||||
| return 0; | |||||
| cpuid(7, &eax, &ebx, &ecx, &edx); | |||||
| if((ebx & (1<<7)) != 0) | |||||
| ret=1; //OS supports AVX2 | |||||
| return ret; | |||||
| #else | |||||
| return 0; | |||||
| #endif | |||||
| } | |||||
| int support_avx512(){ | |||||
| #ifndef NO_AVX512 | |||||
| int eax, ebx, ecx, edx; | |||||
| int ret=0; | |||||
| if (!support_avx) | |||||
| return 0; | |||||
| cpuid(7, &eax, &ebx, &ecx, &edx); | |||||
| if((ebx & (1<<7)) != 1){ | |||||
| ret=0; //OS does not even support AVX2 | |||||
| } | |||||
| if((ebx & (1<<31)) != 0){ | |||||
| ret=1; //OS supports AVX512VL | |||||
| } | |||||
| return ret; | |||||
| #else | |||||
| return 0; | |||||
| #endif | |||||
| } | |||||
| extern void openblas_warning(int verbose, const char * msg); | extern void openblas_warning(int verbose, const char * msg); | ||||
| #define FALLBACK_VERBOSE 1 | #define FALLBACK_VERBOSE 1 | ||||
| #define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n" | #define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n" | ||||
| #define SANDYBRIDGE_FALLBACK "OpenBLAS : Your OS does not support AVX2 instructions. OpenBLAS is using Sandybridge kernels as a fallback, which may give poorer performance.\n" | |||||
| #define HASWELL_FALLBACK "OpenBLAS : Your OS does not support AVX512 instructions. OpenBLAS is using Haswell kernels as a fallback, which may give poorer performance.\n" | |||||
| #define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n" | #define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n" | ||||
| static int get_vendor(void){ | static int get_vendor(void){ | ||||
| @@ -403,18 +441,24 @@ static gotoblas_t *get_coretype(void){ | |||||
| } | } | ||||
| //Intel Haswell | //Intel Haswell | ||||
| if (model == 12 || model == 15) { | if (model == 12 || model == 15) { | ||||
| if(support_avx()) | |||||
| if(support_avx2()) | |||||
| return &gotoblas_HASWELL; | return &gotoblas_HASWELL; | ||||
| else{ | |||||
| if(support_avx()) { | |||||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||||
| return &gotoblas_SANDYBRIDGE; | |||||
| } else { | |||||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | ||||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | ||||
| } | } | ||||
| } | } | ||||
| //Intel Broadwell | //Intel Broadwell | ||||
| if (model == 13) { | if (model == 13) { | ||||
| if(support_avx()) | |||||
| if(support_avx2()) | |||||
| return &gotoblas_HASWELL; | return &gotoblas_HASWELL; | ||||
| else{ | |||||
| if(support_avx()) { | |||||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||||
| return &gotoblas_SANDYBRIDGE; | |||||
| } else { | |||||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | ||||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | ||||
| } | } | ||||
| @@ -424,27 +468,36 @@ static gotoblas_t *get_coretype(void){ | |||||
| case 4: | case 4: | ||||
| //Intel Haswell | //Intel Haswell | ||||
| if (model == 5 || model == 6) { | if (model == 5 || model == 6) { | ||||
| if(support_avx()) | |||||
| if(support_avx2()) | |||||
| return &gotoblas_HASWELL; | return &gotoblas_HASWELL; | ||||
| else{ | |||||
| if(support_avx()) { | |||||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||||
| return &gotoblas_SANDYBRIDGE; | |||||
| } else { | |||||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | ||||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | ||||
| } | } | ||||
| } | } | ||||
| //Intel Broadwell | //Intel Broadwell | ||||
| if (model == 7 || model == 15) { | if (model == 7 || model == 15) { | ||||
| if(support_avx()) | |||||
| if(support_avx2()) | |||||
| return &gotoblas_HASWELL; | return &gotoblas_HASWELL; | ||||
| else{ | |||||
| if(support_avx()) { | |||||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||||
| return &gotoblas_SANDYBRIDGE; | |||||
| } else { | |||||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | ||||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | ||||
| } | } | ||||
| } | } | ||||
| //Intel Skylake | //Intel Skylake | ||||
| if (model == 14) { | if (model == 14) { | ||||
| if(support_avx()) | |||||
| if(support_avx2()) | |||||
| return &gotoblas_HASWELL; | return &gotoblas_HASWELL; | ||||
| else{ | |||||
| if(support_avx()) { | |||||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||||
| return &gotoblas_SANDYBRIDGE; | |||||
| } else { | |||||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | ||||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | ||||
| } | } | ||||
| @@ -457,40 +510,50 @@ static gotoblas_t *get_coretype(void){ | |||||
| case 5: | case 5: | ||||
| //Intel Broadwell | //Intel Broadwell | ||||
| if (model == 6) { | if (model == 6) { | ||||
| if(support_avx()) | |||||
| if(support_avx2()) | |||||
| return &gotoblas_HASWELL; | return &gotoblas_HASWELL; | ||||
| else{ | |||||
| if(support_avx()) { | |||||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||||
| return &gotoblas_SANDYBRIDGE; | |||||
| } else { | |||||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | ||||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | ||||
| } | } | ||||
| } | } | ||||
| if (model == 5) { | if (model == 5) { | ||||
| // Intel Skylake X | // Intel Skylake X | ||||
| #ifndef NO_AVX512 | |||||
| return &gotoblas_SKYLAKEX; | |||||
| #else | |||||
| if(support_avx()) | |||||
| if (support_avx512()) | |||||
| return &gotoblas_SKYLAKEX; | |||||
| if(support_avx2()) | |||||
| return &gotoblas_HASWELL; | return &gotoblas_HASWELL; | ||||
| else { | |||||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | |||||
| return &gotoblas_NEHALEM; | |||||
| } | |||||
| #endif | |||||
| if(support_avx()) { | |||||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||||
| return &gotoblas_SANDYBRIDGE; | |||||
| } else { | |||||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | |||||
| return &gotoblas_NEHALEM; | |||||
| } | |||||
| } | } | ||||
| //Intel Skylake | //Intel Skylake | ||||
| if (model == 14) { | if (model == 14) { | ||||
| if(support_avx()) | |||||
| if(support_avx2()) | |||||
| return &gotoblas_HASWELL; | return &gotoblas_HASWELL; | ||||
| else{ | |||||
| if(support_avx()) { | |||||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||||
| return &gotoblas_SANDYBRIDGE; | |||||
| } else { | |||||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | ||||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | ||||
| } | } | ||||
| } | } | ||||
| //Intel Phi Knights Landing | //Intel Phi Knights Landing | ||||
| if (model == 7) { | if (model == 7) { | ||||
| if(support_avx()) | |||||
| if(support_avx2()) | |||||
| return &gotoblas_HASWELL; | return &gotoblas_HASWELL; | ||||
| else{ | |||||
| if(support_avx()) { | |||||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||||
| return &gotoblas_SANDYBRIDGE; | |||||
| } else { | |||||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | ||||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | ||||
| } | } | ||||
| @@ -503,26 +566,26 @@ static gotoblas_t *get_coretype(void){ | |||||
| case 6: | case 6: | ||||
| if (model == 6) { | if (model == 6) { | ||||
| // Cannon Lake | // Cannon Lake | ||||
| #ifndef NO_AVX512 | |||||
| return &gotoblas_SKYLAKEX; | |||||
| #else | |||||
| if(support_avx()) | |||||
| #ifndef NO_AVX2 | |||||
| return &gotoblas_HASWELL; | |||||
| #else | |||||
| return &gotoblas_SANDYBRIDGE; | |||||
| #endif | |||||
| else | |||||
| return &gotoblas_NEHALEM; | |||||
| #endif | |||||
| if(support_avx2()) | |||||
| return &gotoblas_HASWELL; | |||||
| if(support_avx()) { | |||||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||||
| return &gotoblas_SANDYBRIDGE; | |||||
| } else { | |||||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | |||||
| return &gotoblas_NEHALEM; | |||||
| } | |||||
| } | } | ||||
| return NULL; | return NULL; | ||||
| case 9: | case 9: | ||||
| case 8: | case 8: | ||||
| if (model == 14 ) { // Kaby Lake | if (model == 14 ) { // Kaby Lake | ||||
| if(support_avx()) | |||||
| if(support_avx2()) | |||||
| return &gotoblas_HASWELL; | return &gotoblas_HASWELL; | ||||
| else{ | |||||
| if(support_avx()) { | |||||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||||
| return &gotoblas_SANDYBRIDGE; | |||||
| } else { | |||||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | ||||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | ||||
| } | } | ||||