| @@ -304,9 +304,47 @@ int support_avx(){ | |||
| #endif | |||
| } | |||
| int support_avx2(){ | |||
| #ifndef NO_AVX2 | |||
| int eax, ebx, ecx=0, edx; | |||
| int ret=0; | |||
| if (!support_avx) | |||
| return 0; | |||
| cpuid(7, &eax, &ebx, &ecx, &edx); | |||
| if((ebx & (1<<7)) != 0) | |||
| ret=1; //OS supports AVX2 | |||
| return ret; | |||
| #else | |||
| return 0; | |||
| #endif | |||
| } | |||
| int support_avx512(){ | |||
| #ifndef NO_AVX512 | |||
| int eax, ebx, ecx, edx; | |||
| int ret=0; | |||
| if (!support_avx) | |||
| return 0; | |||
| cpuid(7, &eax, &ebx, &ecx, &edx); | |||
| if((ebx & (1<<7)) != 1){ | |||
| ret=0; //OS does not even support AVX2 | |||
| } | |||
| if((ebx & (1<<31)) != 0){ | |||
| ret=1; //OS supports AVX512VL | |||
| } | |||
| return ret; | |||
| #else | |||
| return 0; | |||
| #endif | |||
| } | |||
| extern void openblas_warning(int verbose, const char * msg); | |||
| #define FALLBACK_VERBOSE 1 | |||
| #define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n" | |||
| #define SANDYBRIDGE_FALLBACK "OpenBLAS : Your OS does not support AVX2 instructions. OpenBLAS is using Sandybridge kernels as a fallback, which may give poorer performance.\n" | |||
| #define HASWELL_FALLBACK "OpenBLAS : Your OS does not support AVX512 instructions. OpenBLAS is using Haswell kernels as a fallback, which may give poorer performance.\n" | |||
| #define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n" | |||
| static int get_vendor(void){ | |||
| @@ -403,18 +441,24 @@ static gotoblas_t *get_coretype(void){ | |||
| } | |||
| //Intel Haswell | |||
| if (model == 12 || model == 15) { | |||
| if(support_avx()) | |||
| if(support_avx2()) | |||
| return &gotoblas_HASWELL; | |||
| else{ | |||
| if(support_avx()) { | |||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||
| return &gotoblas_SANDYBRIDGE; | |||
| } else { | |||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | |||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | |||
| } | |||
| } | |||
| //Intel Broadwell | |||
| if (model == 13) { | |||
| if(support_avx()) | |||
| if(support_avx2()) | |||
| return &gotoblas_HASWELL; | |||
| else{ | |||
| if(support_avx()) { | |||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||
| return &gotoblas_SANDYBRIDGE; | |||
| } else { | |||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | |||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | |||
| } | |||
| @@ -424,27 +468,36 @@ static gotoblas_t *get_coretype(void){ | |||
| case 4: | |||
| //Intel Haswell | |||
| if (model == 5 || model == 6) { | |||
| if(support_avx()) | |||
| if(support_avx2()) | |||
| return &gotoblas_HASWELL; | |||
| else{ | |||
| if(support_avx()) { | |||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||
| return &gotoblas_SANDYBRIDGE; | |||
| } else { | |||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | |||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | |||
| } | |||
| } | |||
| //Intel Broadwell | |||
| if (model == 7 || model == 15) { | |||
| if(support_avx()) | |||
| if(support_avx2()) | |||
| return &gotoblas_HASWELL; | |||
| else{ | |||
| if(support_avx()) { | |||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||
| return &gotoblas_SANDYBRIDGE; | |||
| } else { | |||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | |||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | |||
| } | |||
| } | |||
| //Intel Skylake | |||
| if (model == 14) { | |||
| if(support_avx()) | |||
| if(support_avx2()) | |||
| return &gotoblas_HASWELL; | |||
| else{ | |||
| if(support_avx()) { | |||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||
| return &gotoblas_SANDYBRIDGE; | |||
| } else { | |||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | |||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | |||
| } | |||
| @@ -457,40 +510,50 @@ static gotoblas_t *get_coretype(void){ | |||
| case 5: | |||
| //Intel Broadwell | |||
| if (model == 6) { | |||
| if(support_avx()) | |||
| if(support_avx2()) | |||
| return &gotoblas_HASWELL; | |||
| else{ | |||
| if(support_avx()) { | |||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||
| return &gotoblas_SANDYBRIDGE; | |||
| } else { | |||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | |||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | |||
| } | |||
| } | |||
| if (model == 5) { | |||
| // Intel Skylake X | |||
| #ifndef NO_AVX512 | |||
| return &gotoblas_SKYLAKEX; | |||
| #else | |||
| if(support_avx()) | |||
| if (support_avx512()) | |||
| return &gotoblas_SKYLAKEX; | |||
| if(support_avx2()) | |||
| return &gotoblas_HASWELL; | |||
| else { | |||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | |||
| return &gotoblas_NEHALEM; | |||
| } | |||
| #endif | |||
| if(support_avx()) { | |||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||
| return &gotoblas_SANDYBRIDGE; | |||
| } else { | |||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | |||
| return &gotoblas_NEHALEM; | |||
| } | |||
| } | |||
| //Intel Skylake | |||
| if (model == 14) { | |||
| if(support_avx()) | |||
| if(support_avx2()) | |||
| return &gotoblas_HASWELL; | |||
| else{ | |||
| if(support_avx()) { | |||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||
| return &gotoblas_SANDYBRIDGE; | |||
| } else { | |||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | |||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | |||
| } | |||
| } | |||
| //Intel Phi Knights Landing | |||
| if (model == 7) { | |||
| if(support_avx()) | |||
| if(support_avx2()) | |||
| return &gotoblas_HASWELL; | |||
| else{ | |||
| if(support_avx()) { | |||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||
| return &gotoblas_SANDYBRIDGE; | |||
| } else { | |||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | |||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | |||
| } | |||
| @@ -503,26 +566,26 @@ static gotoblas_t *get_coretype(void){ | |||
| case 6: | |||
| if (model == 6) { | |||
| // Cannon Lake | |||
| #ifndef NO_AVX512 | |||
| return &gotoblas_SKYLAKEX; | |||
| #else | |||
| if(support_avx()) | |||
| #ifndef NO_AVX2 | |||
| return &gotoblas_HASWELL; | |||
| #else | |||
| return &gotoblas_SANDYBRIDGE; | |||
| #endif | |||
| else | |||
| return &gotoblas_NEHALEM; | |||
| #endif | |||
| if(support_avx2()) | |||
| return &gotoblas_HASWELL; | |||
| if(support_avx()) { | |||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||
| return &gotoblas_SANDYBRIDGE; | |||
| } else { | |||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | |||
| return &gotoblas_NEHALEM; | |||
| } | |||
| } | |||
| return NULL; | |||
| case 9: | |||
| case 8: | |||
| if (model == 14 ) { // Kaby Lake | |||
| if(support_avx()) | |||
| if(support_avx2()) | |||
| return &gotoblas_HASWELL; | |||
| else{ | |||
| if(support_avx()) { | |||
| openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); | |||
| return &gotoblas_SANDYBRIDGE; | |||
| } else { | |||
| openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); | |||
| return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. | |||
| } | |||