| @@ -54,6 +54,7 @@ | |||
| #define VENDOR_TRANSMETA 9 | |||
| #define VENDOR_NSC 10 | |||
| #define VENDOR_HYGON 11 | |||
| #define VENDOR_ZHAOXIN 12 | |||
| #define VENDOR_UNKNOWN 99 | |||
| #define BITMASK(a, b, c) ((((a) >> (b)) & (c))) | |||
| @@ -283,7 +283,7 @@ int get_vendor(void){ | |||
| if (!strcmp(vendor, "CyrixInstead")) return VENDOR_CYRIX; | |||
| if (!strcmp(vendor, "NexGenDriven")) return VENDOR_NEXGEN; | |||
| if (!strcmp(vendor, "CentaurHauls")) return VENDOR_CENTAUR; | |||
| if (!strcmp(vendor, " Shanghai ")) return VENDOR_CENTAUR; | |||
| if (!strcmp(vendor, " Shanghai ")) return VENDOR_ZHAOXIN; | |||
| if (!strcmp(vendor, "RiseRiseRise")) return VENDOR_RISE; | |||
| if (!strcmp(vendor, " SiS SiS SiS")) return VENDOR_SIS; | |||
| if (!strcmp(vendor, "GenuineTMx86")) return VENDOR_TRANSMETA; | |||
| @@ -1067,7 +1067,8 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ | |||
| if ((get_vendor() == VENDOR_AMD) || | |||
| (get_vendor() == VENDOR_HYGON) || | |||
| (get_vendor() == VENDOR_CENTAUR)) { | |||
| (get_vendor() == VENDOR_CENTAUR) || | |||
| (get_vendor() == VENDOR_ZHAOXIN)) { | |||
| cpuid(0x80000005, &eax, &ebx, &ecx, &edx); | |||
| LDTB.size = 4096; | |||
| @@ -1190,7 +1191,7 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ | |||
| int get_cpuname(void){ | |||
| int family, exfamily, model, vendor, exmodel; | |||
| int family, exfamily, model, vendor, exmodel, stepping; | |||
| if (!have_cpuid()) return CPUTYPE_80386; | |||
| @@ -1198,6 +1199,7 @@ int get_cpuname(void){ | |||
| exfamily = get_cputype(GET_EXFAMILY); | |||
| model = get_cputype(GET_MODEL); | |||
| exmodel = get_cputype(GET_EXMODEL); | |||
| stepping = get_cputype(GET_STEPPING); | |||
| vendor = get_vendor(); | |||
| @@ -1628,15 +1630,20 @@ int get_cpuname(void){ | |||
| switch (family) { | |||
| case 0x5: | |||
| return CPUTYPE_CENTAURC6; | |||
| break; | |||
| case 0x6: | |||
| return CPUTYPE_NANO; | |||
| break; | |||
| case 0x7: | |||
| if (model == 0xf && stepping < 0xe) | |||
| return CPUTYPE_NANO; | |||
| return CPUTYPE_NEHALEM; | |||
| break; | |||
| default: | |||
| if (family >= 0x7) | |||
| return CPUTYPE_NEHALEM; | |||
| else | |||
| return CPUTYPE_VIAC3; | |||
| } | |||
| return CPUTYPE_VIAC3; | |||
| } | |||
| if (vendor == VENDOR_ZHAOXIN){ | |||
| return CPUTYPE_NEHALEM; | |||
| } | |||
| if (vendor == VENDOR_RISE){ | |||
| @@ -1869,7 +1876,7 @@ char *get_lower_cpunamechar(void){ | |||
| int get_coretype(void){ | |||
| int family, exfamily, model, exmodel, vendor; | |||
| int family, exfamily, model, exmodel, vendor, stepping; | |||
| if (!have_cpuid()) return CORE_80486; | |||
| @@ -1877,6 +1884,7 @@ int get_coretype(void){ | |||
| exfamily = get_cputype(GET_EXFAMILY); | |||
| model = get_cputype(GET_MODEL); | |||
| exmodel = get_cputype(GET_EXMODEL); | |||
| stepping = get_cputype(GET_STEPPING); | |||
| vendor = get_vendor(); | |||
| @@ -2286,13 +2294,19 @@ int get_coretype(void){ | |||
| if (vendor == VENDOR_CENTAUR) { | |||
| switch (family) { | |||
| case 0x6: | |||
| return CORE_NANO; | |||
| break; | |||
| case 0x7: | |||
| if (model == 0xf && stepping < 0xe) | |||
| return CORE_NANO; | |||
| return CORE_NEHALEM; | |||
| break; | |||
| default: | |||
| if (family >= 0x7) | |||
| return CORE_NEHALEM; | |||
| else | |||
| return CORE_VIAC3; | |||
| } | |||
| return CORE_VIAC3; | |||
| } | |||
| if (vendor == VENDOR_ZHAOXIN) { | |||
| return CORE_NEHALEM; | |||
| } | |||
| return CORE_UNKNOWN; | |||
| @@ -292,6 +292,7 @@ extern gotoblas_t gotoblas_COOPERLAKE; | |||
| #define VENDOR_AMD 2 | |||
| #define VENDOR_CENTAUR 3 | |||
| #define VENDOR_HYGON 4 | |||
| #define VENDOR_ZHAOXIN 5 | |||
| #define VENDOR_UNKNOWN 99 | |||
| #define BITMASK(a, b, c) ((((a) >> (b)) & (c))) | |||
| @@ -404,7 +405,7 @@ static int get_vendor(void){ | |||
| if (!strcmp(vendor.vchar, "GenuineIntel")) return VENDOR_INTEL; | |||
| if (!strcmp(vendor.vchar, "AuthenticAMD")) return VENDOR_AMD; | |||
| if (!strcmp(vendor.vchar, "CentaurHauls")) return VENDOR_CENTAUR; | |||
| if (!strcmp(vendor.vchar, " Shanghai ")) return VENDOR_CENTAUR; | |||
| if (!strcmp(vendor.vchar, " Shanghai ")) return VENDOR_ZHAOXIN; | |||
| if (!strcmp(vendor.vchar, "HygonGenuine")) return VENDOR_HYGON; | |||
| if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL; | |||
| @@ -415,7 +416,7 @@ static int get_vendor(void){ | |||
| static gotoblas_t *get_coretype(void){ | |||
| int eax, ebx, ecx, edx; | |||
| int family, exfamily, model, vendor, exmodel; | |||
| int family, exfamily, model, vendor, exmodel, stepping; | |||
| cpuid(1, &eax, &ebx, &ecx, &edx); | |||
| @@ -423,6 +424,7 @@ static gotoblas_t *get_coretype(void){ | |||
| exfamily = BITMASK(eax, 20, 0xff); | |||
| model = BITMASK(eax, 4, 0x0f); | |||
| exmodel = BITMASK(eax, 16, 0x0f); | |||
| stepping = BITMASK(eax, 0, 0x0f); | |||
| vendor = get_vendor(); | |||
| @@ -824,13 +826,19 @@ static gotoblas_t *get_coretype(void){ | |||
| if (vendor == VENDOR_CENTAUR) { | |||
| switch (family) { | |||
| case 0x6: | |||
| return &gotoblas_NANO; | |||
| break; | |||
| case 0x7: | |||
| if (model == 0xf && stepping < 0xe) | |||
| return &gotoblas_NANO; | |||
| return &gotoblas_NEHALEM; | |||
| default: | |||
| if (family >= 0x7) | |||
| return &gotoblas_NEHALEM; | |||
| } | |||
| } | |||
| if (vendor == VENDOR_ZHAOXIN) { | |||
| return &gotoblas_NEHALEM; | |||
| } | |||
| return NULL; | |||
| } | |||
| @@ -203,7 +203,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
| if (alpha == ZERO) return; | |||
| #if 0 | |||
| /* this optimization causes stack corruption on x86_64 under OSX, Windows and FreeBSD */ | |||
| /* this optimization causes stack corruption on x86_64 under OSX, Windows and FreeBSD */ | |||
| if (trans == 0 && incx == 1 && incy == 1 && m*n < 2304 *GEMM_MULTITHREAD_THRESHOLD) { | |||
| GEMV_N(m, n, 0, alpha, a, lda, x, incx, y, incy, NULL); | |||
| return; | |||
| @@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include "sgemv_t_microk_haswell-4.c" | |||
| #elif defined (SKYLAKEX) || defined (COOPERLAKE) | |||
| #include "sgemv_t_microk_haswell-4.c" | |||
| #include "sgemv_t_microk_skylakex.c" | |||
| /*#include "sgemv_t_microk_skylakex.c"*/ | |||
| #endif | |||
| #if defined(STEAMROLLER) || defined(EXCAVATOR) | |||
| @@ -99,6 +99,8 @@ typedef int blasint; | |||
| /* Inclusion of Linux-specific header is needed for definition of cpu_set_t. */ | |||
| #ifdef OPENBLAS_OS_LINUX | |||
| #define _GNU_SOURCE | |||
| #ifndef _GNU_SOURCE | |||
| #define _GNU_SOURCE | |||
| #endif | |||
| #include <sched.h> | |||
| #endif | |||
| @@ -2502,7 +2502,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||
| #define GEMM_DEFAULT_OFFSET_B 2048 | |||
| #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL | |||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
| #define SGEMM_DEFAULT_UNROLL_M 2 | |||
| #define SGEMM_DEFAULT_UNROLL_N 8 | |||
| @@ -2534,7 +2534,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||
| #define GEMM_DEFAULT_OFFSET_B 2048 | |||
| #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL | |||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
| #define SGEMM_DEFAULT_UNROLL_M 4 | |||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||