| @@ -1,4 +1,12 @@ | |||||
| OpenBLAS ChangeLog | OpenBLAS ChangeLog | ||||
| ==================================================================== | |||||
| Version 0.2.1 | |||||
| 30-Jun-2012 | |||||
| common: | |||||
| x86/x86-64: | |||||
| * Fixed the SEGFAULT bug about hyper-theading | |||||
| * Support AMD Bulldozer by using GotoBLAS2 AMD Barcelona codes | |||||
| ==================================================================== | ==================================================================== | ||||
| Version 0.2.0 | Version 0.2.0 | ||||
| 26-Jun-2012 | 26-Jun-2012 | ||||
| @@ -3,7 +3,7 @@ | |||||
| # | # | ||||
| # This library's version | # This library's version | ||||
| VERSION = 0.2.0 | |||||
| VERSION = 0.2.1 | |||||
| # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | ||||
| # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | ||||
| @@ -44,6 +44,7 @@ Please read GotoBLAS_01Readme.txt | |||||
| - **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes. | - **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes. | ||||
| - **Intel Sandy Bridge**: Optimized Level-3 BLAS with AVX on x86-64. | - **Intel Sandy Bridge**: Optimized Level-3 BLAS with AVX on x86-64. | ||||
| - **AMD Bobcat**: Used GotoBLAS2 Barcelona codes. | - **AMD Bobcat**: Used GotoBLAS2 Barcelona codes. | ||||
| - **AMD Bulldozer**: Used GotoBLAS2 Barcelona codes. | |||||
| #### MIPS64: | #### MIPS64: | ||||
| - **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2. | - **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2. | ||||
| @@ -105,6 +105,7 @@ | |||||
| #define CORE_NANO 19 | #define CORE_NANO 19 | ||||
| #define CORE_SANDYBRIDGE 20 | #define CORE_SANDYBRIDGE 20 | ||||
| #define CORE_BOBCAT 21 | #define CORE_BOBCAT 21 | ||||
| #define CORE_BULLDOZER 22 | |||||
| #define HAVE_SSE (1 << 0) | #define HAVE_SSE (1 << 0) | ||||
| #define HAVE_SSE2 (1 << 1) | #define HAVE_SSE2 (1 << 1) | ||||
| @@ -193,4 +194,5 @@ typedef struct { | |||||
| #define CPUTYPE_NANO 43 | #define CPUTYPE_NANO 43 | ||||
| #define CPUTYPE_SANDYBRIDGE 44 | #define CPUTYPE_SANDYBRIDGE 44 | ||||
| #define CPUTYPE_BOBCAT 45 | #define CPUTYPE_BOBCAT 45 | ||||
| #define CPUTYPE_BULLDOZER 46 | |||||
| #endif | #endif | ||||
| @@ -1027,6 +1027,7 @@ int get_cpuname(void){ | |||||
| return CPUTYPE_OPTERON; | return CPUTYPE_OPTERON; | ||||
| case 1: | case 1: | ||||
| case 10: | case 10: | ||||
| case 6: //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series | |||||
| return CPUTYPE_BARCELONA; | return CPUTYPE_BARCELONA; | ||||
| case 5: | case 5: | ||||
| return CPUTYPE_BOBCAT; | return CPUTYPE_BOBCAT; | ||||
| @@ -1151,6 +1152,7 @@ static char *cpuname[] = { | |||||
| "NANO", | "NANO", | ||||
| "SANDYBRIDGE", | "SANDYBRIDGE", | ||||
| "BOBCAT", | "BOBCAT", | ||||
| "BULLDOZER", | |||||
| }; | }; | ||||
| static char *lowercpuname[] = { | static char *lowercpuname[] = { | ||||
| @@ -1199,6 +1201,7 @@ static char *lowercpuname[] = { | |||||
| "nano", | "nano", | ||||
| "sandybridge", | "sandybridge", | ||||
| "bobcat", | "bobcat", | ||||
| "bulldozer", | |||||
| }; | }; | ||||
| static char *corename[] = { | static char *corename[] = { | ||||
| @@ -1224,6 +1227,7 @@ static char *corename[] = { | |||||
| "NANO", | "NANO", | ||||
| "SANDYBRIDGE", | "SANDYBRIDGE", | ||||
| "BOBCAT", | "BOBCAT", | ||||
| "BULLDOZER", | |||||
| }; | }; | ||||
| static char *corename_lower[] = { | static char *corename_lower[] = { | ||||
| @@ -1249,6 +1253,7 @@ static char *corename_lower[] = { | |||||
| "nano", | "nano", | ||||
| "sandybridge", | "sandybridge", | ||||
| "bobcat", | "bobcat", | ||||
| "bulldozer", | |||||
| }; | }; | ||||
| @@ -1359,6 +1364,7 @@ int get_coretype(void){ | |||||
| if (family == 0xf){ | if (family == 0xf){ | ||||
| if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON; | if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON; | ||||
| else if (exfamily == 5) return CORE_BOBCAT; | else if (exfamily == 5) return CORE_BOBCAT; | ||||
| else if (exfamily == 6) return CORE_BARCELONA; //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series | |||||
| else return CORE_BARCELONA; | else return CORE_BARCELONA; | ||||
| } | } | ||||
| } | } | ||||
| @@ -447,6 +447,9 @@ static void disable_hyperthread(void) { | |||||
| //When the shared cpu are in different element of share & avail array, this may be a bug. | //When the shared cpu are in different element of share & avail array, this may be a bug. | ||||
| for (i = 0; i < count ; i++){ | for (i = 0; i < count ; i++){ | ||||
| share[i] &= common->avail[i]; | |||||
| if (popcount(share[i]) > 1) { | if (popcount(share[i]) > 1) { | ||||
| #ifdef DEBUG | #ifdef DEBUG | ||||
| @@ -102,6 +102,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| /* #define FORCE_BARCELONA */ | /* #define FORCE_BARCELONA */ | ||||
| /* #define FORCE_SHANGHAI */ | /* #define FORCE_SHANGHAI */ | ||||
| /* #define FORCE_ISTANBUL */ | /* #define FORCE_ISTANBUL */ | ||||
| /* #define FORCE_BULLDOZER */ | |||||
| /* #define FORCE_BOBCAT */ | /* #define FORCE_BOBCAT */ | ||||
| /* #define FORCE_SSE_GENERIC */ | /* #define FORCE_SSE_GENERIC */ | ||||
| /* #define FORCE_VIAC3 */ | /* #define FORCE_VIAC3 */ | ||||
| @@ -349,7 +350,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define CORENAME "OPTERON" | #define CORENAME "OPTERON" | ||||
| #endif | #endif | ||||
| #if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) | |||||
| #if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_BULLDOZER) | |||||
| #define FORCE | #define FORCE | ||||
| #define FORCE_INTEL | #define FORCE_INTEL | ||||
| #define ARCHITECTURE "X86" | #define ARCHITECTURE "X86" | ||||
| @@ -357,8 +358,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ARCHCONFIG "-DBARCELONA " \ | #define ARCHCONFIG "-DBARCELONA " \ | ||||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ | "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ | ||||
| "-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL3_SIZE=2097152 " \ | "-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL3_SIZE=2097152 " \ | ||||
| "-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ | |||||
| "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 " \ | |||||
| "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \ | |||||
| "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU" | "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU" | ||||
| #define LIBNAME "barcelona" | #define LIBNAME "barcelona" | ||||
| #define CORENAME "BARCELONA" | #define CORENAME "BARCELONA" | ||||