| @@ -117,5 +117,9 @@ In chronological order: | |||
| * Isaac Dunham <https://github.com/idunham> | |||
| * [2014-08-03] Fixed link error on Linux/musl | |||
| * Dave Nuechterlein | |||
| * [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1). | |||
| ARMv8 support. | |||
| * [Your name or handle] <[email or website]> | |||
| * [Date] [Brief summary of your changes] | |||
| @@ -1,4 +1,25 @@ | |||
| OpenBLAS ChangeLog | |||
| ==================================================================== | |||
| Version 0.2.13 | |||
| 3-Dec-2014 | |||
| common: | |||
| * Add SYMBOLPREFIX and SYMBOLSUFFIX makefile options | |||
| for adding a prefix or suffix to all exported symbol names | |||
| in the shared library.(#459, Thanks Tony Kelman) | |||
| * Provide OpenBLASConfig.cmake at installation. | |||
| * Fix Fortran compiler detection on FreeBSD. | |||
| (#470, Thanks Mike Nolta) | |||
| x86/x86-64: | |||
| * Add generic kernel files for x86-64. make TARGET=GENERIC | |||
| * Fix a bug of sgemm kernel on Intel Sandy Bridge. | |||
| * Fix c_check bug on some amd64 systems. (#471, Thanks Mike Nolta) | |||
| ARM: | |||
| * Support APM's X-Gene 1 AArch64 processors. | |||
| Optimize trmm and sgemm. (#465, Thanks Dave Nuechterlein) | |||
| ==================================================================== | |||
| Version 0.2.12 | |||
| 13-Oct-2014 | |||
| @@ -1,4 +1,4 @@ | |||
| Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -12,9 +12,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -9,6 +9,8 @@ OPENBLAS_INCLUDE_DIR := $(PREFIX)/include | |||
| OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib | |||
| OPENBLAS_BINARY_DIR := $(PREFIX)/bin | |||
| OPENBLAS_BUILD_DIR := $(CURDIR) | |||
| OPENBLAS_CMAKE_DIR := $(PREFIX)/cmake | |||
| OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake | |||
| .PHONY : install | |||
| .NOTPARALLEL : install | |||
| @@ -21,6 +23,7 @@ install : lib.grd | |||
| @-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | |||
| @-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | |||
| @-mkdir -p $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
| @-mkdir -p $(DESTDIR)$(OPENBLAS_CMAKE_DIR) | |||
| @echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | |||
| #for inc | |||
| @echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | |||
| @@ -90,6 +93,23 @@ ifeq ($(OSNAME), CYGWIN_NT) | |||
| @-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR) | |||
| endif | |||
| endif | |||
| #Generating OpenBLASConfig.cmake | |||
| @echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR) | |||
| @echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | |||
| ifndef NO_SHARED | |||
| #ifeq logical or | |||
| ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD)) | |||
| @echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | |||
| endif | |||
| ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT)) | |||
| @echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | |||
| endif | |||
| ifeq ($(OSNAME), Darwin) | |||
| @echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | |||
| endif | |||
| else | |||
| #only static | |||
| @echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | |||
| endif | |||
| @echo Install OK! | |||
| @@ -3,7 +3,7 @@ | |||
| # | |||
| # This library's version | |||
| VERSION = 0.2.12 | |||
| VERSION = 0.2.13 | |||
| # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | |||
| # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | |||
| @@ -186,6 +186,8 @@ LD = $(CROSS_SUFFIX)ld | |||
| RANLIB = $(CROSS_SUFFIX)ranlib | |||
| NM = $(CROSS_SUFFIX)nm | |||
| DLLWRAP = $(CROSS_SUFFIX)dllwrap | |||
| OBJCOPY = $(CROSS_SUFFIX)objcopy | |||
| OBJCONV = $(CROSS_SUFFIX)objconv | |||
| # | |||
| # OS dependent settings | |||
| @@ -845,6 +847,14 @@ else | |||
| LIBPREFIX = libopenblas_$(LIBNAMESUFFIX) | |||
| endif | |||
| ifndef SYMBOLPREFIX | |||
| SYMBOLPREFIX = | |||
| endif | |||
| ifndef SYMBOLSUFFIX | |||
| SYMBOLSUFFIX = | |||
| endif | |||
| KERNELDIR = $(TOPDIR)/kernel/$(ARCH) | |||
| include $(TOPDIR)/Makefile.$(ARCH) | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -27,7 +28,6 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| /*********************************************************************/ | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -27,7 +28,6 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| /*********************************************************************/ | |||
| @@ -119,9 +119,9 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||
| } | |||
| #if defined(DOUBLE) | |||
| #define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory") | |||
| #define GET_IMAGE(res) __asm__ __volatile__("str d1, %0" : "=m"(res) : : "memory") | |||
| #else | |||
| #define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory") | |||
| #define GET_IMAGE(res) __asm__ __volatile__("str s1, %0" : "=m"(res) : : "memory") | |||
| #endif | |||
| #define GET_IMAGE_CANCEL | |||
| @@ -138,7 +138,6 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||
| #if defined(ASSEMBLER) && !defined(NEEDPARAM) | |||
| #define PROLOGUE \ | |||
| .arm ;\ | |||
| .global REALNAME ;\ | |||
| .func REALNAME ;\ | |||
| REALNAME: | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -27,7 +28,6 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| /*********************************************************************/ | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -0,0 +1,217 @@ | |||
| /************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include <string.h> | |||
| #define CPU_UNKNOWN 0 | |||
| #define CPU_ARMV8 1 | |||
| static char *cpuname[] = { | |||
| "UNKOWN", | |||
| "ARMV8" | |||
| }; | |||
| int get_feature(char *search) | |||
| { | |||
| #ifdef linux | |||
| FILE *infile; | |||
| char buffer[2048], *p,*t; | |||
| p = (char *) NULL ; | |||
| infile = fopen("/proc/cpuinfo", "r"); | |||
| while (fgets(buffer, sizeof(buffer), infile)) | |||
| { | |||
| if (!strncmp("Features", buffer, 8)) | |||
| { | |||
| p = strchr(buffer, ':') + 2; | |||
| break; | |||
| } | |||
| } | |||
| fclose(infile); | |||
| if( p == NULL ) return; | |||
| t = strtok(p," "); | |||
| while( t = strtok(NULL," ")) | |||
| { | |||
| if (!strcmp(t, search)) { return(1); } | |||
| } | |||
| #endif | |||
| return(0); | |||
| } | |||
| int detect(void) | |||
| { | |||
| #ifdef linux | |||
| FILE *infile; | |||
| char buffer[512], *p; | |||
| p = (char *) NULL ; | |||
| infile = fopen("/proc/cpuinfo", "r"); | |||
| while (fgets(buffer, sizeof(buffer), infile)) | |||
| { | |||
| if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9))) | |||
| { | |||
| p = strchr(buffer, ':') + 2; | |||
| break; | |||
| } | |||
| } | |||
| fclose(infile); | |||
| if(p != NULL) | |||
| { | |||
| if (strstr(p, "AArch64")) | |||
| { | |||
| return CPU_ARMV8; | |||
| } | |||
| } | |||
| #endif | |||
| return CPU_UNKNOWN; | |||
| } | |||
| char *get_corename(void) | |||
| { | |||
| return cpuname[detect()]; | |||
| } | |||
| void get_architecture(void) | |||
| { | |||
| printf("ARM"); | |||
| } | |||
| void get_subarchitecture(void) | |||
| { | |||
| int d = detect(); | |||
| switch (d) | |||
| { | |||
| case CPU_ARMV8: | |||
| printf("ARMV8"); | |||
| break; | |||
| default: | |||
| printf("UNKNOWN"); | |||
| break; | |||
| } | |||
| } | |||
| void get_subdirname(void) | |||
| { | |||
| printf("arm64"); | |||
| } | |||
| void get_cpuconfig(void) | |||
| { | |||
| int d = detect(); | |||
| switch (d) | |||
| { | |||
| case CPU_ARMV8: | |||
| printf("#define ARMV8\n"); | |||
| printf("#define L1_DATA_SIZE 32768\n"); | |||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||
| printf("#define L2_SIZE 262144\n"); | |||
| printf("#define L2_LINESIZE 64\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| printf("#define L2_ASSOCIATIVE 4\n"); | |||
| break; | |||
| } | |||
| } | |||
| void get_libname(void) | |||
| { | |||
| int d = detect(); | |||
| switch (d) | |||
| { | |||
| case CPU_ARMV8: | |||
| printf("armv8\n"); | |||
| break; | |||
| } | |||
| } | |||
| void get_features(void) | |||
| { | |||
| #ifdef linux | |||
| FILE *infile; | |||
| char buffer[2048], *p,*t; | |||
| p = (char *) NULL ; | |||
| infile = fopen("/proc/cpuinfo", "r"); | |||
| while (fgets(buffer, sizeof(buffer), infile)) | |||
| { | |||
| if (!strncmp("Features", buffer, 8)) | |||
| { | |||
| p = strchr(buffer, ':') + 2; | |||
| break; | |||
| } | |||
| } | |||
| fclose(infile); | |||
| if( p == NULL ) return; | |||
| t = strtok(p," "); | |||
| while( t = strtok(NULL," ")) | |||
| { | |||
| } | |||
| #endif | |||
| return; | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -853,11 +853,24 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ | |||
| if (get_vendor() == VENDOR_INTEL) { | |||
| cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx); | |||
| if (cpuid_level >= 0x80000006) { | |||
| cpuid(0x80000006, &eax, &ebx, &ecx, &edx); | |||
| if(L2.size<=0){ | |||
| //If we didn't detect L2 correctly before, | |||
| cpuid(0x80000006, &eax, &ebx, &ecx, &edx); | |||
| L2.size = BITMASK(ecx, 16, 0xffff); | |||
| L2.associative = BITMASK(ecx, 12, 0x0f); | |||
| switch (L2.associative){ | |||
| case 0x06: | |||
| L2.associative = 8; | |||
| break; | |||
| case 0x08: | |||
| L2.associative = 16; | |||
| break; | |||
| } | |||
| L2.size = BITMASK(ecx, 16, 0xffff); | |||
| L2.associative = BITMASK(ecx, 12, 0x0f); | |||
| L2.linesize = BITMASK(ecx, 0, 0xff); | |||
| L2.linesize = BITMASK(ecx, 0, 0xff); | |||
| } | |||
| } | |||
| } | |||
| @@ -916,10 +929,22 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ | |||
| if (L2ITB.associative == 0xff) L2ITB.associative = 0; | |||
| L2ITB.linesize = BITMASK(ebx, 0, 0xff); | |||
| L2.size = BITMASK(ecx, 16, 0xffff); | |||
| L2.associative = BITMASK(ecx, 12, 0xf); | |||
| if (L2.associative == 0xff) L2.associative = 0; | |||
| L2.linesize = BITMASK(ecx, 0, 0xff); | |||
| if(L2.size <= 0){ | |||
| //If we didn't detect L2 correctly before, | |||
| L2.size = BITMASK(ecx, 16, 0xffff); | |||
| L2.associative = BITMASK(ecx, 12, 0xf); | |||
| switch (L2.associative){ | |||
| case 0x06: | |||
| L2.associative = 8; | |||
| break; | |||
| case 0x08: | |||
| L2.associative = 16; | |||
| break; | |||
| } | |||
| if (L2.associative == 0xff) L2.associative = 0; | |||
| L2.linesize = BITMASK(ecx, 0, 0xff); | |||
| } | |||
| L3.size = BITMASK(edx, 18, 0x3fff) * 512; | |||
| L3.associative = BITMASK(edx, 12, 0xf); | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -13,7 +13,7 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| 3. Neither the name of the OpenBLAS project nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| @@ -40,6 +40,8 @@ static int parallel = 1; | |||
| static int parallel = 0; | |||
| #endif | |||
| #ifdef NEEDBUNDERSCORE | |||
| int CNAME() { | |||
| return parallel; | |||
| } | |||
| @@ -48,5 +50,10 @@ int NAME() { | |||
| return parallel; | |||
| } | |||
| #else | |||
| //The CNAME and NAME are the same. | |||
| int NAME() { | |||
| return parallel; | |||
| } | |||
| #endif | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -88,12 +88,18 @@ dll : ../$(LIBDLLNAME) | |||
| -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB) | |||
| libopenblas.def : gensymbol | |||
| perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F) | |||
| perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F) | |||
| libgoto_hpl.def : gensymbol | |||
| perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F) | |||
| perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F) | |||
| ifeq (, $(SYMBOLPREFIX)$(SYMBOLSUFFIX)) | |||
| $(LIBDYNNAME) : ../$(LIBNAME) osx.def | |||
| else | |||
| ../$(LIBNAME).renamed : ../$(LIBNAME) objconv.def | |||
| $(OBJCONV) @objconv.def ../$(LIBNAME) ../$(LIBNAME).renamed | |||
| $(LIBDYNNAME) : ../$(LIBNAME).renamed osx.def | |||
| endif | |||
| $(FC) $(FFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) | |||
| dllinit.$(SUFFIX) : dllinit.c | |||
| @@ -103,16 +109,22 @@ ifeq ($(OSNAME), Linux) | |||
| so : ../$(LIBSONAME) | |||
| ifeq (, $(SYMBOLPREFIX)$(SYMBOLSUFFIX)) | |||
| ../$(LIBSONAME) : ../$(LIBNAME) linktest.c | |||
| else | |||
| ../$(LIBNAME).renamed : ../$(LIBNAME) objcopy.def | |||
| $(OBJCOPY) --redefine-syms objcopy.def ../$(LIBNAME) ../$(LIBNAME).renamed | |||
| ../$(LIBSONAME) : ../$(LIBNAME).renamed linktest.c | |||
| endif | |||
| ifneq ($(C_COMPILER), LSB) | |||
| $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ | |||
| -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ | |||
| -Wl,--whole-archive $< -Wl,--no-whole-archive \ | |||
| -Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB) | |||
| $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. | |||
| else | |||
| #for LSB | |||
| env LSBCC_SHAREDLIBS=gfortran $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ | |||
| -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ | |||
| -Wl,--whole-archive $< -Wl,--no-whole-archive \ | |||
| -Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB) | |||
| $(FC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. | |||
| endif | |||
| @@ -125,9 +137,15 @@ ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD NetBSD)) | |||
| so : ../$(LIBSONAME) | |||
| ifeq (, $(SYMBOLPREFIX)$(SYMBOLSUFFIX)) | |||
| ../$(LIBSONAME) : ../$(LIBNAME) linktest.c | |||
| else | |||
| ../$(LIBNAME).renamed : ../$(LIBNAME) objcopy.def | |||
| $(OBJCOPY) --redefine-syms objcopy.def ../$(LIBNAME) ../$(LIBNAME).renamed | |||
| ../$(LIBSONAME) : ../$(LIBNAME).renamed linktest.c | |||
| endif | |||
| $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ | |||
| -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ | |||
| -Wl,--whole-archive $< -Wl,--no-whole-archive \ | |||
| $(FEXTRALIB) $(EXTRALIB) | |||
| $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. | |||
| rm -f linktest | |||
| @@ -178,17 +196,23 @@ static : ../$(LIBNAME) | |||
| rm -f goto.$(SUFFIX) | |||
| osx.def : gensymbol ../Makefile.system ../getarch.c | |||
| perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F) | |||
| perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F) | |||
| aix.def : gensymbol ../Makefile.system ../getarch.c | |||
| perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F) | |||
| perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F) | |||
| objcopy.def : gensymbol ../Makefile.system ../getarch.c | |||
| perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F) | |||
| objconv.def : gensymbol ../Makefile.system ../getarch.c | |||
| perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F) | |||
| test : linktest.c | |||
| $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. | |||
| rm -f linktest | |||
| linktest.c : gensymbol ../Makefile.system ../getarch.c | |||
| perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > linktest.c | |||
| perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > linktest.c | |||
| clean :: | |||
| @rm -f *.def *.dylib __.SYMDEF* | |||
| @@ -2784,22 +2784,26 @@ $bu = $ARGV[2]; | |||
| $bu = "" if (($bu eq "0") || ($bu eq "1")); | |||
| $symbolprefix = $ARGV[9]; | |||
| $symbolsuffix = $ARGV[10]; | |||
| if ($ARGV[0] eq "osx"){ | |||
| @underscore_objs = (@underscore_objs, @misc_common_objs); | |||
| @no_underscore_objs = (@no_underscore_objs, @misc_common_objs); | |||
| foreach $objs (@underscore_objs) { | |||
| print "_", $objs, $bu, "\n"; | |||
| print "_", $symbolprefix, $objs, $bu, $symbolsuffix, "\n"; | |||
| } | |||
| foreach $objs (@need_2underscore_objs) { | |||
| print "_", $objs, $bu, $bu, "\n"; | |||
| print "_", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "\n"; | |||
| } | |||
| # if ($ARGV[4] == 0) { | |||
| foreach $objs (@no_underscore_objs) { | |||
| print "_", $objs, "\n"; | |||
| print "_", $symbolprefix, $objs, $symbolsuffix, "\n"; | |||
| } | |||
| # } | |||
| exit(0); | |||
| @@ -2811,16 +2815,58 @@ if ($ARGV[0] eq "aix"){ | |||
| @no_underscore_objs = (@no_underscore_objs, @misc_common_objs); | |||
| foreach $objs (@underscore_objs) { | |||
| print $objs, $bu, "\n"; | |||
| print $symbolprefix, $objs, $bu, $symbolsuffix, "\n"; | |||
| } | |||
| foreach $objs (@need_2underscore_objs) { | |||
| print $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "\n"; | |||
| } | |||
| # if ($ARGV[4] == 0) { | |||
| foreach $objs (@no_underscore_objs) { | |||
| print $symbolprefix, $objs, $symbolsuffix, "\n"; | |||
| } | |||
| # } | |||
| exit(0); | |||
| } | |||
| if ($ARGV[0] eq "objcopy"){ | |||
| @underscore_objs = (@underscore_objs, @misc_common_objs); | |||
| @no_underscore_objs = (@no_underscore_objs, @misc_common_objs); | |||
| foreach $objs (@underscore_objs) { | |||
| print $objs, $bu, " ", $symbolprefix, $objs, $bu, $symbolsuffix, "\n"; | |||
| } | |||
| foreach $objs (@need_2underscore_objs) { | |||
| print $objs, $bu, $bu, " ", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "\n"; | |||
| } | |||
| # if ($ARGV[4] == 0) { | |||
| foreach $objs (@no_underscore_objs) { | |||
| print $objs, " ", $symbolprefix, $objs, $symbolsuffix, "\n"; | |||
| } | |||
| # } | |||
| exit(0); | |||
| } | |||
| if ($ARGV[0] eq "objconv"){ | |||
| @underscore_objs = (@underscore_objs, @misc_common_objs); | |||
| @no_underscore_objs = (@no_underscore_objs, @misc_common_objs); | |||
| foreach $objs (@underscore_objs) { | |||
| print "-nr:_", $objs, $bu, ":_", $symbolprefix, $objs, $bu, $symbolsuffix, "\n"; | |||
| } | |||
| foreach $objs (@need_2underscore_objs) { | |||
| print $objs, $bu, $bu, "\n"; | |||
| print "-nr:_", $objs, $bu, $bu, ":_", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "\n"; | |||
| } | |||
| # if ($ARGV[4] == 0) { | |||
| foreach $objs (@no_underscore_objs) { | |||
| print $objs, "\n"; | |||
| print "-nr:_", $objs, ":_", $symbolprefix, $objs, $symbolsuffix, "\n"; | |||
| } | |||
| # } | |||
| exit(0); | |||
| @@ -2835,22 +2881,22 @@ if ($ARGV[0] eq "win2k"){ | |||
| foreach $objs (@underscore_objs) { | |||
| $uppercase = $objs; | |||
| $uppercase =~ tr/[a-z]/[A-Z]/; | |||
| print "\t$objs=$objs","_ \@", $count, "\n"; | |||
| print "\t",$symbolprefix, $objs, $symbolsuffix, "=$objs","_ \@", $count, "\n"; | |||
| $count ++; | |||
| print "\t",$objs, "_=$objs","_ \@", $count, "\n"; | |||
| print "\t",$symbolprefix, $objs, "_", $symbolsuffix, "=$objs","_ \@", $count, "\n"; | |||
| $count ++; | |||
| print "\t$uppercase=$objs", "_ \@", $count, "\n"; | |||
| print "\t",$symbolprefix, $uppercase, $symbolsuffix, "=$objs", "_ \@", $count, "\n"; | |||
| $count ++; | |||
| } | |||
| foreach $objs (@need_2underscore_objs) { | |||
| $uppercase = $objs; | |||
| $uppercase =~ tr/[a-z]/[A-Z]/; | |||
| print "\t$objs=$objs","__ \@", $count, "\n"; | |||
| print "\t",$symbolprefix, $objs, $symbolsuffix, "=$objs","__ \@", $count, "\n"; | |||
| $count ++; | |||
| print "\t",$objs, "__=$objs","__ \@", $count, "\n"; | |||
| print "\t",$symbolprefix, $objs, "__", $symbolsuffix, "=$objs","__ \@", $count, "\n"; | |||
| $count ++; | |||
| print "\t$uppercase=$objs", "__ \@", $count, "\n"; | |||
| print "\t",$symbolprefix, $uppercase, $symbolsuffix, "=$objs", "__ \@", $count, "\n"; | |||
| $count ++; | |||
| } | |||
| @@ -2859,15 +2905,15 @@ if ($ARGV[0] eq "win2k"){ | |||
| $uppercase = $objs; | |||
| $uppercase =~ tr/[a-z]/[A-Z]/; | |||
| print "\t",$objs, "_=$objs","_ \@", $count, "\n"; | |||
| print "\t",$symbolprefix, $objs, "_", $symbolsuffix, "=$objs","_ \@", $count, "\n"; | |||
| $count ++; | |||
| print "\t$uppercase=$objs", "_ \@", $count, "\n"; | |||
| print "\t",$symbolprefix, $uppercase, $symbolsuffix, "=$objs", "_ \@", $count, "\n"; | |||
| $count ++; | |||
| } | |||
| foreach $objs (@no_underscore_objs) { | |||
| print "\t",$objs,"=$objs"," \@", $count, "\n"; | |||
| print "\t",$symbolprefix,$objs,$symbolsuffix,"=$objs"," \@", $count, "\n"; | |||
| $count ++; | |||
| } | |||
| @@ -2880,11 +2926,11 @@ if ($ARGV[0] eq "win2khpl"){ | |||
| foreach $objs (@hplobjs) { | |||
| $uppercase = $objs; | |||
| $uppercase =~ tr/[a-z]/[A-Z]/; | |||
| print "\t$objs=$objs","_ \@", $count, "\n"; | |||
| print "\t",$symbolprefix, $objs, $symbolsuffix, "=$objs","_ \@", $count, "\n"; | |||
| $count ++; | |||
| print "\t",$objs, "_=$objs","_ \@", $count, "\n"; | |||
| print "\t",$symbolprefix, $objs, "_", $symbolsuffix, "=$objs","_ \@", $count, "\n"; | |||
| $count ++; | |||
| print "\t$uppercase=$objs", "_ \@", $count, "\n"; | |||
| print "\t",$symbolprefix, $uppercase, $symbolsuffix, "=$objs", "_ \@", $count, "\n"; | |||
| $count ++; | |||
| } | |||
| @@ -2905,24 +2951,24 @@ if ($ARGV[0] eq "microsoft"){ | |||
| foreach $objs (@underscore_objs) { | |||
| $uppercase = $objs; | |||
| $uppercase =~ tr/[a-z]/[A-Z]/; | |||
| print "\t$objs = $objs","_\n"; | |||
| print "\t",$symbolprefix, $objs, $symbolsuffix, " = $objs","_\n"; | |||
| $count ++; | |||
| print "\t$objs\_ = $objs","_\n"; | |||
| print "\t",$symbolprefix, $objs, "\_", $symbolsuffix, " = $objs","_\n"; | |||
| $count ++; | |||
| print "\t$uppercase = $objs","_\n"; | |||
| print "\t",$symbolprefix, $uppercase, $symbolsuffix, " = $objs","_\n"; | |||
| $count ++; | |||
| print "\t$uppercase\_ = $objs","_\n"; | |||
| print "\t",$symbolprefix, $uppercase, "\_", $symbolsuffix, " = $objs","_\n"; | |||
| $count ++; | |||
| } | |||
| foreach $objs (@need_2underscore_objs) { | |||
| $uppercase = $objs; | |||
| $uppercase =~ tr/[a-z]/[A-Z]/; | |||
| print "\t$objs=$objs","__ \@", $count, "\n"; | |||
| print "\t",$symbolprefix, $objs, $symbolsuffix, "=$objs","__ \@", $count, "\n"; | |||
| $count ++; | |||
| print "\t",$objs, "__=$objs","__ \@", $count, "\n"; | |||
| print "\t",$symbolprefix, $objs, "__", $symbolsuffix, "=$objs","__ \@", $count, "\n"; | |||
| $count ++; | |||
| print "\t$uppercase=$objs", "__ \@", $count, "\n"; | |||
| print "\t",$symbolprefix, $uppercase, $symbolsuffix, "=$objs", "__ \@", $count, "\n"; | |||
| $count ++; | |||
| } | |||
| @@ -2936,16 +2982,16 @@ if ($ARGV[0] eq "linktest"){ | |||
| print "int main(void){\n"; | |||
| foreach $objs (@underscore_objs) { | |||
| print $objs, $bu, "();\n" if $objs ne "xerbla"; | |||
| print $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla"; | |||
| } | |||
| foreach $objs (@need_2underscore_objs) { | |||
| print $objs, $bu, $bu, "();\n"; | |||
| print $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "();\n"; | |||
| } | |||
| # if ($ARGV[4] == 0) { | |||
| foreach $objs (@no_underscore_objs) { | |||
| print $objs, "();\n"; | |||
| print $symbolprefix, $objs, $symbolsuffix, "();\n"; | |||
| } | |||
| # } | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -746,12 +747,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define SUBARCHITECTURE "ARMV8" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DARMV8 " \ | |||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
| "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | |||
| "-DHAVE_VFP -DHAVE_VFPV3 -DHAVE_VFPV4" | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " | |||
| #define LIBNAME "armv8" | |||
| #define CORENAME "ARMV8" | |||
| #define CORENAME "XGENE1" | |||
| #else | |||
| #endif | |||
| @@ -801,6 +801,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define OPENBLAS_SUPPORTED | |||
| #endif | |||
| #ifdef __aarch64__ | |||
| #include "cpuid_arm64.c" | |||
| #define OPENBLAS_SUPPORTED | |||
| #endif | |||
| #ifndef OPENBLAS_SUPPORTED | |||
| #error "This arch/CPU is not supported by OpenBLAS." | |||
| @@ -856,7 +861,7 @@ int main(int argc, char *argv[]){ | |||
| #ifdef FORCE | |||
| printf("CORE=%s\n", CORENAME); | |||
| #else | |||
| #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) | |||
| #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) | |||
| printf("CORE=%s\n", get_corename()); | |||
| #endif | |||
| #endif | |||
| @@ -956,7 +961,7 @@ int main(int argc, char *argv[]){ | |||
| #ifdef FORCE | |||
| printf("#define CHAR_CORENAME \"%s\"\n", CORENAME); | |||
| #else | |||
| #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) | |||
| #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) | |||
| printf("#define CHAR_CORENAME \"%s\"\n", get_corename()); | |||
| #endif | |||
| #endif | |||
| @@ -28,6 +28,10 @@ ifeq ($(TARGET), LOONGSON3B) | |||
| USE_TRMM = 1 | |||
| endif | |||
| ifeq ($(TARGET), GENERIC) | |||
| USE_TRMM = 1 | |||
| endif | |||
| SKERNELOBJS += \ | |||
| @@ -80,14 +80,14 @@ DGEMVTKERNEL = ../arm/gemv_t.c | |||
| CGEMVTKERNEL = ../arm/zgemv_t.c | |||
| ZGEMVTKERNEL = ../arm/zgemv_t.c | |||
| STRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
| STRMMKERNEL = ../generic/trmmkernel_4x4.c | |||
| DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
| CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| SGEMMKERNEL = sgemm_kernel_4x4.S | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| @@ -0,0 +1,875 @@ | |||
| #include "common.h" | |||
| #include <stdbool.h> | |||
| int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FLOAT* C,BLASLONG ldc ,BLASLONG offset) | |||
| { | |||
| BLASLONG i,j,k; | |||
| FLOAT *C0,*C1,*C2,*C3,*ptrba,*ptrbb; | |||
| FLOAT res0_0; | |||
| FLOAT res0_1; | |||
| FLOAT res0_2; | |||
| FLOAT res0_3; | |||
| FLOAT res1_0; | |||
| FLOAT res1_1; | |||
| FLOAT res1_2; | |||
| FLOAT res1_3; | |||
| FLOAT res2_0; | |||
| FLOAT res2_1; | |||
| FLOAT res2_2; | |||
| FLOAT res2_3; | |||
| FLOAT res3_0; | |||
| FLOAT res3_1; | |||
| FLOAT res3_2; | |||
| FLOAT res3_3; | |||
| FLOAT a0; | |||
| FLOAT a1; | |||
| FLOAT b0; | |||
| FLOAT b1; | |||
| FLOAT b2; | |||
| FLOAT b3; | |||
| BLASLONG off, temp; | |||
| bool left; | |||
| bool transposed; | |||
| bool backwards; | |||
| #ifdef LEFT | |||
| left = true; | |||
| #else | |||
| left = false; | |||
| #endif | |||
| #ifdef TRANSA | |||
| transposed = true; | |||
| #else | |||
| transposed = false; | |||
| #endif | |||
| backwards = left != transposed; | |||
| if (!left) { | |||
| off = -offset; | |||
| } | |||
| for (j=0; j<bn/4; j+=1) // do blocks of the Mx4 loops | |||
| { | |||
| C0 = C; | |||
| C1 = C0+ldc; | |||
| C2 = C1+ldc; | |||
| C3 = C2+ldc; | |||
| if (left) { | |||
| off = offset; | |||
| } | |||
| ptrba = ba; | |||
| for (i=0; i<bm/4; i+=1) // do blocks of 4x4 | |||
| { | |||
| ptrbb = bb; | |||
| if (backwards) | |||
| { | |||
| ptrba += off*4; // number of values in A | |||
| ptrbb += off*4; // number of values in B | |||
| } | |||
| res0_0 = 0; | |||
| res0_1 = 0; | |||
| res0_2 = 0; | |||
| res0_3 = 0; | |||
| res1_0 = 0; | |||
| res1_1 = 0; | |||
| res1_2 = 0; | |||
| res1_3 = 0; | |||
| res2_0 = 0; | |||
| res2_1 = 0; | |||
| res2_2 = 0; | |||
| res2_3 = 0; | |||
| res3_0 = 0; | |||
| res3_1 = 0; | |||
| res3_2 = 0; | |||
| res3_3 = 0; | |||
| temp = backwards ? bk-off : | |||
| left ? off + 4 : // number of values in A | |||
| off + 4; // number of values in B | |||
| for (k=0; k<temp; k++) | |||
| { | |||
| b0 = ptrbb[0]; | |||
| b1 = ptrbb[1]; | |||
| b2 = ptrbb[2]; | |||
| b3 = ptrbb[3]; | |||
| a0 = ptrba[0]; | |||
| res0_0 += a0*b0; | |||
| res1_0 += a0*b1; | |||
| res2_0 += a0*b2; | |||
| res3_0 += a0*b3; | |||
| a1 = ptrba[1]; | |||
| res0_1 += a1*b0; | |||
| res1_1 += a1*b1; | |||
| res2_1 += a1*b2; | |||
| res3_1 += a1*b3; | |||
| a0 = ptrba[2]; | |||
| res0_2 += a0*b0; | |||
| res1_2 += a0*b1; | |||
| res2_2 += a0*b2; | |||
| res3_2 += a0*b3; | |||
| a1 = ptrba[3]; | |||
| res0_3 += a1*b0; | |||
| res1_3 += a1*b1; | |||
| res2_3 += a1*b2; | |||
| res3_3 += a1*b3; | |||
| ptrba = ptrba+4; | |||
| ptrbb = ptrbb+4; | |||
| } | |||
| res0_0 *= alpha; | |||
| res0_1 *= alpha; | |||
| res0_2 *= alpha; | |||
| res0_3 *= alpha; | |||
| res1_0 *= alpha; | |||
| res1_1 *= alpha; | |||
| res1_2 *= alpha; | |||
| res1_3 *= alpha; | |||
| res2_0 *= alpha; | |||
| res2_1 *= alpha; | |||
| res2_2 *= alpha; | |||
| res2_3 *= alpha; | |||
| res3_0 *= alpha; | |||
| res3_1 *= alpha; | |||
| res3_2 *= alpha; | |||
| res3_3 *= alpha; | |||
| C0[0] = res0_0; | |||
| C0[1] = res0_1; | |||
| C0[2] = res0_2; | |||
| C0[3] = res0_3; | |||
| C1[0] = res1_0; | |||
| C1[1] = res1_1; | |||
| C1[2] = res1_2; | |||
| C1[3] = res1_3; | |||
| C2[0] = res2_0; | |||
| C2[1] = res2_1; | |||
| C2[2] = res2_2; | |||
| C2[3] = res2_3; | |||
| C3[0] = res3_0; | |||
| C3[1] = res3_1; | |||
| C3[2] = res3_2; | |||
| C3[3] = res3_3; | |||
| if (!backwards) { | |||
| temp = bk-off; | |||
| temp = left ? temp - 4 : // number of values in A | |||
| temp - 4; // number of values in B | |||
| ptrba += temp*4; // number of values in A | |||
| ptrbb += temp*4; // number of values in B | |||
| } | |||
| #ifdef LEFT | |||
| off += 4; // number of values in A | |||
| #endif | |||
| C0 = C0+4; | |||
| C1 = C1+4; | |||
| C2 = C2+4; | |||
| C3 = C3+4; | |||
| } | |||
| if ( bm & 2 ) // do any 2x4 loop | |||
| { | |||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| ptrbb = bb; | |||
| #else | |||
| ptrba += off*2; | |||
| ptrbb = bb + off*4; | |||
| #endif | |||
| res0_0 = 0; | |||
| res0_1 = 0; | |||
| res1_0 = 0; | |||
| res1_1 = 0; | |||
| res2_0 = 0; | |||
| res2_1 = 0; | |||
| res3_0 = 0; | |||
| res3_1 = 0; | |||
| #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) | |||
| temp = bk-off; | |||
| #elif defined(LEFT) | |||
| temp = off+2; // number of values in A | |||
| #else | |||
| temp = off+4; // number of values in B | |||
| #endif | |||
| for (k=0; k<temp; k++) | |||
| { | |||
| b0 = ptrbb[0]; | |||
| b1 = ptrbb[1]; | |||
| b2 = ptrbb[2]; | |||
| b3 = ptrbb[3]; | |||
| a0 = ptrba[0]; | |||
| res0_0 += a0*b0; | |||
| res1_0 += a0*b1; | |||
| res2_0 += a0*b2; | |||
| res3_0 += a0*b3; | |||
| a1 = ptrba[1]; | |||
| res0_1 += a1*b0; | |||
| res1_1 += a1*b1; | |||
| res2_1 += a1*b2; | |||
| res3_1 += a1*b3; | |||
| ptrba = ptrba+2; | |||
| ptrbb = ptrbb+4; | |||
| } | |||
| res0_0 *= alpha; | |||
| res0_1 *= alpha; | |||
| res1_0 *= alpha; | |||
| res1_1 *= alpha; | |||
| res2_0 *= alpha; | |||
| res2_1 *= alpha; | |||
| res3_0 *= alpha; | |||
| res3_1 *= alpha; | |||
| C0[0] = res0_0; | |||
| C0[1] = res0_1; | |||
| C1[0] = res1_0; | |||
| C1[1] = res1_1; | |||
| C2[0] = res2_0; | |||
| C2[1] = res2_1; | |||
| C3[0] = res3_0; | |||
| C3[1] = res3_1; | |||
| #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| temp = bk - off; | |||
| #ifdef LEFT | |||
| temp -= 2; // number of values in A | |||
| #else | |||
| temp -= 4; // number of values in B | |||
| #endif | |||
| ptrba += temp*2; | |||
| ptrbb += temp*4; | |||
| #endif | |||
| #ifdef LEFT | |||
| off += 2; // number of values in A | |||
| #endif | |||
| C0 = C0+2; | |||
| C1 = C1+2; | |||
| C2 = C2+2; | |||
| C3 = C3+2; | |||
| } | |||
| if ( bm & 1 ) // do any 1x4 loop | |||
| { | |||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| ptrbb = bb; | |||
| #else | |||
| ptrba += off*1; | |||
| ptrbb = bb + off*4; | |||
| #endif | |||
| res0_0 = 0; | |||
| res1_0 = 0; | |||
| res2_0 = 0; | |||
| res3_0 = 0; | |||
| #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) | |||
| temp = bk-off; | |||
| #elif defined(LEFT) | |||
| temp = off+1; // number of values in A | |||
| #else | |||
| temp = off+4; // number of values in B | |||
| #endif | |||
| for (k=0; k<temp; k++) | |||
| { | |||
| b0 = ptrbb[0]; | |||
| b1 = ptrbb[1]; | |||
| b2 = ptrbb[2]; | |||
| b3 = ptrbb[3]; | |||
| a0 = ptrba[0]; | |||
| res0_0 += a0*b0; | |||
| res1_0 += a0*b1; | |||
| res2_0 += a0*b2; | |||
| res3_0 += a0*b3; | |||
| ptrba = ptrba+1; | |||
| ptrbb = ptrbb+4; | |||
| } | |||
| res0_0 *= alpha; | |||
| res1_0 *= alpha; | |||
| res2_0 *= alpha; | |||
| res3_0 *= alpha; | |||
| C0[0] = res0_0; | |||
| C1[0] = res1_0; | |||
| C2[0] = res2_0; | |||
| C3[0] = res3_0; | |||
| #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| temp = bk - off; | |||
| #ifdef LEFT | |||
| temp -= 1; // number of values in A | |||
| #else | |||
| temp -= 4; // number of values in B | |||
| #endif | |||
| ptrba += temp*1; | |||
| ptrbb += temp*4; | |||
| #endif | |||
| #ifdef LEFT | |||
| off += 1; // number of values in A | |||
| #endif | |||
| C0 = C0+1; | |||
| C1 = C1+1; | |||
| C2 = C2+1; | |||
| C3 = C3+1; | |||
| } | |||
| #if defined(TRMMKERNEL) && !defined(LEFT) | |||
| off += 4; | |||
| #endif | |||
| k = (bk<<2); | |||
| bb = bb+k; | |||
| i = (ldc<<2); | |||
| C = C+i; | |||
| } | |||
| for (j=0; j<(bn&2); j+=2) // do the Mx2 loops | |||
| { | |||
| C0 = C; | |||
| C1 = C0+ldc; | |||
| #if defined(TRMMKERNEL) && defined(LEFT) | |||
| off = offset; | |||
| #endif | |||
| ptrba = ba; | |||
| for (i=0; i<bm/4; i+=1) // do blocks of 4x2 | |||
| { | |||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| ptrbb = bb; | |||
| #else | |||
| ptrba += off*4; | |||
| ptrbb = bb + off*2; | |||
| #endif | |||
| res0_0 = 0; | |||
| res0_1 = 0; | |||
| res0_2 = 0; | |||
| res0_3 = 0; | |||
| res1_0 = 0; | |||
| res1_1 = 0; | |||
| res1_2 = 0; | |||
| res1_3 = 0; | |||
| #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) | |||
| temp = bk-off; | |||
| #elif defined(LEFT) | |||
| temp = off+4; // number of values in A | |||
| #else | |||
| temp = off+2; // number of values in B | |||
| #endif | |||
| for (k=0; k<temp; k++) | |||
| { | |||
| b0 = ptrbb[0]; | |||
| b1 = ptrbb[1]; | |||
| a0 = ptrba[0]; | |||
| res0_0 += a0*b0; | |||
| res1_0 += a0*b1; | |||
| a1 = ptrba[1]; | |||
| res0_1 += a1*b0; | |||
| res1_1 += a1*b1; | |||
| a0 = ptrba[2]; | |||
| res0_2 += a0*b0; | |||
| res1_2 += a0*b1; | |||
| a1 = ptrba[3]; | |||
| res0_3 += a1*b0; | |||
| res1_3 += a1*b1; | |||
| ptrba = ptrba+4; | |||
| ptrbb = ptrbb+2; | |||
| } | |||
| res0_0 *= alpha; | |||
| res0_1 *= alpha; | |||
| res0_2 *= alpha; | |||
| res0_3 *= alpha; | |||
| res1_0 *= alpha; | |||
| res1_1 *= alpha; | |||
| res1_2 *= alpha; | |||
| res1_3 *= alpha; | |||
| C0[0] = res0_0; | |||
| C0[1] = res0_1; | |||
| C0[2] = res0_2; | |||
| C0[3] = res0_3; | |||
| C1[0] = res1_0; | |||
| C1[1] = res1_1; | |||
| C1[2] = res1_2; | |||
| C1[3] = res1_3; | |||
| #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| temp = bk - off; | |||
| #ifdef LEFT | |||
| temp -= 4; // number of values in A | |||
| #else | |||
| temp -= 2; // number of values in B | |||
| #endif | |||
| ptrba += temp*4; | |||
| ptrbb += temp*2; | |||
| #endif | |||
| #ifdef LEFT | |||
| off += 4; // number of values in A | |||
| #endif | |||
| C0 = C0+4; | |||
| C1 = C1+4; | |||
| } | |||
| if ( bm & 2 ) // do any 2x2 loop | |||
| { | |||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| ptrbb = bb; | |||
| #else | |||
| ptrba += off*2; | |||
| ptrbb = bb + off*2; | |||
| #endif | |||
| res0_0 = 0; | |||
| res0_1 = 0; | |||
| res1_0 = 0; | |||
| res1_1 = 0; | |||
| #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) | |||
| temp = bk-off; | |||
| #elif defined(LEFT) | |||
| temp = off+2; // number of values in A | |||
| #else | |||
| temp = off+2; // number of values in B | |||
| #endif | |||
| for (k=0; k<temp; k++) | |||
| { | |||
| b0 = ptrbb[0]; | |||
| b1 = ptrbb[1]; | |||
| a0 = ptrba[0]; | |||
| res0_0 += a0*b0; | |||
| res1_0 += a0*b1; | |||
| a1 = ptrba[1]; | |||
| res0_1 += a1*b0; | |||
| res1_1 += a1*b1; | |||
| ptrba = ptrba+2; | |||
| ptrbb = ptrbb+2; | |||
| } | |||
| res0_0 *= alpha; | |||
| res0_1 *= alpha; | |||
| res1_0 *= alpha; | |||
| res1_1 *= alpha; | |||
| C0[0] = res0_0; | |||
| C0[1] = res0_1; | |||
| C1[0] = res1_0; | |||
| C1[1] = res1_1; | |||
| #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| temp = bk - off; | |||
| #ifdef LEFT | |||
| temp -= 2; // number of values in A | |||
| #else | |||
| temp -= 2; // number of values in B | |||
| #endif | |||
| ptrba += temp*2; | |||
| ptrbb += temp*2; | |||
| #endif | |||
| #ifdef LEFT | |||
| off += 2; // number of values in A | |||
| #endif | |||
| C0 = C0+2; | |||
| C1 = C1+2; | |||
| } | |||
| if ( bm & 1 ) // do any 1x2 loop | |||
| { | |||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| ptrbb = bb; | |||
| #else | |||
| ptrba += off*1; | |||
| ptrbb = bb + off*2; | |||
| #endif | |||
| res0_0 = 0; | |||
| res1_0 = 0; | |||
| #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) | |||
| temp = bk-off; | |||
| #elif defined(LEFT) | |||
| temp = off+1; // number of values in A | |||
| #else | |||
| temp = off+2; // number of values in B | |||
| #endif | |||
| for (k=0; k<temp; k++) | |||
| { | |||
| b0 = ptrbb[0]; | |||
| b1 = ptrbb[1]; | |||
| a0 = ptrba[0]; | |||
| res0_0 += a0*b0; | |||
| res1_0 += a0*b1; | |||
| ptrba = ptrba+1; | |||
| ptrbb = ptrbb+2; | |||
| } | |||
| res0_0 *= alpha; | |||
| res1_0 *= alpha; | |||
| C0[0] = res0_0; | |||
| C1[0] = res1_0; | |||
| #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| temp = bk - off; | |||
| #ifdef LEFT | |||
| temp -= 1; // number of values in A | |||
| #else | |||
| temp -= 2; // number of values in B | |||
| #endif | |||
| ptrba += temp*1; | |||
| ptrbb += temp*2; | |||
| #endif | |||
| #ifdef LEFT | |||
| off += 1; // number of values in A | |||
| #endif | |||
| C0 = C0+1; | |||
| C1 = C1+1; | |||
| } | |||
| #if defined(TRMMKERNEL) && !defined(LEFT) | |||
| off += 2; | |||
| #endif | |||
| k = (bk<<1); | |||
| bb = bb+k; | |||
| i = (ldc<<1); | |||
| C = C+i; | |||
| } | |||
| for (j=0; j<(bn&1); j+=1) // do the Mx1 loops | |||
| { | |||
| C0 = C; | |||
| #if defined(TRMMKERNEL) && defined(LEFT) | |||
| off = offset; | |||
| #endif | |||
| ptrba = ba; | |||
| for (i=0; i<bm/4; i+=1) // do blocks of 4x1 loops | |||
| { | |||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| ptrbb = bb; | |||
| #else | |||
| ptrba += off*4; | |||
| ptrbb = bb + off*1; | |||
| #endif | |||
| res0_0 = 0; | |||
| res0_1 = 0; | |||
| res0_2 = 0; | |||
| res0_3 = 0; | |||
| #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) | |||
| temp = bk-off; | |||
| #elif defined(LEFT) | |||
| temp = off+4; // number of values in A | |||
| #else | |||
| temp = off+1; // number of values in B | |||
| #endif | |||
| for (k=0; k<temp; k++) | |||
| { | |||
| b0 = ptrbb[0]; | |||
| a0 = ptrba[0]; | |||
| res0_0 += a0*b0; | |||
| a1 = ptrba[1]; | |||
| res0_1 += a1*b0; | |||
| a0 = ptrba[2]; | |||
| res0_2 += a0*b0; | |||
| a1 = ptrba[3]; | |||
| res0_3 += a1*b0; | |||
| ptrba = ptrba+4; | |||
| ptrbb = ptrbb+1; | |||
| } | |||
| res0_0 *= alpha; | |||
| res0_1 *= alpha; | |||
| res0_2 *= alpha; | |||
| res0_3 *= alpha; | |||
| C0[0] = res0_0; | |||
| C0[1] = res0_1; | |||
| C0[2] = res0_2; | |||
| C0[3] = res0_3; | |||
| #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| temp = bk - off; | |||
| #ifdef LEFT | |||
| temp -= 4; // number of values in A | |||
| #else | |||
| temp -= 1; // number of values in B | |||
| #endif | |||
| ptrba += temp*4; | |||
| ptrbb += temp*1; | |||
| #endif | |||
| #ifdef LEFT | |||
| off += 4; // number of values in A | |||
| #endif | |||
| C0 = C0+4; | |||
| } | |||
| if ( bm & 2 ) // do any 2x1 loop | |||
| { | |||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| ptrbb = bb; | |||
| #else | |||
| ptrba += off*2; | |||
| ptrbb = bb + off*1; | |||
| #endif | |||
| res0_0 = 0; | |||
| res0_1 = 0; | |||
| #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) | |||
| temp = bk-off; | |||
| #elif defined(LEFT) | |||
| temp = off+2; // number of values in A | |||
| #else | |||
| temp = off+1; // number of values in B | |||
| #endif | |||
| for (k=0; k<temp; k++) | |||
| { | |||
| b0 = ptrbb[0]; | |||
| a0 = ptrba[0]; | |||
| res0_0 += a0*b0; | |||
| a1 = ptrba[1]; | |||
| res0_1 += a1*b0; | |||
| ptrba = ptrba+2; | |||
| ptrbb = ptrbb+1; | |||
| } | |||
| res0_0 *= alpha; | |||
| res0_1 *= alpha; | |||
| C0[0] = res0_0; | |||
| C0[1] = res0_1; | |||
| #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| temp = bk - off; | |||
| #ifdef LEFT | |||
| temp -= 2; // number of values in A | |||
| #else | |||
| temp -= 1; // number of values in B | |||
| #endif | |||
| ptrba += temp*2; | |||
| ptrbb += temp*1; | |||
| #endif | |||
| #ifdef LEFT | |||
| off += 2; // number of values in A | |||
| #endif | |||
| C0 = C0+2; | |||
| } | |||
| if ( bm & 1 ) // do any 1x1 loop | |||
| { | |||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| ptrbb = bb; | |||
| #else | |||
| ptrba += off*1; | |||
| ptrbb = bb + off*1; | |||
| #endif | |||
| res0_0 = 0; | |||
| #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) | |||
| temp = bk-off; | |||
| #elif defined(LEFT) | |||
| temp = off+1; // number of values in A | |||
| #else | |||
| temp = off+1; // number of values in B | |||
| #endif | |||
| for (k=0; k<temp; k++) | |||
| { | |||
| b0 = ptrbb[0]; | |||
| a0 = ptrba[0]; | |||
| res0_0 += a0*b0; | |||
| ptrba = ptrba+1; | |||
| ptrbb = ptrbb+1; | |||
| } | |||
| res0_0 *= alpha; | |||
| C0[0] = res0_0; | |||
| #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| temp = bk - off; | |||
| #ifdef LEFT | |||
| temp -= 1; // number of values in A | |||
| #else | |||
| temp -= 1; // number of values in B | |||
| #endif | |||
| ptrba += temp*1; | |||
| ptrbb += temp*1; | |||
| #endif | |||
| #ifdef LEFT | |||
| off += 1; // number of values in A | |||
| #endif | |||
| C0 = C0+1; | |||
| } | |||
| #if defined(TRMMKERNEL) && !defined(LEFT) | |||
| off += 1; | |||
| #endif | |||
| k = (bk<<0); | |||
| bb = bb+k; | |||
| C = C+ldc; | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -1,21 +1,22 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS | |||
| All rights reserved. | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,21 +1,22 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS | |||
| All rights reserved. | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,21 +1,22 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS | |||
| All rights reserved. | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,21 +1,22 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS | |||
| All rights reserved. | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -0,0 +1,52 @@ | |||
| STRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
| DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
| CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
| ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| #Todo: CGEMM3MKERNEL should be 4x4 blocksizes. | |||
| CGEMM3MKERNEL = zgemm3m_kernel_8x4_sse3.S | |||
| ZGEMM3MKERNEL = zgemm3m_kernel_4x4_sse3.S | |||
| @@ -1,21 +1,22 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS | |||
| All rights reserved. | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,21 +1,22 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS | |||
| All rights reserved. | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,21 +1,22 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS | |||
| All rights reserved. | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,21 +1,22 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS | |||
| All rights reserved. | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -2039,8 +2040,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
| #define SGEMM_DEFAULT_UNROLL_M 2 | |||
| #define SGEMM_DEFAULT_UNROLL_N 2 | |||
| #define SGEMM_DEFAULT_UNROLL_M 4 | |||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||
| #define DGEMM_DEFAULT_UNROLL_N 2 | |||
| @@ -2122,25 +2123,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||
| #define GEMM_DEFAULT_ALIGN 0x0ffffUL | |||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||
| #define DGEMM_DEFAULT_UNROLL_N 4 | |||
| #define SGEMM_DEFAULT_UNROLL_N 2 | |||
| #define DGEMM_DEFAULT_UNROLL_N 2 | |||
| #define QGEMM_DEFAULT_UNROLL_N 2 | |||
| #define CGEMM_DEFAULT_UNROLL_N 2 | |||
| #define ZGEMM_DEFAULT_UNROLL_N 2 | |||
| #define XGEMM_DEFAULT_UNROLL_N 1 | |||
| #ifdef ARCH_X86 | |||
| #define SGEMM_DEFAULT_UNROLL_M 4 | |||
| #define SGEMM_DEFAULT_UNROLL_M 2 | |||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||
| #define QGEMM_DEFAULT_UNROLL_M 2 | |||
| #define CGEMM_DEFAULT_UNROLL_M 2 | |||
| #define ZGEMM_DEFAULT_UNROLL_M 1 | |||
| #define ZGEMM_DEFAULT_UNROLL_M 2 | |||
| #define XGEMM_DEFAULT_UNROLL_M 1 | |||
| #else | |||
| #define SGEMM_DEFAULT_UNROLL_M 8 | |||
| #define DGEMM_DEFAULT_UNROLL_M 4 | |||
| #define SGEMM_DEFAULT_UNROLL_M 2 | |||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||
| #define QGEMM_DEFAULT_UNROLL_M 2 | |||
| #define CGEMM_DEFAULT_UNROLL_M 4 | |||
| #define CGEMM_DEFAULT_UNROLL_M 2 | |||
| #define ZGEMM_DEFAULT_UNROLL_M 2 | |||
| #define XGEMM_DEFAULT_UNROLL_M 1 | |||
| #endif | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011-2012, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2014, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -120,4 +121,4 @@ void test_fork_safety(void) | |||
| CU_ASSERT(WEXITSTATUS (child_status) == 0); | |||
| } | |||
| } | |||
| #endif | |||
| #endif | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| @@ -1,5 +1,5 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| @@ -13,9 +13,10 @@ met: | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||