| @@ -12,7 +12,7 @@ VERSION = 0.2.8 | |||
| # You can specify the target architecture, otherwise it's | |||
| # automatically detected. | |||
| # TARGET = PENRYN | |||
| TARGET = ARMV7 | |||
| # If you want to support multiple architecture in one binary | |||
| # DYNAMIC_ARCH = 1 | |||
| @@ -25,20 +25,20 @@ VERSION = 0.2.8 | |||
| # FC = gfortran | |||
| # Even you can specify cross compiler. Meanwhile, please set HOSTCC. | |||
| # CC = x86_64-w64-mingw32-gcc | |||
| # FC = x86_64-w64-mingw32-gfortran | |||
| CC = arm-linux-gnueabihf-gcc | |||
| FC = arm-linux-gnueabihf-gfortran | |||
| # If you use the cross compiler, please set this host compiler. | |||
| # HOSTCC = gcc | |||
| HOSTCC = gcc | |||
| # If you need 32bit binary, define BINARY=32, otherwise define BINARY=64 | |||
| # BINARY=64 | |||
| #BINARY=32 | |||
| # About threaded BLAS. It will be automatically detected if you don't | |||
| # specify it. | |||
| # For force setting for single threaded, specify USE_THREAD = 0 | |||
| # For force setting for multi threaded, specify USE_THREAD = 1 | |||
| # USE_THREAD = 0 | |||
| USE_THREAD = 0 | |||
| # If you're going to use this library with OpenMP, please comment it in. | |||
| # USE_OPENMP = 1 | |||
| @@ -46,7 +46,7 @@ VERSION = 0.2.8 | |||
| # You can define maximum number of threads. Basically it should be | |||
| # less than actual number of cores. If you don't specify one, it's | |||
| # automatically detected by the the script. | |||
| # NUM_THREADS = 24 | |||
| NUM_THREADS = 4 | |||
| # if you don't need generate the shared library, please comment it in. | |||
| # NO_SHARED = 1 | |||
| @@ -54,16 +54,12 @@ VERSION = 0.2.8 | |||
| # If you don't need CBLAS interface, please comment it in. | |||
| # NO_CBLAS = 1 | |||
| # If you only want CBLAS interface without installing Fortran compiler, | |||
| # please comment it in. | |||
| # ONLY_CBLAS = 1 | |||
| # If you don't need LAPACK, please comment it in. | |||
| # If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1. | |||
| # NO_LAPACK = 1 | |||
| #NO_LAPACK = 1 | |||
| # If you don't need LAPACKE (C Interface to LAPACK), please comment it in. | |||
| # NO_LAPACKE = 1 | |||
| #NO_LAPACKE = 1 | |||
| # If you want to use legacy threaded Level 3 implementation. | |||
| # USE_SIMPLE_THREADED_LEVEL3 = 1 | |||
| @@ -76,10 +72,10 @@ VERSION = 0.2.8 | |||
| # Unfortunately most of kernel won't give us high quality buffer. | |||
| # BLAS tries to find the best region before entering main function, | |||
| # but it will consume time. If you don't like it, you can disable one. | |||
| # NO_WARMUP = 1 | |||
| NO_WARMUP = 1 | |||
| # If you want to disable CPU/Memory affinity on Linux. | |||
| # NO_AFFINITY = 1 | |||
| NO_AFFINITY = 1 | |||
| # Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers | |||
| # and OS. However, the performance is low. | |||
| @@ -127,13 +123,13 @@ VERSION = 0.2.8 | |||
| # Common Optimization Flag; | |||
| # The default -O2 is enough. | |||
| # COMMON_OPT = -O2 | |||
| COMMON_OPT = -O0 -marm -mfpu=vfpv3 -fno-omit-frame-pointer | |||
| # Profiling flags | |||
| COMMON_PROF = -pg | |||
| # Build Debug version | |||
| # DEBUG = 1 | |||
| DEBUG = 1 | |||
| # | |||
| # End of user configuration | |||
| @@ -82,19 +82,12 @@ ifeq ($(HOSTCC), loongcc) | |||
| GETARCH_FLAGS += -static | |||
| endif | |||
| #if don't use Fortran, it will only compile CBLAS. | |||
| ifeq ($(ONLY_CBLAS), 1) | |||
| NO_LAPACK = 1 | |||
| else | |||
| ONLY_CBLAS = 0 | |||
| endif | |||
| # This operation is expensive, so execution should be once. | |||
| ifndef GOTOBLAS_MAKEFILE | |||
| export GOTOBLAS_MAKEFILE = 1 | |||
| # Generating Makefile.conf and config.h | |||
| DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all) | |||
| DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) all) | |||
| ifndef TARGET_CORE | |||
| include $(TOPDIR)/Makefile.conf | |||
| @@ -331,14 +324,16 @@ ifeq ($(ARCH), x86) | |||
| DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ | |||
| CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||
| ifneq ($(NO_AVX), 1) | |||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||
| DYNAMIC_CORE += SANDYBRIDGE | |||
| #BULLDOZER PILEDRIVER | |||
| endif | |||
| endif | |||
| ifeq ($(ARCH), x86_64) | |||
| DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||
| ifneq ($(NO_AVX), 1) | |||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||
| DYNAMIC_CORE += SANDYBRIDGE | |||
| #BULLDOZER PILEDRIVER | |||
| endif | |||
| endif | |||
| @@ -368,6 +363,10 @@ NO_BINARY_MODE = 1 | |||
| BINARY_DEFINED = 1 | |||
| endif | |||
| ifeq ($(ARCH), arm) | |||
| NO_BINARY_MODE = 1 | |||
| BINARY_DEFINED = 1 | |||
| endif | |||
| # | |||
| # C Compiler dependent settings | |||
| # | |||
| @@ -892,23 +891,6 @@ LIBZIPNAME = $(LIBNAME:.$(LIBSUFFIX)=.zip) | |||
| LIBS = $(TOPDIR)/$(LIBNAME) | |||
| LIBS_P = $(TOPDIR)/$(LIBNAME_P) | |||
| LIB_COMPONENTS = BLAS | |||
| ifneq ($(NO_CBLAS), 1) | |||
| LIB_COMPONENTS += CBLAS | |||
| endif | |||
| ifneq ($(NO_LAPACK), 1) | |||
| LIB_COMPONENTS += LAPACK | |||
| ifneq ($(NO_LAPACKE), 1) | |||
| LIB_COMPONENTS += LAPACKE | |||
| endif | |||
| endif | |||
| ifeq ($(ONLY_CBLAS), 1) | |||
| LIB_COMPONENTS = CBLAS | |||
| endif | |||
| export OSNAME | |||
| export ARCH | |||
| export CORE | |||
| @@ -935,7 +917,6 @@ export USE_OPENMP | |||
| export CROSS | |||
| export CROSS_SUFFIX | |||
| export NOFORTRAN | |||
| export NO_FBLAS | |||
| export EXTRALIB | |||
| export CEXTRALIB | |||
| export FEXTRALIB | |||
| @@ -363,6 +363,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246 | |||
| #include "common_mips64.h" | |||
| #endif | |||
| #ifdef ARCH_ARM | |||
| #include "common_arm.h" | |||
| #endif | |||
| #ifdef OS_LINUX | |||
| #include "common_linux.h" | |||
| #endif | |||
| @@ -574,10 +578,9 @@ typedef struct { | |||
| #include "common_level2.h" | |||
| #include "common_level3.h" | |||
| #include "common_lapack.h" | |||
| #ifdef CBLAS | |||
| # define OPENBLAS_CONST /* see comment in cblas.h */ | |||
| # include "cblas.h" | |||
| /* This header file is generated from "cblas.h" (see Makefile.prebuild). */ | |||
| #include "cblas_noconst.h" | |||
| #endif | |||
| #ifndef ASSEMBLER | |||
| @@ -0,0 +1,163 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| /* without modification, are permitted provided that the following */ | |||
| /* conditions are met: */ | |||
| /* */ | |||
| /* 1. Redistributions of source code must retain the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer. */ | |||
| /* */ | |||
| /* 2. Redistributions in binary form must reproduce the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer in the documentation and/or other materials */ | |||
| /* provided with the distribution. */ | |||
| /* */ | |||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||
| /* */ | |||
| /* The views and conclusions contained in the software and */ | |||
| /* documentation are those of the authors and should not be */ | |||
| /* interpreted as representing official policies, either expressed */ | |||
| /* or implied, of The University of Texas at Austin. */ | |||
| /*********************************************************************/ | |||
| #ifndef COMMON_ARM | |||
| #define COMMON_ARM | |||
| #define MB | |||
| #define WMB | |||
| #define INLINE inline | |||
| #define RETURN_BY_COMPLEX | |||
| #ifndef ASSEMBLER | |||
| static void INLINE blas_lock(volatile unsigned long *address){ | |||
| // long int ret, val = 1; | |||
| /* | |||
| do { | |||
| while (*address) {YIELDING;}; | |||
| __asm__ __volatile__( | |||
| "1: ll %0, %3\n" | |||
| " ori %2, %0, 1\n" | |||
| " sc %2, %1\n" | |||
| " beqz %2, 1b\n" | |||
| " andi %2, %0, 1\n" | |||
| " sync\n" | |||
| : "=&r" (val), "=m" (address), "=&r" (ret) | |||
| : "m" (address) | |||
| : "memory"); | |||
| } while (ret); | |||
| */ | |||
| } | |||
| static inline unsigned int rpcc(void){ | |||
| unsigned long ret=0; | |||
| return ret; | |||
| } | |||
| static inline int blas_quickdivide(blasint x, blasint y){ | |||
| return x / y; | |||
| } | |||
| #if defined(DOUBLE) | |||
| #define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory") | |||
| #else | |||
| #define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory") | |||
| #endif | |||
| #define GET_IMAGE_CANCEL | |||
| #endif | |||
| #ifndef F_INTERFACE | |||
| #define REALNAME ASMNAME | |||
| #else | |||
| #define REALNAME ASMFNAME | |||
| #endif | |||
| #if defined(ASSEMBLER) && !defined(NEEDPARAM) | |||
| #define PROLOGUE \ | |||
| .arm ;\ | |||
| .global REALNAME ;\ | |||
| .func REALNAME ;\ | |||
| REALNAME: | |||
| #define EPILOGUE | |||
| #define PROFCODE | |||
| #endif | |||
| #define SEEK_ADDRESS | |||
| #ifndef PAGESIZE | |||
| #define PAGESIZE ( 4 << 10) | |||
| #endif | |||
| #define HUGE_PAGESIZE ( 4 << 20) | |||
| #define BUFFER_SIZE (16 << 20) | |||
| #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) | |||
| #ifndef MAP_ANONYMOUS | |||
| #define MAP_ANONYMOUS MAP_ANON | |||
| #endif | |||
| #endif | |||
| @@ -124,3 +124,9 @@ ARCH_IA64 | |||
| #if defined(__LP64) || defined(__LP64__) || defined(__ptr64) || defined(__x86_64__) || defined(__amd64__) || defined(__64BIT__) | |||
| BINARY_64 | |||
| #endif | |||
| #if defined(__ARM_ARCH) || defined(__ARM_ARCH_7A__) | |||
| ARCH_ARM | |||
| #endif | |||
| @@ -679,6 +679,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CORENAME "generic" | |||
| #endif | |||
| #ifdef FORCE_ARMV7 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM" | |||
| #define SUBARCHITECTURE "ARMV7" | |||
| #define SUBDIRNAME "arm" | |||
| #define ARCHCONFIG "-DARMV7 " \ | |||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
| "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " | |||
| #define LIBNAME "armv7" | |||
| #define CORENAME "ARMV7" | |||
| #else | |||
| #endif | |||
| #ifndef FORCE | |||
| #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \ | |||
| @@ -1793,6 +1793,49 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define SYMV_P 16 | |||
| #endif | |||
| #ifdef ARMV7 | |||
| #define SNUMOPT 2 | |||
| #define DNUMOPT 2 | |||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
| #define SGEMM_DEFAULT_UNROLL_M 2 | |||
| #define SGEMM_DEFAULT_UNROLL_N 2 | |||
| #define DGEMM_DEFAULT_UNROLL_M 8 | |||
| #define DGEMM_DEFAULT_UNROLL_N 2 | |||
| #define CGEMM_DEFAULT_UNROLL_M 2 | |||
| #define CGEMM_DEFAULT_UNROLL_N 2 | |||
| #define ZGEMM_DEFAULT_UNROLL_M 2 | |||
| #define ZGEMM_DEFAULT_UNROLL_N 2 | |||
| #define SGEMM_DEFAULT_P 64 | |||
| #define DGEMM_DEFAULT_P 128 | |||
| #define CGEMM_DEFAULT_P 24 | |||
| #define ZGEMM_DEFAULT_P 20 | |||
| #define SGEMM_DEFAULT_Q 192 | |||
| #define DGEMM_DEFAULT_Q 128 | |||
| #define CGEMM_DEFAULT_Q 128 | |||
| #define ZGEMM_DEFAULT_Q 64 | |||
| #define SGEMM_DEFAULT_R 512 | |||
| #define DGEMM_DEFAULT_R 2048 | |||
| #define CGEMM_DEFAULT_R 512 | |||
| #define ZGEMM_DEFAULT_R 512 | |||
| #define SYMV_P 16 | |||
| #endif | |||
| #ifdef GENERIC | |||
| #define SNUMOPT 2 | |||