| Author | SHA1 | Message | Date |
|---|---|---|---|
|
|
034ffa93fa | Provide iaxpy and cblas_iaxpy for integer vectors. make INTEGER_PRECISION=1 | 10 years ago |
| @@ -114,6 +114,9 @@ NO_AFFINITY = 1 | |||
| # Support for IEEE quad precision(it's *real* REAL*16)( under testing) | |||
| # QUAD_PRECISION = 1 | |||
| # Support for integer matrix and vector (e.g. iaxpy) | |||
| # INTEGER_PRECISION = 1 | |||
| # Theads are still working for a while after finishing BLAS operation | |||
| # to reduce thread activate/deactivate overhead. You can determine | |||
| # time out to improve performance. This number should be from 4 to 30 | |||
| @@ -309,6 +309,10 @@ CCOMMON_OPT += -DQUAD_PRECISION | |||
| NO_EXPRECISION = 1 | |||
| endif | |||
| ifdef INTEGER_PRECISION | |||
| CCOMMON_OPT += -DINTEGER_PRECISION | |||
| endif | |||
| ifneq ($(ARCH), x86) | |||
| ifneq ($(ARCH), x86_64) | |||
| NO_EXPRECISION = 1 | |||
| @@ -4,6 +4,7 @@ QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
| CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
| ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
| XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
| IBLASOBJS_P = $(IBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
| COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
| @@ -22,12 +23,18 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS) | |||
| BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P) | |||
| endif | |||
| ifdef INTEGER_PRECISION | |||
| BLASOBJS += $(IBLASOBJS) | |||
| BLASOBJS_P += $(IBLASOBJS_P) | |||
| endif | |||
| $(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX | |||
| $(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX | |||
| $(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX | |||
| $(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX | |||
| $(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX | |||
| $(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX | |||
| $(IBLASOBJS) $(IBLASOBJS_P) : override CFLAGS += -DINTEGER -UCOMPLEX | |||
| $(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
| $(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
| @@ -35,6 +42,7 @@ $(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
| $(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
| $(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
| $(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
| $(IBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
| libs :: $(BLASOBJS) $(COMMONOBJS) | |||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
| @@ -276,6 +276,11 @@ typedef int blasint; | |||
| #define SIZE 8 | |||
| #define BASE_SHIFT 3 | |||
| #define ZBASE_SHIFT 4 | |||
| #elif defined(INTEGER) //extend for integer matrix | |||
| #define FLOAT int | |||
| #define SIZE 4 | |||
| #define BASE_SHIFT 2 | |||
| #define ZBASE_SHIFT 3 | |||
| #else | |||
| #define FLOAT float | |||
| #define SIZE 4 | |||
| @@ -0,0 +1,9 @@ | |||
| #ifndef COMMON_I_H | |||
| #define COMMON_I_H | |||
| #ifndef DYNAMIC_ARCH | |||
| #define IAXPYU_K iaxpy_k | |||
| #else | |||
| #error | |||
| #endif | |||
| #endif | |||
| @@ -93,6 +93,7 @@ openblas_complex_xdouble BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdo | |||
| void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *); | |||
| void BLASFUNC(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *); | |||
| void BLASFUNC(iaxpy) (blasint *, int *, int *, blasint *, int *, blasint *); | |||
| void BLASFUNC(qaxpy) (blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *); | |||
| void BLASFUNC(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *); | |||
| void BLASFUNC(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *); | |||
| @@ -60,6 +60,8 @@ int daxpy_k (BLASLONG, BLASLONG, BLASLONG, double, | |||
| double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||
| int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble, | |||
| xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | |||
| int iaxpy_k (BLASLONG, BLASLONG, BLASLONG, int, | |||
| int *, BLASLONG, int *, BLASLONG, int *, BLASLONG); | |||
| int caxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float, | |||
| float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||
| int zaxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double, | |||
| @@ -47,6 +47,10 @@ | |||
| #include "common_z.h" | |||
| #include "common_x.h" | |||
| #ifdef INTEGER_PRECISION | |||
| #include "common_i.h" | |||
| #endif | |||
| #ifndef COMPLEX | |||
| #ifdef XDOUBLE | |||
| @@ -635,6 +639,9 @@ | |||
| #define OMATCOPY_K_CT DOMATCOPY_K_CT | |||
| #define OMATCOPY_K_RT DOMATCOPY_K_RT | |||
| #define GEADD_K DGEADD_K | |||
| #elif defined(INTEGER) | |||
| #define AXPYU_K IAXPYU_K | |||
| #else | |||
| #define AMAX_K SAMAX_K | |||
| @@ -65,6 +65,7 @@ extern int blas_omp_linked; | |||
| #define BLAS_XDOUBLE 0x0002U | |||
| #define BLAS_REAL 0x0000U | |||
| #define BLAS_COMPLEX 0x0004U | |||
| #define BLAS_INTEGER 0x0008U | |||
| #define BLAS_TRANSA 0x0030U /* 2bit */ | |||
| #define BLAS_TRANSA_N 0x0000U | |||
| @@ -189,6 +189,20 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ | |||
| args -> b, args -> ldb, | |||
| args -> c, args -> ldc, sb); | |||
| } else | |||
| #endif | |||
| #ifdef INTEGER_PRECISION | |||
| if (mode & BLAS_INTEGER){ | |||
| /* REAL / Extended Double */ | |||
| void (*afunc)(BLASLONG, BLASLONG, BLASLONG, int, | |||
| int *, BLASLONG, int *, BLASLONG, | |||
| int *, BLASLONG, void *) = func; | |||
| afunc(args -> m, args -> n, args -> k, | |||
| ((int *)args -> alpha)[0], | |||
| args -> a, args -> lda, | |||
| args -> b, args -> ldb, | |||
| args -> c, args -> ldc, sb); | |||
| } else | |||
| #endif | |||
| if (mode & BLAS_DOUBLE){ | |||
| /* REAL / Double */ | |||
| @@ -253,6 +253,15 @@ XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) | |||
| endif | |||
| endif | |||
| ifdef INTEGER_PRECISION | |||
| IBLAS1OBJS = \ | |||
| iaxpy.$(SUFFIX) | |||
| IBLAS2OBJS = | |||
| IBLAS3OBJS = | |||
| endif | |||
| endif | |||
| HPLOBJS = dgemm.$(SUFFIX) dtrsm.$(SUFFIX) \ | |||
| @@ -343,6 +352,9 @@ CZBLAS3OBJS = \ | |||
| cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \ | |||
| cblas_zgeadd.$(SUFFIX) | |||
| CIBLAS1OBJS = \ | |||
| cblas_iaxpy.$(SUFFIX) | |||
| ifeq ($(SUPPORT_GEMM3M), 1) | |||
| @@ -372,6 +384,10 @@ ZBLAS1OBJS += $(CZBLAS1OBJS) | |||
| ZBLAS2OBJS += $(CZBLAS2OBJS) | |||
| ZBLAS3OBJS += $(CZBLAS3OBJS) | |||
| IBLAS1OBJS += $(CIBLAS1OBJS) | |||
| IBLAS2OBJS += $(CIBLAS2OBJS) | |||
| IBLAS3OBJS += $(CIBLAS3OBJS) | |||
| endif | |||
| SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS) | |||
| @@ -380,6 +396,7 @@ QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS) | |||
| CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS) | |||
| ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS) | |||
| XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) | |||
| IBLASOBJS = $(IBLAS1OBJS) $(IBLAS2OBJS) $(IBLAS3OBJS) | |||
| #SLAPACKOBJS = \ | |||
| # sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ | |||
| @@ -458,6 +475,10 @@ ifdef QUAD_PRECISION | |||
| FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) | |||
| endif | |||
| ifdef INTEGER_PRECISION | |||
| FUNCOBJS += $(IBLASOBJS) | |||
| endif | |||
| FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=) | |||
| include $(TOPDIR)/Makefile.tail | |||
| @@ -476,17 +497,18 @@ endif | |||
| clean :: | |||
| @rm -f functable.h | |||
| level1 : $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) | |||
| level1 : $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) $(IBLAS1OBJS) | |||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
| level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) | |||
| level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) $(IBLAS2OBJS) | |||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
| level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) | |||
| level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) $(IBLAS3OBJS) | |||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
| $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \ | |||
| $(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS | |||
| $(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) \ | |||
| $(CIBLASOBJS) $(CIBLASOBJS_P) : override CFLAGS += -DCBLAS | |||
| srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c | |||
| $(CC) $(CFLAGS) -c $< -o $(@F) | |||
| @@ -725,6 +747,9 @@ saxpy.$(SUFFIX) saxpy.$(PSUFFIX) : axpy.c | |||
| daxpy.$(SUFFIX) daxpy.$(PSUFFIX) : axpy.c | |||
| $(CC) $(CFLAGS) -c $< -o $(@F) | |||
| iaxpy.$(SUFFIX) iaxpy.$(PSUFFIX) : axpy.c | |||
| $(CC) $(CFLAGS) -c $< -o $(@F) | |||
| qaxpy.$(SUFFIX) qaxpy.$(PSUFFIX) : axpy.c | |||
| $(CC) $(CFLAGS) -c $< -o $(@F) | |||
| @@ -1437,6 +1462,9 @@ cblas_saxpy.$(SUFFIX) cblas_saxpy.$(PSUFFIX) : axpy.c | |||
| cblas_daxpy.$(SUFFIX) cblas_daxpy.$(PSUFFIX) : axpy.c | |||
| $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | |||
| cblas_iaxpy.$(SUFFIX) cblas_iaxpy.$(PSUFFIX) : axpy.c | |||
| $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | |||
| cblas_caxpy.$(SUFFIX) cblas_caxpy.$(PSUFFIX) : zaxpy.c | |||
| $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | |||
| @@ -103,6 +103,8 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc | |||
| mode = BLAS_XDOUBLE | BLAS_REAL; | |||
| #elif defined(DOUBLE) | |||
| mode = BLAS_DOUBLE | BLAS_REAL; | |||
| #elif defined(INTEGER) | |||
| mode = BLAS_INTEGER | BLAS_REAL; | |||
| #else | |||
| mode = BLAS_SINGLE | BLAS_REAL; | |||
| #endif | |||
| @@ -210,6 +210,10 @@ ifndef XAXPYKERNEL | |||
| XAXPYKERNEL = zaxpy.S | |||
| endif | |||
| ifndef IAXPYKERNEL | |||
| IAXPYKERNEL = ../generic/iaxpy.c | |||
| endif | |||
| ### COPY ### | |||
| ifndef SCOPYKERNEL | |||
| @@ -471,6 +475,9 @@ QBLASOBJS += \ | |||
| qasum_k$(TSUFFIX).$(SUFFIX) qaxpy_k$(TSUFFIX).$(SUFFIX) qcopy_k$(TSUFFIX).$(SUFFIX) qdot_k$(TSUFFIX).$(SUFFIX) \ | |||
| qnrm2_k$(TSUFFIX).$(SUFFIX) qrot_k$(TSUFFIX).$(SUFFIX) qscal_k$(TSUFFIX).$(SUFFIX) qswap_k$(TSUFFIX).$(SUFFIX) | |||
| IBLASOBJS += \ | |||
| iaxpy_k$(TSUFFIX).$(SUFFIX) | |||
| CBLASOBJS += \ | |||
| camax_k$(TSUFFIX).$(SUFFIX) camin_k$(TSUFFIX).$(SUFFIX) icamax_k$(TSUFFIX).$(SUFFIX) icamin_k$(TSUFFIX).$(SUFFIX) \ | |||
| casum_k$(TSUFFIX).$(SUFFIX) caxpy_k$(TSUFFIX).$(SUFFIX) caxpyc_k$(TSUFFIX).$(SUFFIX) ccopy_k$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -645,6 +652,9 @@ $(KDIR)daxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)daxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KE | |||
| $(KDIR)qaxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)qaxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QAXPYKERNEL) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@ | |||
| $(KDIR)iaxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)iaxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(IAXPYKERNEL) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DINTEGER $< -o $@ | |||
| $(KDIR)caxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)caxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CAXPYKERNEL) | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UCONJ -UDOUBLE $< -o $@ | |||
| @@ -0,0 +1,52 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2015, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, int da, int *x, BLASLONG inc_x, int *y, BLASLONG inc_y, int *dummy, BLASLONG dummy2) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix,iy; | |||
| if ( n < 0 ) return(0); | |||
| if ( da == 0 ) return(0); | |||
| ix = 0; | |||
| iy = 0; | |||
| while(i < n) | |||
| { | |||
| y[iy] += da * x[ix] ; | |||
| ix += inc_x ; | |||
| iy += inc_y ; | |||
| i++ ; | |||
| } | |||
| return 0; | |||
| } | |||