| Author | SHA1 | Message | Date |
|---|---|---|---|
|
|
034ffa93fa | Provide iaxpy and cblas_iaxpy for integer vectors. make INTEGER_PRECISION=1 | 10 years ago |
| @@ -114,6 +114,9 @@ NO_AFFINITY = 1 | |||||
| # Support for IEEE quad precision(it's *real* REAL*16)( under testing) | # Support for IEEE quad precision(it's *real* REAL*16)( under testing) | ||||
| # QUAD_PRECISION = 1 | # QUAD_PRECISION = 1 | ||||
| # Support for integer matrix and vector (e.g. iaxpy) | |||||
| # INTEGER_PRECISION = 1 | |||||
| # Theads are still working for a while after finishing BLAS operation | # Theads are still working for a while after finishing BLAS operation | ||||
| # to reduce thread activate/deactivate overhead. You can determine | # to reduce thread activate/deactivate overhead. You can determine | ||||
| # time out to improve performance. This number should be from 4 to 30 | # time out to improve performance. This number should be from 4 to 30 | ||||
| @@ -309,6 +309,10 @@ CCOMMON_OPT += -DQUAD_PRECISION | |||||
| NO_EXPRECISION = 1 | NO_EXPRECISION = 1 | ||||
| endif | endif | ||||
| ifdef INTEGER_PRECISION | |||||
| CCOMMON_OPT += -DINTEGER_PRECISION | |||||
| endif | |||||
| ifneq ($(ARCH), x86) | ifneq ($(ARCH), x86) | ||||
| ifneq ($(ARCH), x86_64) | ifneq ($(ARCH), x86_64) | ||||
| NO_EXPRECISION = 1 | NO_EXPRECISION = 1 | ||||
| @@ -4,6 +4,7 @@ QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||||
| CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | ||||
| ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | ||||
| XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | ||||
| IBLASOBJS_P = $(IBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||||
| COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX)) | COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX)) | ||||
| @@ -22,12 +23,18 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS) | |||||
| BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P) | BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P) | ||||
| endif | endif | ||||
| ifdef INTEGER_PRECISION | |||||
| BLASOBJS += $(IBLASOBJS) | |||||
| BLASOBJS_P += $(IBLASOBJS_P) | |||||
| endif | |||||
| $(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX | $(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX | ||||
| $(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX | $(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX | ||||
| $(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX | $(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX | ||||
| $(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX | $(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX | ||||
| $(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX | $(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX | ||||
| $(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX | $(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX | ||||
| $(IBLASOBJS) $(IBLASOBJS_P) : override CFLAGS += -DINTEGER -UCOMPLEX | |||||
| $(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | $(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | ||||
| $(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | $(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | ||||
| @@ -35,6 +42,7 @@ $(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||||
| $(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | $(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | ||||
| $(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | $(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | ||||
| $(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | $(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | ||||
| $(IBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||||
| libs :: $(BLASOBJS) $(COMMONOBJS) | libs :: $(BLASOBJS) $(COMMONOBJS) | ||||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | ||||
| @@ -276,6 +276,11 @@ typedef int blasint; | |||||
| #define SIZE 8 | #define SIZE 8 | ||||
| #define BASE_SHIFT 3 | #define BASE_SHIFT 3 | ||||
| #define ZBASE_SHIFT 4 | #define ZBASE_SHIFT 4 | ||||
| #elif defined(INTEGER) //extend for integer matrix | |||||
| #define FLOAT int | |||||
| #define SIZE 4 | |||||
| #define BASE_SHIFT 2 | |||||
| #define ZBASE_SHIFT 3 | |||||
| #else | #else | ||||
| #define FLOAT float | #define FLOAT float | ||||
| #define SIZE 4 | #define SIZE 4 | ||||
| @@ -0,0 +1,9 @@ | |||||
| #ifndef COMMON_I_H | |||||
| #define COMMON_I_H | |||||
| #ifndef DYNAMIC_ARCH | |||||
| #define IAXPYU_K iaxpy_k | |||||
| #else | |||||
| #error | |||||
| #endif | |||||
| #endif | |||||
| @@ -93,6 +93,7 @@ openblas_complex_xdouble BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdo | |||||
| void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *); | void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *); | ||||
| void BLASFUNC(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *); | void BLASFUNC(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *); | ||||
| void BLASFUNC(iaxpy) (blasint *, int *, int *, blasint *, int *, blasint *); | |||||
| void BLASFUNC(qaxpy) (blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *); | void BLASFUNC(qaxpy) (blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *); | ||||
| void BLASFUNC(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *); | void BLASFUNC(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *); | ||||
| void BLASFUNC(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *); | void BLASFUNC(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *); | ||||
| @@ -60,6 +60,8 @@ int daxpy_k (BLASLONG, BLASLONG, BLASLONG, double, | |||||
| double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | ||||
| int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble, | int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble, | ||||
| xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | ||||
| int iaxpy_k (BLASLONG, BLASLONG, BLASLONG, int, | |||||
| int *, BLASLONG, int *, BLASLONG, int *, BLASLONG); | |||||
| int caxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float, | int caxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float, | ||||
| float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | ||||
| int zaxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double, | int zaxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double, | ||||
| @@ -47,6 +47,10 @@ | |||||
| #include "common_z.h" | #include "common_z.h" | ||||
| #include "common_x.h" | #include "common_x.h" | ||||
| #ifdef INTEGER_PRECISION | |||||
| #include "common_i.h" | |||||
| #endif | |||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| #ifdef XDOUBLE | #ifdef XDOUBLE | ||||
| @@ -635,6 +639,9 @@ | |||||
| #define OMATCOPY_K_CT DOMATCOPY_K_CT | #define OMATCOPY_K_CT DOMATCOPY_K_CT | ||||
| #define OMATCOPY_K_RT DOMATCOPY_K_RT | #define OMATCOPY_K_RT DOMATCOPY_K_RT | ||||
| #define GEADD_K DGEADD_K | #define GEADD_K DGEADD_K | ||||
| #elif defined(INTEGER) | |||||
| #define AXPYU_K IAXPYU_K | |||||
| #else | #else | ||||
| #define AMAX_K SAMAX_K | #define AMAX_K SAMAX_K | ||||
| @@ -65,6 +65,7 @@ extern int blas_omp_linked; | |||||
| #define BLAS_XDOUBLE 0x0002U | #define BLAS_XDOUBLE 0x0002U | ||||
| #define BLAS_REAL 0x0000U | #define BLAS_REAL 0x0000U | ||||
| #define BLAS_COMPLEX 0x0004U | #define BLAS_COMPLEX 0x0004U | ||||
| #define BLAS_INTEGER 0x0008U | |||||
| #define BLAS_TRANSA 0x0030U /* 2bit */ | #define BLAS_TRANSA 0x0030U /* 2bit */ | ||||
| #define BLAS_TRANSA_N 0x0000U | #define BLAS_TRANSA_N 0x0000U | ||||
| @@ -189,6 +189,20 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ | |||||
| args -> b, args -> ldb, | args -> b, args -> ldb, | ||||
| args -> c, args -> ldc, sb); | args -> c, args -> ldc, sb); | ||||
| } else | } else | ||||
| #endif | |||||
| #ifdef INTEGER_PRECISION | |||||
| if (mode & BLAS_INTEGER){ | |||||
| /* REAL / Extended Double */ | |||||
| void (*afunc)(BLASLONG, BLASLONG, BLASLONG, int, | |||||
| int *, BLASLONG, int *, BLASLONG, | |||||
| int *, BLASLONG, void *) = func; | |||||
| afunc(args -> m, args -> n, args -> k, | |||||
| ((int *)args -> alpha)[0], | |||||
| args -> a, args -> lda, | |||||
| args -> b, args -> ldb, | |||||
| args -> c, args -> ldc, sb); | |||||
| } else | |||||
| #endif | #endif | ||||
| if (mode & BLAS_DOUBLE){ | if (mode & BLAS_DOUBLE){ | ||||
| /* REAL / Double */ | /* REAL / Double */ | ||||
| @@ -253,6 +253,15 @@ XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) | |||||
| endif | endif | ||||
| endif | endif | ||||
| ifdef INTEGER_PRECISION | |||||
| IBLAS1OBJS = \ | |||||
| iaxpy.$(SUFFIX) | |||||
| IBLAS2OBJS = | |||||
| IBLAS3OBJS = | |||||
| endif | |||||
| endif | endif | ||||
| HPLOBJS = dgemm.$(SUFFIX) dtrsm.$(SUFFIX) \ | HPLOBJS = dgemm.$(SUFFIX) dtrsm.$(SUFFIX) \ | ||||
| @@ -343,6 +352,9 @@ CZBLAS3OBJS = \ | |||||
| cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \ | cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \ | ||||
| cblas_zgeadd.$(SUFFIX) | cblas_zgeadd.$(SUFFIX) | ||||
| CIBLAS1OBJS = \ | |||||
| cblas_iaxpy.$(SUFFIX) | |||||
| ifeq ($(SUPPORT_GEMM3M), 1) | ifeq ($(SUPPORT_GEMM3M), 1) | ||||
| @@ -372,6 +384,10 @@ ZBLAS1OBJS += $(CZBLAS1OBJS) | |||||
| ZBLAS2OBJS += $(CZBLAS2OBJS) | ZBLAS2OBJS += $(CZBLAS2OBJS) | ||||
| ZBLAS3OBJS += $(CZBLAS3OBJS) | ZBLAS3OBJS += $(CZBLAS3OBJS) | ||||
| IBLAS1OBJS += $(CIBLAS1OBJS) | |||||
| IBLAS2OBJS += $(CIBLAS2OBJS) | |||||
| IBLAS3OBJS += $(CIBLAS3OBJS) | |||||
| endif | endif | ||||
| SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS) | SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS) | ||||
| @@ -380,6 +396,7 @@ QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS) | |||||
| CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS) | CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS) | ||||
| ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS) | ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS) | ||||
| XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) | XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) | ||||
| IBLASOBJS = $(IBLAS1OBJS) $(IBLAS2OBJS) $(IBLAS3OBJS) | |||||
| #SLAPACKOBJS = \ | #SLAPACKOBJS = \ | ||||
| # sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ | # sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ | ||||
| @@ -458,6 +475,10 @@ ifdef QUAD_PRECISION | |||||
| FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) | FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) | ||||
| endif | endif | ||||
| ifdef INTEGER_PRECISION | |||||
| FUNCOBJS += $(IBLASOBJS) | |||||
| endif | |||||
| FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=) | FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=) | ||||
| include $(TOPDIR)/Makefile.tail | include $(TOPDIR)/Makefile.tail | ||||
| @@ -476,17 +497,18 @@ endif | |||||
| clean :: | clean :: | ||||
| @rm -f functable.h | @rm -f functable.h | ||||
| level1 : $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) | |||||
| level1 : $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) $(IBLAS1OBJS) | |||||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | ||||
| level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) | |||||
| level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) $(IBLAS2OBJS) | |||||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | ||||
| level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) | |||||
| level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) $(IBLAS3OBJS) | |||||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | ||||
| $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \ | $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \ | ||||
| $(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS | |||||
| $(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) \ | |||||
| $(CIBLASOBJS) $(CIBLASOBJS_P) : override CFLAGS += -DCBLAS | |||||
| srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c | srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c | ||||
| $(CC) $(CFLAGS) -c $< -o $(@F) | $(CC) $(CFLAGS) -c $< -o $(@F) | ||||
| @@ -725,6 +747,9 @@ saxpy.$(SUFFIX) saxpy.$(PSUFFIX) : axpy.c | |||||
| daxpy.$(SUFFIX) daxpy.$(PSUFFIX) : axpy.c | daxpy.$(SUFFIX) daxpy.$(PSUFFIX) : axpy.c | ||||
| $(CC) $(CFLAGS) -c $< -o $(@F) | $(CC) $(CFLAGS) -c $< -o $(@F) | ||||
| iaxpy.$(SUFFIX) iaxpy.$(PSUFFIX) : axpy.c | |||||
| $(CC) $(CFLAGS) -c $< -o $(@F) | |||||
| qaxpy.$(SUFFIX) qaxpy.$(PSUFFIX) : axpy.c | qaxpy.$(SUFFIX) qaxpy.$(PSUFFIX) : axpy.c | ||||
| $(CC) $(CFLAGS) -c $< -o $(@F) | $(CC) $(CFLAGS) -c $< -o $(@F) | ||||
| @@ -1437,6 +1462,9 @@ cblas_saxpy.$(SUFFIX) cblas_saxpy.$(PSUFFIX) : axpy.c | |||||
| cblas_daxpy.$(SUFFIX) cblas_daxpy.$(PSUFFIX) : axpy.c | cblas_daxpy.$(SUFFIX) cblas_daxpy.$(PSUFFIX) : axpy.c | ||||
| $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | ||||
| cblas_iaxpy.$(SUFFIX) cblas_iaxpy.$(PSUFFIX) : axpy.c | |||||
| $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | |||||
| cblas_caxpy.$(SUFFIX) cblas_caxpy.$(PSUFFIX) : zaxpy.c | cblas_caxpy.$(SUFFIX) cblas_caxpy.$(PSUFFIX) : zaxpy.c | ||||
| $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | ||||
| @@ -103,6 +103,8 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc | |||||
| mode = BLAS_XDOUBLE | BLAS_REAL; | mode = BLAS_XDOUBLE | BLAS_REAL; | ||||
| #elif defined(DOUBLE) | #elif defined(DOUBLE) | ||||
| mode = BLAS_DOUBLE | BLAS_REAL; | mode = BLAS_DOUBLE | BLAS_REAL; | ||||
| #elif defined(INTEGER) | |||||
| mode = BLAS_INTEGER | BLAS_REAL; | |||||
| #else | #else | ||||
| mode = BLAS_SINGLE | BLAS_REAL; | mode = BLAS_SINGLE | BLAS_REAL; | ||||
| #endif | #endif | ||||
| @@ -210,6 +210,10 @@ ifndef XAXPYKERNEL | |||||
| XAXPYKERNEL = zaxpy.S | XAXPYKERNEL = zaxpy.S | ||||
| endif | endif | ||||
| ifndef IAXPYKERNEL | |||||
| IAXPYKERNEL = ../generic/iaxpy.c | |||||
| endif | |||||
| ### COPY ### | ### COPY ### | ||||
| ifndef SCOPYKERNEL | ifndef SCOPYKERNEL | ||||
| @@ -471,6 +475,9 @@ QBLASOBJS += \ | |||||
| qasum_k$(TSUFFIX).$(SUFFIX) qaxpy_k$(TSUFFIX).$(SUFFIX) qcopy_k$(TSUFFIX).$(SUFFIX) qdot_k$(TSUFFIX).$(SUFFIX) \ | qasum_k$(TSUFFIX).$(SUFFIX) qaxpy_k$(TSUFFIX).$(SUFFIX) qcopy_k$(TSUFFIX).$(SUFFIX) qdot_k$(TSUFFIX).$(SUFFIX) \ | ||||
| qnrm2_k$(TSUFFIX).$(SUFFIX) qrot_k$(TSUFFIX).$(SUFFIX) qscal_k$(TSUFFIX).$(SUFFIX) qswap_k$(TSUFFIX).$(SUFFIX) | qnrm2_k$(TSUFFIX).$(SUFFIX) qrot_k$(TSUFFIX).$(SUFFIX) qscal_k$(TSUFFIX).$(SUFFIX) qswap_k$(TSUFFIX).$(SUFFIX) | ||||
| IBLASOBJS += \ | |||||
| iaxpy_k$(TSUFFIX).$(SUFFIX) | |||||
| CBLASOBJS += \ | CBLASOBJS += \ | ||||
| camax_k$(TSUFFIX).$(SUFFIX) camin_k$(TSUFFIX).$(SUFFIX) icamax_k$(TSUFFIX).$(SUFFIX) icamin_k$(TSUFFIX).$(SUFFIX) \ | camax_k$(TSUFFIX).$(SUFFIX) camin_k$(TSUFFIX).$(SUFFIX) icamax_k$(TSUFFIX).$(SUFFIX) icamin_k$(TSUFFIX).$(SUFFIX) \ | ||||
| casum_k$(TSUFFIX).$(SUFFIX) caxpy_k$(TSUFFIX).$(SUFFIX) caxpyc_k$(TSUFFIX).$(SUFFIX) ccopy_k$(TSUFFIX).$(SUFFIX) \ | casum_k$(TSUFFIX).$(SUFFIX) caxpy_k$(TSUFFIX).$(SUFFIX) caxpyc_k$(TSUFFIX).$(SUFFIX) ccopy_k$(TSUFFIX).$(SUFFIX) \ | ||||
| @@ -645,6 +652,9 @@ $(KDIR)daxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)daxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KE | |||||
| $(KDIR)qaxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)qaxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QAXPYKERNEL) | $(KDIR)qaxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)qaxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QAXPYKERNEL) | ||||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@ | $(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@ | ||||
| $(KDIR)iaxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)iaxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(IAXPYKERNEL) | |||||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DINTEGER $< -o $@ | |||||
| $(KDIR)caxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)caxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CAXPYKERNEL) | $(KDIR)caxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)caxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CAXPYKERNEL) | ||||
| $(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UCONJ -UDOUBLE $< -o $@ | $(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UCONJ -UDOUBLE $< -o $@ | ||||
| @@ -0,0 +1,52 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2015, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #include "common.h" | |||||
| int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, int da, int *x, BLASLONG inc_x, int *y, BLASLONG inc_y, int *dummy, BLASLONG dummy2) | |||||
| { | |||||
| BLASLONG i=0; | |||||
| BLASLONG ix,iy; | |||||
| if ( n < 0 ) return(0); | |||||
| if ( da == 0 ) return(0); | |||||
| ix = 0; | |||||
| iy = 0; | |||||
| while(i < n) | |||||
| { | |||||
| y[iy] += da * x[ix] ; | |||||
| ix += inc_x ; | |||||
| iy += inc_y ; | |||||
| i++ ; | |||||
| } | |||||
| return 0; | |||||
| } | |||||