| @@ -347,6 +347,16 @@ void cblas_cimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum | |||
| void cblas_zimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, double* a, | |||
| OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb); | |||
| void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta, | |||
| float *c, OPENBLAS_CONST blasint cldc); | |||
| void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta, | |||
| double *c, OPENBLAS_CONST blasint cldc); | |||
| void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta, | |||
| float *c, OPENBLAS_CONST blasint cldc); | |||
| void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta, | |||
| double *c, OPENBLAS_CONST blasint cldc); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif /* __cplusplus */ | |||
| @@ -333,6 +333,16 @@ void cblas_cimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, bl | |||
| blasint clda, blasint cldb); | |||
| void cblas_zimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double* calpha, double* a, | |||
| blasint clda, blasint cldb); | |||
| void cblas_sgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float calpha, float *a, blasint clda, float cbeta, | |||
| float *c, blasint cldc); | |||
| void cblas_dgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double calpha, double *a, blasint clda, double cbeta, | |||
| double *c, blasint cldc); | |||
| void cblas_cgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float *calpha, float *a, blasint clda, float *cbeta, | |||
| float *c, blasint cldc); | |||
| void cblas_zgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double *calpha, double *a, blasint clda, double *cbeta, | |||
| double *c, blasint cldc); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif /* __cplusplus */ | |||
| @@ -220,6 +220,7 @@ | |||
| #define COMATCOPY_K_CTC comatcopy_k_ctc | |||
| #define COMATCOPY_K_RTC comatcopy_k_rtc | |||
| #define CGEADD_K cgeadd_k | |||
| #else | |||
| @@ -402,6 +403,7 @@ | |||
| #define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc | |||
| #define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc | |||
| #define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc | |||
| #define CGEADD_K gotoblas -> cgeadd_k | |||
| #endif | |||
| @@ -149,6 +149,7 @@ | |||
| #define DOMATCOPY_K_RN domatcopy_k_rn | |||
| #define DOMATCOPY_K_CT domatcopy_k_ct | |||
| #define DOMATCOPY_K_RT domatcopy_k_rt | |||
| #define DGEADD_K dgeadd_k | |||
| #else | |||
| @@ -267,6 +268,8 @@ | |||
| #define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct | |||
| #define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt | |||
| #define DGEADD_K gotoblas -> dgeadd_k | |||
| #endif | |||
| #define DGEMM_NN dgemm_nn | |||
| @@ -754,6 +754,12 @@ void BLASFUNC(dimatcopy) (char *, char *, blasint *, blasint *, double *, do | |||
| void BLASFUNC(cimatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *); | |||
| void BLASFUNC(zimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *); | |||
| void BLASFUNC(sgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*); | |||
| void BLASFUNC(dgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*); | |||
| void BLASFUNC(cgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*); | |||
| void BLASFUNC(zgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*); | |||
| #ifdef __cplusplus | |||
| } | |||
| @@ -1762,6 +1762,11 @@ int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, dou | |||
| int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | |||
| int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | |||
| int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG); | |||
| int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG); | |||
| int cgeadd_k(BLASLONG, BLASLONG, float, float, float*, BLASLONG, float, float, float *, BLASLONG); | |||
| int zgeadd_k(BLASLONG, BLASLONG, double,double, double*, BLASLONG, double, double, double *, BLASLONG); | |||
| #ifdef __CUDACC__ | |||
| } | |||
| @@ -634,7 +634,7 @@ | |||
| #define OMATCOPY_K_RN DOMATCOPY_K_RN | |||
| #define OMATCOPY_K_CT DOMATCOPY_K_CT | |||
| #define OMATCOPY_K_RT DOMATCOPY_K_RT | |||
| #define GEADD_K DGEADD_K | |||
| #else | |||
| #define AMAX_K SAMAX_K | |||
| @@ -932,6 +932,7 @@ | |||
| #define OMATCOPY_K_CT SOMATCOPY_K_CT | |||
| #define OMATCOPY_K_RT SOMATCOPY_K_RT | |||
| #define GEADD_K SGEADD_K | |||
| #endif | |||
| #else | |||
| #ifdef XDOUBLE | |||
| @@ -1746,6 +1747,7 @@ | |||
| #define OMATCOPY_K_RNC ZOMATCOPY_K_RNC | |||
| #define OMATCOPY_K_CTC ZOMATCOPY_K_CTC | |||
| #define OMATCOPY_K_RTC ZOMATCOPY_K_RTC | |||
| #define GEADD_K ZGEADD_K | |||
| #else | |||
| @@ -2159,6 +2161,8 @@ | |||
| #define OMATCOPY_K_CTC COMATCOPY_K_CTC | |||
| #define OMATCOPY_K_RTC COMATCOPY_K_RTC | |||
| #define GEADD_K CGEADD_K | |||
| #endif | |||
| #endif | |||
| @@ -855,6 +855,10 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); | |||
| int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | |||
| int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | |||
| int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); | |||
| int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG); | |||
| int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG); | |||
| int (*zgeadd_k) (BLASLONG, BLASLONG, float, double, double *, BLASLONG, double, double, double *, BLASLONG); | |||
| } gotoblas_t; | |||
| @@ -153,6 +153,7 @@ | |||
| #define SOMATCOPY_K_CT somatcopy_k_ct | |||
| #define SOMATCOPY_K_RT somatcopy_k_rt | |||
| #define SGEADD_K sgeadd_k | |||
| #else | |||
| @@ -274,6 +275,7 @@ | |||
| #define SOMATCOPY_K_CT gotoblas -> somatcopy_k_ct | |||
| #define SOMATCOPY_K_RT gotoblas -> somatcopy_k_rt | |||
| #define SGEADD_K gotoblas -> sgeadd_k | |||
| #endif | |||
| @@ -220,6 +220,7 @@ | |||
| #define ZOMATCOPY_K_CTC zomatcopy_k_ctc | |||
| #define ZOMATCOPY_K_RTC zomatcopy_k_rtc | |||
| #define ZGEADD_K zgeadd_k | |||
| #else | |||
| @@ -403,6 +404,8 @@ | |||
| #define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc | |||
| #define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc | |||
| #define ZGEADD_K zgeadd_k | |||
| #endif | |||
| #define ZGEMM_NN zgemm_nn | |||
| @@ -23,7 +23,8 @@ | |||
| zhpr,zrotg,zscal,zswap,zsymm,zsyr2k,zsyrk,ztbmv, | |||
| ztbsv,ztpmv,ztpsv,ztrmm,ztrmv,ztrsm,ztrsv, zsymv, | |||
| xerbla, | |||
| saxpby,daxpby,caxpby,zaxpby | |||
| saxpby,daxpby,caxpby,zaxpby, | |||
| sgeadd,dgeadd,cgeadd,zgeadd, | |||
| ); | |||
| @cblasobjs = ( | |||
| @@ -55,6 +56,7 @@ | |||
| cblas_saxpby,cblas_daxpby,cblas_caxpby,cblas_zaxpby, | |||
| cblas_somatcopy, cblas_domatcopy, cblas_comatcopy, cblas_zomatcopy, | |||
| cblas_simatcopy, cblas_dimatcopy, cblas_cimatcopy, cblas_zimatcopy, | |||
| cblas_sgeadd, cblas_dgeadd,cblas_cgeadd, cblas_zgeadd | |||
| ); | |||
| @exblasobjs = ( | |||
| @@ -43,7 +43,8 @@ SBLAS2OBJS = \ | |||
| SBLAS3OBJS = \ | |||
| sgemm.$(SUFFIX) ssymm.$(SUFFIX) strmm.$(SUFFIX) \ | |||
| strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX) \ | |||
| somatcopy.$(SUFFIX) simatcopy.$(SUFFIX) | |||
| somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\ | |||
| sgeadd.$(SUFFIX) | |||
| DBLAS1OBJS = \ | |||
| @@ -68,7 +69,8 @@ DBLAS2OBJS = \ | |||
| DBLAS3OBJS = \ | |||
| dgemm.$(SUFFIX) dsymm.$(SUFFIX) dtrmm.$(SUFFIX) \ | |||
| dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) \ | |||
| domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX) | |||
| domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX)\ | |||
| dgeadd.$(SUFFIX) | |||
| CBLAS1OBJS = \ | |||
| caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \ | |||
| @@ -96,7 +98,8 @@ CBLAS3OBJS = \ | |||
| cgemm.$(SUFFIX) csymm.$(SUFFIX) ctrmm.$(SUFFIX) \ | |||
| ctrsm.$(SUFFIX) csyrk.$(SUFFIX) csyr2k.$(SUFFIX) \ | |||
| chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) \ | |||
| comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX) | |||
| comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX)\ | |||
| cgeadd.$(SUFFIX) | |||
| ZBLAS1OBJS = \ | |||
| zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \ | |||
| @@ -124,7 +127,8 @@ ZBLAS3OBJS = \ | |||
| zgemm.$(SUFFIX) zsymm.$(SUFFIX) ztrmm.$(SUFFIX) \ | |||
| ztrsm.$(SUFFIX) zsyrk.$(SUFFIX) zsyr2k.$(SUFFIX) \ | |||
| zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \ | |||
| zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX) | |||
| zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)\ | |||
| zgeadd.$(SUFFIX) | |||
| ifeq ($(SUPPORT_GEMM3M), 1) | |||
| @@ -269,7 +273,8 @@ CSBLAS2OBJS = \ | |||
| CSBLAS3OBJS = \ | |||
| cblas_sgemm.$(SUFFIX) cblas_ssymm.$(SUFFIX) cblas_strmm.$(SUFFIX) cblas_strsm.$(SUFFIX) \ | |||
| cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX) | |||
| cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\ | |||
| cblas_sgeadd.$(SUFFIX) | |||
| CDBLAS1OBJS = \ | |||
| cblas_idamax.$(SUFFIX) cblas_dasum.$(SUFFIX) cblas_daxpy.$(SUFFIX) \ | |||
| @@ -285,7 +290,8 @@ CDBLAS2OBJS = \ | |||
| CDBLAS3OBJS += \ | |||
| cblas_dgemm.$(SUFFIX) cblas_dsymm.$(SUFFIX) cblas_dtrmm.$(SUFFIX) cblas_dtrsm.$(SUFFIX) \ | |||
| cblas_dsyrk.$(SUFFIX) cblas_dsyr2k.$(SUFFIX) cblas_domatcopy.$(SUFFIX) cblas_dimatcopy.$(SUFFIX) | |||
| cblas_dsyrk.$(SUFFIX) cblas_dsyr2k.$(SUFFIX) cblas_domatcopy.$(SUFFIX) cblas_dimatcopy.$(SUFFIX) \ | |||
| cblas_dgeadd.$(SUFFIX) | |||
| CCBLAS1OBJS = \ | |||
| cblas_icamax.$(SUFFIX) cblas_scasum.$(SUFFIX) cblas_caxpy.$(SUFFIX) \ | |||
| @@ -308,7 +314,9 @@ CCBLAS3OBJS = \ | |||
| cblas_cgemm.$(SUFFIX) cblas_csymm.$(SUFFIX) cblas_ctrmm.$(SUFFIX) cblas_ctrsm.$(SUFFIX) \ | |||
| cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \ | |||
| cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \ | |||
| cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX) | |||
| cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\ | |||
| cblas_cgeadd.$(SUFFIX) | |||
| CZBLAS1OBJS = \ | |||
| @@ -332,7 +340,9 @@ CZBLAS3OBJS = \ | |||
| cblas_zgemm.$(SUFFIX) cblas_zsymm.$(SUFFIX) cblas_ztrmm.$(SUFFIX) cblas_ztrsm.$(SUFFIX) \ | |||
| cblas_zsyrk.$(SUFFIX) cblas_zsyr2k.$(SUFFIX) \ | |||
| cblas_zhemm.$(SUFFIX) cblas_zherk.$(SUFFIX) cblas_zher2k.$(SUFFIX)\ | |||
| cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) | |||
| cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \ | |||
| cblas_zgeadd.$(SUFFIX) | |||
| ifeq ($(SUPPORT_GEMM3M), 1) | |||
| @@ -2103,4 +2113,27 @@ zimatcopy.$(SUFFIX) zimatcopy.$(PSUFFIX) : zimatcopy.c | |||
| cblas_zimatcopy.$(SUFFIX) cblas_zimatcopy.$(PSUFFIX) : zimatcopy.c | |||
| $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) | |||
| sgeadd.$(SUFFIX) sgeadd.$(PSUFFIX) : geadd.c | |||
| $(CC) -c $(CFLAGS) $< -o $(@F) | |||
| dgeadd.$(SUFFIX) dgeadd.$(PSUFFIX) : geadd.c | |||
| $(CC) -c $(CFLAGS) $< -o $(@F) | |||
| cgeadd.$(SUFFIX) cgeadd.$(PSUFFIX) : zgeadd.c | |||
| $(CC) -c $(CFLAGS) $< -o $(@F) | |||
| zgeadd.$(SUFFIX) zgeadd.$(PSUFFIX) : zgeadd.c | |||
| $(CC) -c $(CFLAGS) $< -o $(@F) | |||
| cblas_sgeadd.$(SUFFIX) cblas_sgeadd.$(PSUFFIX) : geadd.c | |||
| $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) | |||
| cblas_dgeadd.$(SUFFIX) cblas_dgeadd.$(PSUFFIX) : geadd.c | |||
| $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) | |||
| cblas_cgeadd.$(SUFFIX) cblas_cgeadd.$(PSUFFIX) : zgeadd.c | |||
| $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) | |||
| cblas_zgeadd.$(SUFFIX) cblas_zgeadd.$(PSUFFIX) : zgeadd.c | |||
| $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) | |||
| @@ -0,0 +1,148 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| /* without modification, are permitted provided that the following */ | |||
| /* conditions are met: */ | |||
| /* */ | |||
| /* 1. Redistributions of source code must retain the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer. */ | |||
| /* */ | |||
| /* 2. Redistributions in binary form must reproduce the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer in the documentation and/or other materials */ | |||
| /* provided with the distribution. */ | |||
| /* */ | |||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||
| /* */ | |||
| /* The views and conclusions contained in the software and */ | |||
| /* documentation are those of the authors and should not be */ | |||
| /* interpreted as representing official policies, either expressed */ | |||
| /* or implied, of The University of Texas at Austin. */ | |||
| /*********************************************************************/ | |||
| #include <stdio.h> | |||
| #include "common.h" | |||
| #ifdef FUNCTION_PROFILE | |||
| #include "functable.h" | |||
| #endif | |||
| #if defined(DOUBLE) | |||
| #define ERROR_NAME "DGEADD " | |||
| #else | |||
| #define ERROR_NAME "SGEADD " | |||
| #endif | |||
| #ifndef CBLAS | |||
| void NAME(blasint *M, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA, | |||
| FLOAT *BETA, FLOAT *c, blasint *LDC) | |||
| { | |||
| blasint m = *M; | |||
| blasint n = *N; | |||
| blasint lda = *LDA; | |||
| blasint ldc = *LDC; | |||
| FLOAT alpha = *ALPHA; | |||
| FLOAT beta = *BETA; | |||
| blasint info; | |||
| PRINT_DEBUG_NAME; | |||
| info = 0; | |||
| if (lda < MAX(1, m)) info = 6; | |||
| if (ldc < MAX(1, m)) info = 8; | |||
| if (n < 0) info = 2; | |||
| if (m < 0) info = 1; | |||
| if (info != 0){ | |||
| BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); | |||
| return; | |||
| } | |||
| #else | |||
| void CNAME( enum CBLAS_ORDER order, blasint m, blasint n, FLOAT alpha, FLOAT *a, blasint lda, FLOAT beta, | |||
| FLOAT *c, blasint ldc) | |||
| { | |||
| /* | |||
| void CNAME(enum CBLAS_ORDER order, | |||
| blasint m, blasint n, | |||
| FLOAT alpha, | |||
| FLOAT *a, blasint lda, | |||
| FLOAT beta, | |||
| FLOAT *c, blasint ldc){ */ | |||
| blasint info, t; | |||
| PRINT_DEBUG_CNAME; | |||
| info = 0; | |||
| if (order == CblasColMajor) { | |||
| info = -1; | |||
| if (ldc < MAX(1, m)) info = 8; | |||
| if (lda < MAX(1, m)) info = 5; | |||
| if (n < 0) info = 2; | |||
| if (m < 0) info = 1; | |||
| } | |||
| if (order == CblasRowMajor) { | |||
| info = -1; | |||
| t = n; | |||
| n = m; | |||
| m = t; | |||
| if (ldc < MAX(1, m)) info = 8; | |||
| if (lda < MAX(1, m)) info = 5; | |||
| if (n < 0) info = 2; | |||
| if (m < 0) info = 1; | |||
| } | |||
| if (info >= 0) { | |||
| BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); | |||
| return; | |||
| } | |||
| #endif | |||
| if ((m==0) || (n==0)) return; | |||
| IDEBUG_START; | |||
| FUNCTION_PROFILE_START(); | |||
| GEADD_K(m,n,alpha, a, lda, beta, c, ldc); | |||
| FUNCTION_PROFILE_END(1, 2* m * n , 2 * m * n); | |||
| IDEBUG_END; | |||
| return; | |||
| } | |||
| @@ -0,0 +1,146 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| /* without modification, are permitted provided that the following */ | |||
| /* conditions are met: */ | |||
| /* */ | |||
| /* 1. Redistributions of source code must retain the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer. */ | |||
| /* */ | |||
| /* 2. Redistributions in binary form must reproduce the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer in the documentation and/or other materials */ | |||
| /* provided with the distribution. */ | |||
| /* */ | |||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||
| /* */ | |||
| /* The views and conclusions contained in the software and */ | |||
| /* documentation are those of the authors and should not be */ | |||
| /* interpreted as representing official policies, either expressed */ | |||
| /* or implied, of The University of Texas at Austin. */ | |||
| /*********************************************************************/ | |||
| #include <stdio.h> | |||
| #include "common.h" | |||
| #ifdef FUNCTION_PROFILE | |||
| #include "functable.h" | |||
| #endif | |||
| #if defined(DOUBLE) | |||
| #define ERROR_NAME "ZGEADD " | |||
| #else | |||
| #define ERROR_NAME "CGEADD " | |||
| #endif | |||
| #ifndef CBLAS | |||
| void NAME(blasint *M, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA, | |||
| FLOAT *BETA, FLOAT *c, blasint *LDC) | |||
| { | |||
| blasint m = *M; | |||
| blasint n = *N; | |||
| blasint lda = *LDA; | |||
| blasint ldc = *LDC; | |||
| blasint info; | |||
| PRINT_DEBUG_NAME; | |||
| info = 0; | |||
| if (lda < MAX(1, m)) info = 6; | |||
| if (ldc < MAX(1, m)) info = 8; | |||
| if (n < 0) info = 2; | |||
| if (m < 0) info = 1; | |||
| if (info != 0){ | |||
| BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); | |||
| return; | |||
| } | |||
| #else | |||
| void CNAME( enum CBLAS_ORDER order, blasint m, blasint n, FLOAT *ALPHA, FLOAT *a, blasint lda, FLOAT *BETA, | |||
| FLOAT *c, blasint ldc) | |||
| { | |||
| /* | |||
| void CNAME(enum CBLAS_ORDER order, | |||
| blasint m, blasint n, | |||
| FLOAT alpha, | |||
| FLOAT *a, blasint lda, | |||
| FLOAT beta, | |||
| FLOAT *c, blasint ldc){ */ | |||
| blasint info, t; | |||
| PRINT_DEBUG_CNAME; | |||
| info = 0; | |||
| if (order == CblasColMajor) { | |||
| info = -1; | |||
| if (ldc < MAX(1, m)) info = 8; | |||
| if (lda < MAX(1, m)) info = 5; | |||
| if (n < 0) info = 2; | |||
| if (m < 0) info = 1; | |||
| } | |||
| if (order == CblasRowMajor) { | |||
| info = -1; | |||
| t = n; | |||
| n = m; | |||
| m = t; | |||
| if (ldc < MAX(1, m)) info = 8; | |||
| if (lda < MAX(1, m)) info = 5; | |||
| if (n < 0) info = 2; | |||
| if (m < 0) info = 1; | |||
| } | |||
| if (info >= 0) { | |||
| BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); | |||
| return; | |||
| } | |||
| #endif | |||
| if ((m==0) || (n==0)) return; | |||
| IDEBUG_START; | |||
| FUNCTION_PROFILE_START(); | |||
| GEADD_K(m,n,ALPHA[0],ALPHA[1], a, lda, BETA[0], BETA[1], c, ldc); | |||
| FUNCTION_PROFILE_END(1, 2* m * n , 2 * m * n); | |||
| IDEBUG_END; | |||
| return; | |||
| } | |||
| @@ -329,23 +329,27 @@ endif | |||
| ###### BLAS extensions ##### | |||
| SBLASOBJS += \ | |||
| somatcopy_k_cn$(TSUFFIX).$(SUFFIX) somatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ | |||
| somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX) | |||
| somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ | |||
| sgeadd_k$(TSUFFIX).$(SUFFIX) | |||
| DBLASOBJS += \ | |||
| domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ | |||
| domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) | |||
| domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ | |||
| dgeadd_k$(TSUFFIX).$(SUFFIX) | |||
| CBLASOBJS += \ | |||
| comatcopy_k_cn$(TSUFFIX).$(SUFFIX) comatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ | |||
| comatcopy_k_ct$(TSUFFIX).$(SUFFIX) comatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ | |||
| comatcopy_k_cnc$(TSUFFIX).$(SUFFIX) comatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ | |||
| comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) comatcopy_k_rtc$(TSUFFIX).$(SUFFIX) | |||
| comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) comatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ | |||
| cgeadd_k$(TSUFFIX).$(SUFFIX) | |||
| ZBLASOBJS += \ | |||
| zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ | |||
| zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ | |||
| zomatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ | |||
| zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) | |||
| zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ | |||
| zgeadd_k$(TSUFFIX).$(SUFFIX) | |||
| SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||
| @@ -3440,3 +3444,31 @@ $(KDIR)zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RTC) | |||
| $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ | |||
| ifndef SGEADD_K | |||
| SGEADD_K = ../arm/geadd.c | |||
| endif | |||
| $(KDIR)sgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K) | |||
| $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@ | |||
| ifndef DGEADD_K | |||
| DGEADD_K = ../arm/geadd.c | |||
| endif | |||
| $(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K) | |||
| $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ | |||
| ifndef CGEADD_K | |||
| CGEADD_K = ../arm/zgeadd.c | |||
| endif | |||
| $(KDIR)cgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEADD_K) | |||
| $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM $< -o $@ | |||
| ifndef ZGEADD_K | |||
| ZGEADD_K = ../arm/zgeadd.c | |||
| endif | |||
| $(KDIR)zgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEADD_K) | |||
| $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM $< -o $@ | |||
| @@ -0,0 +1,64 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT beta, FLOAT *b, BLASLONG ldb) | |||
| { | |||
| BLASLONG i; | |||
| FLOAT *aptr,*bptr; | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| aptr = a; | |||
| bptr = b; | |||
| if ( alpha == 0.0 ) | |||
| { | |||
| for ( i=0; i<cols ; i++ ) | |||
| { | |||
| SCAL_K(rows, 0,0, beta, bptr, 1, NULL, 0,NULL,0); | |||
| bptr+=ldb; | |||
| } | |||
| return(0); | |||
| } | |||
| for (i = 0; i < cols; i++) { | |||
| AXPBY_K(rows, alpha, aptr, 1, beta, bptr, 1); | |||
| aptr += lda; | |||
| bptr += ldb; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,65 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alphar, FLOAT alphai, FLOAT *a, BLASLONG lda, FLOAT betar, FLOAT betai , FLOAT *b, BLASLONG ldb) | |||
| { | |||
| BLASLONG i; | |||
| FLOAT *aptr,*bptr; | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| aptr = a; | |||
| bptr = b; | |||
| lda *= 2; | |||
| ldb *= 2; | |||
| if ( alphar == 0.0 && alphai == 0.0 ) | |||
| { | |||
| for ( i=0; i<cols ; i++ ) | |||
| { | |||
| SCAL_K(rows, 0,0, betar, betai, bptr, 1, NULL, 0,NULL,0); | |||
| bptr+=ldb; | |||
| } | |||
| return(0); | |||
| } | |||
| for (i = 0; i < cols; i++) { | |||
| AXPBY_K(rows, alphar, alphai, aptr, 1, betar, betai, bptr, 1); | |||
| aptr += lda; | |||
| bptr += ldb; | |||
| } | |||
| return(0); | |||
| } | |||