Browse Source

Bug fix and improvements for [z]imatcopy interface.

tags/v0.3.24
Ken Ho 3 years ago
parent
commit
33ab415f68
2 changed files with 57 additions and 66 deletions
  1. +17
    -17
      interface/imatcopy.c
  2. +40
    -49
      interface/zimatcopy.c

+ 17
- 17
interface/imatcopy.c View File

@@ -120,17 +120,20 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
return; return;
} }

#ifdef NEW_IMATCOPY #ifdef NEW_IMATCOPY
if ( *lda == *ldb && *rows == *cols) {
if ( *lda == *ldb ) {
if ( order == BlasColMajor ) if ( order == BlasColMajor )
{ {
if ( trans == BlasNoTrans ) if ( trans == BlasNoTrans )
{ {
IMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda ); IMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda );
return;
} }
else
else if ( *rows == *cols )
{ {
IMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda ); IMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda );
return;
} }
} }
else else
@@ -138,21 +141,18 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
if ( trans == BlasNoTrans ) if ( trans == BlasNoTrans )
{ {
IMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda ); IMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda );
return;
} }
else
else if ( *rows == *cols )
{ {
IMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda ); IMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda );
return;
} }
} }
return;
} }

#endif #endif


if ( *lda > *ldb )
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT);
else
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT);
msize = (size_t)(*rows) * (*cols) * sizeof(FLOAT);


b = malloc(msize); b = malloc(msize);
if ( b == NULL ) if ( b == NULL )
@@ -165,26 +165,26 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
{ {
if ( trans == BlasNoTrans ) if ( trans == BlasNoTrans )
{ {
OMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda, b, *ldb );
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0 , b, *ldb, a, *ldb );
OMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda, b, *rows );
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0 , b, *rows, a, *ldb );
} }
else else
{ {
OMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda, b, *ldb );
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, b, *ldb, a, *ldb );
OMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda, b, *cols );
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, b, *cols, a, *ldb );
} }
} }
else else
{ {
if ( trans == BlasNoTrans ) if ( trans == BlasNoTrans )
{ {
OMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda, b, *ldb );
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, b, *ldb, a, *ldb );
OMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda, b, *cols );
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, b, *cols, a, *ldb );
} }
else else
{ {
OMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda, b, *ldb );
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, b, *ldb, a, *ldb );
OMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda, b, *rows );
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, b, *rows, a, *ldb );
} }
} }




+ 40
- 49
interface/zimatcopy.c View File

@@ -125,27 +125,33 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
return; return;
} }

#ifdef NEW_IMATCOPY #ifdef NEW_IMATCOPY
if (*lda == *ldb && *cols == *rows) {
if (*lda == *ldb ) {
if ( order == BlasColMajor ) if ( order == BlasColMajor )
{ {


if ( trans == BlasNoTrans ) if ( trans == BlasNoTrans )
{ {
IMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda ); IMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda );
return;
} }
if ( trans == BlasConj ) if ( trans == BlasConj )
{ {
IMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda ); IMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda );
return;
} }
if ( trans == BlasTrans )
if ( trans == BlasTrans && *rows == *cols )
{ {
IMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda ); IMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda );
return;
} }
if ( trans == BlasTransConj )
if ( trans == BlasTransConj && *rows == *cols )
{ {
IMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda ); IMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda );
return;
} }

} }
else else
{ {
@@ -153,67 +159,59 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
if ( trans == BlasNoTrans ) if ( trans == BlasNoTrans )
{ {
IMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda ); IMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda );
return;
} }
if ( trans == BlasConj ) if ( trans == BlasConj )
{ {
IMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda ); IMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda );
return;
} }
if ( trans == BlasTrans )
if ( trans == BlasTrans && *rows == *cols )
{ {
IMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda ); IMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda );
return;
} }
if ( trans == BlasTransConj )
if ( trans == BlasTransConj && *rows == *cols )
{ {
IMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda ); IMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda );
return;
} }

} }
return;
} }
#endif #endif


if ( *lda > *ldb )
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT) * 2;
else
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT) * 2;

b = malloc(msize);
if ( b == NULL )
{
printf("Memory alloc failed in zimatcopy\n");
exit(1);
}
msize = (size_t)(*rows) * (*cols) * sizeof(FLOAT) * 2;


b = malloc(msize);
if ( b == NULL )
{
printf("Memory alloc failed\n");
exit(1);
}


if ( order == BlasColMajor ) if ( order == BlasColMajor )
{ {


if ( trans == BlasNoTrans ) if ( trans == BlasNoTrans )
{ {
OMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
free(b);
return;
OMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *rows );
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *rows, a, *ldb );
} }
if ( trans == BlasConj ) if ( trans == BlasConj )
{ {
OMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
free(b);
return;
OMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *rows );
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *rows, a, *ldb );
} }
if ( trans == BlasTrans ) if ( trans == BlasTrans )
{ {
OMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
free(b);
return;
OMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *cols );
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *cols, a, *ldb );
} }
if ( trans == BlasTransConj ) if ( trans == BlasTransConj )
{ {
OMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
free(b);
return;
OMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *cols );
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *cols, a, *ldb );
} }


} }
@@ -222,34 +220,27 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,


if ( trans == BlasNoTrans ) if ( trans == BlasNoTrans )
{ {
OMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
free(b);
return;
OMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *cols );
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *cols, a, *ldb );
} }
if ( trans == BlasConj ) if ( trans == BlasConj )
{ {
OMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
free(b);
return;
OMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *cols );
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *cols, a, *ldb );
} }
if ( trans == BlasTrans ) if ( trans == BlasTrans )
{ {
OMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
free(b);
return;
OMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *rows );
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *rows, a, *ldb );
} }
if ( trans == BlasTransConj ) if ( trans == BlasTransConj )
{ {
OMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
free(b);
return;
OMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *rows );
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *rows, a, *ldb );
} }


} }

free(b); free(b);
return; return;




Loading…
Cancel
Save