Browse Source

Eliminate 2-8 dead increments code

tags/v0.3.0
Andrew 8 years ago
parent
commit
1236dbe5a6
27 changed files with 147 additions and 147 deletions
  1. +6
    -6
      kernel/generic/trmm_lncopy_2.c
  2. +4
    -4
      kernel/generic/trmm_lncopy_8.c
  3. +5
    -5
      kernel/generic/trmm_ltcopy_2.c
  4. +8
    -8
      kernel/generic/trmm_ltcopy_8.c
  5. +4
    -4
      kernel/generic/trmm_uncopy_2.c
  6. +4
    -4
      kernel/generic/trmm_uncopy_8.c
  7. +4
    -4
      kernel/generic/trmm_utcopy_2.c
  8. +8
    -8
      kernel/generic/trmm_utcopy_8.c
  9. +4
    -4
      kernel/generic/trsm_ltcopy_8.c
  10. +3
    -3
      kernel/generic/trsm_uncopy_8.c
  11. +4
    -4
      kernel/generic/trsm_utcopy_8.c
  12. +2
    -2
      kernel/generic/zgemm3m_tcopy_8.c
  13. +4
    -4
      kernel/generic/zgemm_ncopy_4.c
  14. +2
    -2
      kernel/generic/zgemm_tcopy_2.c
  15. +5
    -5
      kernel/generic/zgemm_tcopy_4.c
  16. +4
    -4
      kernel/generic/ztrmm_lncopy_2.c
  17. +9
    -9
      kernel/generic/ztrmm_lncopy_4.c
  18. +4
    -4
      kernel/generic/ztrmm_ltcopy_2.c
  19. +10
    -10
      kernel/generic/ztrmm_ltcopy_4.c
  20. +3
    -3
      kernel/generic/ztrmm_uncopy_2.c
  21. +11
    -11
      kernel/generic/ztrmm_uncopy_4.c
  22. +3
    -3
      kernel/generic/ztrmm_utcopy_2.c
  23. +6
    -6
      kernel/generic/ztrmm_utcopy_4.c
  24. +8
    -8
      kernel/generic/ztrsm_lncopy_4.c
  25. +7
    -7
      kernel/generic/ztrsm_ltcopy_4.c
  26. +8
    -8
      kernel/generic/ztrsm_uncopy_4.c
  27. +7
    -7
      kernel/generic/ztrsm_utcopy_4.c

+ 6
- 6
kernel/generic/trmm_lncopy_2.c View File

@@ -121,12 +121,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = data03; b[ 1] = data03;


ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2; b += 2;
} else } else
if (X < posY) { if (X < posY) {
ao1 += lda;
// ao1 += lda;
b += 2; b += 2;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -141,8 +141,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = data03; b[ 1] = data03;
#endif #endif
ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2; b += 2;
} }
} }
@@ -191,7 +191,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0); } while (i > 0);
} }


posY += 1;
// posY += 1;
} }


return 0; return 0;


+ 4
- 4
kernel/generic/trmm_lncopy_8.c View File

@@ -608,16 +608,16 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} else } else
if (X < posY) { if (X < posY) {
if (m & 4) { if (m & 4) {
ao1 += 4 * lda;
/* ao1 += 4 * lda;
ao2 += 4 * lda; ao2 += 4 * lda;
ao3 += 4 * lda; ao3 += 4 * lda;
ao4 += 4 * lda;
ao4 += 4 * lda; */


b += 32; b += 32;
} }


if (m & 2) { if (m & 2) {
ao1 += 2 * lda;
// ao1 += 2 * lda;
b += 16; b += 16;
} }


@@ -1018,7 +1018,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} else } else
if (X < posY) { if (X < posY) {
if (m & 2) { if (m & 2) {
ao1 += 2 * lda;
// ao1 += 2 * lda;
b += 8; b += 8;
} }




+ 5
- 5
kernel/generic/trmm_ltcopy_2.c View File

@@ -116,8 +116,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (m & 1) { if (m & 1) {


if (X > posY) { if (X > posY) {
ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2; b += 2;
} else } else
if (X < posY) { if (X < posY) {
@@ -126,7 +126,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON


b[ 0] = data01; b[ 0] = data01;
b[ 1] = data02; b[ 1] = data02;
ao1 += lda;
// ao1 += lda;
b += 2; b += 2;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -141,7 +141,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = data02; b[ 1] = data02;
#endif #endif
ao1 += 2;
// ao1 += 2;
b += 2; b += 2;
} }
} }
@@ -190,7 +190,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0); } while (i > 0);
} }


posY += 1;
// posY += 1;
} }


return 0; return 0;


+ 8
- 8
kernel/generic/trmm_ltcopy_8.c View File

@@ -443,27 +443,27 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X > posY) { if (X > posY) {


if (m & 4) { if (m & 4) {
ao1 += 4;
/* ao1 += 4;
ao2 += 4; ao2 += 4;
ao3 += 4; ao3 += 4;
ao4 += 4; ao4 += 4;
ao5 += 4; ao5 += 4;
ao6 += 4; ao6 += 4;
ao7 += 4; ao7 += 4;
ao8 += 4;
ao8 += 4; */


b += 32; b += 32;
} }


if (m & 2) { if (m & 2) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2; ao4 += 2;
ao5 += 2; ao5 += 2;
ao6 += 2; ao6 += 2;
ao7 += 2; ao7 += 2;
ao8 += 2;
ao8 += 2; */


b += 16; b += 16;
} }
@@ -548,8 +548,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON


ao1 += 4 * lda; ao1 += 4 * lda;
ao2 += 4 * lda; ao2 += 4 * lda;
ao3 += 4 * lda;
ao4 += 4 * lda;
/* ao3 += 4 * lda;
ao4 += 4 * lda; */


b += 32; b += 32;
} }
@@ -964,10 +964,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X > posY) { if (X > posY) {


if (m & 2) { if (m & 2) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2;
ao4 += 2; */


b += 8; b += 8;
} }


+ 4
- 4
kernel/generic/trmm_uncopy_2.c View File

@@ -122,12 +122,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = data03; b[ 1] = data03;


ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2; b += 2;
} else } else
if (X > posY) { if (X > posY) {
ao1 += lda;
// ao1 += lda;
b += 2; b += 2;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -142,7 +142,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = data03; b[ 1] = data03;
#endif #endif
ao1 += lda;
// ao1 += lda;
b += 2; b += 2;
} }
} }


+ 4
- 4
kernel/generic/trmm_uncopy_8.c View File

@@ -610,16 +610,16 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} else } else
if (X > posY) { if (X > posY) {
if (m & 4) { if (m & 4) {
ao1 += 4 * lda;
/* ao1 += 4 * lda;
ao2 += 4 * lda; ao2 += 4 * lda;
ao3 += 4 * lda; ao3 += 4 * lda;
ao4 += 4 * lda;
ao4 += 4 * lda; */


b += 32; b += 32;
} }


if (m & 2) { if (m & 2) {
ao1 += 2 * lda;
// ao1 += 2 * lda;
b += 16; b += 16;
} }


@@ -1019,7 +1019,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} else } else
if (X > posY) { if (X > posY) {
if (m & 2) { if (m & 2) {
ao1 += 2 * lda;
// ao1 += 2 * lda;
b += 8; b += 8;
} }




+ 4
- 4
kernel/generic/trmm_utcopy_2.c View File

@@ -117,8 +117,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (m & 1) { if (m & 1) {


if (X < posY) { if (X < posY) {
ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2; b += 2;
} else } else
if (X > posY) { if (X > posY) {
@@ -127,7 +127,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON


b[ 0] = data01; b[ 0] = data01;
b[ 1] = data02; b[ 1] = data02;
ao1 += lda;
// ao1 += lda;
b += 2; b += 2;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -139,7 +139,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = ZERO; b[ 1] = ZERO;
#endif #endif
ao1 += lda;
// ao1 += lda;
b += 2; b += 2;
} }
} }


+ 8
- 8
kernel/generic/trmm_utcopy_8.c View File

@@ -501,27 +501,27 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X < posY) { if (X < posY) {


if (m & 4) { if (m & 4) {
ao1 += 4;
/* ao1 += 4;
ao2 += 4; ao2 += 4;
ao3 += 4; ao3 += 4;
ao4 += 4; ao4 += 4;
ao5 += 4; ao5 += 4;
ao6 += 4; ao6 += 4;
ao7 += 4; ao7 += 4;
ao8 += 4;
ao8 += 4; */


b += 32; b += 32;
} }


if (m & 2) { if (m & 2) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2; ao4 += 2;
ao5 += 2; ao5 += 2;
ao6 += 2; ao6 += 2;
ao7 += 2; ao7 += 2;
ao8 += 2;
ao8 += 2; */


b += 16; b += 16;
} }
@@ -606,8 +606,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON


ao1 += 4 * lda; ao1 += 4 * lda;
ao2 += 4 * lda; ao2 += 4 * lda;
ao3 += 4 * lda;
ao4 += 4 * lda;
/* ao3 += 4 * lda;
ao4 += 4 * lda; */


b += 32; b += 32;
} }
@@ -1022,10 +1022,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X < posY) { if (X < posY) {


if (m & 2) { if (m & 2) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2;
ao4 += 2; */


b += 8; b += 8;
} }


+ 4
- 4
kernel/generic/trsm_ltcopy_8.c View File

@@ -487,8 +487,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT


a1 += 4 * lda; a1 += 4 * lda;
a2 += 4 * lda; a2 += 4 * lda;
a3 += 4 * lda;
a4 += 4 * lda;
/* a3 += 4 * lda;
a4 += 4 * lda; */
b += 32; b += 32;


ii += 4; ii += 4;
@@ -574,7 +574,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
} }


a1 += 2 * lda; a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 16; b += 16;


ii += 2; ii += 2;
@@ -779,7 +779,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
} }


a1 += 2 * lda; a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 8; b += 8;
ii += 2; ii += 2;
} }


+ 3
- 3
kernel/generic/trsm_uncopy_8.c View File

@@ -646,7 +646,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 7) = data57; *(b + 7) = data57;
} }
b += 8; b += 8;
ii += 1;
// ii += 1;
} }


a += 8 * lda; a += 8 * lda;
@@ -835,7 +835,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 3) = data25; *(b + 3) = data25;
} }
b += 4; b += 4;
ii += 1;
// ii += 1;
} }


a += 4 * lda; a += 4 * lda;
@@ -908,7 +908,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 1) = data09; *(b + 1) = data09;
} }
b += 2; b += 2;
ii += 1;
// ii += 1;
} }


a += 2 * lda; a += 2 * lda;


+ 4
- 4
kernel/generic/trsm_utcopy_8.c View File

@@ -453,8 +453,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT


a1 += 4 * lda; a1 += 4 * lda;
a2 += 4 * lda; a2 += 4 * lda;
a3 += 4 * lda;
a4 += 4 * lda;
/* a3 += 4 * lda;
a4 += 4 * lda; */
b += 32; b += 32;
ii += 4; ii += 4;
} }
@@ -513,7 +513,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
} }


a1 += 2 * lda; a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 16; b += 16;
ii += 2; ii += 2;
} }
@@ -680,7 +680,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
} }


a1 += 2 * lda; a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 8; b += 8;
ii += 2; ii += 2;
} }


+ 2
- 2
kernel/generic/zgemm3m_tcopy_8.c View File

@@ -1044,7 +1044,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda,
*(b_offset2 + 3) = CMULT(a7, a8); *(b_offset2 + 3) = CMULT(a7, a8);


a_offset1 += 8; a_offset1 += 8;
b_offset2 += 4;
// b_offset2 += 4;
} }


if (n & 2){ if (n & 2){
@@ -1057,7 +1057,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda,
*(b_offset3 + 1) = CMULT(a3, a4); *(b_offset3 + 1) = CMULT(a3, a4);


a_offset1 += 4; a_offset1 += 4;
b_offset3 += 2;
// b_offset3 += 2;
} }


if (n & 1){ if (n & 1){


+ 4
- 4
kernel/generic/zgemm_ncopy_4.c View File

@@ -225,10 +225,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset + 6) = ctemp07; *(boffset + 6) = ctemp07;
*(boffset + 7) = ctemp08; *(boffset + 7) = ctemp08;


aoffset1 += 2;
/* aoffset1 += 2;
aoffset2 += 2; aoffset2 += 2;
aoffset3 += 2; aoffset3 += 2;
aoffset4 += 2;
aoffset4 += 2; */
boffset += 8; boffset += 8;
} }
j--; j--;
@@ -323,8 +323,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset + 2) = ctemp03; *(boffset + 2) = ctemp03;
*(boffset + 3) = ctemp04; *(boffset + 3) = ctemp04;


aoffset1 += 2;
aoffset2 += 2;
/* aoffset1 += 2;
aoffset2 += 2; */
boffset += 4; boffset += 4;
} }
} }


+ 2
- 2
kernel/generic/zgemm_tcopy_2.c View File

@@ -140,7 +140,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(b_offset1 + 6) = ctemp11; *(b_offset1 + 6) = ctemp11;
*(b_offset1 + 7) = ctemp12; *(b_offset1 + 7) = ctemp12;


b_offset1 += m * 4;
// b_offset1 += m * 4;
a_offset1 += 4; a_offset1 += 4;
a_offset2 += 4; a_offset2 += 4;
} }
@@ -204,7 +204,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(b_offset + 2) = ctemp3; *(b_offset + 2) = ctemp3;
*(b_offset + 3) = ctemp4; *(b_offset + 3) = ctemp4;


b_offset += m * 4;
// b_offset += m * 4;
a_offset += 4; a_offset += 4;
} }




+ 5
- 5
kernel/generic/zgemm_tcopy_4.c View File

@@ -233,10 +233,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset3 + 6) = ctemp07; *(boffset3 + 6) = ctemp07;
*(boffset3 + 7) = ctemp08; *(boffset3 + 7) = ctemp08;


aoffset1 += 2;
/* aoffset1 += 2;
aoffset2 += 2; aoffset2 += 2;
aoffset3 += 2; aoffset3 += 2;
aoffset4 += 2;
aoffset4 += 2; */


boffset3 += 8; boffset3 += 8;
} }
@@ -338,8 +338,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset3 + 2) = ctemp03; *(boffset3 + 2) = ctemp03;
*(boffset3 + 3) = ctemp04; *(boffset3 + 3) = ctemp04;


aoffset1 += 2;
aoffset2 += 2;
/* aoffset1 += 2;
aoffset2 += 2; */
boffset3 += 4; boffset3 += 4;
} }
} }
@@ -387,7 +387,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset2 + 3) = ctemp04; *(boffset2 + 3) = ctemp04;


aoffset1 += 4; aoffset1 += 4;
boffset2 += 4;
// boffset2 += 4;
} }


if (n & 1){ if (n & 1){


+ 4
- 4
kernel/generic/ztrmm_lncopy_2.c View File

@@ -148,12 +148,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data03; b[ 2] = data03;
b[ 3] = data04; b[ 3] = data04;


ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4; b += 4;
} else } else
if (X < posY) { if (X < posY) {
ao1 += lda;
// ao1 += lda;
b += 4; b += 4;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -224,7 +224,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0); } while (i > 0);
} }


posY += 1;
// posY += 1;
} }
return 0; return 0;
} }

+ 9
- 9
kernel/generic/ztrmm_lncopy_4.c View File

@@ -355,23 +355,23 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 6] = data25; b[ 6] = data25;
b[ 7] = data26; b[ 7] = data26;


ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2;
ao4 += 2; */
b += 8; b += 8;
} }


} else } else
if (X < posY) { if (X < posY) {
if (m & 2) { if (m & 2) {
ao1 += 2 * lda;
ao2 += 2 * lda;
/* ao1 += 2 * lda;
ao2 += 2 * lda; */
b += 16; b += 16;
} }


if (m & 1) { if (m & 1) {
ao1 += lda;
// ao1 += lda;
b += 8; b += 8;
} }


@@ -586,12 +586,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data09; b[ 2] = data09;
b[ 3] = data10; b[ 3] = data10;


ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4; b += 4;
} else } else
if (X < posY) { if (X < posY) {
ao1 += lda;
// ao1 += lda;
b += 4; b += 4;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -657,7 +657,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0); } while (i > 0);
} }


posY += 1;
// posY += 1;
} }


return 0; return 0;


+ 4
- 4
kernel/generic/ztrmm_ltcopy_2.c View File

@@ -141,8 +141,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (m & 1) { if (m & 1) {


if (X > posY) { if (X > posY) {
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4; b += 4;


} else } else
@@ -157,7 +157,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data3; b[ 2] = data3;
b[ 3] = data4; b[ 3] = data4;


ao1 += lda;
// ao1 += lda;
b += 4; b += 4;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -233,7 +233,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0); } while (i > 0);
} }


posY += 1;
// posY += 1;
} }


return 0; return 0;


+ 10
- 10
kernel/generic/ztrmm_ltcopy_4.c View File

@@ -292,18 +292,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X > posY) { if (X > posY) {


if (m & 2) { if (m & 2) {
ao1 += 4;
/* ao1 += 4;
ao2 += 4; ao2 += 4;
ao3 += 4; ao3 += 4;
ao4 += 4;
ao4 += 4; */
b += 16; b += 16;
} }


if (m & 1) { if (m & 1) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2;
ao4 += 2; */
b += 8; b += 8;
} }


@@ -347,7 +347,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[15] = data16; b[15] = data16;


ao1 += 2 * lda; ao1 += 2 * lda;
ao2 += 2 * lda;
// ao2 += 2 * lda;


b += 16; b += 16;
} }
@@ -371,7 +371,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 6] = data07; b[ 6] = data07;
b[ 7] = data08; b[ 7] = data08;


ao1 += lda;
// ao1 += lda;
b += 8; b += 8;
} }


@@ -588,8 +588,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (i) { if (i) {


if (X > posY) { if (X > posY) {
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */


b += 4; b += 4;
} else } else
@@ -604,7 +604,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data03; b[ 2] = data03;
b[ 3] = data04; b[ 3] = data04;


ao1 += lda;
// ao1 += lda;
b += 4; b += 4;


} else { } else {
@@ -678,7 +678,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0); } while (i > 0);
} }


posY += 1;
// posY += 1;
} }


return 0; return 0;


+ 3
- 3
kernel/generic/ztrmm_uncopy_2.c View File

@@ -153,12 +153,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data03; b[ 2] = data03;
b[ 3] = data04; b[ 3] = data04;


ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4; b += 4;
} else } else
if (X > posY) { if (X > posY) {
ao1 += lda;
// ao1 += lda;
b += 4; b += 4;
} else { } else {
#ifdef UNIT #ifdef UNIT


+ 11
- 11
kernel/generic/ztrmm_uncopy_4.c View File

@@ -354,23 +354,23 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 6] = data25; b[ 6] = data25;
b[ 7] = data26; b[ 7] = data26;


ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2;
ao4 += 2; */
b += 8; b += 8;
} }


} else } else
if (X > posY) { if (X > posY) {
if (m & 2) { if (m & 2) {
ao1 += 2 * lda;
ao2 += 2 * lda;
/* ao1 += 2 * lda;
ao2 += 2 * lda; */
b += 16; b += 16;
} }


if (m & 1) { if (m & 1) {
ao1 += lda;
// ao1 += lda;
b += 8; b += 8;
} }


@@ -596,13 +596,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 1] = data02; b[ 1] = data02;
b[ 2] = data09; b[ 2] = data09;
b[ 3] = data10; b[ 3] = data10;
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4; b += 4;
} else } else
if (X > posY) { if (X > posY) {
ao1 += 2 * lda;
ao2 += 2 * lda;
/* ao1 += 2 * lda;
ao2 += 2 * lda; */
b += 4; b += 4;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -624,8 +624,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data09; b[ 2] = data09;
b[ 3] = data10; b[ 3] = data10;
#endif #endif
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4; b += 4;
} }
} }


+ 3
- 3
kernel/generic/ztrmm_utcopy_2.c View File

@@ -142,8 +142,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON


if (m & 1) { if (m & 1) {
if (X < posY) { if (X < posY) {
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4; b += 4;
} else } else
if (X > posY) { if (X > posY) {
@@ -157,7 +157,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data3; b[ 2] = data3;
b[ 3] = data4; b[ 3] = data4;


ao1 += lda;
// ao1 += lda;
b += 4; b += 4;


} else { } else {


+ 6
- 6
kernel/generic/ztrmm_utcopy_4.c View File

@@ -294,18 +294,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X < posY) { if (X < posY) {


if (m & 2) { if (m & 2) {
ao1 += 4;
/* ao1 += 4;
ao2 += 4; ao2 += 4;
ao3 += 4; ao3 += 4;
ao4 += 4;
ao4 += 4; */
b += 16; b += 16;
} }


if (m & 1) { if (m & 1) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2;
ao4 += 2; */
b += 8; b += 8;
} }


@@ -349,7 +349,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[15] = data16; b[15] = data16;


ao1 += 2 * lda; ao1 += 2 * lda;
ao2 += 2 * lda;
// ao2 += 2 * lda;
b += 16; b += 16;
} }


@@ -372,7 +372,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 6] = data07; b[ 6] = data07;
b[ 7] = data08; b[ 7] = data08;


ao1 += lda;
// ao1 += lda;
b += 8; b += 8;
} }




+ 8
- 8
kernel/generic/ztrsm_lncopy_4.c View File

@@ -313,13 +313,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 7) = data26; *(b + 7) = data26;
} }


a1 += 2;
/* a1 += 2;
a2 += 2; a2 += 2;
a3 += 2; a3 += 2;
a4 += 2;
a4 += 2; */
b += 8; b += 8;


ii += 1;
// ii += 1;
} }
a += 4 * lda; a += 4 * lda;
jj += 4; jj += 4;
@@ -410,11 +410,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 3) = data10; *(b + 3) = data10;
} }


a1 += 2;
a2 += 2;
/* a1 += 2;
a2 += 2; */
b += 4; b += 4;


ii += 1;
// ii += 1;
} }
a += 2 * lda; a += 2 * lda;
jj += 2; jj += 2;
@@ -451,8 +451,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
ii += 1; ii += 1;
} }


a += lda;
jj += 1;
// a += lda;
// jj += 1;
} }


return 0; return 0;


+ 7
- 7
kernel/generic/ztrsm_ltcopy_4.c View File

@@ -286,7 +286,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
} }


a1 += 2 * lda; a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 16; b += 16;


ii += 2; ii += 2;
@@ -335,9 +335,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 7) = data08; *(b + 7) = data08;
} }


a1 += lda;
// a1 += lda;
b += 8; b += 8;
ii += 1;
// ii += 1;
} }


a += 8; a += 8;
@@ -430,9 +430,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 3) = data04; *(b + 3) = data04;
} }


a1 += lda;
// a1 += lda;
b += 4; b += 4;
ii += 1;
// ii += 1;
} }


a += 4; a += 4;
@@ -471,8 +471,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
ii += 1; ii += 1;
} }


a += 2;
jj += 1;
// a += 2;
// jj += 1;
} }


return 0; return 0;


+ 8
- 8
kernel/generic/ztrsm_uncopy_4.c View File

@@ -344,13 +344,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 7) = data26; *(b + 7) = data26;
} }


a1 += 2;
/* a1 += 2;
a2 += 2; a2 += 2;
a3 += 2; a3 += 2;
a4 += 2;
a4 += 2; */
b += 8; b += 8;


ii += 1;
// ii += 1;
} }


a += 4 * lda; a += 4 * lda;
@@ -444,11 +444,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 3) = data10; *(b + 3) = data10;
} }


a1 += 2;
a2 += 2;
/* a1 += 2;
a2 += 2; */
b += 4; b += 4;


ii += 1;
// ii += 1;
} }


a += 2 *lda; a += 2 *lda;
@@ -488,8 +488,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
ii += 1; ii += 1;
} }


a += lda;
jj += 1;
// a += lda;
// jj += 1;
} }


return 0; return 0;


+ 7
- 7
kernel/generic/ztrsm_utcopy_4.c View File

@@ -266,7 +266,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
} }


a1 += 2 * lda; a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 16; b += 16;


ii += 2; ii += 2;
@@ -303,10 +303,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 7) = data08; *(b + 7) = data08;
} }


a1 += lda;
// a1 += lda;
b += 8; b += 8;


ii += 1;
// ii += 1;
} }


a += 8; a += 8;
@@ -394,10 +394,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 3) = data04; *(b + 3) = data04;
} }


a1 += lda;
// a1 += lda;
b += 4; b += 4;


ii += 1;
// ii += 1;
} }


a += 4; a += 4;
@@ -436,8 +436,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
ii += 1; ii += 1;
} }


a += 2;
jj += 1;
// a += 2;
// jj += 1;
} }


return 0; return 0;


Loading…
Cancel
Save