Browse Source

Eliminate 2-8 dead increments code

tags/v0.3.0
Andrew 8 years ago
parent
commit
1236dbe5a6
27 changed files with 147 additions and 147 deletions
  1. +6
    -6
      kernel/generic/trmm_lncopy_2.c
  2. +4
    -4
      kernel/generic/trmm_lncopy_8.c
  3. +5
    -5
      kernel/generic/trmm_ltcopy_2.c
  4. +8
    -8
      kernel/generic/trmm_ltcopy_8.c
  5. +4
    -4
      kernel/generic/trmm_uncopy_2.c
  6. +4
    -4
      kernel/generic/trmm_uncopy_8.c
  7. +4
    -4
      kernel/generic/trmm_utcopy_2.c
  8. +8
    -8
      kernel/generic/trmm_utcopy_8.c
  9. +4
    -4
      kernel/generic/trsm_ltcopy_8.c
  10. +3
    -3
      kernel/generic/trsm_uncopy_8.c
  11. +4
    -4
      kernel/generic/trsm_utcopy_8.c
  12. +2
    -2
      kernel/generic/zgemm3m_tcopy_8.c
  13. +4
    -4
      kernel/generic/zgemm_ncopy_4.c
  14. +2
    -2
      kernel/generic/zgemm_tcopy_2.c
  15. +5
    -5
      kernel/generic/zgemm_tcopy_4.c
  16. +4
    -4
      kernel/generic/ztrmm_lncopy_2.c
  17. +9
    -9
      kernel/generic/ztrmm_lncopy_4.c
  18. +4
    -4
      kernel/generic/ztrmm_ltcopy_2.c
  19. +10
    -10
      kernel/generic/ztrmm_ltcopy_4.c
  20. +3
    -3
      kernel/generic/ztrmm_uncopy_2.c
  21. +11
    -11
      kernel/generic/ztrmm_uncopy_4.c
  22. +3
    -3
      kernel/generic/ztrmm_utcopy_2.c
  23. +6
    -6
      kernel/generic/ztrmm_utcopy_4.c
  24. +8
    -8
      kernel/generic/ztrsm_lncopy_4.c
  25. +7
    -7
      kernel/generic/ztrsm_ltcopy_4.c
  26. +8
    -8
      kernel/generic/ztrsm_uncopy_4.c
  27. +7
    -7
      kernel/generic/ztrsm_utcopy_4.c

+ 6
- 6
kernel/generic/trmm_lncopy_2.c View File

@@ -121,12 +121,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01;
b[ 1] = data03;

ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2;
} else
if (X < posY) {
ao1 += lda;
// ao1 += lda;
b += 2;
} else {
#ifdef UNIT
@@ -141,8 +141,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01;
b[ 1] = data03;
#endif
ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2;
}
}
@@ -191,7 +191,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0);
}

posY += 1;
// posY += 1;
}

return 0;


+ 4
- 4
kernel/generic/trmm_lncopy_8.c View File

@@ -608,16 +608,16 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} else
if (X < posY) {
if (m & 4) {
ao1 += 4 * lda;
/* ao1 += 4 * lda;
ao2 += 4 * lda;
ao3 += 4 * lda;
ao4 += 4 * lda;
ao4 += 4 * lda; */

b += 32;
}

if (m & 2) {
ao1 += 2 * lda;
// ao1 += 2 * lda;
b += 16;
}

@@ -1018,7 +1018,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} else
if (X < posY) {
if (m & 2) {
ao1 += 2 * lda;
// ao1 += 2 * lda;
b += 8;
}



+ 5
- 5
kernel/generic/trmm_ltcopy_2.c View File

@@ -116,8 +116,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (m & 1) {

if (X > posY) {
ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2;
} else
if (X < posY) {
@@ -126,7 +126,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON

b[ 0] = data01;
b[ 1] = data02;
ao1 += lda;
// ao1 += lda;
b += 2;
} else {
#ifdef UNIT
@@ -141,7 +141,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01;
b[ 1] = data02;
#endif
ao1 += 2;
// ao1 += 2;
b += 2;
}
}
@@ -190,7 +190,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0);
}

posY += 1;
// posY += 1;
}

return 0;


+ 8
- 8
kernel/generic/trmm_ltcopy_8.c View File

@@ -443,27 +443,27 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X > posY) {

if (m & 4) {
ao1 += 4;
/* ao1 += 4;
ao2 += 4;
ao3 += 4;
ao4 += 4;
ao5 += 4;
ao6 += 4;
ao7 += 4;
ao8 += 4;
ao8 += 4; */

b += 32;
}

if (m & 2) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2;
ao3 += 2;
ao4 += 2;
ao5 += 2;
ao6 += 2;
ao7 += 2;
ao8 += 2;
ao8 += 2; */

b += 16;
}
@@ -548,8 +548,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON

ao1 += 4 * lda;
ao2 += 4 * lda;
ao3 += 4 * lda;
ao4 += 4 * lda;
/* ao3 += 4 * lda;
ao4 += 4 * lda; */

b += 32;
}
@@ -964,10 +964,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X > posY) {

if (m & 2) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2;
ao3 += 2;
ao4 += 2;
ao4 += 2; */

b += 8;
}


+ 4
- 4
kernel/generic/trmm_uncopy_2.c View File

@@ -122,12 +122,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01;
b[ 1] = data03;

ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2;
} else
if (X > posY) {
ao1 += lda;
// ao1 += lda;
b += 2;
} else {
#ifdef UNIT
@@ -142,7 +142,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01;
b[ 1] = data03;
#endif
ao1 += lda;
// ao1 += lda;
b += 2;
}
}


+ 4
- 4
kernel/generic/trmm_uncopy_8.c View File

@@ -610,16 +610,16 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} else
if (X > posY) {
if (m & 4) {
ao1 += 4 * lda;
/* ao1 += 4 * lda;
ao2 += 4 * lda;
ao3 += 4 * lda;
ao4 += 4 * lda;
ao4 += 4 * lda; */

b += 32;
}

if (m & 2) {
ao1 += 2 * lda;
// ao1 += 2 * lda;
b += 16;
}

@@ -1019,7 +1019,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} else
if (X > posY) {
if (m & 2) {
ao1 += 2 * lda;
// ao1 += 2 * lda;
b += 8;
}



+ 4
- 4
kernel/generic/trmm_utcopy_2.c View File

@@ -117,8 +117,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (m & 1) {

if (X < posY) {
ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2;
} else
if (X > posY) {
@@ -127,7 +127,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON

b[ 0] = data01;
b[ 1] = data02;
ao1 += lda;
// ao1 += lda;
b += 2;
} else {
#ifdef UNIT
@@ -139,7 +139,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01;
b[ 1] = ZERO;
#endif
ao1 += lda;
// ao1 += lda;
b += 2;
}
}


+ 8
- 8
kernel/generic/trmm_utcopy_8.c View File

@@ -501,27 +501,27 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X < posY) {

if (m & 4) {
ao1 += 4;
/* ao1 += 4;
ao2 += 4;
ao3 += 4;
ao4 += 4;
ao5 += 4;
ao6 += 4;
ao7 += 4;
ao8 += 4;
ao8 += 4; */

b += 32;
}

if (m & 2) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2;
ao3 += 2;
ao4 += 2;
ao5 += 2;
ao6 += 2;
ao7 += 2;
ao8 += 2;
ao8 += 2; */

b += 16;
}
@@ -606,8 +606,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON

ao1 += 4 * lda;
ao2 += 4 * lda;
ao3 += 4 * lda;
ao4 += 4 * lda;
/* ao3 += 4 * lda;
ao4 += 4 * lda; */

b += 32;
}
@@ -1022,10 +1022,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X < posY) {

if (m & 2) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2;
ao3 += 2;
ao4 += 2;
ao4 += 2; */

b += 8;
}


+ 4
- 4
kernel/generic/trsm_ltcopy_8.c View File

@@ -487,8 +487,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT

a1 += 4 * lda;
a2 += 4 * lda;
a3 += 4 * lda;
a4 += 4 * lda;
/* a3 += 4 * lda;
a4 += 4 * lda; */
b += 32;

ii += 4;
@@ -574,7 +574,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
}

a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 16;

ii += 2;
@@ -779,7 +779,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
}

a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 8;
ii += 2;
}


+ 3
- 3
kernel/generic/trsm_uncopy_8.c View File

@@ -646,7 +646,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 7) = data57;
}
b += 8;
ii += 1;
// ii += 1;
}

a += 8 * lda;
@@ -835,7 +835,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 3) = data25;
}
b += 4;
ii += 1;
// ii += 1;
}

a += 4 * lda;
@@ -908,7 +908,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 1) = data09;
}
b += 2;
ii += 1;
// ii += 1;
}

a += 2 * lda;


+ 4
- 4
kernel/generic/trsm_utcopy_8.c View File

@@ -453,8 +453,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT

a1 += 4 * lda;
a2 += 4 * lda;
a3 += 4 * lda;
a4 += 4 * lda;
/* a3 += 4 * lda;
a4 += 4 * lda; */
b += 32;
ii += 4;
}
@@ -513,7 +513,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
}

a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 16;
ii += 2;
}
@@ -680,7 +680,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
}

a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 8;
ii += 2;
}


+ 2
- 2
kernel/generic/zgemm3m_tcopy_8.c View File

@@ -1044,7 +1044,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda,
*(b_offset2 + 3) = CMULT(a7, a8);

a_offset1 += 8;
b_offset2 += 4;
// b_offset2 += 4;
}

if (n & 2){
@@ -1057,7 +1057,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda,
*(b_offset3 + 1) = CMULT(a3, a4);

a_offset1 += 4;
b_offset3 += 2;
// b_offset3 += 2;
}

if (n & 1){


+ 4
- 4
kernel/generic/zgemm_ncopy_4.c View File

@@ -225,10 +225,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset + 6) = ctemp07;
*(boffset + 7) = ctemp08;

aoffset1 += 2;
/* aoffset1 += 2;
aoffset2 += 2;
aoffset3 += 2;
aoffset4 += 2;
aoffset4 += 2; */
boffset += 8;
}
j--;
@@ -323,8 +323,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset + 2) = ctemp03;
*(boffset + 3) = ctemp04;

aoffset1 += 2;
aoffset2 += 2;
/* aoffset1 += 2;
aoffset2 += 2; */
boffset += 4;
}
}


+ 2
- 2
kernel/generic/zgemm_tcopy_2.c View File

@@ -140,7 +140,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(b_offset1 + 6) = ctemp11;
*(b_offset1 + 7) = ctemp12;

b_offset1 += m * 4;
// b_offset1 += m * 4;
a_offset1 += 4;
a_offset2 += 4;
}
@@ -204,7 +204,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(b_offset + 2) = ctemp3;
*(b_offset + 3) = ctemp4;

b_offset += m * 4;
// b_offset += m * 4;
a_offset += 4;
}



+ 5
- 5
kernel/generic/zgemm_tcopy_4.c View File

@@ -233,10 +233,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset3 + 6) = ctemp07;
*(boffset3 + 7) = ctemp08;

aoffset1 += 2;
/* aoffset1 += 2;
aoffset2 += 2;
aoffset3 += 2;
aoffset4 += 2;
aoffset4 += 2; */

boffset3 += 8;
}
@@ -338,8 +338,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset3 + 2) = ctemp03;
*(boffset3 + 3) = ctemp04;

aoffset1 += 2;
aoffset2 += 2;
/* aoffset1 += 2;
aoffset2 += 2; */
boffset3 += 4;
}
}
@@ -387,7 +387,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset2 + 3) = ctemp04;

aoffset1 += 4;
boffset2 += 4;
// boffset2 += 4;
}

if (n & 1){


+ 4
- 4
kernel/generic/ztrmm_lncopy_2.c View File

@@ -148,12 +148,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data03;
b[ 3] = data04;

ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4;
} else
if (X < posY) {
ao1 += lda;
// ao1 += lda;
b += 4;
} else {
#ifdef UNIT
@@ -224,7 +224,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0);
}

posY += 1;
// posY += 1;
}
return 0;
}

+ 9
- 9
kernel/generic/ztrmm_lncopy_4.c View File

@@ -355,23 +355,23 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 6] = data25;
b[ 7] = data26;

ao1 += 2;
/* ao1 += 2;
ao2 += 2;
ao3 += 2;
ao4 += 2;
ao4 += 2; */
b += 8;
}

} else
if (X < posY) {
if (m & 2) {
ao1 += 2 * lda;
ao2 += 2 * lda;
/* ao1 += 2 * lda;
ao2 += 2 * lda; */
b += 16;
}

if (m & 1) {
ao1 += lda;
// ao1 += lda;
b += 8;
}

@@ -586,12 +586,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data09;
b[ 3] = data10;

ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4;
} else
if (X < posY) {
ao1 += lda;
// ao1 += lda;
b += 4;
} else {
#ifdef UNIT
@@ -657,7 +657,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0);
}

posY += 1;
// posY += 1;
}

return 0;


+ 4
- 4
kernel/generic/ztrmm_ltcopy_2.c View File

@@ -141,8 +141,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (m & 1) {

if (X > posY) {
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4;

} else
@@ -157,7 +157,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data3;
b[ 3] = data4;

ao1 += lda;
// ao1 += lda;
b += 4;
} else {
#ifdef UNIT
@@ -233,7 +233,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0);
}

posY += 1;
// posY += 1;
}

return 0;


+ 10
- 10
kernel/generic/ztrmm_ltcopy_4.c View File

@@ -292,18 +292,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X > posY) {

if (m & 2) {
ao1 += 4;
/* ao1 += 4;
ao2 += 4;
ao3 += 4;
ao4 += 4;
ao4 += 4; */
b += 16;
}

if (m & 1) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2;
ao3 += 2;
ao4 += 2;
ao4 += 2; */
b += 8;
}

@@ -347,7 +347,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[15] = data16;

ao1 += 2 * lda;
ao2 += 2 * lda;
// ao2 += 2 * lda;

b += 16;
}
@@ -371,7 +371,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 6] = data07;
b[ 7] = data08;

ao1 += lda;
// ao1 += lda;
b += 8;
}

@@ -588,8 +588,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (i) {

if (X > posY) {
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */

b += 4;
} else
@@ -604,7 +604,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data03;
b[ 3] = data04;

ao1 += lda;
// ao1 += lda;
b += 4;

} else {
@@ -678,7 +678,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0);
}

posY += 1;
// posY += 1;
}

return 0;


+ 3
- 3
kernel/generic/ztrmm_uncopy_2.c View File

@@ -153,12 +153,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data03;
b[ 3] = data04;

ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4;
} else
if (X > posY) {
ao1 += lda;
// ao1 += lda;
b += 4;
} else {
#ifdef UNIT


+ 11
- 11
kernel/generic/ztrmm_uncopy_4.c View File

@@ -354,23 +354,23 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 6] = data25;
b[ 7] = data26;

ao1 += 2;
/* ao1 += 2;
ao2 += 2;
ao3 += 2;
ao4 += 2;
ao4 += 2; */
b += 8;
}

} else
if (X > posY) {
if (m & 2) {
ao1 += 2 * lda;
ao2 += 2 * lda;
/* ao1 += 2 * lda;
ao2 += 2 * lda; */
b += 16;
}

if (m & 1) {
ao1 += lda;
// ao1 += lda;
b += 8;
}

@@ -596,13 +596,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 1] = data02;
b[ 2] = data09;
b[ 3] = data10;
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4;
} else
if (X > posY) {
ao1 += 2 * lda;
ao2 += 2 * lda;
/* ao1 += 2 * lda;
ao2 += 2 * lda; */
b += 4;
} else {
#ifdef UNIT
@@ -624,8 +624,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data09;
b[ 3] = data10;
#endif
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4;
}
}


+ 3
- 3
kernel/generic/ztrmm_utcopy_2.c View File

@@ -142,8 +142,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON

if (m & 1) {
if (X < posY) {
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4;
} else
if (X > posY) {
@@ -157,7 +157,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data3;
b[ 3] = data4;

ao1 += lda;
// ao1 += lda;
b += 4;

} else {


+ 6
- 6
kernel/generic/ztrmm_utcopy_4.c View File

@@ -294,18 +294,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X < posY) {

if (m & 2) {
ao1 += 4;
/* ao1 += 4;
ao2 += 4;
ao3 += 4;
ao4 += 4;
ao4 += 4; */
b += 16;
}

if (m & 1) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2;
ao3 += 2;
ao4 += 2;
ao4 += 2; */
b += 8;
}

@@ -349,7 +349,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[15] = data16;

ao1 += 2 * lda;
ao2 += 2 * lda;
// ao2 += 2 * lda;
b += 16;
}

@@ -372,7 +372,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 6] = data07;
b[ 7] = data08;

ao1 += lda;
// ao1 += lda;
b += 8;
}



+ 8
- 8
kernel/generic/ztrsm_lncopy_4.c View File

@@ -313,13 +313,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 7) = data26;
}

a1 += 2;
/* a1 += 2;
a2 += 2;
a3 += 2;
a4 += 2;
a4 += 2; */
b += 8;

ii += 1;
// ii += 1;
}
a += 4 * lda;
jj += 4;
@@ -410,11 +410,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 3) = data10;
}

a1 += 2;
a2 += 2;
/* a1 += 2;
a2 += 2; */
b += 4;

ii += 1;
// ii += 1;
}
a += 2 * lda;
jj += 2;
@@ -451,8 +451,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
ii += 1;
}

a += lda;
jj += 1;
// a += lda;
// jj += 1;
}

return 0;


+ 7
- 7
kernel/generic/ztrsm_ltcopy_4.c View File

@@ -286,7 +286,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
}

a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 16;

ii += 2;
@@ -335,9 +335,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 7) = data08;
}

a1 += lda;
// a1 += lda;
b += 8;
ii += 1;
// ii += 1;
}

a += 8;
@@ -430,9 +430,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 3) = data04;
}

a1 += lda;
// a1 += lda;
b += 4;
ii += 1;
// ii += 1;
}

a += 4;
@@ -471,8 +471,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
ii += 1;
}

a += 2;
jj += 1;
// a += 2;
// jj += 1;
}

return 0;


+ 8
- 8
kernel/generic/ztrsm_uncopy_4.c View File

@@ -344,13 +344,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 7) = data26;
}

a1 += 2;
/* a1 += 2;
a2 += 2;
a3 += 2;
a4 += 2;
a4 += 2; */
b += 8;

ii += 1;
// ii += 1;
}

a += 4 * lda;
@@ -444,11 +444,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 3) = data10;
}

a1 += 2;
a2 += 2;
/* a1 += 2;
a2 += 2; */
b += 4;

ii += 1;
// ii += 1;
}

a += 2 *lda;
@@ -488,8 +488,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
ii += 1;
}

a += lda;
jj += 1;
// a += lda;
// jj += 1;
}

return 0;


+ 7
- 7
kernel/generic/ztrsm_utcopy_4.c View File

@@ -266,7 +266,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
}

a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 16;

ii += 2;
@@ -303,10 +303,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 7) = data08;
}

a1 += lda;
// a1 += lda;
b += 8;

ii += 1;
// ii += 1;
}

a += 8;
@@ -394,10 +394,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 3) = data04;
}

a1 += lda;
// a1 += lda;
b += 4;

ii += 1;
// ii += 1;
}

a += 4;
@@ -436,8 +436,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
ii += 1;
}

a += 2;
jj += 1;
// a += 2;
// jj += 1;
}

return 0;


Loading…
Cancel
Save