Browse Source

Merge pull request #1368 from brada4/develop

Eliminate warnings
tags/v0.3.0
Martin Kroeker GitHub 8 years ago
parent
commit
d8b3c3c7db
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
53 changed files with 240 additions and 239 deletions
  1. +1
    -1
      driver/level2/tbmv_L.c
  2. +1
    -1
      driver/level2/tbmv_U.c
  3. +1
    -1
      driver/level2/tbsv_L.c
  4. +1
    -1
      driver/level2/tbsv_U.c
  5. +1
    -1
      driver/level2/tpmv_L.c
  6. +1
    -1
      driver/level2/tpmv_U.c
  7. +1
    -1
      driver/level2/ztbmv_L.c
  8. +1
    -1
      driver/level2/ztbmv_U.c
  9. +1
    -1
      driver/level2/ztbsv_L.c
  10. +1
    -1
      driver/level2/ztbsv_U.c
  11. +1
    -1
      driver/level2/ztpsv_L.c
  12. +3
    -2
      driver/level3/level3_gemm3m_thread.c
  13. +4
    -4
      kernel/generic/gemm_tcopy_8.c
  14. +6
    -6
      kernel/generic/trmm_lncopy_2.c
  15. +9
    -9
      kernel/generic/trmm_lncopy_4.c
  16. +4
    -4
      kernel/generic/trmm_lncopy_8.c
  17. +5
    -5
      kernel/generic/trmm_ltcopy_2.c
  18. +10
    -10
      kernel/generic/trmm_ltcopy_4.c
  19. +8
    -8
      kernel/generic/trmm_ltcopy_8.c
  20. +4
    -4
      kernel/generic/trmm_uncopy_2.c
  21. +11
    -11
      kernel/generic/trmm_uncopy_4.c
  22. +4
    -4
      kernel/generic/trmm_uncopy_8.c
  23. +4
    -4
      kernel/generic/trmm_utcopy_2.c
  24. +8
    -8
      kernel/generic/trmm_utcopy_4.c
  25. +8
    -8
      kernel/generic/trmm_utcopy_8.c
  26. +1
    -1
      kernel/generic/trsm_ltcopy_4.c
  27. +4
    -4
      kernel/generic/trsm_ltcopy_8.c
  28. +3
    -3
      kernel/generic/trsm_uncopy_8.c
  29. +1
    -1
      kernel/generic/trsm_utcopy_4.c
  30. +4
    -4
      kernel/generic/trsm_utcopy_8.c
  31. +2
    -2
      kernel/generic/zgemm3m_tcopy_8.c
  32. +4
    -4
      kernel/generic/zgemm_ncopy_4.c
  33. +2
    -2
      kernel/generic/zgemm_tcopy_2.c
  34. +5
    -5
      kernel/generic/zgemm_tcopy_4.c
  35. +2
    -2
      kernel/generic/zgemm_tcopy_8.c
  36. +4
    -4
      kernel/generic/ztrmm_lncopy_2.c
  37. +9
    -9
      kernel/generic/ztrmm_lncopy_4.c
  38. +8
    -8
      kernel/generic/ztrmm_lncopy_8.c
  39. +4
    -4
      kernel/generic/ztrmm_ltcopy_2.c
  40. +10
    -10
      kernel/generic/ztrmm_ltcopy_4.c
  41. +9
    -9
      kernel/generic/ztrmm_ltcopy_8.c
  42. +3
    -3
      kernel/generic/ztrmm_uncopy_2.c
  43. +11
    -11
      kernel/generic/ztrmm_uncopy_4.c
  44. +8
    -8
      kernel/generic/ztrmm_uncopy_8.c
  45. +3
    -3
      kernel/generic/ztrmm_utcopy_2.c
  46. +6
    -6
      kernel/generic/ztrmm_utcopy_4.c
  47. +4
    -4
      kernel/generic/ztrmm_utcopy_8.c
  48. +8
    -8
      kernel/generic/ztrsm_lncopy_4.c
  49. +7
    -7
      kernel/generic/ztrsm_ltcopy_4.c
  50. +8
    -8
      kernel/generic/ztrsm_uncopy_4.c
  51. +7
    -7
      kernel/generic/ztrsm_utcopy_4.c
  52. +2
    -2
      kernel/x86_64/zgemv_t_4.c
  53. +2
    -2
      kernel/zarch/zgemv_t_4.c

+ 1
- 1
driver/level2/tbmv_L.c View File

@@ -40,7 +40,7 @@
#include <ctype.h> #include <ctype.h>
#include "common.h" #include "common.h"


const static FLOAT dp1 = 1.;
// const static FLOAT dp1 = 1.;


int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){




+ 1
- 1
driver/level2/tbmv_U.c View File

@@ -40,7 +40,7 @@
#include <ctype.h> #include <ctype.h>
#include "common.h" #include "common.h"


const static FLOAT dp1 = 1.;
// const static FLOAT dp1 = 1.;


int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){




+ 1
- 1
driver/level2/tbsv_L.c View File

@@ -40,7 +40,7 @@
#include <ctype.h> #include <ctype.h>
#include "common.h" #include "common.h"


const static FLOAT dp1 = 1.;
// const static FLOAT dp1 = 1.;


int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){




+ 1
- 1
driver/level2/tbsv_U.c View File

@@ -40,7 +40,7 @@
#include <ctype.h> #include <ctype.h>
#include "common.h" #include "common.h"


const static FLOAT dp1 = 1.;
// const static FLOAT dp1 = 1.;


int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){




+ 1
- 1
driver/level2/tpmv_L.c View File

@@ -40,7 +40,7 @@
#include <ctype.h> #include <ctype.h>
#include "common.h" #include "common.h"


const static FLOAT dp1 = 1.;
// const static FLOAT dp1 = 1.;


int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){




+ 1
- 1
driver/level2/tpmv_U.c View File

@@ -40,7 +40,7 @@
#include <ctype.h> #include <ctype.h>
#include "common.h" #include "common.h"


const static FLOAT dp1 = 1.;
// const static FLOAT dp1 = 1.;


int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){




+ 1
- 1
driver/level2/ztbmv_L.c View File

@@ -40,7 +40,7 @@
#include <ctype.h> #include <ctype.h>
#include "common.h" #include "common.h"


const static FLOAT dp1 = 1.;
// const static FLOAT dp1 = 1.;


int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){




+ 1
- 1
driver/level2/ztbmv_U.c View File

@@ -40,7 +40,7 @@
#include <ctype.h> #include <ctype.h>
#include "common.h" #include "common.h"


const static FLOAT dp1 = 1.;
// const static FLOAT dp1 = 1.;


int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){




+ 1
- 1
driver/level2/ztbsv_L.c View File

@@ -40,7 +40,7 @@
#include <ctype.h> #include <ctype.h>
#include "common.h" #include "common.h"


const static FLOAT dp1 = 1.;
// const static FLOAT dp1 = 1.;


int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){




+ 1
- 1
driver/level2/ztbsv_U.c View File

@@ -40,7 +40,7 @@
#include <ctype.h> #include <ctype.h>
#include "common.h" #include "common.h"


const static FLOAT dp1 = 1.;
// const static FLOAT dp1 = 1.;


int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){




+ 1
- 1
driver/level2/ztpsv_L.c View File

@@ -40,7 +40,7 @@
#include <ctype.h> #include <ctype.h>
#include "common.h" #include "common.h"


const static FLOAT dm1 = -1.;
// const static FLOAT dm1 = -1.;


int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){




+ 3
- 2
driver/level3/level3_gemm3m_thread.c View File

@@ -974,7 +974,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos){ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos){


BLASLONG m = args -> m; BLASLONG m = args -> m;
BLASLONG n = args -> n;
// BLASLONG n = args -> n;
BLASLONG nthreads = args -> nthreads; BLASLONG nthreads = args -> nthreads;
BLASLONG divN, divT; BLASLONG divN, divT;
int mode; int mode;
@@ -985,13 +985,14 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO


m = m_to - m_from; m = m_to - m_from;
} }
/*
if (range_n) { if (range_n) {
BLASLONG n_from = *(((BLASLONG *)range_n) + 0); BLASLONG n_from = *(((BLASLONG *)range_n) + 0);
BLASLONG n_to = *(((BLASLONG *)range_n) + 1); BLASLONG n_to = *(((BLASLONG *)range_n) + 1);


n = n_to - n_from; n = n_to - n_from;
} }
*/


if ((args -> m < nthreads * SWITCH_RATIO) || (args -> n < nthreads * SWITCH_RATIO)) { if ((args -> m < nthreads * SWITCH_RATIO) || (args -> n < nthreads * SWITCH_RATIO)) {
GEMM3M_LOCAL(args, range_m, range_n, sa, sb, 0); GEMM3M_LOCAL(args, range_m, range_n, sa, sb, 0);


+ 4
- 4
kernel/generic/gemm_tcopy_8.c View File

@@ -719,10 +719,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){


if (m & 1){ if (m & 1){
aoffset1 = aoffset; aoffset1 = aoffset;
aoffset += lda;
// aoffset += lda;


boffset1 = boffset; boffset1 = boffset;
boffset += 8;
// boffset += 8;


i = (n >> 3); i = (n >> 3);
if (i > 0){ if (i > 0){
@@ -762,7 +762,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset2 + 1) = ctemp02; *(boffset2 + 1) = ctemp02;
*(boffset2 + 2) = ctemp03; *(boffset2 + 2) = ctemp03;
*(boffset2 + 3) = ctemp04; *(boffset2 + 3) = ctemp04;
boffset2 += 4;
// boffset2 += 4;
} }


if (n & 2){ if (n & 2){
@@ -772,7 +772,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){


*(boffset3 + 0) = ctemp01; *(boffset3 + 0) = ctemp01;
*(boffset3 + 1) = ctemp02; *(boffset3 + 1) = ctemp02;
boffset3 += 2;
// boffset3 += 2;
} }


if (n & 1){ if (n & 1){


+ 6
- 6
kernel/generic/trmm_lncopy_2.c View File

@@ -121,12 +121,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = data03; b[ 1] = data03;


ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2; b += 2;
} else } else
if (X < posY) { if (X < posY) {
ao1 += lda;
// ao1 += lda;
b += 2; b += 2;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -141,8 +141,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = data03; b[ 1] = data03;
#endif #endif
ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2; b += 2;
} }
} }
@@ -191,7 +191,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0); } while (i > 0);
} }


posY += 1;
// posY += 1;
} }


return 0; return 0;


+ 9
- 9
kernel/generic/trmm_lncopy_4.c View File

@@ -237,24 +237,24 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data03; b[ 2] = data03;
b[ 3] = data04; b[ 3] = data04;


ao1 += 1;
/* ao1 += 1;
ao2 += 1; ao2 += 1;
ao3 += 1; ao3 += 1;
ao4 += 1;
ao4 += 1; */
b += 4; b += 4;
} }


} else } else
if (X < posY) { if (X < posY) {
if (m & 2) { if (m & 2) {
ao1 += 2 * lda;
ao2 += 2 * lda;
/* ao1 += 2 * lda;
ao2 += 2 * lda; */


b += 8; b += 8;
} }


if (m & 1) { if (m & 1) {
ao1 += lda;
// ao1 += lda;
b += 4; b += 4;
} }


@@ -414,12 +414,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = data02; b[ 1] = data02;


ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2; b += 2;
} else } else
if (X < posY) { if (X < posY) {
ao1 += lda;
// ao1 += lda;
b += 2; b += 2;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -477,7 +477,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0); } while (i > 0);
} }


posY += 1;
// posY += 1;
} }


return 0; return 0;


+ 4
- 4
kernel/generic/trmm_lncopy_8.c View File

@@ -608,16 +608,16 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} else } else
if (X < posY) { if (X < posY) {
if (m & 4) { if (m & 4) {
ao1 += 4 * lda;
/* ao1 += 4 * lda;
ao2 += 4 * lda; ao2 += 4 * lda;
ao3 += 4 * lda; ao3 += 4 * lda;
ao4 += 4 * lda;
ao4 += 4 * lda; */


b += 32; b += 32;
} }


if (m & 2) { if (m & 2) {
ao1 += 2 * lda;
// ao1 += 2 * lda;
b += 16; b += 16;
} }


@@ -1018,7 +1018,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} else } else
if (X < posY) { if (X < posY) {
if (m & 2) { if (m & 2) {
ao1 += 2 * lda;
// ao1 += 2 * lda;
b += 8; b += 8;
} }




+ 5
- 5
kernel/generic/trmm_ltcopy_2.c View File

@@ -116,8 +116,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (m & 1) { if (m & 1) {


if (X > posY) { if (X > posY) {
ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2; b += 2;
} else } else
if (X < posY) { if (X < posY) {
@@ -126,7 +126,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON


b[ 0] = data01; b[ 0] = data01;
b[ 1] = data02; b[ 1] = data02;
ao1 += lda;
// ao1 += lda;
b += 2; b += 2;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -141,7 +141,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = data02; b[ 1] = data02;
#endif #endif
ao1 += 2;
// ao1 += 2;
b += 2; b += 2;
} }
} }
@@ -190,7 +190,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0); } while (i > 0);
} }


posY += 1;
// posY += 1;
} }


return 0; return 0;


+ 10
- 10
kernel/generic/trmm_ltcopy_4.c View File

@@ -204,18 +204,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X > posY) { if (X > posY) {


if (m & 2) { if (m & 2) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2;
ao4 += 2; */
b += 8; b += 8;
} }


if (m & 1) { if (m & 1) {
ao1 += 1;
/* ao1 += 1;
ao2 += 1; ao2 += 1;
ao3 += 1; ao3 += 1;
ao4 += 1;
ao4 += 1; */
b += 4; b += 4;
} }


@@ -241,7 +241,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 7] = data08; b[ 7] = data08;


ao1 += 2 * lda; ao1 += 2 * lda;
ao2 += 2 * lda;
// ao2 += 2 * lda;


b += 8; b += 8;
} }
@@ -257,7 +257,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data03; b[ 2] = data03;
b[ 3] = data04; b[ 3] = data04;


ao1 += lda;
// ao1 += lda;
b += 4; b += 4;
} }


@@ -412,8 +412,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (i) { if (i) {


if (X > posY) { if (X > posY) {
ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */


b += 2; b += 2;
} else } else
@@ -423,7 +423,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON


b[ 0] = data01; b[ 0] = data01;
b[ 1] = data02; b[ 1] = data02;
ao1 += lda;
// ao1 += lda;
b += 2; b += 2;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -481,7 +481,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0); } while (i > 0);
} }


posY += 1;
// posY += 1;
} }


return 0; return 0;


+ 8
- 8
kernel/generic/trmm_ltcopy_8.c View File

@@ -443,27 +443,27 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X > posY) { if (X > posY) {


if (m & 4) { if (m & 4) {
ao1 += 4;
/* ao1 += 4;
ao2 += 4; ao2 += 4;
ao3 += 4; ao3 += 4;
ao4 += 4; ao4 += 4;
ao5 += 4; ao5 += 4;
ao6 += 4; ao6 += 4;
ao7 += 4; ao7 += 4;
ao8 += 4;
ao8 += 4; */


b += 32; b += 32;
} }


if (m & 2) { if (m & 2) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2; ao4 += 2;
ao5 += 2; ao5 += 2;
ao6 += 2; ao6 += 2;
ao7 += 2; ao7 += 2;
ao8 += 2;
ao8 += 2; */


b += 16; b += 16;
} }
@@ -548,8 +548,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON


ao1 += 4 * lda; ao1 += 4 * lda;
ao2 += 4 * lda; ao2 += 4 * lda;
ao3 += 4 * lda;
ao4 += 4 * lda;
/* ao3 += 4 * lda;
ao4 += 4 * lda; */


b += 32; b += 32;
} }
@@ -964,10 +964,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X > posY) { if (X > posY) {


if (m & 2) { if (m & 2) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2;
ao4 += 2; */


b += 8; b += 8;
} }


+ 4
- 4
kernel/generic/trmm_uncopy_2.c View File

@@ -122,12 +122,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = data03; b[ 1] = data03;


ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2; b += 2;
} else } else
if (X > posY) { if (X > posY) {
ao1 += lda;
// ao1 += lda;
b += 2; b += 2;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -142,7 +142,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = data03; b[ 1] = data03;
#endif #endif
ao1 += lda;
// ao1 += lda;
b += 2; b += 2;
} }
} }


+ 11
- 11
kernel/generic/trmm_uncopy_4.c View File

@@ -241,23 +241,23 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data05; b[ 2] = data05;
b[ 3] = data07; b[ 3] = data07;


ao1 += 1;
/* ao1 += 1;
ao2 += 1; ao2 += 1;
ao3 += 1; ao3 += 1;
ao4 += 1;
ao4 += 1; */
b += 4; b += 4;
} }


} else } else
if (X > posY) { if (X > posY) {
if (m & 2) { if (m & 2) {
ao1 += 2 * lda;
ao2 += 2 * lda;
/* ao1 += 2 * lda;
ao2 += 2 * lda; */
b += 8; b += 8;
} }


if (m & 1) { if (m & 1) {
ao1 += lda;
// ao1 += lda;
b += 4; b += 4;
} }


@@ -418,13 +418,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON


b[ 0] = data01; b[ 0] = data01;
b[ 1] = data05; b[ 1] = data05;
ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2; b += 2;
} else } else
if (X > posY) { if (X > posY) {
ao1 += lda;
ao2 += lda;
/* ao1 += lda;
ao2 += lda; */
b += 2; b += 2;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -438,8 +438,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = data05; b[ 1] = data05;
#endif #endif
ao1 += lda;
ao2 += lda;
/* ao1 += lda;
ao2 += lda; */
b += 2; b += 2;
} }
} }


+ 4
- 4
kernel/generic/trmm_uncopy_8.c View File

@@ -610,16 +610,16 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} else } else
if (X > posY) { if (X > posY) {
if (m & 4) { if (m & 4) {
ao1 += 4 * lda;
/* ao1 += 4 * lda;
ao2 += 4 * lda; ao2 += 4 * lda;
ao3 += 4 * lda; ao3 += 4 * lda;
ao4 += 4 * lda;
ao4 += 4 * lda; */


b += 32; b += 32;
} }


if (m & 2) { if (m & 2) {
ao1 += 2 * lda;
// ao1 += 2 * lda;
b += 16; b += 16;
} }


@@ -1019,7 +1019,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} else } else
if (X > posY) { if (X > posY) {
if (m & 2) { if (m & 2) {
ao1 += 2 * lda;
// ao1 += 2 * lda;
b += 8; b += 8;
} }




+ 4
- 4
kernel/generic/trmm_utcopy_2.c View File

@@ -117,8 +117,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (m & 1) { if (m & 1) {


if (X < posY) { if (X < posY) {
ao1 += 1;
ao2 += 1;
/* ao1 += 1;
ao2 += 1; */
b += 2; b += 2;
} else } else
if (X > posY) { if (X > posY) {
@@ -127,7 +127,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON


b[ 0] = data01; b[ 0] = data01;
b[ 1] = data02; b[ 1] = data02;
ao1 += lda;
// ao1 += lda;
b += 2; b += 2;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -139,7 +139,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = ZERO; b[ 1] = ZERO;
#endif #endif
ao1 += lda;
// ao1 += lda;
b += 2; b += 2;
} }
} }


+ 8
- 8
kernel/generic/trmm_utcopy_4.c View File

@@ -201,18 +201,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X < posY) { if (X < posY) {


if (m & 2) { if (m & 2) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2;
ao4 += 2; */
b += 8; b += 8;
} }


if (m & 1) { if (m & 1) {
ao1 += 1;
/* ao1 += 1;
ao2 += 1; ao2 += 1;
ao3 += 1; ao3 += 1;
ao4 += 1;
ao4 += 1; */
b += 4; b += 4;
} }


@@ -238,7 +238,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 7] = data08; b[ 7] = data08;


ao1 += 2 * lda; ao1 += 2 * lda;
ao2 += 2 * lda;
// ao2 += 2 * lda;
b += 8; b += 8;
} }


@@ -253,7 +253,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data03; b[ 2] = data03;
b[ 3] = data04; b[ 3] = data04;


ao1 += lda;
// ao1 += lda;
b += 4; b += 4;
} }


@@ -401,7 +401,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (i) { if (i) {


if (X < posY) { if (X < posY) {
ao1 += 2;
// ao1 += 2;
b += 2; b += 2;
} else } else
if (X > posY) { if (X > posY) {
@@ -411,7 +411,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = data02; b[ 1] = data02;


ao1 += lda;
// ao1 += lda;
b += 2; b += 2;
} else { } else {
#ifdef UNIT #ifdef UNIT


+ 8
- 8
kernel/generic/trmm_utcopy_8.c View File

@@ -501,27 +501,27 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X < posY) { if (X < posY) {


if (m & 4) { if (m & 4) {
ao1 += 4;
/* ao1 += 4;
ao2 += 4; ao2 += 4;
ao3 += 4; ao3 += 4;
ao4 += 4; ao4 += 4;
ao5 += 4; ao5 += 4;
ao6 += 4; ao6 += 4;
ao7 += 4; ao7 += 4;
ao8 += 4;
ao8 += 4; */


b += 32; b += 32;
} }


if (m & 2) { if (m & 2) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2; ao4 += 2;
ao5 += 2; ao5 += 2;
ao6 += 2; ao6 += 2;
ao7 += 2; ao7 += 2;
ao8 += 2;
ao8 += 2; */


b += 16; b += 16;
} }
@@ -606,8 +606,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON


ao1 += 4 * lda; ao1 += 4 * lda;
ao2 += 4 * lda; ao2 += 4 * lda;
ao3 += 4 * lda;
ao4 += 4 * lda;
/* ao3 += 4 * lda;
ao4 += 4 * lda; */


b += 32; b += 32;
} }
@@ -1022,10 +1022,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X < posY) { if (X < posY) {


if (m & 2) { if (m & 2) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2;
ao4 += 2; */


b += 8; b += 8;
} }


+ 1
- 1
kernel/generic/trsm_ltcopy_4.c View File

@@ -206,7 +206,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
} }


a1 += 2 * lda; a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 8; b += 8;


ii += 2; ii += 2;


+ 4
- 4
kernel/generic/trsm_ltcopy_8.c View File

@@ -487,8 +487,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT


a1 += 4 * lda; a1 += 4 * lda;
a2 += 4 * lda; a2 += 4 * lda;
a3 += 4 * lda;
a4 += 4 * lda;
/* a3 += 4 * lda;
a4 += 4 * lda; */
b += 32; b += 32;


ii += 4; ii += 4;
@@ -574,7 +574,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
} }


a1 += 2 * lda; a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 16; b += 16;


ii += 2; ii += 2;
@@ -779,7 +779,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
} }


a1 += 2 * lda; a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 8; b += 8;
ii += 2; ii += 2;
} }


+ 3
- 3
kernel/generic/trsm_uncopy_8.c View File

@@ -646,7 +646,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 7) = data57; *(b + 7) = data57;
} }
b += 8; b += 8;
ii += 1;
// ii += 1;
} }


a += 8 * lda; a += 8 * lda;
@@ -835,7 +835,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 3) = data25; *(b + 3) = data25;
} }
b += 4; b += 4;
ii += 1;
// ii += 1;
} }


a += 4 * lda; a += 4 * lda;
@@ -908,7 +908,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 1) = data09; *(b + 1) = data09;
} }
b += 2; b += 2;
ii += 1;
// ii += 1;
} }


a += 2 * lda; a += 2 * lda;


+ 1
- 1
kernel/generic/trsm_utcopy_4.c View File

@@ -194,7 +194,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
} }


a1 += 2 * lda; a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 8; b += 8;


ii += 2; ii += 2;


+ 4
- 4
kernel/generic/trsm_utcopy_8.c View File

@@ -453,8 +453,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT


a1 += 4 * lda; a1 += 4 * lda;
a2 += 4 * lda; a2 += 4 * lda;
a3 += 4 * lda;
a4 += 4 * lda;
/* a3 += 4 * lda;
a4 += 4 * lda; */
b += 32; b += 32;
ii += 4; ii += 4;
} }
@@ -513,7 +513,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
} }


a1 += 2 * lda; a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 16; b += 16;
ii += 2; ii += 2;
} }
@@ -680,7 +680,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
} }


a1 += 2 * lda; a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 8; b += 8;
ii += 2; ii += 2;
} }


+ 2
- 2
kernel/generic/zgemm3m_tcopy_8.c View File

@@ -1044,7 +1044,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda,
*(b_offset2 + 3) = CMULT(a7, a8); *(b_offset2 + 3) = CMULT(a7, a8);


a_offset1 += 8; a_offset1 += 8;
b_offset2 += 4;
// b_offset2 += 4;
} }


if (n & 2){ if (n & 2){
@@ -1057,7 +1057,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda,
*(b_offset3 + 1) = CMULT(a3, a4); *(b_offset3 + 1) = CMULT(a3, a4);


a_offset1 += 4; a_offset1 += 4;
b_offset3 += 2;
// b_offset3 += 2;
} }


if (n & 1){ if (n & 1){


+ 4
- 4
kernel/generic/zgemm_ncopy_4.c View File

@@ -225,10 +225,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset + 6) = ctemp07; *(boffset + 6) = ctemp07;
*(boffset + 7) = ctemp08; *(boffset + 7) = ctemp08;


aoffset1 += 2;
/* aoffset1 += 2;
aoffset2 += 2; aoffset2 += 2;
aoffset3 += 2; aoffset3 += 2;
aoffset4 += 2;
aoffset4 += 2; */
boffset += 8; boffset += 8;
} }
j--; j--;
@@ -323,8 +323,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset + 2) = ctemp03; *(boffset + 2) = ctemp03;
*(boffset + 3) = ctemp04; *(boffset + 3) = ctemp04;


aoffset1 += 2;
aoffset2 += 2;
/* aoffset1 += 2;
aoffset2 += 2; */
boffset += 4; boffset += 4;
} }
} }


+ 2
- 2
kernel/generic/zgemm_tcopy_2.c View File

@@ -140,7 +140,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(b_offset1 + 6) = ctemp11; *(b_offset1 + 6) = ctemp11;
*(b_offset1 + 7) = ctemp12; *(b_offset1 + 7) = ctemp12;


b_offset1 += m * 4;
// b_offset1 += m * 4;
a_offset1 += 4; a_offset1 += 4;
a_offset2 += 4; a_offset2 += 4;
} }
@@ -204,7 +204,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(b_offset + 2) = ctemp3; *(b_offset + 2) = ctemp3;
*(b_offset + 3) = ctemp4; *(b_offset + 3) = ctemp4;


b_offset += m * 4;
// b_offset += m * 4;
a_offset += 4; a_offset += 4;
} }




+ 5
- 5
kernel/generic/zgemm_tcopy_4.c View File

@@ -233,10 +233,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset3 + 6) = ctemp07; *(boffset3 + 6) = ctemp07;
*(boffset3 + 7) = ctemp08; *(boffset3 + 7) = ctemp08;


aoffset1 += 2;
/* aoffset1 += 2;
aoffset2 += 2; aoffset2 += 2;
aoffset3 += 2; aoffset3 += 2;
aoffset4 += 2;
aoffset4 += 2; */


boffset3 += 8; boffset3 += 8;
} }
@@ -338,8 +338,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset3 + 2) = ctemp03; *(boffset3 + 2) = ctemp03;
*(boffset3 + 3) = ctemp04; *(boffset3 + 3) = ctemp04;


aoffset1 += 2;
aoffset2 += 2;
/* aoffset1 += 2;
aoffset2 += 2; */
boffset3 += 4; boffset3 += 4;
} }
} }
@@ -387,7 +387,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset2 + 3) = ctemp04; *(boffset2 + 3) = ctemp04;


aoffset1 += 4; aoffset1 += 4;
boffset2 += 4;
// boffset2 += 4;
} }


if (n & 1){ if (n & 1){


+ 2
- 2
kernel/generic/zgemm_tcopy_8.c View File

@@ -324,7 +324,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
if (n & 1){ if (n & 1){
aoffset1 = aoffset; aoffset1 = aoffset;
aoffset2 = aoffset + lda; aoffset2 = aoffset + lda;
aoffset += 2;
// aoffset += 2;


i = (m >> 1); i = (m >> 1);
if (i > 0){ if (i > 0){
@@ -353,7 +353,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){


*(boffset + 0) = ctemp01; *(boffset + 0) = ctemp01;
*(boffset + 1) = ctemp02; *(boffset + 1) = ctemp02;
boffset += 2;
// boffset += 2;
} }
} }




+ 4
- 4
kernel/generic/ztrmm_lncopy_2.c View File

@@ -148,12 +148,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data03; b[ 2] = data03;
b[ 3] = data04; b[ 3] = data04;


ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4; b += 4;
} else } else
if (X < posY) { if (X < posY) {
ao1 += lda;
// ao1 += lda;
b += 4; b += 4;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -224,7 +224,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0); } while (i > 0);
} }


posY += 1;
// posY += 1;
} }
return 0; return 0;
} }

+ 9
- 9
kernel/generic/ztrmm_lncopy_4.c View File

@@ -355,23 +355,23 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 6] = data25; b[ 6] = data25;
b[ 7] = data26; b[ 7] = data26;


ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2;
ao4 += 2; */
b += 8; b += 8;
} }


} else } else
if (X < posY) { if (X < posY) {
if (m & 2) { if (m & 2) {
ao1 += 2 * lda;
ao2 += 2 * lda;
/* ao1 += 2 * lda;
ao2 += 2 * lda; */
b += 16; b += 16;
} }


if (m & 1) { if (m & 1) {
ao1 += lda;
// ao1 += lda;
b += 8; b += 8;
} }


@@ -586,12 +586,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data09; b[ 2] = data09;
b[ 3] = data10; b[ 3] = data10;


ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4; b += 4;
} else } else
if (X < posY) { if (X < posY) {
ao1 += lda;
// ao1 += lda;
b += 4; b += 4;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -657,7 +657,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0); } while (i > 0);
} }


posY += 1;
// posY += 1;
} }


return 0; return 0;


+ 8
- 8
kernel/generic/ztrmm_lncopy_8.c View File

@@ -350,14 +350,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} }
} else } else
if (X < posY) { if (X < posY) {
ao1 += i * lda;
/* ao1 += i * lda;
ao2 += i * lda; ao2 += i * lda;
ao3 += i * lda; ao3 += i * lda;
ao4 += i * lda; ao4 += i * lda;
ao5 += i * lda; ao5 += i * lda;
ao6 += i * lda; ao6 += i * lda;
ao7 += i * lda; ao7 += i * lda;
ao8 += i * lda;
ao8 += i * lda; */
b += 16 * i; b += 16 * i;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -675,10 +675,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} }
} else } else
if (X < posY) { if (X < posY) {
ao1 += i * lda;
/* ao1 += i * lda;
ao2 += i * lda; ao2 += i * lda;
ao3 += i * lda; ao3 += i * lda;
ao4 += i * lda;
ao4 += i * lda; */
b += 8 * i; b += 8 * i;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -804,13 +804,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 1] = *(ao1 + 1); b[ 1] = *(ao1 + 1);
b[ 2] = *(ao2 + 0); b[ 2] = *(ao2 + 0);
b[ 3] = *(ao2 + 1); b[ 3] = *(ao2 + 1);
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4; b += 4;
} else } else
if (X < posY) { if (X < posY) {
ao1 += 2 * lda;
ao2 += 2 * lda;
/* ao1 += 2 * lda;
ao2 += 2 * lda; */
b += 4; b += 4;
} else { } else {
#ifdef UNIT #ifdef UNIT


+ 4
- 4
kernel/generic/ztrmm_ltcopy_2.c View File

@@ -141,8 +141,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (m & 1) { if (m & 1) {


if (X > posY) { if (X > posY) {
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4; b += 4;


} else } else
@@ -157,7 +157,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data3; b[ 2] = data3;
b[ 3] = data4; b[ 3] = data4;


ao1 += lda;
// ao1 += lda;
b += 4; b += 4;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -233,7 +233,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0); } while (i > 0);
} }


posY += 1;
// posY += 1;
} }


return 0; return 0;


+ 10
- 10
kernel/generic/ztrmm_ltcopy_4.c View File

@@ -292,18 +292,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X > posY) { if (X > posY) {


if (m & 2) { if (m & 2) {
ao1 += 4;
/* ao1 += 4;
ao2 += 4; ao2 += 4;
ao3 += 4; ao3 += 4;
ao4 += 4;
ao4 += 4; */
b += 16; b += 16;
} }


if (m & 1) { if (m & 1) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2;
ao4 += 2; */
b += 8; b += 8;
} }


@@ -347,7 +347,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[15] = data16; b[15] = data16;


ao1 += 2 * lda; ao1 += 2 * lda;
ao2 += 2 * lda;
// ao2 += 2 * lda;


b += 16; b += 16;
} }
@@ -371,7 +371,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 6] = data07; b[ 6] = data07;
b[ 7] = data08; b[ 7] = data08;


ao1 += lda;
// ao1 += lda;
b += 8; b += 8;
} }


@@ -588,8 +588,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (i) { if (i) {


if (X > posY) { if (X > posY) {
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */


b += 4; b += 4;
} else } else
@@ -604,7 +604,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data03; b[ 2] = data03;
b[ 3] = data04; b[ 3] = data04;


ao1 += lda;
// ao1 += lda;
b += 4; b += 4;


} else { } else {
@@ -678,7 +678,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0); } while (i > 0);
} }


posY += 1;
// posY += 1;
} }


return 0; return 0;


+ 9
- 9
kernel/generic/ztrmm_ltcopy_8.c View File

@@ -317,14 +317,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
i = (m & 7); i = (m & 7);
if (i > 0) { if (i > 0) {
if (X > posY) { if (X > posY) {
a01 += 2 * i;
/* a01 += 2 * i;
a02 += 2 * i; a02 += 2 * i;
a03 += 2 * i; a03 += 2 * i;
a04 += 2 * i; a04 += 2 * i;
a05 += 2 * i; a05 += 2 * i;
a06 += 2 * i; a06 += 2 * i;
a07 += 2 * i; a07 += 2 * i;
a08 += 2 * i;
a08 += 2 * i; */
b += 16 * i; b += 16 * i;
} else } else
if (X < posY) { if (X < posY) {
@@ -661,10 +661,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
i = (m & 3); i = (m & 3);
if (i > 0) { if (i > 0) {
if (X > posY) { if (X > posY) {
a01 += 2 * i;
/* a01 += 2 * i;
a02 += 2 * i; a02 += 2 * i;
a03 += 2 * i; a03 += 2 * i;
a04 += 2 * i;
a04 += 2 * i; */
b += 8 * i; b += 8 * i;
} else } else
if (X < posY) { if (X < posY) {
@@ -802,8 +802,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
i = (m & 1); i = (m & 1);
if (i > 0) { if (i > 0) {
if (X > posY) { if (X > posY) {
a01 += 2;
a02 += 2;
/* a01 += 2;
a02 += 2; */
b += 4; b += 4;
} else } else
if (X < posY) { if (X < posY) {
@@ -812,8 +812,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = *(a01 + 2); b[ 2] = *(a01 + 2);
b[ 3] = *(a01 + 3); b[ 3] = *(a01 + 3);


a01 += lda;
a02 += lda;
/* a01 += lda;
a02 += lda; */
b += 4; b += 4;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -869,7 +869,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
i --; i --;
} while (i > 0); } while (i > 0);
} }
posY += 1;
// posY += 1;
} }


return 0; return 0;


+ 3
- 3
kernel/generic/ztrmm_uncopy_2.c View File

@@ -153,12 +153,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data03; b[ 2] = data03;
b[ 3] = data04; b[ 3] = data04;


ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4; b += 4;
} else } else
if (X > posY) { if (X > posY) {
ao1 += lda;
// ao1 += lda;
b += 4; b += 4;
} else { } else {
#ifdef UNIT #ifdef UNIT


+ 11
- 11
kernel/generic/ztrmm_uncopy_4.c View File

@@ -354,23 +354,23 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 6] = data25; b[ 6] = data25;
b[ 7] = data26; b[ 7] = data26;


ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2;
ao4 += 2; */
b += 8; b += 8;
} }


} else } else
if (X > posY) { if (X > posY) {
if (m & 2) { if (m & 2) {
ao1 += 2 * lda;
ao2 += 2 * lda;
/* ao1 += 2 * lda;
ao2 += 2 * lda; */
b += 16; b += 16;
} }


if (m & 1) { if (m & 1) {
ao1 += lda;
// ao1 += lda;
b += 8; b += 8;
} }


@@ -596,13 +596,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 1] = data02; b[ 1] = data02;
b[ 2] = data09; b[ 2] = data09;
b[ 3] = data10; b[ 3] = data10;
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4; b += 4;
} else } else
if (X > posY) { if (X > posY) {
ao1 += 2 * lda;
ao2 += 2 * lda;
/* ao1 += 2 * lda;
ao2 += 2 * lda; */
b += 4; b += 4;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -624,8 +624,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data09; b[ 2] = data09;
b[ 3] = data10; b[ 3] = data10;
#endif #endif
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4; b += 4;
} }
} }


+ 8
- 8
kernel/generic/ztrmm_uncopy_8.c View File

@@ -350,14 +350,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} }
} else } else
if (X > posY) { if (X > posY) {
ao1 += i * lda;
/* ao1 += i * lda;
ao2 += i * lda; ao2 += i * lda;
ao3 += i * lda; ao3 += i * lda;
ao4 += i * lda; ao4 += i * lda;
ao5 += i * lda; ao5 += i * lda;
ao6 += i * lda; ao6 += i * lda;
ao7 += i * lda; ao7 += i * lda;
ao8 += i * lda;
ao8 += i * lda; */
b += 16 * i; b += 16 * i;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -677,10 +677,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} }
} else } else
if (X > posY) { if (X > posY) {
ao1 += i * lda;
/* ao1 += i * lda;
ao2 += i * lda; ao2 += i * lda;
ao3 += i * lda; ao3 += i * lda;
ao4 += i * lda;
ao4 += i * lda; */
b += 8 * i; b += 8 * i;
} else { } else {
#ifdef UNIT #ifdef UNIT
@@ -807,13 +807,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 1] = *(ao1 + 1); b[ 1] = *(ao1 + 1);
b[ 2] = *(ao2 + 0); b[ 2] = *(ao2 + 0);
b[ 3] = *(ao2 + 1); b[ 3] = *(ao2 + 1);
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4; b += 4;
} else } else
if (X > posY) { if (X > posY) {
ao1 += 2 * lda;
ao2 += 2 * lda;
/* ao1 += 2 * lda;
ao2 += 2 * lda; */
b += 4; b += 4;
} else { } else {
#ifdef UNIT #ifdef UNIT


+ 3
- 3
kernel/generic/ztrmm_utcopy_2.c View File

@@ -142,8 +142,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON


if (m & 1) { if (m & 1) {
if (X < posY) { if (X < posY) {
ao1 += 2;
ao2 += 2;
/* ao1 += 2;
ao2 += 2; */
b += 4; b += 4;
} else } else
if (X > posY) { if (X > posY) {
@@ -157,7 +157,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data3; b[ 2] = data3;
b[ 3] = data4; b[ 3] = data4;


ao1 += lda;
// ao1 += lda;
b += 4; b += 4;


} else { } else {


+ 6
- 6
kernel/generic/ztrmm_utcopy_4.c View File

@@ -294,18 +294,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X < posY) { if (X < posY) {


if (m & 2) { if (m & 2) {
ao1 += 4;
/* ao1 += 4;
ao2 += 4; ao2 += 4;
ao3 += 4; ao3 += 4;
ao4 += 4;
ao4 += 4; */
b += 16; b += 16;
} }


if (m & 1) { if (m & 1) {
ao1 += 2;
/* ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2;
ao4 += 2; */
b += 8; b += 8;
} }


@@ -349,7 +349,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[15] = data16; b[15] = data16;


ao1 += 2 * lda; ao1 += 2 * lda;
ao2 += 2 * lda;
// ao2 += 2 * lda;
b += 16; b += 16;
} }


@@ -372,7 +372,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 6] = data07; b[ 6] = data07;
b[ 7] = data08; b[ 7] = data08;


ao1 += lda;
// ao1 += lda;
b += 8; b += 8;
} }




+ 4
- 4
kernel/generic/ztrmm_utcopy_8.c View File

@@ -320,14 +320,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON


if (X < posY) { if (X < posY) {


a01 += 2 * i;
/* a01 += 2 * i;
a02 += 2 * i; a02 += 2 * i;
a03 += 2 * i; a03 += 2 * i;
a04 += 2 * i; a04 += 2 * i;
a05 += 2 * i; a05 += 2 * i;
a06 += 2 * i; a06 += 2 * i;
a07 += 2 * i; a07 += 2 * i;
a08 += 2 * i;
a08 += 2 * i; */
b += 16 * i; b += 16 * i;
} else } else
if (X > posY) { if (X > posY) {
@@ -664,10 +664,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (i) { if (i) {


if (X < posY) { if (X < posY) {
a01 += 2 * i;
/* a01 += 2 * i;
a02 += 2 * i; a02 += 2 * i;
a03 += 2 * i; a03 += 2 * i;
a04 += 2 * i;
a04 += 2 * i; */
b += 8 * i; b += 8 * i;
} else } else
if (X > posY) { if (X > posY) {


+ 8
- 8
kernel/generic/ztrsm_lncopy_4.c View File

@@ -313,13 +313,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 7) = data26; *(b + 7) = data26;
} }


a1 += 2;
/* a1 += 2;
a2 += 2; a2 += 2;
a3 += 2; a3 += 2;
a4 += 2;
a4 += 2; */
b += 8; b += 8;


ii += 1;
// ii += 1;
} }
a += 4 * lda; a += 4 * lda;
jj += 4; jj += 4;
@@ -410,11 +410,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 3) = data10; *(b + 3) = data10;
} }


a1 += 2;
a2 += 2;
/* a1 += 2;
a2 += 2; */
b += 4; b += 4;


ii += 1;
// ii += 1;
} }
a += 2 * lda; a += 2 * lda;
jj += 2; jj += 2;
@@ -451,8 +451,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
ii += 1; ii += 1;
} }


a += lda;
jj += 1;
// a += lda;
// jj += 1;
} }


return 0; return 0;


+ 7
- 7
kernel/generic/ztrsm_ltcopy_4.c View File

@@ -286,7 +286,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
} }


a1 += 2 * lda; a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 16; b += 16;


ii += 2; ii += 2;
@@ -335,9 +335,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 7) = data08; *(b + 7) = data08;
} }


a1 += lda;
// a1 += lda;
b += 8; b += 8;
ii += 1;
// ii += 1;
} }


a += 8; a += 8;
@@ -430,9 +430,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 3) = data04; *(b + 3) = data04;
} }


a1 += lda;
// a1 += lda;
b += 4; b += 4;
ii += 1;
// ii += 1;
} }


a += 4; a += 4;
@@ -471,8 +471,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
ii += 1; ii += 1;
} }


a += 2;
jj += 1;
// a += 2;
// jj += 1;
} }


return 0; return 0;


+ 8
- 8
kernel/generic/ztrsm_uncopy_4.c View File

@@ -344,13 +344,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 7) = data26; *(b + 7) = data26;
} }


a1 += 2;
/* a1 += 2;
a2 += 2; a2 += 2;
a3 += 2; a3 += 2;
a4 += 2;
a4 += 2; */
b += 8; b += 8;


ii += 1;
// ii += 1;
} }


a += 4 * lda; a += 4 * lda;
@@ -444,11 +444,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 3) = data10; *(b + 3) = data10;
} }


a1 += 2;
a2 += 2;
/* a1 += 2;
a2 += 2; */
b += 4; b += 4;


ii += 1;
// ii += 1;
} }


a += 2 *lda; a += 2 *lda;
@@ -488,8 +488,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
ii += 1; ii += 1;
} }


a += lda;
jj += 1;
// a += lda;
// jj += 1;
} }


return 0; return 0;


+ 7
- 7
kernel/generic/ztrsm_utcopy_4.c View File

@@ -266,7 +266,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
} }


a1 += 2 * lda; a1 += 2 * lda;
a2 += 2 * lda;
// a2 += 2 * lda;
b += 16; b += 16;


ii += 2; ii += 2;
@@ -303,10 +303,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 7) = data08; *(b + 7) = data08;
} }


a1 += lda;
// a1 += lda;
b += 8; b += 8;


ii += 1;
// ii += 1;
} }


a += 8; a += 8;
@@ -394,10 +394,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
*(b + 3) = data04; *(b + 3) = data04;
} }


a1 += lda;
// a1 += lda;
b += 4; b += 4;


ii += 1;
// ii += 1;
} }


a += 4; a += 4;
@@ -436,8 +436,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
ii += 1; ii += 1;
} }


a += 2;
jj += 1;
// a += 2;
// jj += 1;
} }


return 0; return 0;


+ 2
- 2
kernel/x86_64/zgemv_t_4.c View File

@@ -313,7 +313,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,


for( i = 0; i < n1 ; i++) for( i = 0; i < n1 ; i++)
{ {
memset(ybuffer,0,64);
memset(ybuffer,0,sizeof(ybuffer));
zgemv_kernel_4x4(NB,ap,xbuffer,ybuffer,alpha); zgemv_kernel_4x4(NB,ap,xbuffer,ybuffer,alpha);
ap[0] += lda4; ap[0] += lda4;
ap[1] += lda4; ap[1] += lda4;
@@ -338,7 +338,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,


for( i = 0; i < n2 ; i++) for( i = 0; i < n2 ; i++)
{ {
memset(ybuffer,0,64);
memset(ybuffer,0,sizeof(ybuffer));
zgemv_kernel_4x1(NB,a_ptr,xbuffer,ybuffer,alpha); zgemv_kernel_4x1(NB,a_ptr,xbuffer,ybuffer,alpha);
a_ptr += lda; a_ptr += lda;
y_ptr[0] += ybuffer[0]; y_ptr[0] += ybuffer[0];


+ 2
- 2
kernel/zarch/zgemv_t_4.c View File

@@ -518,7 +518,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,


for( i = 0; i < n1 ; i++) for( i = 0; i < n1 ; i++)
{ {
memset(ybuffer,0,64);
memset(ybuffer,0,sizeof(ybuffer));
zgemv_kernel_4x4(NB,ap,xbuffer,ybuffer,alpha); zgemv_kernel_4x4(NB,ap,xbuffer,ybuffer,alpha);
ap[0] += lda4; ap[0] += lda4;
ap[1] += lda4; ap[1] += lda4;
@@ -543,7 +543,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,


for( i = 0; i < n2 ; i++) for( i = 0; i < n2 ; i++)
{ {
memset(ybuffer,0,64);
memset(ybuffer,0,sizeof(ybuffer));
zgemv_kernel_4x1(NB,a_ptr,xbuffer,ybuffer,alpha); zgemv_kernel_4x1(NB,a_ptr,xbuffer,ybuffer,alpha);
a_ptr += lda; a_ptr += lda;
y_ptr[0] += ybuffer[0]; y_ptr[0] += ybuffer[0];


Loading…
Cancel
Save