Browse Source

disambiguate whilelt calls

pull/4816/head
Chris Sidebottom 1 year ago
parent
commit
3c3d439721
2 changed files with 8 additions and 8 deletions
  1. +5
    -5
      kernel/arm64/gemv_n_sve.c
  2. +3
    -3
      kernel/arm64/gemv_t_sve.c

+ 5
- 5
kernel/arm64/gemv_n_sve.c View File

@@ -95,7 +95,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
}

for (; i < M; i += v_size) {
svbool_t pg = SV_WHILE(i, M);
svbool_t pg = SV_WHILE((uint64_t)i, (uint64_t)M);
SV_TYPE a_vec1 = svld1(pg, a + i);
SV_TYPE a_vec2 = svld1(pg, a + i + lda);
SV_TYPE a_vec3 = svld1(pg, a + i + lda * 2);
@@ -138,7 +138,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
}

for (; i < M; i += v_size) {
svbool_t pg = SV_WHILE(i, M);
svbool_t pg = SV_WHILE((uint64_t)i, (uint64_t)M);
SV_TYPE a_vec1 = svld1(pg, a + i);
SV_TYPE a_vec2 = svld1(pg, a + i + lda);
SV_TYPE a_vec3 = svld1(pg, a + i + lda * 2);
@@ -174,7 +174,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
svst1(pg_true, y + i, y_vec);
}
for (; i < M; i += v_size) {
svbool_t pg = SV_WHILE(i, M);
svbool_t pg = SV_WHILE((uint64_t)i, (uint64_t)M);
SV_TYPE a_vec1 = svld1(pg, a + i);
SV_TYPE a_vec2 = svld1(pg, a + i + lda);
SV_TYPE a_vec3 = svld1(pg, a + i + lda * 2);
@@ -203,7 +203,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
svst1(pg_true, y + i, y_vec);
}
for (; i < M; i += v_size) {
svbool_t pg = SV_WHILE(i, M);
svbool_t pg = SV_WHILE((uint64_t)i, (uint64_t)M);
SV_TYPE a_vec1 = svld1(pg, a + i);
SV_TYPE a_vec2 = svld1(pg, a + i + lda);
SV_TYPE y_vec = svld1(pg, y + i);
@@ -230,7 +230,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
svst1(pg_true, y + i, y_vec);
}
for (; i < M; i += v_size) {
svbool_t pg = SV_WHILE(i, M);
svbool_t pg = SV_WHILE((uint64_t)i, (uint64_t)M);
SV_TYPE a_vec = svld1(pg, a + i);
SV_TYPE y_vec = svld1(pg, y + i);
y_vec = svmla_x(pg, y_vec, temp_vec1, a_vec);


+ 3
- 3
kernel/arm64/gemv_t_sve.c View File

@@ -82,7 +82,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
}

for (; i < M; i += v_size) {
svbool_t pg = SV_WHILE(i, M);
svbool_t pg = SV_WHILE((uint64_t)i, (uint64_t)M);
SV_TYPE a_vec1 = svld1(pg, a + i);
SV_TYPE a_vec2 = svld1(pg, a + i + lda);
SV_TYPE a_vec3 = svld1(pg, a + i + lda * 2);
@@ -120,7 +120,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
temp_vec2 = svmla_x(pg_true, temp_vec2, a_vec2, x_vec);
}
for (; i < M; i += v_size) {
svbool_t pg = SV_WHILE(i, M);
svbool_t pg = SV_WHILE((uint64_t)i, (uint64_t)M);
SV_TYPE a_vec1 = svld1(pg, a + i);
SV_TYPE a_vec2 = svld1(pg, a + lda + i);
SV_TYPE x_vec = svld1(pg, x + i);
@@ -156,7 +156,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
temp_vec = svmla_x(pg_true, temp_vec, a_vec, x_vec);
}
for (; i < M; i += v_size) {
svbool_t pg = SV_WHILE(i, M);
svbool_t pg = SV_WHILE((uint64_t)i, (uint64_t)M);
SV_TYPE a_vec = svld1(pg, a + i);
SV_TYPE x_vec = svld1(pg, x + i);
temp_vec = svmla_x(pg, temp_vec, a_vec, x_vec);


Loading…
Cancel
Save